image viewer app

2025-08-11 01:35:14 +01:00
parent 245cc81289
commit 7ca86d7751
3 changed files with 288 additions and 0 deletions
--- a/apps/image_viewer/README.md
+++ b/apps/image_viewer/README.md
@@ -0,0 +1,59 @@
+# Image Dataset Viewer
+
+A simple Streamlit application to browse images from your Discord chat dataset.
+
+## Features
+
+- 📋 Dropdown to select different channels
+- 🖼️ View images with navigation controls
+- ⬅️➡️ Previous/Next buttons and slider navigation
+- 📊 Display metadata for each image
+- 📱 Responsive layout
+
+## Setup and Usage
+
+### Option 1: Using the run script (Recommended)
+```bash
+./run.sh
+```
+
+### Option 2: Manual setup
+1. Create a virtual environment:
+   ```bash
+   python3 -m venv venv
+   source venv/bin/activate
+   ```
+
+2. Install dependencies:
+   ```bash
+   pip install -r requirements.txt
+   ```
+
+3. Run the application:
+   ```bash
+   streamlit run image_viewer.py
+   ```
+
+## How it works
+
+The application:
+1. Loads the `images_dataset.json` file from the parent directory
+2. Extracts unique channel names from the dataset
+3. Allows you to select a channel from a dropdown
+4. Displays images from that channel with navigation controls
+5. Shows metadata including author, timestamp, and message content
+
+## Dataset Structure
+
+The app expects your dataset to have entries with:
+- `channel`: The channel name
+- `image_url`, `image_path`, `url`, or `attachment_url`: The image location
+- `author`: The message author (optional)
+- `timestamp`: When the message was sent (optional)
+- `content` or `message`: The message text (optional)
+
+## Troubleshooting
+
+- If images don't load, check that the URLs in your dataset are accessible
+- For local images, ensure the paths are relative to the project root
+- Large datasets may take a moment to load initially
--- a/apps/image_viewer/image_viewer.py
+++ b/apps/image_viewer/image_viewer.py
@@ -0,0 +1,226 @@
+import streamlit as st
+import json
+import os
+from pathlib import Path
+import requests
+from PIL import Image
+from io import BytesIO
+
+# Set page config
+st.set_page_config(
+    page_title="Image Dataset Viewer",
+    page_icon="🖼️",
+    layout="wide"
+)
+
+# Cache the dataset loading
+@st.cache_data
+def load_dataset():
+    """Load the images dataset JSON file"""
+    dataset_path = "../images_dataset/images_dataset.json"
+    
+    try:
+        with open(dataset_path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+        return data
+    except Exception as e:
+        st.error(f"Error loading dataset: {e}")
+        return {}
+
+@st.cache_data
+def get_channels(data):
+    """Extract unique channels from the dataset"""
+    # First try to get channels from metadata
+    if isinstance(data, dict) and 'metadata' in data and 'summary' in data['metadata']:
+        channels = data['metadata']['summary'].get('channels', [])
+        if channels:
+            return sorted(channels)
+    
+    # Fallback: extract from images array
+    channels = set()
+    images = data.get('images', []) if isinstance(data, dict) else []
+    
+    for item in images:
+        if isinstance(item, dict) and 'channel' in item:
+            channels.add(item['channel'])
+    
+    return sorted(list(channels))
+
+def display_image(image_url, caption="", base64_data=None):
+    """Display an image from URL, local path, or base64 data"""
+    try:
+        if base64_data and base64_data != "image datta ...........":
+            # Load image from base64 data
+            import base64
+            image_data = base64.b64decode(base64_data)
+            image = Image.open(BytesIO(image_data))
+        elif image_url and image_url.startswith(('http://', 'https://')):
+            # Load image from URL
+            response = requests.get(image_url, timeout=10)
+            response.raise_for_status()
+            image = Image.open(BytesIO(response.content))
+        elif image_url:
+            # Load local image
+            image_path = Path(__file__).parent.parent / image_url
+            if image_path.exists():
+                image = Image.open(image_path)
+            else:
+                st.error(f"Image not found: {image_url}")
+                return False
+        else:
+            st.error("No valid image source found")
+            return False
+        
+        st.image(image, caption=caption, use_column_width=True)
+        return True
+    except Exception as e:
+        st.error(f"Error loading image: {e}")
+        return False
+
+def main():
+    st.title("🖼️ Image Dataset Viewer")
+    st.markdown("Browse images from your dataset by channel")
+    
+    # Load dataset
+    with st.spinner("Loading dataset..."):
+        data = load_dataset()
+    
+    if not data:
+        st.error("No data loaded. Please check your dataset file.")
+        return
+    
+    # Display dataset summary if available
+    if isinstance(data, dict) and 'metadata' in data:
+        metadata = data['metadata']
+        if 'summary' in metadata:
+            summary = metadata['summary']
+            col1, col2, col3, col4 = st.columns(4)
+            with col1:
+                st.metric("Total Images", summary.get('total_images', 'Unknown'))
+            with col2:
+                st.metric("Channels", len(summary.get('channels', [])))
+            with col3:
+                st.metric("Authors", len(summary.get('authors', [])))
+            with col4:
+                size_mb = summary.get('total_size_bytes', 0) / (1024 * 1024)
+                st.metric("Total Size", f"{size_mb:.1f} MB")
+    
+    # Get channels
+    channels = get_channels(data)
+    
+    if not channels:
+        st.error("No channels found in the dataset.")
+        return
+    
+    # Channel selection
+    selected_channel = st.selectbox(
+        "Select a channel:",
+        channels,
+        help="Choose a channel to view its images"
+    )
+    
+    # Filter images by channel
+    channel_images = []
+    images = data.get('images', []) if isinstance(data, dict) else []
+    
+    for i, item in enumerate(images):
+        if isinstance(item, dict) and item.get('channel') == selected_channel:
+            if 'url' in item or 'base64_data' in item:
+                channel_images.append({
+                    'id': i,
+                    'data': item
+                })
+    
+    if not channel_images:
+        st.warning(f"No images found for channel: {selected_channel}")
+        return
+    
+    st.success(f"Found {len(channel_images)} images in #{selected_channel}")
+    
+    # Image navigation
+    if len(channel_images) > 1:
+        col1, col2, col3 = st.columns([1, 2, 1])
+        
+        with col1:
+            if st.button("⬅️ Previous", use_container_width=True):
+                if 'image_index' in st.session_state and st.session_state.image_index > 0:
+                    st.session_state.image_index -= 1
+                else:
+                    st.session_state.image_index = len(channel_images) - 1
+        
+        with col2:
+            # Initialize or get current index
+            if 'image_index' not in st.session_state:
+                st.session_state.image_index = 0
+            
+            # Image selector
+            st.session_state.image_index = st.slider(
+                "Image",
+                0,
+                len(channel_images) - 1,
+                st.session_state.image_index,
+                help=f"Navigate through {len(channel_images)} images"
+            )
+        
+        with col3:
+            if st.button("Next ➡️", use_container_width=True):
+                if 'image_index' in st.session_state and st.session_state.image_index < len(channel_images) - 1:
+                    st.session_state.image_index += 1
+                else:
+                    st.session_state.image_index = 0
+    else:
+        st.session_state.image_index = 0
+    
+    # Display current image
+    current_image = channel_images[st.session_state.image_index]
+    image_data = current_image['data']
+    
+    # Get image URL and base64 data
+    image_url = image_data.get('url')
+    base64_data = image_data.get('base64_data')
+    
+    if image_url or base64_data:
+        # Create two columns for image and metadata
+        col1, col2 = st.columns([2, 1])
+        
+        with col1:
+            st.subheader(f"Image {st.session_state.image_index + 1} of {len(channel_images)}")
+            caption = f"Channel: #{selected_channel}"
+            if 'author_name' in image_data:
+                caption += f" | Author: {image_data['author_name']}"
+            if 'timestamp_utc' in image_data:
+                caption += f" | Time: {image_data['timestamp_utc']}"
+            
+            display_image(image_url, caption, base64_data)
+        
+        with col2:
+            st.subheader("Metadata")
+            
+            # Display metadata in an organized way
+            metadata_to_show = {
+                'ID': current_image['id'],
+                'Channel': image_data.get('channel', 'Unknown'),
+                'Author': image_data.get('author_name', 'Unknown'),
+                'Nickname': image_data.get('author_nickname', 'Unknown'),
+                'Author ID': image_data.get('author_id', 'Unknown'),
+                'Message ID': image_data.get('message_id', 'Unknown'),
+                'Timestamp': image_data.get('timestamp_utc', 'Unknown'),
+                'File Extension': image_data.get('file_extension', 'Unknown'),
+                'File Size': f"{image_data.get('file_size', 0):,} bytes" if image_data.get('file_size') else 'Unknown',
+                'Message': image_data.get('content', 'No message'),
+            }
+            
+            for key, value in metadata_to_show.items():
+                if value and value != 'Unknown':
+                    st.write(f"**{key}:** {value}")
+            
+            # Show all other metadata
+            st.subheader("Raw Data")
+            with st.expander("Show all metadata"):
+                st.json(image_data)
+    else:
+        st.error("No image URL or base64 data found in this entry")
+        st.json(image_data)
+
+if __name__ == "__main__":
+    main()
--- a/apps/image_viewer/requirements.txt
+++ b/apps/image_viewer/requirements.txt
@@ -0,0 +1,3 @@
+streamlit>=1.28.0
+requests>=2.31.0
+Pillow>=10.0.0