image viewer app

2025-08-11 01:35:14 +01:00
parent 245cc81289
commit 7ca86d7751
3 changed files with 288 additions and 0 deletions
--- a/apps/image_viewer/README.md
+++ b/apps/image_viewer/README.md
@@ -0,0 +1,59 @@
 # Image Dataset Viewer
 A simple Streamlit application to browse images from your Discord chat dataset.
 ## Features
 - 📋 Dropdown to select different channels
 - 🖼️ View images with navigation controls
 - ⬅️➡️ Previous/Next buttons and slider navigation
 - 📊 Display metadata for each image
 - 📱 Responsive layout
 ## Setup and Usage
 ### Option 1: Using the run script (Recommended)
 ```bash
 ./run.sh
 ```
 ### Option 2: Manual setup
 1. Create a virtual environment:
   ```bash
   python3 -m venv venv
   source venv/bin/activate
   ```
 2. Install dependencies:
   ```bash
   pip install -r requirements.txt
   ```
 3. Run the application:
   ```bash
   streamlit run image_viewer.py
   ```
 ## How it works
 The application:
 1. Loads the `images_dataset.json` file from the parent directory
 2. Extracts unique channel names from the dataset
 3. Allows you to select a channel from a dropdown
 4. Displays images from that channel with navigation controls
 5. Shows metadata including author, timestamp, and message content
 ## Dataset Structure
 The app expects your dataset to have entries with:
 - `channel`: The channel name
 - `image_url`, `image_path`, `url`, or `attachment_url`: The image location
 - `author`: The message author (optional)
 - `timestamp`: When the message was sent (optional)
 - `content` or `message`: The message text (optional)
 ## Troubleshooting
 - If images don't load, check that the URLs in your dataset are accessible
 - For local images, ensure the paths are relative to the project root
 - Large datasets may take a moment to load initially
--- a/apps/image_viewer/image_viewer.py
+++ b/apps/image_viewer/image_viewer.py
@@ -0,0 +1,226 @@
 import streamlit as st
 import json
 import os
 from pathlib import Path
 import requests
 from PIL import Image
 from io import BytesIO
 # Set page config
 st.set_page_config(
    page_title="Image Dataset Viewer",
    page_icon="🖼️",
    layout="wide"
 )
 # Cache the dataset loading
@st.cache_data
 def load_dataset():
    """Load the images dataset JSON file"""
    dataset_path = "../images_dataset/images_dataset.json"
    try:
        with open(dataset_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        return data
    except Exception as e:
        st.error(f"Error loading dataset: {e}")
        return {}
@st.cache_data
 def get_channels(data):
    """Extract unique channels from the dataset"""
    # First try to get channels from metadata
    if isinstance(data, dict) and 'metadata' in data and 'summary' in data['metadata']:
        channels = data['metadata']['summary'].get('channels', [])
        if channels:
            return sorted(channels)
    # Fallback: extract from images array
    channels = set()
    images = data.get('images', []) if isinstance(data, dict) else []
    for item in images:
        if isinstance(item, dict) and 'channel' in item:
            channels.add(item['channel'])
    return sorted(list(channels))
 def display_image(image_url, caption="", base64_data=None):
    """Display an image from URL, local path, or base64 data"""
    try:
        if base64_data and base64_data != "image datta ...........":
            # Load image from base64 data
            import base64
            image_data = base64.b64decode(base64_data)
            image = Image.open(BytesIO(image_data))
        elif image_url and image_url.startswith(('http://', 'https://')):
            # Load image from URL
            response = requests.get(image_url, timeout=10)
            response.raise_for_status()
            image = Image.open(BytesIO(response.content))
        elif image_url:
            # Load local image
            image_path = Path(__file__).parent.parent / image_url
            if image_path.exists():
                image = Image.open(image_path)
            else:
                st.error(f"Image not found: {image_url}")
                return False
        else:
            st.error("No valid image source found")
            return False
        st.image(image, caption=caption, use_column_width=True)
        return True
    except Exception as e:
        st.error(f"Error loading image: {e}")
        return False
 def main():
    st.title("🖼️ Image Dataset Viewer")
    st.markdown("Browse images from your dataset by channel")
    # Load dataset
    with st.spinner("Loading dataset..."):
        data = load_dataset()
    if not data:
        st.error("No data loaded. Please check your dataset file.")
        return
    # Display dataset summary if available
    if isinstance(data, dict) and 'metadata' in data:
        metadata = data['metadata']
        if 'summary' in metadata:
            summary = metadata['summary']
            col1, col2, col3, col4 = st.columns(4)
            with col1:
                st.metric("Total Images", summary.get('total_images', 'Unknown'))
            with col2:
                st.metric("Channels", len(summary.get('channels', [])))
            with col3:
                st.metric("Authors", len(summary.get('authors', [])))
            with col4:
                size_mb = summary.get('total_size_bytes', 0) / (1024 * 1024)
                st.metric("Total Size", f"{size_mb:.1f} MB")
    # Get channels
    channels = get_channels(data)
    if not channels:
        st.error("No channels found in the dataset.")
        return
    # Channel selection
    selected_channel = st.selectbox(
        "Select a channel:",
        channels,
        help="Choose a channel to view its images"
    )
    # Filter images by channel
    channel_images = []
    images = data.get('images', []) if isinstance(data, dict) else []
    for i, item in enumerate(images):
        if isinstance(item, dict) and item.get('channel') == selected_channel:
            if 'url' in item or 'base64_data' in item:
                channel_images.append({
                    'id': i,
                    'data': item
                })
    if not channel_images:
        st.warning(f"No images found for channel: {selected_channel}")
        return
    st.success(f"Found {len(channel_images)} images in #{selected_channel}")
    # Image navigation
    if len(channel_images) > 1:
        col1, col2, col3 = st.columns([1, 2, 1])
        with col1:
            if st.button("⬅️ Previous", use_container_width=True):
                if 'image_index' in st.session_state and st.session_state.image_index > 0:
                    st.session_state.image_index -= 1
                else:
                    st.session_state.image_index = len(channel_images) - 1
        with col2:
            # Initialize or get current index
            if 'image_index' not in st.session_state:
                st.session_state.image_index = 0
            # Image selector
            st.session_state.image_index = st.slider(
                "Image",
                0,
                len(channel_images) - 1,
                st.session_state.image_index,
                help=f"Navigate through {len(channel_images)} images"
            )
        with col3:
            if st.button("Next ➡️", use_container_width=True):
                if 'image_index' in st.session_state and st.session_state.image_index < len(channel_images) - 1:
                    st.session_state.image_index += 1
                else:
                    st.session_state.image_index = 0
    else:
        st.session_state.image_index = 0
    # Display current image
    current_image = channel_images[st.session_state.image_index]
    image_data = current_image['data']
    # Get image URL and base64 data
    image_url = image_data.get('url')
    base64_data = image_data.get('base64_data')
    if image_url or base64_data:
        # Create two columns for image and metadata
        col1, col2 = st.columns([2, 1])
        with col1:
            st.subheader(f"Image {st.session_state.image_index + 1} of {len(channel_images)}")
            caption = f"Channel: #{selected_channel}"
            if 'author_name' in image_data:
                caption += f" | Author: {image_data['author_name']}"
            if 'timestamp_utc' in image_data:
                caption += f" | Time: {image_data['timestamp_utc']}"
            display_image(image_url, caption, base64_data)
        with col2:
            st.subheader("Metadata")
            # Display metadata in an organized way
            metadata_to_show = {
                'ID': current_image['id'],
                'Channel': image_data.get('channel', 'Unknown'),
                'Author': image_data.get('author_name', 'Unknown'),
                'Nickname': image_data.get('author_nickname', 'Unknown'),
                'Author ID': image_data.get('author_id', 'Unknown'),
                'Message ID': image_data.get('message_id', 'Unknown'),
                'Timestamp': image_data.get('timestamp_utc', 'Unknown'),
                'File Extension': image_data.get('file_extension', 'Unknown'),
                'File Size': f"{image_data.get('file_size', 0):,} bytes" if image_data.get('file_size') else 'Unknown',
                'Message': image_data.get('content', 'No message'),
            }
            for key, value in metadata_to_show.items():
                if value and value != 'Unknown':
                    st.write(f"**{key}:** {value}")
            # Show all other metadata
            st.subheader("Raw Data")
            with st.expander("Show all metadata"):
                st.json(image_data)
    else:
        st.error("No image URL or base64 data found in this entry")
        st.json(image_data)
 if __name__ == "__main__":
    main()
--- a/apps/image_viewer/requirements.txt
+++ b/apps/image_viewer/requirements.txt
@@ -0,0 +1,3 @@
 streamlit>=1.28.0
 requests>=2.31.0
 Pillow>=10.0.0