From 7ca86d77512f97b7a219a3a3f15b39012afe6116 Mon Sep 17 00:00:00 2001 From: Azeem Fidahusein Date: Mon, 11 Aug 2025 01:35:14 +0100 Subject: [PATCH] image viewer app --- apps/image_viewer/README.md | 59 ++++++++ apps/image_viewer/image_viewer.py | 226 +++++++++++++++++++++++++++++ apps/image_viewer/requirements.txt | 3 + 3 files changed, 288 insertions(+) create mode 100644 apps/image_viewer/README.md create mode 100644 apps/image_viewer/image_viewer.py create mode 100644 apps/image_viewer/requirements.txt diff --git a/apps/image_viewer/README.md b/apps/image_viewer/README.md new file mode 100644 index 0000000..b8c0c67 --- /dev/null +++ b/apps/image_viewer/README.md @@ -0,0 +1,59 @@ +# Image Dataset Viewer + +A simple Streamlit application to browse images from your Discord chat dataset. + +## Features + +- 📋 Dropdown to select different channels +- 🖼️ View images with navigation controls +- ⬅️➡️ Previous/Next buttons and slider navigation +- 📊 Display metadata for each image +- 📱 Responsive layout + +## Setup and Usage + +### Option 1: Using the run script (Recommended) +```bash +./run.sh +``` + +### Option 2: Manual setup +1. Create a virtual environment: + ```bash + python3 -m venv venv + source venv/bin/activate + ``` + +2. Install dependencies: + ```bash + pip install -r requirements.txt + ``` + +3. Run the application: + ```bash + streamlit run image_viewer.py + ``` + +## How it works + +The application: +1. Loads the `images_dataset.json` file from the parent directory +2. Extracts unique channel names from the dataset +3. Allows you to select a channel from a dropdown +4. Displays images from that channel with navigation controls +5. Shows metadata including author, timestamp, and message content + +## Dataset Structure + +The app expects your dataset to have entries with: +- `channel`: The channel name +- `image_url`, `image_path`, `url`, or `attachment_url`: The image location +- `author`: The message author (optional) +- `timestamp`: When the message was sent (optional) +- `content` or `message`: The message text (optional) + +## Troubleshooting + +- If images don't load, check that the URLs in your dataset are accessible +- For local images, ensure the paths are relative to the project root +- Large datasets may take a moment to load initially diff --git a/apps/image_viewer/image_viewer.py b/apps/image_viewer/image_viewer.py new file mode 100644 index 0000000..9005057 --- /dev/null +++ b/apps/image_viewer/image_viewer.py @@ -0,0 +1,226 @@ +import streamlit as st +import json +import os +from pathlib import Path +import requests +from PIL import Image +from io import BytesIO + +# Set page config +st.set_page_config( + page_title="Image Dataset Viewer", + page_icon="🖼️", + layout="wide" +) + +# Cache the dataset loading +@st.cache_data +def load_dataset(): + """Load the images dataset JSON file""" + dataset_path = "../images_dataset/images_dataset.json" + + try: + with open(dataset_path, 'r', encoding='utf-8') as f: + data = json.load(f) + return data + except Exception as e: + st.error(f"Error loading dataset: {e}") + return {} + +@st.cache_data +def get_channels(data): + """Extract unique channels from the dataset""" + # First try to get channels from metadata + if isinstance(data, dict) and 'metadata' in data and 'summary' in data['metadata']: + channels = data['metadata']['summary'].get('channels', []) + if channels: + return sorted(channels) + + # Fallback: extract from images array + channels = set() + images = data.get('images', []) if isinstance(data, dict) else [] + + for item in images: + if isinstance(item, dict) and 'channel' in item: + channels.add(item['channel']) + + return sorted(list(channels)) + +def display_image(image_url, caption="", base64_data=None): + """Display an image from URL, local path, or base64 data""" + try: + if base64_data and base64_data != "image datta ...........": + # Load image from base64 data + import base64 + image_data = base64.b64decode(base64_data) + image = Image.open(BytesIO(image_data)) + elif image_url and image_url.startswith(('http://', 'https://')): + # Load image from URL + response = requests.get(image_url, timeout=10) + response.raise_for_status() + image = Image.open(BytesIO(response.content)) + elif image_url: + # Load local image + image_path = Path(__file__).parent.parent / image_url + if image_path.exists(): + image = Image.open(image_path) + else: + st.error(f"Image not found: {image_url}") + return False + else: + st.error("No valid image source found") + return False + + st.image(image, caption=caption, use_column_width=True) + return True + except Exception as e: + st.error(f"Error loading image: {e}") + return False + +def main(): + st.title("🖼️ Image Dataset Viewer") + st.markdown("Browse images from your dataset by channel") + + # Load dataset + with st.spinner("Loading dataset..."): + data = load_dataset() + + if not data: + st.error("No data loaded. Please check your dataset file.") + return + + # Display dataset summary if available + if isinstance(data, dict) and 'metadata' in data: + metadata = data['metadata'] + if 'summary' in metadata: + summary = metadata['summary'] + col1, col2, col3, col4 = st.columns(4) + with col1: + st.metric("Total Images", summary.get('total_images', 'Unknown')) + with col2: + st.metric("Channels", len(summary.get('channels', []))) + with col3: + st.metric("Authors", len(summary.get('authors', []))) + with col4: + size_mb = summary.get('total_size_bytes', 0) / (1024 * 1024) + st.metric("Total Size", f"{size_mb:.1f} MB") + + # Get channels + channels = get_channels(data) + + if not channels: + st.error("No channels found in the dataset.") + return + + # Channel selection + selected_channel = st.selectbox( + "Select a channel:", + channels, + help="Choose a channel to view its images" + ) + + # Filter images by channel + channel_images = [] + images = data.get('images', []) if isinstance(data, dict) else [] + + for i, item in enumerate(images): + if isinstance(item, dict) and item.get('channel') == selected_channel: + if 'url' in item or 'base64_data' in item: + channel_images.append({ + 'id': i, + 'data': item + }) + + if not channel_images: + st.warning(f"No images found for channel: {selected_channel}") + return + + st.success(f"Found {len(channel_images)} images in #{selected_channel}") + + # Image navigation + if len(channel_images) > 1: + col1, col2, col3 = st.columns([1, 2, 1]) + + with col1: + if st.button("⬅️ Previous", use_container_width=True): + if 'image_index' in st.session_state and st.session_state.image_index > 0: + st.session_state.image_index -= 1 + else: + st.session_state.image_index = len(channel_images) - 1 + + with col2: + # Initialize or get current index + if 'image_index' not in st.session_state: + st.session_state.image_index = 0 + + # Image selector + st.session_state.image_index = st.slider( + "Image", + 0, + len(channel_images) - 1, + st.session_state.image_index, + help=f"Navigate through {len(channel_images)} images" + ) + + with col3: + if st.button("Next ➡️", use_container_width=True): + if 'image_index' in st.session_state and st.session_state.image_index < len(channel_images) - 1: + st.session_state.image_index += 1 + else: + st.session_state.image_index = 0 + else: + st.session_state.image_index = 0 + + # Display current image + current_image = channel_images[st.session_state.image_index] + image_data = current_image['data'] + + # Get image URL and base64 data + image_url = image_data.get('url') + base64_data = image_data.get('base64_data') + + if image_url or base64_data: + # Create two columns for image and metadata + col1, col2 = st.columns([2, 1]) + + with col1: + st.subheader(f"Image {st.session_state.image_index + 1} of {len(channel_images)}") + caption = f"Channel: #{selected_channel}" + if 'author_name' in image_data: + caption += f" | Author: {image_data['author_name']}" + if 'timestamp_utc' in image_data: + caption += f" | Time: {image_data['timestamp_utc']}" + + display_image(image_url, caption, base64_data) + + with col2: + st.subheader("Metadata") + + # Display metadata in an organized way + metadata_to_show = { + 'ID': current_image['id'], + 'Channel': image_data.get('channel', 'Unknown'), + 'Author': image_data.get('author_name', 'Unknown'), + 'Nickname': image_data.get('author_nickname', 'Unknown'), + 'Author ID': image_data.get('author_id', 'Unknown'), + 'Message ID': image_data.get('message_id', 'Unknown'), + 'Timestamp': image_data.get('timestamp_utc', 'Unknown'), + 'File Extension': image_data.get('file_extension', 'Unknown'), + 'File Size': f"{image_data.get('file_size', 0):,} bytes" if image_data.get('file_size') else 'Unknown', + 'Message': image_data.get('content', 'No message'), + } + + for key, value in metadata_to_show.items(): + if value and value != 'Unknown': + st.write(f"**{key}:** {value}") + + # Show all other metadata + st.subheader("Raw Data") + with st.expander("Show all metadata"): + st.json(image_data) + else: + st.error("No image URL or base64 data found in this entry") + st.json(image_data) + +if __name__ == "__main__": + main() diff --git a/apps/image_viewer/requirements.txt b/apps/image_viewer/requirements.txt new file mode 100644 index 0000000..fe4ac48 --- /dev/null +++ b/apps/image_viewer/requirements.txt @@ -0,0 +1,3 @@ +streamlit>=1.28.0 +requests>=2.31.0 +Pillow>=10.0.0