import streamlit as st import json import os from pathlib import Path import requests from PIL import Image from io import BytesIO # Set page config st.set_page_config( page_title="Image Dataset Viewer", page_icon="🖼️", layout="wide" ) # Cache the dataset loading @st.cache_data def load_dataset(): """Load the images dataset JSON file""" dataset_path = "../images_dataset/images_dataset.json" try: with open(dataset_path, 'r', encoding='utf-8') as f: data = json.load(f) return data except Exception as e: st.error(f"Error loading dataset: {e}") return {} @st.cache_data def get_channels(data): """Extract unique channels from the dataset""" # First try to get channels from metadata if isinstance(data, dict) and 'metadata' in data and 'summary' in data['metadata']: channels = data['metadata']['summary'].get('channels', []) if channels: return sorted(channels) # Fallback: extract from images array channels = set() images = data.get('images', []) if isinstance(data, dict) else [] for item in images: if isinstance(item, dict) and 'channel' in item: channels.add(item['channel']) return sorted(list(channels)) def display_image(image_url, caption="", base64_data=None): """Display an image from URL, local path, or base64 data""" try: if base64_data and base64_data != "image datta ...........": # Load image from base64 data import base64 image_data = base64.b64decode(base64_data) image = Image.open(BytesIO(image_data)) elif image_url and image_url.startswith(('http://', 'https://')): # Load image from URL response = requests.get(image_url, timeout=10) response.raise_for_status() image = Image.open(BytesIO(response.content)) elif image_url: # Load local image image_path = Path(__file__).parent.parent / image_url if image_path.exists(): image = Image.open(image_path) else: st.error(f"Image not found: {image_url}") return False else: st.error("No valid image source found") return False st.image(image, caption=caption, use_column_width=True) return True except Exception as e: st.error(f"Error loading image: {e}") return False def main(): st.title("🖼️ Image Dataset Viewer") st.markdown("Browse images from your dataset by channel") # Load dataset with st.spinner("Loading dataset..."): data = load_dataset() if not data: st.error("No data loaded. Please check your dataset file.") return # Display dataset summary if available if isinstance(data, dict) and 'metadata' in data: metadata = data['metadata'] if 'summary' in metadata: summary = metadata['summary'] col1, col2, col3, col4 = st.columns(4) with col1: st.metric("Total Images", summary.get('total_images', 'Unknown')) with col2: st.metric("Channels", len(summary.get('channels', []))) with col3: st.metric("Authors", len(summary.get('authors', []))) with col4: size_mb = summary.get('total_size_bytes', 0) / (1024 * 1024) st.metric("Total Size", f"{size_mb:.1f} MB") # Get channels channels = get_channels(data) if not channels: st.error("No channels found in the dataset.") return # Channel selection selected_channel = st.selectbox( "Select a channel:", channels, help="Choose a channel to view its images" ) # Filter images by channel channel_images = [] images = data.get('images', []) if isinstance(data, dict) else [] for i, item in enumerate(images): if isinstance(item, dict) and item.get('channel') == selected_channel: if 'url' in item or 'base64_data' in item: channel_images.append({ 'id': i, 'data': item }) if not channel_images: st.warning(f"No images found for channel: {selected_channel}") return st.success(f"Found {len(channel_images)} images in #{selected_channel}") # Image navigation if len(channel_images) > 1: col1, col2, col3 = st.columns([1, 2, 1]) with col1: if st.button("⬅️ Previous", use_container_width=True): if 'image_index' in st.session_state and st.session_state.image_index > 0: st.session_state.image_index -= 1 else: st.session_state.image_index = len(channel_images) - 1 with col2: # Initialize or get current index if 'image_index' not in st.session_state: st.session_state.image_index = 0 # Image selector st.session_state.image_index = st.slider( "Image", 0, len(channel_images) - 1, st.session_state.image_index, help=f"Navigate through {len(channel_images)} images" ) with col3: if st.button("Next ➡️", use_container_width=True): if 'image_index' in st.session_state and st.session_state.image_index < len(channel_images) - 1: st.session_state.image_index += 1 else: st.session_state.image_index = 0 else: st.session_state.image_index = 0 # Display current image current_image = channel_images[st.session_state.image_index] image_data = current_image['data'] # Get image URL and base64 data image_url = image_data.get('url') base64_data = image_data.get('base64_data') if image_url or base64_data: # Create two columns for image and metadata col1, col2 = st.columns([2, 1]) with col1: st.subheader(f"Image {st.session_state.image_index + 1} of {len(channel_images)}") caption = f"Channel: #{selected_channel}" if 'author_name' in image_data: caption += f" | Author: {image_data['author_name']}" if 'timestamp_utc' in image_data: caption += f" | Time: {image_data['timestamp_utc']}" display_image(image_url, caption, base64_data) with col2: st.subheader("Metadata") # Display metadata in an organized way metadata_to_show = { 'ID': current_image['id'], 'Channel': image_data.get('channel', 'Unknown'), 'Author': image_data.get('author_name', 'Unknown'), 'Nickname': image_data.get('author_nickname', 'Unknown'), 'Author ID': image_data.get('author_id', 'Unknown'), 'Message ID': image_data.get('message_id', 'Unknown'), 'Timestamp': image_data.get('timestamp_utc', 'Unknown'), 'File Extension': image_data.get('file_extension', 'Unknown'), 'File Size': f"{image_data.get('file_size', 0):,} bytes" if image_data.get('file_size') else 'Unknown', 'Message': image_data.get('content', 'No message'), } for key, value in metadata_to_show.items(): if value and value != 'Unknown': st.write(f"**{key}:** {value}") # Show all other metadata st.subheader("Raw Data") with st.expander("Show all metadata"): st.json(image_data) else: st.error("No image URL or base64 data found in this entry") st.json(image_data) if __name__ == "__main__": main()