image viewer app
This commit is contained in:
59
apps/image_viewer/README.md
Normal file
59
apps/image_viewer/README.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# Image Dataset Viewer
|
||||
|
||||
A simple Streamlit application to browse images from your Discord chat dataset.
|
||||
|
||||
## Features
|
||||
|
||||
- 📋 Dropdown to select different channels
|
||||
- 🖼️ View images with navigation controls
|
||||
- ⬅️➡️ Previous/Next buttons and slider navigation
|
||||
- 📊 Display metadata for each image
|
||||
- 📱 Responsive layout
|
||||
|
||||
## Setup and Usage
|
||||
|
||||
### Option 1: Using the run script (Recommended)
|
||||
```bash
|
||||
./run.sh
|
||||
```
|
||||
|
||||
### Option 2: Manual setup
|
||||
1. Create a virtual environment:
|
||||
```bash
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
3. Run the application:
|
||||
```bash
|
||||
streamlit run image_viewer.py
|
||||
```
|
||||
|
||||
## How it works
|
||||
|
||||
The application:
|
||||
1. Loads the `images_dataset.json` file from the parent directory
|
||||
2. Extracts unique channel names from the dataset
|
||||
3. Allows you to select a channel from a dropdown
|
||||
4. Displays images from that channel with navigation controls
|
||||
5. Shows metadata including author, timestamp, and message content
|
||||
|
||||
## Dataset Structure
|
||||
|
||||
The app expects your dataset to have entries with:
|
||||
- `channel`: The channel name
|
||||
- `image_url`, `image_path`, `url`, or `attachment_url`: The image location
|
||||
- `author`: The message author (optional)
|
||||
- `timestamp`: When the message was sent (optional)
|
||||
- `content` or `message`: The message text (optional)
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- If images don't load, check that the URLs in your dataset are accessible
|
||||
- For local images, ensure the paths are relative to the project root
|
||||
- Large datasets may take a moment to load initially
|
||||
226
apps/image_viewer/image_viewer.py
Normal file
226
apps/image_viewer/image_viewer.py
Normal file
@@ -0,0 +1,226 @@
|
||||
import streamlit as st
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
import requests
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
|
||||
# Set page config
|
||||
st.set_page_config(
|
||||
page_title="Image Dataset Viewer",
|
||||
page_icon="🖼️",
|
||||
layout="wide"
|
||||
)
|
||||
|
||||
# Cache the dataset loading
|
||||
@st.cache_data
|
||||
def load_dataset():
|
||||
"""Load the images dataset JSON file"""
|
||||
dataset_path = "../images_dataset/images_dataset.json"
|
||||
|
||||
try:
|
||||
with open(dataset_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
return data
|
||||
except Exception as e:
|
||||
st.error(f"Error loading dataset: {e}")
|
||||
return {}
|
||||
|
||||
@st.cache_data
|
||||
def get_channels(data):
|
||||
"""Extract unique channels from the dataset"""
|
||||
# First try to get channels from metadata
|
||||
if isinstance(data, dict) and 'metadata' in data and 'summary' in data['metadata']:
|
||||
channels = data['metadata']['summary'].get('channels', [])
|
||||
if channels:
|
||||
return sorted(channels)
|
||||
|
||||
# Fallback: extract from images array
|
||||
channels = set()
|
||||
images = data.get('images', []) if isinstance(data, dict) else []
|
||||
|
||||
for item in images:
|
||||
if isinstance(item, dict) and 'channel' in item:
|
||||
channels.add(item['channel'])
|
||||
|
||||
return sorted(list(channels))
|
||||
|
||||
def display_image(image_url, caption="", base64_data=None):
|
||||
"""Display an image from URL, local path, or base64 data"""
|
||||
try:
|
||||
if base64_data and base64_data != "image datta ...........":
|
||||
# Load image from base64 data
|
||||
import base64
|
||||
image_data = base64.b64decode(base64_data)
|
||||
image = Image.open(BytesIO(image_data))
|
||||
elif image_url and image_url.startswith(('http://', 'https://')):
|
||||
# Load image from URL
|
||||
response = requests.get(image_url, timeout=10)
|
||||
response.raise_for_status()
|
||||
image = Image.open(BytesIO(response.content))
|
||||
elif image_url:
|
||||
# Load local image
|
||||
image_path = Path(__file__).parent.parent / image_url
|
||||
if image_path.exists():
|
||||
image = Image.open(image_path)
|
||||
else:
|
||||
st.error(f"Image not found: {image_url}")
|
||||
return False
|
||||
else:
|
||||
st.error("No valid image source found")
|
||||
return False
|
||||
|
||||
st.image(image, caption=caption, use_column_width=True)
|
||||
return True
|
||||
except Exception as e:
|
||||
st.error(f"Error loading image: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
st.title("🖼️ Image Dataset Viewer")
|
||||
st.markdown("Browse images from your dataset by channel")
|
||||
|
||||
# Load dataset
|
||||
with st.spinner("Loading dataset..."):
|
||||
data = load_dataset()
|
||||
|
||||
if not data:
|
||||
st.error("No data loaded. Please check your dataset file.")
|
||||
return
|
||||
|
||||
# Display dataset summary if available
|
||||
if isinstance(data, dict) and 'metadata' in data:
|
||||
metadata = data['metadata']
|
||||
if 'summary' in metadata:
|
||||
summary = metadata['summary']
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
with col1:
|
||||
st.metric("Total Images", summary.get('total_images', 'Unknown'))
|
||||
with col2:
|
||||
st.metric("Channels", len(summary.get('channels', [])))
|
||||
with col3:
|
||||
st.metric("Authors", len(summary.get('authors', [])))
|
||||
with col4:
|
||||
size_mb = summary.get('total_size_bytes', 0) / (1024 * 1024)
|
||||
st.metric("Total Size", f"{size_mb:.1f} MB")
|
||||
|
||||
# Get channels
|
||||
channels = get_channels(data)
|
||||
|
||||
if not channels:
|
||||
st.error("No channels found in the dataset.")
|
||||
return
|
||||
|
||||
# Channel selection
|
||||
selected_channel = st.selectbox(
|
||||
"Select a channel:",
|
||||
channels,
|
||||
help="Choose a channel to view its images"
|
||||
)
|
||||
|
||||
# Filter images by channel
|
||||
channel_images = []
|
||||
images = data.get('images', []) if isinstance(data, dict) else []
|
||||
|
||||
for i, item in enumerate(images):
|
||||
if isinstance(item, dict) and item.get('channel') == selected_channel:
|
||||
if 'url' in item or 'base64_data' in item:
|
||||
channel_images.append({
|
||||
'id': i,
|
||||
'data': item
|
||||
})
|
||||
|
||||
if not channel_images:
|
||||
st.warning(f"No images found for channel: {selected_channel}")
|
||||
return
|
||||
|
||||
st.success(f"Found {len(channel_images)} images in #{selected_channel}")
|
||||
|
||||
# Image navigation
|
||||
if len(channel_images) > 1:
|
||||
col1, col2, col3 = st.columns([1, 2, 1])
|
||||
|
||||
with col1:
|
||||
if st.button("⬅️ Previous", use_container_width=True):
|
||||
if 'image_index' in st.session_state and st.session_state.image_index > 0:
|
||||
st.session_state.image_index -= 1
|
||||
else:
|
||||
st.session_state.image_index = len(channel_images) - 1
|
||||
|
||||
with col2:
|
||||
# Initialize or get current index
|
||||
if 'image_index' not in st.session_state:
|
||||
st.session_state.image_index = 0
|
||||
|
||||
# Image selector
|
||||
st.session_state.image_index = st.slider(
|
||||
"Image",
|
||||
0,
|
||||
len(channel_images) - 1,
|
||||
st.session_state.image_index,
|
||||
help=f"Navigate through {len(channel_images)} images"
|
||||
)
|
||||
|
||||
with col3:
|
||||
if st.button("Next ➡️", use_container_width=True):
|
||||
if 'image_index' in st.session_state and st.session_state.image_index < len(channel_images) - 1:
|
||||
st.session_state.image_index += 1
|
||||
else:
|
||||
st.session_state.image_index = 0
|
||||
else:
|
||||
st.session_state.image_index = 0
|
||||
|
||||
# Display current image
|
||||
current_image = channel_images[st.session_state.image_index]
|
||||
image_data = current_image['data']
|
||||
|
||||
# Get image URL and base64 data
|
||||
image_url = image_data.get('url')
|
||||
base64_data = image_data.get('base64_data')
|
||||
|
||||
if image_url or base64_data:
|
||||
# Create two columns for image and metadata
|
||||
col1, col2 = st.columns([2, 1])
|
||||
|
||||
with col1:
|
||||
st.subheader(f"Image {st.session_state.image_index + 1} of {len(channel_images)}")
|
||||
caption = f"Channel: #{selected_channel}"
|
||||
if 'author_name' in image_data:
|
||||
caption += f" | Author: {image_data['author_name']}"
|
||||
if 'timestamp_utc' in image_data:
|
||||
caption += f" | Time: {image_data['timestamp_utc']}"
|
||||
|
||||
display_image(image_url, caption, base64_data)
|
||||
|
||||
with col2:
|
||||
st.subheader("Metadata")
|
||||
|
||||
# Display metadata in an organized way
|
||||
metadata_to_show = {
|
||||
'ID': current_image['id'],
|
||||
'Channel': image_data.get('channel', 'Unknown'),
|
||||
'Author': image_data.get('author_name', 'Unknown'),
|
||||
'Nickname': image_data.get('author_nickname', 'Unknown'),
|
||||
'Author ID': image_data.get('author_id', 'Unknown'),
|
||||
'Message ID': image_data.get('message_id', 'Unknown'),
|
||||
'Timestamp': image_data.get('timestamp_utc', 'Unknown'),
|
||||
'File Extension': image_data.get('file_extension', 'Unknown'),
|
||||
'File Size': f"{image_data.get('file_size', 0):,} bytes" if image_data.get('file_size') else 'Unknown',
|
||||
'Message': image_data.get('content', 'No message'),
|
||||
}
|
||||
|
||||
for key, value in metadata_to_show.items():
|
||||
if value and value != 'Unknown':
|
||||
st.write(f"**{key}:** {value}")
|
||||
|
||||
# Show all other metadata
|
||||
st.subheader("Raw Data")
|
||||
with st.expander("Show all metadata"):
|
||||
st.json(image_data)
|
||||
else:
|
||||
st.error("No image URL or base64 data found in this entry")
|
||||
st.json(image_data)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
3
apps/image_viewer/requirements.txt
Normal file
3
apps/image_viewer/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
streamlit>=1.28.0
|
||||
requests>=2.31.0
|
||||
Pillow>=10.0.0
|
||||
Reference in New Issue
Block a user