image viewer app
This commit is contained in:
59
apps/image_viewer/README.md
Normal file
59
apps/image_viewer/README.md
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
# Image Dataset Viewer
|
||||||
|
|
||||||
|
A simple Streamlit application to browse images from your Discord chat dataset.
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
- 📋 Dropdown to select different channels
|
||||||
|
- 🖼️ View images with navigation controls
|
||||||
|
- ⬅️➡️ Previous/Next buttons and slider navigation
|
||||||
|
- 📊 Display metadata for each image
|
||||||
|
- 📱 Responsive layout
|
||||||
|
|
||||||
|
## Setup and Usage
|
||||||
|
|
||||||
|
### Option 1: Using the run script (Recommended)
|
||||||
|
```bash
|
||||||
|
./run.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option 2: Manual setup
|
||||||
|
1. Create a virtual environment:
|
||||||
|
```bash
|
||||||
|
python3 -m venv venv
|
||||||
|
source venv/bin/activate
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Install dependencies:
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Run the application:
|
||||||
|
```bash
|
||||||
|
streamlit run image_viewer.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## How it works
|
||||||
|
|
||||||
|
The application:
|
||||||
|
1. Loads the `images_dataset.json` file from the parent directory
|
||||||
|
2. Extracts unique channel names from the dataset
|
||||||
|
3. Allows you to select a channel from a dropdown
|
||||||
|
4. Displays images from that channel with navigation controls
|
||||||
|
5. Shows metadata including author, timestamp, and message content
|
||||||
|
|
||||||
|
## Dataset Structure
|
||||||
|
|
||||||
|
The app expects your dataset to have entries with:
|
||||||
|
- `channel`: The channel name
|
||||||
|
- `image_url`, `image_path`, `url`, or `attachment_url`: The image location
|
||||||
|
- `author`: The message author (optional)
|
||||||
|
- `timestamp`: When the message was sent (optional)
|
||||||
|
- `content` or `message`: The message text (optional)
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
- If images don't load, check that the URLs in your dataset are accessible
|
||||||
|
- For local images, ensure the paths are relative to the project root
|
||||||
|
- Large datasets may take a moment to load initially
|
||||||
226
apps/image_viewer/image_viewer.py
Normal file
226
apps/image_viewer/image_viewer.py
Normal file
@@ -0,0 +1,226 @@
|
|||||||
|
import streamlit as st
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import requests
|
||||||
|
from PIL import Image
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
# Set page config
|
||||||
|
st.set_page_config(
|
||||||
|
page_title="Image Dataset Viewer",
|
||||||
|
page_icon="🖼️",
|
||||||
|
layout="wide"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Cache the dataset loading
|
||||||
|
@st.cache_data
|
||||||
|
def load_dataset():
|
||||||
|
"""Load the images dataset JSON file"""
|
||||||
|
dataset_path = "../images_dataset/images_dataset.json"
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(dataset_path, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
return data
|
||||||
|
except Exception as e:
|
||||||
|
st.error(f"Error loading dataset: {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
@st.cache_data
|
||||||
|
def get_channels(data):
|
||||||
|
"""Extract unique channels from the dataset"""
|
||||||
|
# First try to get channels from metadata
|
||||||
|
if isinstance(data, dict) and 'metadata' in data and 'summary' in data['metadata']:
|
||||||
|
channels = data['metadata']['summary'].get('channels', [])
|
||||||
|
if channels:
|
||||||
|
return sorted(channels)
|
||||||
|
|
||||||
|
# Fallback: extract from images array
|
||||||
|
channels = set()
|
||||||
|
images = data.get('images', []) if isinstance(data, dict) else []
|
||||||
|
|
||||||
|
for item in images:
|
||||||
|
if isinstance(item, dict) and 'channel' in item:
|
||||||
|
channels.add(item['channel'])
|
||||||
|
|
||||||
|
return sorted(list(channels))
|
||||||
|
|
||||||
|
def display_image(image_url, caption="", base64_data=None):
|
||||||
|
"""Display an image from URL, local path, or base64 data"""
|
||||||
|
try:
|
||||||
|
if base64_data and base64_data != "image datta ...........":
|
||||||
|
# Load image from base64 data
|
||||||
|
import base64
|
||||||
|
image_data = base64.b64decode(base64_data)
|
||||||
|
image = Image.open(BytesIO(image_data))
|
||||||
|
elif image_url and image_url.startswith(('http://', 'https://')):
|
||||||
|
# Load image from URL
|
||||||
|
response = requests.get(image_url, timeout=10)
|
||||||
|
response.raise_for_status()
|
||||||
|
image = Image.open(BytesIO(response.content))
|
||||||
|
elif image_url:
|
||||||
|
# Load local image
|
||||||
|
image_path = Path(__file__).parent.parent / image_url
|
||||||
|
if image_path.exists():
|
||||||
|
image = Image.open(image_path)
|
||||||
|
else:
|
||||||
|
st.error(f"Image not found: {image_url}")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
st.error("No valid image source found")
|
||||||
|
return False
|
||||||
|
|
||||||
|
st.image(image, caption=caption, use_column_width=True)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
st.error(f"Error loading image: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def main():
|
||||||
|
st.title("🖼️ Image Dataset Viewer")
|
||||||
|
st.markdown("Browse images from your dataset by channel")
|
||||||
|
|
||||||
|
# Load dataset
|
||||||
|
with st.spinner("Loading dataset..."):
|
||||||
|
data = load_dataset()
|
||||||
|
|
||||||
|
if not data:
|
||||||
|
st.error("No data loaded. Please check your dataset file.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Display dataset summary if available
|
||||||
|
if isinstance(data, dict) and 'metadata' in data:
|
||||||
|
metadata = data['metadata']
|
||||||
|
if 'summary' in metadata:
|
||||||
|
summary = metadata['summary']
|
||||||
|
col1, col2, col3, col4 = st.columns(4)
|
||||||
|
with col1:
|
||||||
|
st.metric("Total Images", summary.get('total_images', 'Unknown'))
|
||||||
|
with col2:
|
||||||
|
st.metric("Channels", len(summary.get('channels', [])))
|
||||||
|
with col3:
|
||||||
|
st.metric("Authors", len(summary.get('authors', [])))
|
||||||
|
with col4:
|
||||||
|
size_mb = summary.get('total_size_bytes', 0) / (1024 * 1024)
|
||||||
|
st.metric("Total Size", f"{size_mb:.1f} MB")
|
||||||
|
|
||||||
|
# Get channels
|
||||||
|
channels = get_channels(data)
|
||||||
|
|
||||||
|
if not channels:
|
||||||
|
st.error("No channels found in the dataset.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Channel selection
|
||||||
|
selected_channel = st.selectbox(
|
||||||
|
"Select a channel:",
|
||||||
|
channels,
|
||||||
|
help="Choose a channel to view its images"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Filter images by channel
|
||||||
|
channel_images = []
|
||||||
|
images = data.get('images', []) if isinstance(data, dict) else []
|
||||||
|
|
||||||
|
for i, item in enumerate(images):
|
||||||
|
if isinstance(item, dict) and item.get('channel') == selected_channel:
|
||||||
|
if 'url' in item or 'base64_data' in item:
|
||||||
|
channel_images.append({
|
||||||
|
'id': i,
|
||||||
|
'data': item
|
||||||
|
})
|
||||||
|
|
||||||
|
if not channel_images:
|
||||||
|
st.warning(f"No images found for channel: {selected_channel}")
|
||||||
|
return
|
||||||
|
|
||||||
|
st.success(f"Found {len(channel_images)} images in #{selected_channel}")
|
||||||
|
|
||||||
|
# Image navigation
|
||||||
|
if len(channel_images) > 1:
|
||||||
|
col1, col2, col3 = st.columns([1, 2, 1])
|
||||||
|
|
||||||
|
with col1:
|
||||||
|
if st.button("⬅️ Previous", use_container_width=True):
|
||||||
|
if 'image_index' in st.session_state and st.session_state.image_index > 0:
|
||||||
|
st.session_state.image_index -= 1
|
||||||
|
else:
|
||||||
|
st.session_state.image_index = len(channel_images) - 1
|
||||||
|
|
||||||
|
with col2:
|
||||||
|
# Initialize or get current index
|
||||||
|
if 'image_index' not in st.session_state:
|
||||||
|
st.session_state.image_index = 0
|
||||||
|
|
||||||
|
# Image selector
|
||||||
|
st.session_state.image_index = st.slider(
|
||||||
|
"Image",
|
||||||
|
0,
|
||||||
|
len(channel_images) - 1,
|
||||||
|
st.session_state.image_index,
|
||||||
|
help=f"Navigate through {len(channel_images)} images"
|
||||||
|
)
|
||||||
|
|
||||||
|
with col3:
|
||||||
|
if st.button("Next ➡️", use_container_width=True):
|
||||||
|
if 'image_index' in st.session_state and st.session_state.image_index < len(channel_images) - 1:
|
||||||
|
st.session_state.image_index += 1
|
||||||
|
else:
|
||||||
|
st.session_state.image_index = 0
|
||||||
|
else:
|
||||||
|
st.session_state.image_index = 0
|
||||||
|
|
||||||
|
# Display current image
|
||||||
|
current_image = channel_images[st.session_state.image_index]
|
||||||
|
image_data = current_image['data']
|
||||||
|
|
||||||
|
# Get image URL and base64 data
|
||||||
|
image_url = image_data.get('url')
|
||||||
|
base64_data = image_data.get('base64_data')
|
||||||
|
|
||||||
|
if image_url or base64_data:
|
||||||
|
# Create two columns for image and metadata
|
||||||
|
col1, col2 = st.columns([2, 1])
|
||||||
|
|
||||||
|
with col1:
|
||||||
|
st.subheader(f"Image {st.session_state.image_index + 1} of {len(channel_images)}")
|
||||||
|
caption = f"Channel: #{selected_channel}"
|
||||||
|
if 'author_name' in image_data:
|
||||||
|
caption += f" | Author: {image_data['author_name']}"
|
||||||
|
if 'timestamp_utc' in image_data:
|
||||||
|
caption += f" | Time: {image_data['timestamp_utc']}"
|
||||||
|
|
||||||
|
display_image(image_url, caption, base64_data)
|
||||||
|
|
||||||
|
with col2:
|
||||||
|
st.subheader("Metadata")
|
||||||
|
|
||||||
|
# Display metadata in an organized way
|
||||||
|
metadata_to_show = {
|
||||||
|
'ID': current_image['id'],
|
||||||
|
'Channel': image_data.get('channel', 'Unknown'),
|
||||||
|
'Author': image_data.get('author_name', 'Unknown'),
|
||||||
|
'Nickname': image_data.get('author_nickname', 'Unknown'),
|
||||||
|
'Author ID': image_data.get('author_id', 'Unknown'),
|
||||||
|
'Message ID': image_data.get('message_id', 'Unknown'),
|
||||||
|
'Timestamp': image_data.get('timestamp_utc', 'Unknown'),
|
||||||
|
'File Extension': image_data.get('file_extension', 'Unknown'),
|
||||||
|
'File Size': f"{image_data.get('file_size', 0):,} bytes" if image_data.get('file_size') else 'Unknown',
|
||||||
|
'Message': image_data.get('content', 'No message'),
|
||||||
|
}
|
||||||
|
|
||||||
|
for key, value in metadata_to_show.items():
|
||||||
|
if value and value != 'Unknown':
|
||||||
|
st.write(f"**{key}:** {value}")
|
||||||
|
|
||||||
|
# Show all other metadata
|
||||||
|
st.subheader("Raw Data")
|
||||||
|
with st.expander("Show all metadata"):
|
||||||
|
st.json(image_data)
|
||||||
|
else:
|
||||||
|
st.error("No image URL or base64 data found in this entry")
|
||||||
|
st.json(image_data)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
3
apps/image_viewer/requirements.txt
Normal file
3
apps/image_viewer/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
streamlit>=1.28.0
|
||||||
|
requests>=2.31.0
|
||||||
|
Pillow>=10.0.0
|
||||||
Reference in New Issue
Block a user