227 lines
7.9 KiB
Python
227 lines
7.9 KiB
Python
import streamlit as st
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
import requests
|
|
from PIL import Image
|
|
from io import BytesIO
|
|
|
|
# Set page config
|
|
st.set_page_config(
|
|
page_title="Image Dataset Viewer",
|
|
page_icon="🖼️",
|
|
layout="wide"
|
|
)
|
|
|
|
# Cache the dataset loading
|
|
@st.cache_data
|
|
def load_dataset():
|
|
"""Load the images dataset JSON file"""
|
|
dataset_path = "../images_dataset/images_dataset.json"
|
|
|
|
try:
|
|
with open(dataset_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
return data
|
|
except Exception as e:
|
|
st.error(f"Error loading dataset: {e}")
|
|
return {}
|
|
|
|
@st.cache_data
|
|
def get_channels(data):
|
|
"""Extract unique channels from the dataset"""
|
|
# First try to get channels from metadata
|
|
if isinstance(data, dict) and 'metadata' in data and 'summary' in data['metadata']:
|
|
channels = data['metadata']['summary'].get('channels', [])
|
|
if channels:
|
|
return sorted(channels)
|
|
|
|
# Fallback: extract from images array
|
|
channels = set()
|
|
images = data.get('images', []) if isinstance(data, dict) else []
|
|
|
|
for item in images:
|
|
if isinstance(item, dict) and 'channel' in item:
|
|
channels.add(item['channel'])
|
|
|
|
return sorted(list(channels))
|
|
|
|
def display_image(image_url, caption="", base64_data=None):
|
|
"""Display an image from URL, local path, or base64 data"""
|
|
try:
|
|
if base64_data and base64_data != "image datta ...........":
|
|
# Load image from base64 data
|
|
import base64
|
|
image_data = base64.b64decode(base64_data)
|
|
image = Image.open(BytesIO(image_data))
|
|
elif image_url and image_url.startswith(('http://', 'https://')):
|
|
# Load image from URL
|
|
response = requests.get(image_url, timeout=10)
|
|
response.raise_for_status()
|
|
image = Image.open(BytesIO(response.content))
|
|
elif image_url:
|
|
# Load local image
|
|
image_path = Path(__file__).parent.parent / image_url
|
|
if image_path.exists():
|
|
image = Image.open(image_path)
|
|
else:
|
|
st.error(f"Image not found: {image_url}")
|
|
return False
|
|
else:
|
|
st.error("No valid image source found")
|
|
return False
|
|
|
|
st.image(image, caption=caption, use_column_width=True)
|
|
return True
|
|
except Exception as e:
|
|
st.error(f"Error loading image: {e}")
|
|
return False
|
|
|
|
def main():
|
|
st.title("🖼️ Image Dataset Viewer")
|
|
st.markdown("Browse images from your dataset by channel")
|
|
|
|
# Load dataset
|
|
with st.spinner("Loading dataset..."):
|
|
data = load_dataset()
|
|
|
|
if not data:
|
|
st.error("No data loaded. Please check your dataset file.")
|
|
return
|
|
|
|
# Display dataset summary if available
|
|
if isinstance(data, dict) and 'metadata' in data:
|
|
metadata = data['metadata']
|
|
if 'summary' in metadata:
|
|
summary = metadata['summary']
|
|
col1, col2, col3, col4 = st.columns(4)
|
|
with col1:
|
|
st.metric("Total Images", summary.get('total_images', 'Unknown'))
|
|
with col2:
|
|
st.metric("Channels", len(summary.get('channels', [])))
|
|
with col3:
|
|
st.metric("Authors", len(summary.get('authors', [])))
|
|
with col4:
|
|
size_mb = summary.get('total_size_bytes', 0) / (1024 * 1024)
|
|
st.metric("Total Size", f"{size_mb:.1f} MB")
|
|
|
|
# Get channels
|
|
channels = get_channels(data)
|
|
|
|
if not channels:
|
|
st.error("No channels found in the dataset.")
|
|
return
|
|
|
|
# Channel selection
|
|
selected_channel = st.selectbox(
|
|
"Select a channel:",
|
|
channels,
|
|
help="Choose a channel to view its images"
|
|
)
|
|
|
|
# Filter images by channel
|
|
channel_images = []
|
|
images = data.get('images', []) if isinstance(data, dict) else []
|
|
|
|
for i, item in enumerate(images):
|
|
if isinstance(item, dict) and item.get('channel') == selected_channel:
|
|
if 'url' in item or 'base64_data' in item:
|
|
channel_images.append({
|
|
'id': i,
|
|
'data': item
|
|
})
|
|
|
|
if not channel_images:
|
|
st.warning(f"No images found for channel: {selected_channel}")
|
|
return
|
|
|
|
st.success(f"Found {len(channel_images)} images in #{selected_channel}")
|
|
|
|
# Image navigation
|
|
if len(channel_images) > 1:
|
|
col1, col2, col3 = st.columns([1, 2, 1])
|
|
|
|
with col1:
|
|
if st.button("⬅️ Previous", use_container_width=True):
|
|
if 'image_index' in st.session_state and st.session_state.image_index > 0:
|
|
st.session_state.image_index -= 1
|
|
else:
|
|
st.session_state.image_index = len(channel_images) - 1
|
|
|
|
with col2:
|
|
# Initialize or get current index
|
|
if 'image_index' not in st.session_state:
|
|
st.session_state.image_index = 0
|
|
|
|
# Image selector
|
|
st.session_state.image_index = st.slider(
|
|
"Image",
|
|
0,
|
|
len(channel_images) - 1,
|
|
st.session_state.image_index,
|
|
help=f"Navigate through {len(channel_images)} images"
|
|
)
|
|
|
|
with col3:
|
|
if st.button("Next ➡️", use_container_width=True):
|
|
if 'image_index' in st.session_state and st.session_state.image_index < len(channel_images) - 1:
|
|
st.session_state.image_index += 1
|
|
else:
|
|
st.session_state.image_index = 0
|
|
else:
|
|
st.session_state.image_index = 0
|
|
|
|
# Display current image
|
|
current_image = channel_images[st.session_state.image_index]
|
|
image_data = current_image['data']
|
|
|
|
# Get image URL and base64 data
|
|
image_url = image_data.get('url')
|
|
base64_data = image_data.get('base64_data')
|
|
|
|
if image_url or base64_data:
|
|
# Create two columns for image and metadata
|
|
col1, col2 = st.columns([2, 1])
|
|
|
|
with col1:
|
|
st.subheader(f"Image {st.session_state.image_index + 1} of {len(channel_images)}")
|
|
caption = f"Channel: #{selected_channel}"
|
|
if 'author_name' in image_data:
|
|
caption += f" | Author: {image_data['author_name']}"
|
|
if 'timestamp_utc' in image_data:
|
|
caption += f" | Time: {image_data['timestamp_utc']}"
|
|
|
|
display_image(image_url, caption, base64_data)
|
|
|
|
with col2:
|
|
st.subheader("Metadata")
|
|
|
|
# Display metadata in an organized way
|
|
metadata_to_show = {
|
|
'ID': current_image['id'],
|
|
'Channel': image_data.get('channel', 'Unknown'),
|
|
'Author': image_data.get('author_name', 'Unknown'),
|
|
'Nickname': image_data.get('author_nickname', 'Unknown'),
|
|
'Author ID': image_data.get('author_id', 'Unknown'),
|
|
'Message ID': image_data.get('message_id', 'Unknown'),
|
|
'Timestamp': image_data.get('timestamp_utc', 'Unknown'),
|
|
'File Extension': image_data.get('file_extension', 'Unknown'),
|
|
'File Size': f"{image_data.get('file_size', 0):,} bytes" if image_data.get('file_size') else 'Unknown',
|
|
'Message': image_data.get('content', 'No message'),
|
|
}
|
|
|
|
for key, value in metadata_to_show.items():
|
|
if value and value != 'Unknown':
|
|
st.write(f"**{key}:** {value}")
|
|
|
|
# Show all other metadata
|
|
st.subheader("Raw Data")
|
|
with st.expander("Show all metadata"):
|
|
st.json(image_data)
|
|
else:
|
|
st.error("No image URL or base64 data found in this entry")
|
|
st.json(image_data)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|