import streamlit as st import pandas as pd import os from pathlib import Path import folium from streamlit_folium import folium_static import numpy as np st.set_page_config(page_title="Timeline CSV Viewer", layout="wide") st.title("Timeline CSV Viewer") # Path to the timeline_csv folder timeline_csv_path = Path("../timeline_csv") # Get all CSV files from all subdirectories csv_files = [] if timeline_csv_path.exists(): for subdir in timeline_csv_path.iterdir(): if subdir.is_dir(): for csv_file in subdir.glob("*.csv"): csv_files.append(csv_file) if not csv_files: st.error("No CSV files found in the timeline_csv folder.") st.stop() # Define geospatial datasets and their coordinate columns GEOSPATIAL_FILES = { 'timeline_path_points.csv': {'lat': 'lat', 'lon': 'lon', 'time': 'time'}, 'visits.csv': {'lat': 'top_lat', 'lon': 'top_lon', 'time': 'startTime'}, 'raw_signals.csv': {'lat': 'lat', 'lon': 'lon', 'time': 'timestamp'}, 'frequent_places.csv': {'lat': 'lat', 'lon': 'lon', 'time': None}, 'semantic_segments.csv': {'lat': None, 'lon': None, 'time': 'startTime'} } # Create enhanced file names with geospatial indicators enhanced_file_names = [] for f in csv_files: if f.name in GEOSPATIAL_FILES: enhanced_file_names.append(f"🗺️ {f.name} (Geospatial)") else: enhanced_file_names.append(f.name) selected_enhanced_name = st.selectbox("Select a CSV file to view:", enhanced_file_names) # Extract the actual filename from the enhanced name selected_file_name = selected_enhanced_name.replace('🗺️ ', '').replace(' (Geospatial)', '') # Find the full path for the selected file selected_file_path = None for file_path in csv_files: if file_path.name == selected_file_name: selected_file_path = file_path break if selected_file_path: st.write(f"**File:** {selected_file_path}") try: # Read the CSV file df = pd.read_csv(selected_file_path) # Display basic info is_geospatial = selected_file_name in GEOSPATIAL_FILES if is_geospatial: st.success(f"🗺️ **Geospatial Dataset Detected** - {df.shape[0]} rows × {df.shape[1]} columns") else: st.write(f"**Shape:** {df.shape[0]} rows × {df.shape[1]} columns") # Show geospatial visualization if applicable if is_geospatial and selected_file_name in GEOSPATIAL_FILES: geo_config = GEOSPATIAL_FILES[selected_file_name] lat_col = geo_config['lat'] lon_col = geo_config['lon'] time_col = geo_config['time'] if lat_col and lon_col and lat_col in df.columns and lon_col in df.columns: st.subheader("🗺️ Map Visualization") # Filter out null coordinates geo_df = df.dropna(subset=[lat_col, lon_col]) if len(geo_df) > 0: # Sample data if too large for performance if len(geo_df) > 1000: geo_df = geo_df.sample(n=1000) st.info(f"Showing 1000 randomly sampled points out of {len(df)} total points for performance") # Create map centered on mean coordinates center_lat = geo_df[lat_col].mean() center_lon = geo_df[lon_col].mean() m = folium.Map(location=[center_lat, center_lon], zoom_start=10) # Add points to map for idx, row in geo_df.iterrows(): popup_text = f"Index: {idx}" if time_col and time_col in df.columns: popup_text += f"
Time: {row[time_col]}" # Color code based on dataset type if selected_file_name == 'timeline_path_points.csv': color = 'blue' elif selected_file_name == 'visits.csv': color = 'red' elif selected_file_name == 'raw_signals.csv': color = 'green' else: color = 'orange' folium.CircleMarker( location=[row[lat_col], row[lon_col]], radius=3, popup=popup_text, color=color, fillColor=color, fillOpacity=0.7 ).add_to(m) folium_static(m) # Show coordinate statistics st.subheader("📍 Coordinate Statistics") coord_stats = pd.DataFrame({ 'Statistic': ['Count', 'Min Lat', 'Max Lat', 'Min Lon', 'Max Lon', 'Center Lat', 'Center Lon'], 'Value': [ len(geo_df), f"{geo_df[lat_col].min():.6f}", f"{geo_df[lat_col].max():.6f}", f"{geo_df[lon_col].min():.6f}", f"{geo_df[lon_col].max():.6f}", f"{center_lat:.6f}", f"{center_lon:.6f}" ] }) st.dataframe(coord_stats) else: st.warning("No valid coordinates found in this dataset") else: if selected_file_name == 'semantic_segments.csv': st.info("📅 This dataset contains temporal data that links to spatial information in other datasets") else: st.warning(f"Expected coordinate columns ({lat_col}, {lon_col}) not found in this dataset") # Show first few rows st.subheader("Data Preview") st.dataframe(df.head(100)) # Show column info st.subheader("Column Information") col_info = pd.DataFrame({ 'Column': df.columns, 'Data Type': df.dtypes, 'Non-Null Count': df.count(), 'Null Count': df.isnull().sum() }) st.dataframe(col_info) # Show basic statistics for numeric columns numeric_cols = df.select_dtypes(include=['number']).columns if len(numeric_cols) > 0: st.subheader("Numeric Column Statistics") st.dataframe(df[numeric_cols].describe()) except Exception as e: st.error(f"Error reading the CSV file: {str(e)}")