From d9ec1405d7aa3a330466c776a3b279d0cd078be8 Mon Sep 17 00:00:00 2001 From: Azeem Fidahusein Date: Thu, 25 Sep 2025 21:01:28 +0100 Subject: [PATCH] csv streamlit viewer with geospatial integrations --- streamlit_app/app.py | 167 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 streamlit_app/app.py diff --git a/streamlit_app/app.py b/streamlit_app/app.py new file mode 100644 index 0000000..1a3e87c --- /dev/null +++ b/streamlit_app/app.py @@ -0,0 +1,167 @@ +import streamlit as st +import pandas as pd +import os +from pathlib import Path +import folium +from streamlit_folium import folium_static +import numpy as np + +st.set_page_config(page_title="Timeline CSV Viewer", layout="wide") + +st.title("Timeline CSV Viewer") + +# Path to the timeline_csv folder +timeline_csv_path = Path("../timeline_csv") + +# Get all CSV files from all subdirectories +csv_files = [] +if timeline_csv_path.exists(): + for subdir in timeline_csv_path.iterdir(): + if subdir.is_dir(): + for csv_file in subdir.glob("*.csv"): + csv_files.append(csv_file) + +if not csv_files: + st.error("No CSV files found in the timeline_csv folder.") + st.stop() + +# Define geospatial datasets and their coordinate columns +GEOSPATIAL_FILES = { + 'timeline_path_points.csv': {'lat': 'lat', 'lon': 'lon', 'time': 'time'}, + 'visits.csv': {'lat': 'top_lat', 'lon': 'top_lon', 'time': 'startTime'}, + 'raw_signals.csv': {'lat': 'lat', 'lon': 'lon', 'time': 'timestamp'}, + 'frequent_places.csv': {'lat': 'lat', 'lon': 'lon', 'time': None}, + 'semantic_segments.csv': {'lat': None, 'lon': None, 'time': 'startTime'} +} + +# Create enhanced file names with geospatial indicators +enhanced_file_names = [] +for f in csv_files: + if f.name in GEOSPATIAL_FILES: + enhanced_file_names.append(f"🗺️ {f.name} (Geospatial)") + else: + enhanced_file_names.append(f.name) + +selected_enhanced_name = st.selectbox("Select a CSV file to view:", enhanced_file_names) + +# Extract the actual filename from the enhanced name +selected_file_name = selected_enhanced_name.replace('🗺️ ', '').replace(' (Geospatial)', '') + +# Find the full path for the selected file +selected_file_path = None +for file_path in csv_files: + if file_path.name == selected_file_name: + selected_file_path = file_path + break + +if selected_file_path: + st.write(f"**File:** {selected_file_path}") + + try: + # Read the CSV file + df = pd.read_csv(selected_file_path) + + # Display basic info + is_geospatial = selected_file_name in GEOSPATIAL_FILES + if is_geospatial: + st.success(f"🗺️ **Geospatial Dataset Detected** - {df.shape[0]} rows × {df.shape[1]} columns") + else: + st.write(f"**Shape:** {df.shape[0]} rows × {df.shape[1]} columns") + + # Show geospatial visualization if applicable + if is_geospatial and selected_file_name in GEOSPATIAL_FILES: + geo_config = GEOSPATIAL_FILES[selected_file_name] + lat_col = geo_config['lat'] + lon_col = geo_config['lon'] + time_col = geo_config['time'] + + if lat_col and lon_col and lat_col in df.columns and lon_col in df.columns: + st.subheader("🗺️ Map Visualization") + + # Filter out null coordinates + geo_df = df.dropna(subset=[lat_col, lon_col]) + + if len(geo_df) > 0: + # Sample data if too large for performance + if len(geo_df) > 1000: + geo_df = geo_df.sample(n=1000) + st.info(f"Showing 1000 randomly sampled points out of {len(df)} total points for performance") + + # Create map centered on mean coordinates + center_lat = geo_df[lat_col].mean() + center_lon = geo_df[lon_col].mean() + + m = folium.Map(location=[center_lat, center_lon], zoom_start=10) + + # Add points to map + for idx, row in geo_df.iterrows(): + popup_text = f"Index: {idx}" + if time_col and time_col in df.columns: + popup_text += f"
Time: {row[time_col]}" + + # Color code based on dataset type + if selected_file_name == 'timeline_path_points.csv': + color = 'blue' + elif selected_file_name == 'visits.csv': + color = 'red' + elif selected_file_name == 'raw_signals.csv': + color = 'green' + else: + color = 'orange' + + folium.CircleMarker( + location=[row[lat_col], row[lon_col]], + radius=3, + popup=popup_text, + color=color, + fillColor=color, + fillOpacity=0.7 + ).add_to(m) + + folium_static(m) + + # Show coordinate statistics + st.subheader("📍 Coordinate Statistics") + coord_stats = pd.DataFrame({ + 'Statistic': ['Count', 'Min Lat', 'Max Lat', 'Min Lon', 'Max Lon', 'Center Lat', 'Center Lon'], + 'Value': [ + len(geo_df), + f"{geo_df[lat_col].min():.6f}", + f"{geo_df[lat_col].max():.6f}", + f"{geo_df[lon_col].min():.6f}", + f"{geo_df[lon_col].max():.6f}", + f"{center_lat:.6f}", + f"{center_lon:.6f}" + ] + }) + st.dataframe(coord_stats) + else: + st.warning("No valid coordinates found in this dataset") + else: + if selected_file_name == 'semantic_segments.csv': + st.info("📅 This dataset contains temporal data that links to spatial information in other datasets") + else: + st.warning(f"Expected coordinate columns ({lat_col}, {lon_col}) not found in this dataset") + + # Show first few rows + st.subheader("Data Preview") + st.dataframe(df.head(100)) + + # Show column info + st.subheader("Column Information") + col_info = pd.DataFrame({ + 'Column': df.columns, + 'Data Type': df.dtypes, + 'Non-Null Count': df.count(), + 'Null Count': df.isnull().sum() + }) + st.dataframe(col_info) + + # Show basic statistics for numeric columns + numeric_cols = df.select_dtypes(include=['number']).columns + if len(numeric_cols) > 0: + st.subheader("Numeric Column Statistics") + st.dataframe(df[numeric_cols].describe()) + + except Exception as e: + st.error(f"Error reading the CSV file: {str(e)}") \ No newline at end of file