From d9ec1405d7aa3a330466c776a3b279d0cd078be8 Mon Sep 17 00:00:00 2001
From: Azeem Fidahusein <azeem.fidahusein@gmail.com>
Date: Thu, 25 Sep 2025 21:01:28 +0100
Subject: [PATCH] csv streamlit viewer with geospatial integrations

---
 streamlit_app/app.py | 167 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 167 insertions(+)
 create mode 100644 streamlit_app/app.py

diff --git a/streamlit_app/app.py b/streamlit_app/app.py
new file mode 100644
index 0000000..1a3e87c
--- /dev/null
+++ b/streamlit_app/app.py
@@ -0,0 +1,167 @@
+import streamlit as st
+import pandas as pd
+import os
+from pathlib import Path
+import folium
+from streamlit_folium import folium_static
+import numpy as np
+
+st.set_page_config(page_title="Timeline CSV Viewer", layout="wide")
+
+st.title("Timeline CSV Viewer")
+
+# Path to the timeline_csv folder
+timeline_csv_path = Path("../timeline_csv")
+
+# Get all CSV files from all subdirectories
+csv_files = []
+if timeline_csv_path.exists():
+    for subdir in timeline_csv_path.iterdir():
+        if subdir.is_dir():
+            for csv_file in subdir.glob("*.csv"):
+                csv_files.append(csv_file)
+
+if not csv_files:
+    st.error("No CSV files found in the timeline_csv folder.")
+    st.stop()
+
+# Define geospatial datasets and their coordinate columns
+GEOSPATIAL_FILES = {
+    'timeline_path_points.csv': {'lat': 'lat', 'lon': 'lon', 'time': 'time'},
+    'visits.csv': {'lat': 'top_lat', 'lon': 'top_lon', 'time': 'startTime'},
+    'raw_signals.csv': {'lat': 'lat', 'lon': 'lon', 'time': 'timestamp'},
+    'frequent_places.csv': {'lat': 'lat', 'lon': 'lon', 'time': None},
+    'semantic_segments.csv': {'lat': None, 'lon': None, 'time': 'startTime'}
+}
+
+# Create enhanced file names with geospatial indicators
+enhanced_file_names = []
+for f in csv_files:
+    if f.name in GEOSPATIAL_FILES:
+        enhanced_file_names.append(f"🗺️ {f.name} (Geospatial)")
+    else:
+        enhanced_file_names.append(f.name)
+
+selected_enhanced_name = st.selectbox("Select a CSV file to view:", enhanced_file_names)
+
+# Extract the actual filename from the enhanced name
+selected_file_name = selected_enhanced_name.replace('🗺️ ', '').replace(' (Geospatial)', '')
+
+# Find the full path for the selected file
+selected_file_path = None
+for file_path in csv_files:
+    if file_path.name == selected_file_name:
+        selected_file_path = file_path
+        break
+
+if selected_file_path:
+    st.write(f"**File:** {selected_file_path}")
+
+    try:
+        # Read the CSV file
+        df = pd.read_csv(selected_file_path)
+
+        # Display basic info
+        is_geospatial = selected_file_name in GEOSPATIAL_FILES
+        if is_geospatial:
+            st.success(f"🗺️ **Geospatial Dataset Detected** - {df.shape[0]} rows × {df.shape[1]} columns")
+        else:
+            st.write(f"**Shape:** {df.shape[0]} rows × {df.shape[1]} columns")
+
+        # Show geospatial visualization if applicable
+        if is_geospatial and selected_file_name in GEOSPATIAL_FILES:
+            geo_config = GEOSPATIAL_FILES[selected_file_name]
+            lat_col = geo_config['lat']
+            lon_col = geo_config['lon']
+            time_col = geo_config['time']
+
+            if lat_col and lon_col and lat_col in df.columns and lon_col in df.columns:
+                st.subheader("🗺️ Map Visualization")
+
+                # Filter out null coordinates
+                geo_df = df.dropna(subset=[lat_col, lon_col])
+
+                if len(geo_df) > 0:
+                    # Sample data if too large for performance
+                    if len(geo_df) > 1000:
+                        geo_df = geo_df.sample(n=1000)
+                        st.info(f"Showing 1000 randomly sampled points out of {len(df)} total points for performance")
+
+                    # Create map centered on mean coordinates
+                    center_lat = geo_df[lat_col].mean()
+                    center_lon = geo_df[lon_col].mean()
+
+                    m = folium.Map(location=[center_lat, center_lon], zoom_start=10)
+
+                    # Add points to map
+                    for idx, row in geo_df.iterrows():
+                        popup_text = f"Index: {idx}"
+                        if time_col and time_col in df.columns:
+                            popup_text += f"<br>Time: {row[time_col]}"
+
+                        # Color code based on dataset type
+                        if selected_file_name == 'timeline_path_points.csv':
+                            color = 'blue'
+                        elif selected_file_name == 'visits.csv':
+                            color = 'red'
+                        elif selected_file_name == 'raw_signals.csv':
+                            color = 'green'
+                        else:
+                            color = 'orange'
+
+                        folium.CircleMarker(
+                            location=[row[lat_col], row[lon_col]],
+                            radius=3,
+                            popup=popup_text,
+                            color=color,
+                            fillColor=color,
+                            fillOpacity=0.7
+                        ).add_to(m)
+
+                    folium_static(m)
+
+                    # Show coordinate statistics
+                    st.subheader("📍 Coordinate Statistics")
+                    coord_stats = pd.DataFrame({
+                        'Statistic': ['Count', 'Min Lat', 'Max Lat', 'Min Lon', 'Max Lon', 'Center Lat', 'Center Lon'],
+                        'Value': [
+                            len(geo_df),
+                            f"{geo_df[lat_col].min():.6f}",
+                            f"{geo_df[lat_col].max():.6f}",
+                            f"{geo_df[lon_col].min():.6f}",
+                            f"{geo_df[lon_col].max():.6f}",
+                            f"{center_lat:.6f}",
+                            f"{center_lon:.6f}"
+                        ]
+                    })
+                    st.dataframe(coord_stats)
+                else:
+                    st.warning("No valid coordinates found in this dataset")
+            else:
+                if selected_file_name == 'semantic_segments.csv':
+                    st.info("📅 This dataset contains temporal data that links to spatial information in other datasets")
+                else:
+                    st.warning(f"Expected coordinate columns ({lat_col}, {lon_col}) not found in this dataset")
+
+        # Show first few rows
+        st.subheader("Data Preview")
+        st.dataframe(df.head(100))
+
+        # Show column info
+        st.subheader("Column Information")
+        col_info = pd.DataFrame({
+            'Column': df.columns,
+            'Data Type': df.dtypes,
+            'Non-Null Count': df.count(),
+            'Null Count': df.isnull().sum()
+        })
+        st.dataframe(col_info)
+
+        # Show basic statistics for numeric columns
+        numeric_cols = df.select_dtypes(include=['number']).columns
+        if len(numeric_cols) > 0:
+            st.subheader("Numeric Column Statistics")
+            st.dataframe(df[numeric_cols].describe())
+
+    except Exception as e:
+        st.error(f"Error reading the CSV file: {str(e)}")
\ No newline at end of file