csv streamlit viewer with geospatial integrations
This commit is contained in:
167
streamlit_app/app.py
Normal file
167
streamlit_app/app.py
Normal file
@@ -0,0 +1,167 @@
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
import os
|
||||
from pathlib import Path
|
||||
import folium
|
||||
from streamlit_folium import folium_static
|
||||
import numpy as np
|
||||
|
||||
st.set_page_config(page_title="Timeline CSV Viewer", layout="wide")
|
||||
|
||||
st.title("Timeline CSV Viewer")
|
||||
|
||||
# Path to the timeline_csv folder
|
||||
timeline_csv_path = Path("../timeline_csv")
|
||||
|
||||
# Get all CSV files from all subdirectories
|
||||
csv_files = []
|
||||
if timeline_csv_path.exists():
|
||||
for subdir in timeline_csv_path.iterdir():
|
||||
if subdir.is_dir():
|
||||
for csv_file in subdir.glob("*.csv"):
|
||||
csv_files.append(csv_file)
|
||||
|
||||
if not csv_files:
|
||||
st.error("No CSV files found in the timeline_csv folder.")
|
||||
st.stop()
|
||||
|
||||
# Define geospatial datasets and their coordinate columns
|
||||
GEOSPATIAL_FILES = {
|
||||
'timeline_path_points.csv': {'lat': 'lat', 'lon': 'lon', 'time': 'time'},
|
||||
'visits.csv': {'lat': 'top_lat', 'lon': 'top_lon', 'time': 'startTime'},
|
||||
'raw_signals.csv': {'lat': 'lat', 'lon': 'lon', 'time': 'timestamp'},
|
||||
'frequent_places.csv': {'lat': 'lat', 'lon': 'lon', 'time': None},
|
||||
'semantic_segments.csv': {'lat': None, 'lon': None, 'time': 'startTime'}
|
||||
}
|
||||
|
||||
# Create enhanced file names with geospatial indicators
|
||||
enhanced_file_names = []
|
||||
for f in csv_files:
|
||||
if f.name in GEOSPATIAL_FILES:
|
||||
enhanced_file_names.append(f"🗺️ {f.name} (Geospatial)")
|
||||
else:
|
||||
enhanced_file_names.append(f.name)
|
||||
|
||||
selected_enhanced_name = st.selectbox("Select a CSV file to view:", enhanced_file_names)
|
||||
|
||||
# Extract the actual filename from the enhanced name
|
||||
selected_file_name = selected_enhanced_name.replace('🗺️ ', '').replace(' (Geospatial)', '')
|
||||
|
||||
# Find the full path for the selected file
|
||||
selected_file_path = None
|
||||
for file_path in csv_files:
|
||||
if file_path.name == selected_file_name:
|
||||
selected_file_path = file_path
|
||||
break
|
||||
|
||||
if selected_file_path:
|
||||
st.write(f"**File:** {selected_file_path}")
|
||||
|
||||
try:
|
||||
# Read the CSV file
|
||||
df = pd.read_csv(selected_file_path)
|
||||
|
||||
# Display basic info
|
||||
is_geospatial = selected_file_name in GEOSPATIAL_FILES
|
||||
if is_geospatial:
|
||||
st.success(f"🗺️ **Geospatial Dataset Detected** - {df.shape[0]} rows × {df.shape[1]} columns")
|
||||
else:
|
||||
st.write(f"**Shape:** {df.shape[0]} rows × {df.shape[1]} columns")
|
||||
|
||||
# Show geospatial visualization if applicable
|
||||
if is_geospatial and selected_file_name in GEOSPATIAL_FILES:
|
||||
geo_config = GEOSPATIAL_FILES[selected_file_name]
|
||||
lat_col = geo_config['lat']
|
||||
lon_col = geo_config['lon']
|
||||
time_col = geo_config['time']
|
||||
|
||||
if lat_col and lon_col and lat_col in df.columns and lon_col in df.columns:
|
||||
st.subheader("🗺️ Map Visualization")
|
||||
|
||||
# Filter out null coordinates
|
||||
geo_df = df.dropna(subset=[lat_col, lon_col])
|
||||
|
||||
if len(geo_df) > 0:
|
||||
# Sample data if too large for performance
|
||||
if len(geo_df) > 1000:
|
||||
geo_df = geo_df.sample(n=1000)
|
||||
st.info(f"Showing 1000 randomly sampled points out of {len(df)} total points for performance")
|
||||
|
||||
# Create map centered on mean coordinates
|
||||
center_lat = geo_df[lat_col].mean()
|
||||
center_lon = geo_df[lon_col].mean()
|
||||
|
||||
m = folium.Map(location=[center_lat, center_lon], zoom_start=10)
|
||||
|
||||
# Add points to map
|
||||
for idx, row in geo_df.iterrows():
|
||||
popup_text = f"Index: {idx}"
|
||||
if time_col and time_col in df.columns:
|
||||
popup_text += f"<br>Time: {row[time_col]}"
|
||||
|
||||
# Color code based on dataset type
|
||||
if selected_file_name == 'timeline_path_points.csv':
|
||||
color = 'blue'
|
||||
elif selected_file_name == 'visits.csv':
|
||||
color = 'red'
|
||||
elif selected_file_name == 'raw_signals.csv':
|
||||
color = 'green'
|
||||
else:
|
||||
color = 'orange'
|
||||
|
||||
folium.CircleMarker(
|
||||
location=[row[lat_col], row[lon_col]],
|
||||
radius=3,
|
||||
popup=popup_text,
|
||||
color=color,
|
||||
fillColor=color,
|
||||
fillOpacity=0.7
|
||||
).add_to(m)
|
||||
|
||||
folium_static(m)
|
||||
|
||||
# Show coordinate statistics
|
||||
st.subheader("📍 Coordinate Statistics")
|
||||
coord_stats = pd.DataFrame({
|
||||
'Statistic': ['Count', 'Min Lat', 'Max Lat', 'Min Lon', 'Max Lon', 'Center Lat', 'Center Lon'],
|
||||
'Value': [
|
||||
len(geo_df),
|
||||
f"{geo_df[lat_col].min():.6f}",
|
||||
f"{geo_df[lat_col].max():.6f}",
|
||||
f"{geo_df[lon_col].min():.6f}",
|
||||
f"{geo_df[lon_col].max():.6f}",
|
||||
f"{center_lat:.6f}",
|
||||
f"{center_lon:.6f}"
|
||||
]
|
||||
})
|
||||
st.dataframe(coord_stats)
|
||||
else:
|
||||
st.warning("No valid coordinates found in this dataset")
|
||||
else:
|
||||
if selected_file_name == 'semantic_segments.csv':
|
||||
st.info("📅 This dataset contains temporal data that links to spatial information in other datasets")
|
||||
else:
|
||||
st.warning(f"Expected coordinate columns ({lat_col}, {lon_col}) not found in this dataset")
|
||||
|
||||
# Show first few rows
|
||||
st.subheader("Data Preview")
|
||||
st.dataframe(df.head(100))
|
||||
|
||||
# Show column info
|
||||
st.subheader("Column Information")
|
||||
col_info = pd.DataFrame({
|
||||
'Column': df.columns,
|
||||
'Data Type': df.dtypes,
|
||||
'Non-Null Count': df.count(),
|
||||
'Null Count': df.isnull().sum()
|
||||
})
|
||||
st.dataframe(col_info)
|
||||
|
||||
# Show basic statistics for numeric columns
|
||||
numeric_cols = df.select_dtypes(include=['number']).columns
|
||||
if len(numeric_cols) > 0:
|
||||
st.subheader("Numeric Column Statistics")
|
||||
st.dataframe(df[numeric_cols].describe())
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error reading the CSV file: {str(e)}")
|
||||
Reference in New Issue
Block a user