csv streamlit viewer with geospatial integrations
This commit is contained in:
167
streamlit_app/app.py
Normal file
167
streamlit_app/app.py
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
import streamlit as st
|
||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import folium
|
||||||
|
from streamlit_folium import folium_static
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
st.set_page_config(page_title="Timeline CSV Viewer", layout="wide")
|
||||||
|
|
||||||
|
st.title("Timeline CSV Viewer")
|
||||||
|
|
||||||
|
# Path to the timeline_csv folder
|
||||||
|
timeline_csv_path = Path("../timeline_csv")
|
||||||
|
|
||||||
|
# Get all CSV files from all subdirectories
|
||||||
|
csv_files = []
|
||||||
|
if timeline_csv_path.exists():
|
||||||
|
for subdir in timeline_csv_path.iterdir():
|
||||||
|
if subdir.is_dir():
|
||||||
|
for csv_file in subdir.glob("*.csv"):
|
||||||
|
csv_files.append(csv_file)
|
||||||
|
|
||||||
|
if not csv_files:
|
||||||
|
st.error("No CSV files found in the timeline_csv folder.")
|
||||||
|
st.stop()
|
||||||
|
|
||||||
|
# Define geospatial datasets and their coordinate columns
|
||||||
|
GEOSPATIAL_FILES = {
|
||||||
|
'timeline_path_points.csv': {'lat': 'lat', 'lon': 'lon', 'time': 'time'},
|
||||||
|
'visits.csv': {'lat': 'top_lat', 'lon': 'top_lon', 'time': 'startTime'},
|
||||||
|
'raw_signals.csv': {'lat': 'lat', 'lon': 'lon', 'time': 'timestamp'},
|
||||||
|
'frequent_places.csv': {'lat': 'lat', 'lon': 'lon', 'time': None},
|
||||||
|
'semantic_segments.csv': {'lat': None, 'lon': None, 'time': 'startTime'}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create enhanced file names with geospatial indicators
|
||||||
|
enhanced_file_names = []
|
||||||
|
for f in csv_files:
|
||||||
|
if f.name in GEOSPATIAL_FILES:
|
||||||
|
enhanced_file_names.append(f"🗺️ {f.name} (Geospatial)")
|
||||||
|
else:
|
||||||
|
enhanced_file_names.append(f.name)
|
||||||
|
|
||||||
|
selected_enhanced_name = st.selectbox("Select a CSV file to view:", enhanced_file_names)
|
||||||
|
|
||||||
|
# Extract the actual filename from the enhanced name
|
||||||
|
selected_file_name = selected_enhanced_name.replace('🗺️ ', '').replace(' (Geospatial)', '')
|
||||||
|
|
||||||
|
# Find the full path for the selected file
|
||||||
|
selected_file_path = None
|
||||||
|
for file_path in csv_files:
|
||||||
|
if file_path.name == selected_file_name:
|
||||||
|
selected_file_path = file_path
|
||||||
|
break
|
||||||
|
|
||||||
|
if selected_file_path:
|
||||||
|
st.write(f"**File:** {selected_file_path}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Read the CSV file
|
||||||
|
df = pd.read_csv(selected_file_path)
|
||||||
|
|
||||||
|
# Display basic info
|
||||||
|
is_geospatial = selected_file_name in GEOSPATIAL_FILES
|
||||||
|
if is_geospatial:
|
||||||
|
st.success(f"🗺️ **Geospatial Dataset Detected** - {df.shape[0]} rows × {df.shape[1]} columns")
|
||||||
|
else:
|
||||||
|
st.write(f"**Shape:** {df.shape[0]} rows × {df.shape[1]} columns")
|
||||||
|
|
||||||
|
# Show geospatial visualization if applicable
|
||||||
|
if is_geospatial and selected_file_name in GEOSPATIAL_FILES:
|
||||||
|
geo_config = GEOSPATIAL_FILES[selected_file_name]
|
||||||
|
lat_col = geo_config['lat']
|
||||||
|
lon_col = geo_config['lon']
|
||||||
|
time_col = geo_config['time']
|
||||||
|
|
||||||
|
if lat_col and lon_col and lat_col in df.columns and lon_col in df.columns:
|
||||||
|
st.subheader("🗺️ Map Visualization")
|
||||||
|
|
||||||
|
# Filter out null coordinates
|
||||||
|
geo_df = df.dropna(subset=[lat_col, lon_col])
|
||||||
|
|
||||||
|
if len(geo_df) > 0:
|
||||||
|
# Sample data if too large for performance
|
||||||
|
if len(geo_df) > 1000:
|
||||||
|
geo_df = geo_df.sample(n=1000)
|
||||||
|
st.info(f"Showing 1000 randomly sampled points out of {len(df)} total points for performance")
|
||||||
|
|
||||||
|
# Create map centered on mean coordinates
|
||||||
|
center_lat = geo_df[lat_col].mean()
|
||||||
|
center_lon = geo_df[lon_col].mean()
|
||||||
|
|
||||||
|
m = folium.Map(location=[center_lat, center_lon], zoom_start=10)
|
||||||
|
|
||||||
|
# Add points to map
|
||||||
|
for idx, row in geo_df.iterrows():
|
||||||
|
popup_text = f"Index: {idx}"
|
||||||
|
if time_col and time_col in df.columns:
|
||||||
|
popup_text += f"<br>Time: {row[time_col]}"
|
||||||
|
|
||||||
|
# Color code based on dataset type
|
||||||
|
if selected_file_name == 'timeline_path_points.csv':
|
||||||
|
color = 'blue'
|
||||||
|
elif selected_file_name == 'visits.csv':
|
||||||
|
color = 'red'
|
||||||
|
elif selected_file_name == 'raw_signals.csv':
|
||||||
|
color = 'green'
|
||||||
|
else:
|
||||||
|
color = 'orange'
|
||||||
|
|
||||||
|
folium.CircleMarker(
|
||||||
|
location=[row[lat_col], row[lon_col]],
|
||||||
|
radius=3,
|
||||||
|
popup=popup_text,
|
||||||
|
color=color,
|
||||||
|
fillColor=color,
|
||||||
|
fillOpacity=0.7
|
||||||
|
).add_to(m)
|
||||||
|
|
||||||
|
folium_static(m)
|
||||||
|
|
||||||
|
# Show coordinate statistics
|
||||||
|
st.subheader("📍 Coordinate Statistics")
|
||||||
|
coord_stats = pd.DataFrame({
|
||||||
|
'Statistic': ['Count', 'Min Lat', 'Max Lat', 'Min Lon', 'Max Lon', 'Center Lat', 'Center Lon'],
|
||||||
|
'Value': [
|
||||||
|
len(geo_df),
|
||||||
|
f"{geo_df[lat_col].min():.6f}",
|
||||||
|
f"{geo_df[lat_col].max():.6f}",
|
||||||
|
f"{geo_df[lon_col].min():.6f}",
|
||||||
|
f"{geo_df[lon_col].max():.6f}",
|
||||||
|
f"{center_lat:.6f}",
|
||||||
|
f"{center_lon:.6f}"
|
||||||
|
]
|
||||||
|
})
|
||||||
|
st.dataframe(coord_stats)
|
||||||
|
else:
|
||||||
|
st.warning("No valid coordinates found in this dataset")
|
||||||
|
else:
|
||||||
|
if selected_file_name == 'semantic_segments.csv':
|
||||||
|
st.info("📅 This dataset contains temporal data that links to spatial information in other datasets")
|
||||||
|
else:
|
||||||
|
st.warning(f"Expected coordinate columns ({lat_col}, {lon_col}) not found in this dataset")
|
||||||
|
|
||||||
|
# Show first few rows
|
||||||
|
st.subheader("Data Preview")
|
||||||
|
st.dataframe(df.head(100))
|
||||||
|
|
||||||
|
# Show column info
|
||||||
|
st.subheader("Column Information")
|
||||||
|
col_info = pd.DataFrame({
|
||||||
|
'Column': df.columns,
|
||||||
|
'Data Type': df.dtypes,
|
||||||
|
'Non-Null Count': df.count(),
|
||||||
|
'Null Count': df.isnull().sum()
|
||||||
|
})
|
||||||
|
st.dataframe(col_info)
|
||||||
|
|
||||||
|
# Show basic statistics for numeric columns
|
||||||
|
numeric_cols = df.select_dtypes(include=['number']).columns
|
||||||
|
if len(numeric_cols) > 0:
|
||||||
|
st.subheader("Numeric Column Statistics")
|
||||||
|
st.dataframe(df[numeric_cols].describe())
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
st.error(f"Error reading the CSV file: {str(e)}")
|
||||||
Reference in New Issue
Block a user