Files
2025-09-27 14:06:26 +01:00

167 lines
6.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import streamlit as st
import pandas as pd
import os
from pathlib import Path
import folium
from streamlit_folium import folium_static
import numpy as np
st.set_page_config(page_title="Timeline CSV Viewer", layout="wide")
st.title("Timeline CSV Viewer")
# Path to the timeline_csv folder
timeline_csv_path = Path("../timeline_csv")
# Get all CSV files from all subdirectories
csv_files = []
if timeline_csv_path.exists():
for subdir in timeline_csv_path.iterdir():
if subdir.is_dir():
for csv_file in subdir.glob("*.csv"):
csv_files.append(csv_file)
if not csv_files:
st.error("No CSV files found in the timeline_csv folder.")
st.stop()
# Define geospatial datasets and their coordinate columns
GEOSPATIAL_FILES = {
'timeline_path_points.csv': {'lat': 'lat', 'lon': 'lon', 'time': 'time'},
'visits.csv': {'lat': 'top_lat', 'lon': 'top_lon', 'time': 'startTime'},
'raw_signals.csv': {'lat': 'lat', 'lon': 'lon', 'time': 'timestamp'},
'frequent_places.csv': {'lat': 'lat', 'lon': 'lon', 'time': None},
'semantic_segments.csv': {'lat': None, 'lon': None, 'time': 'startTime'}
}
# Create enhanced file names with geospatial indicators
enhanced_file_names = []
for f in csv_files:
if f.name in GEOSPATIAL_FILES:
enhanced_file_names.append(f"🗺️ {f.name} (Geospatial)")
else:
enhanced_file_names.append(f.name)
selected_enhanced_name = st.selectbox("Select a CSV file to view:", enhanced_file_names)
# Extract the actual filename from the enhanced name
selected_file_name = selected_enhanced_name.replace('🗺️ ', '').replace(' (Geospatial)', '')
# Find the full path for the selected file
selected_file_path = None
for file_path in csv_files:
if file_path.name == selected_file_name:
selected_file_path = file_path
break
if selected_file_path:
st.write(f"**File:** {selected_file_path}")
try:
# Read the CSV file
df = pd.read_csv(selected_file_path)
# Display basic info
is_geospatial = selected_file_name in GEOSPATIAL_FILES
if is_geospatial:
st.success(f"🗺️ **Geospatial Dataset Detected** - {df.shape[0]} rows × {df.shape[1]} columns")
else:
st.write(f"**Shape:** {df.shape[0]} rows × {df.shape[1]} columns")
# Show geospatial visualization if applicable
if is_geospatial and selected_file_name in GEOSPATIAL_FILES:
geo_config = GEOSPATIAL_FILES[selected_file_name]
lat_col = geo_config['lat']
lon_col = geo_config['lon']
time_col = geo_config['time']
if lat_col and lon_col and lat_col in df.columns and lon_col in df.columns:
st.subheader("🗺️ Map Visualization")
# Filter out null coordinates
geo_df = df.dropna(subset=[lat_col, lon_col])
if len(geo_df) > 0:
# Sample data if too large for performance
if len(geo_df) > 1000:
geo_df = geo_df.sample(n=1000)
st.info(f"Showing 1000 randomly sampled points out of {len(df)} total points for performance")
# Create map centered on mean coordinates
center_lat = geo_df[lat_col].mean()
center_lon = geo_df[lon_col].mean()
m = folium.Map(location=[center_lat, center_lon], zoom_start=10)
# Add points to map
for idx, row in geo_df.iterrows():
popup_text = f"Index: {idx}"
if time_col and time_col in df.columns:
popup_text += f"<br>Time: {row[time_col]}"
# Color code based on dataset type
if selected_file_name == 'timeline_path_points.csv':
color = 'blue'
elif selected_file_name == 'visits.csv':
color = 'red'
elif selected_file_name == 'raw_signals.csv':
color = 'green'
else:
color = 'orange'
folium.CircleMarker(
location=[row[lat_col], row[lon_col]],
radius=3,
popup=popup_text,
color=color,
fillColor=color,
fillOpacity=0.7
).add_to(m)
folium_static(m)
# Show coordinate statistics
st.subheader("📍 Coordinate Statistics")
coord_stats = pd.DataFrame({
'Statistic': ['Count', 'Min Lat', 'Max Lat', 'Min Lon', 'Max Lon', 'Center Lat', 'Center Lon'],
'Value': [
len(geo_df),
f"{geo_df[lat_col].min():.6f}",
f"{geo_df[lat_col].max():.6f}",
f"{geo_df[lon_col].min():.6f}",
f"{geo_df[lon_col].max():.6f}",
f"{center_lat:.6f}",
f"{center_lon:.6f}"
]
})
st.dataframe(coord_stats)
else:
st.warning("No valid coordinates found in this dataset")
else:
if selected_file_name == 'semantic_segments.csv':
st.info("📅 This dataset contains temporal data that links to spatial information in other datasets")
else:
st.warning(f"Expected coordinate columns ({lat_col}, {lon_col}) not found in this dataset")
# Show first few rows
st.subheader("Data Preview")
st.dataframe(df.head(100))
# Show column info
st.subheader("Column Information")
col_info = pd.DataFrame({
'Column': df.columns,
'Data Type': df.dtypes,
'Non-Null Count': df.count(),
'Null Count': df.isnull().sum()
})
st.dataframe(col_info)
# Show basic statistics for numeric columns
numeric_cols = df.select_dtypes(include=['number']).columns
if len(numeric_cols) > 0:
st.subheader("Numeric Column Statistics")
st.dataframe(df[numeric_cols].describe())
except Exception as e:
st.error(f"Error reading the CSV file: {str(e)}")