moved dir

This commit is contained in:
2025-09-27 14:06:26 +01:00
parent a8c85f1b74
commit 19aa910c60

View File

@@ -0,0 +1,167 @@
import streamlit as st
import pandas as pd
import os
from pathlib import Path
import folium
from streamlit_folium import folium_static
import numpy as np
st.set_page_config(page_title="Timeline CSV Viewer", layout="wide")
st.title("Timeline CSV Viewer")
# Path to the timeline_csv folder
timeline_csv_path = Path("../timeline_csv")
# Get all CSV files from all subdirectories
csv_files = []
if timeline_csv_path.exists():
for subdir in timeline_csv_path.iterdir():
if subdir.is_dir():
for csv_file in subdir.glob("*.csv"):
csv_files.append(csv_file)
if not csv_files:
st.error("No CSV files found in the timeline_csv folder.")
st.stop()
# Define geospatial datasets and their coordinate columns
GEOSPATIAL_FILES = {
'timeline_path_points.csv': {'lat': 'lat', 'lon': 'lon', 'time': 'time'},
'visits.csv': {'lat': 'top_lat', 'lon': 'top_lon', 'time': 'startTime'},
'raw_signals.csv': {'lat': 'lat', 'lon': 'lon', 'time': 'timestamp'},
'frequent_places.csv': {'lat': 'lat', 'lon': 'lon', 'time': None},
'semantic_segments.csv': {'lat': None, 'lon': None, 'time': 'startTime'}
}
# Create enhanced file names with geospatial indicators
enhanced_file_names = []
for f in csv_files:
if f.name in GEOSPATIAL_FILES:
enhanced_file_names.append(f"🗺️ {f.name} (Geospatial)")
else:
enhanced_file_names.append(f.name)
selected_enhanced_name = st.selectbox("Select a CSV file to view:", enhanced_file_names)
# Extract the actual filename from the enhanced name
selected_file_name = selected_enhanced_name.replace('🗺️ ', '').replace(' (Geospatial)', '')
# Find the full path for the selected file
selected_file_path = None
for file_path in csv_files:
if file_path.name == selected_file_name:
selected_file_path = file_path
break
if selected_file_path:
st.write(f"**File:** {selected_file_path}")
try:
# Read the CSV file
df = pd.read_csv(selected_file_path)
# Display basic info
is_geospatial = selected_file_name in GEOSPATIAL_FILES
if is_geospatial:
st.success(f"🗺️ **Geospatial Dataset Detected** - {df.shape[0]} rows × {df.shape[1]} columns")
else:
st.write(f"**Shape:** {df.shape[0]} rows × {df.shape[1]} columns")
# Show geospatial visualization if applicable
if is_geospatial and selected_file_name in GEOSPATIAL_FILES:
geo_config = GEOSPATIAL_FILES[selected_file_name]
lat_col = geo_config['lat']
lon_col = geo_config['lon']
time_col = geo_config['time']
if lat_col and lon_col and lat_col in df.columns and lon_col in df.columns:
st.subheader("🗺️ Map Visualization")
# Filter out null coordinates
geo_df = df.dropna(subset=[lat_col, lon_col])
if len(geo_df) > 0:
# Sample data if too large for performance
if len(geo_df) > 1000:
geo_df = geo_df.sample(n=1000)
st.info(f"Showing 1000 randomly sampled points out of {len(df)} total points for performance")
# Create map centered on mean coordinates
center_lat = geo_df[lat_col].mean()
center_lon = geo_df[lon_col].mean()
m = folium.Map(location=[center_lat, center_lon], zoom_start=10)
# Add points to map
for idx, row in geo_df.iterrows():
popup_text = f"Index: {idx}"
if time_col and time_col in df.columns:
popup_text += f"<br>Time: {row[time_col]}"
# Color code based on dataset type
if selected_file_name == 'timeline_path_points.csv':
color = 'blue'
elif selected_file_name == 'visits.csv':
color = 'red'
elif selected_file_name == 'raw_signals.csv':
color = 'green'
else:
color = 'orange'
folium.CircleMarker(
location=[row[lat_col], row[lon_col]],
radius=3,
popup=popup_text,
color=color,
fillColor=color,
fillOpacity=0.7
).add_to(m)
folium_static(m)
# Show coordinate statistics
st.subheader("📍 Coordinate Statistics")
coord_stats = pd.DataFrame({
'Statistic': ['Count', 'Min Lat', 'Max Lat', 'Min Lon', 'Max Lon', 'Center Lat', 'Center Lon'],
'Value': [
len(geo_df),
f"{geo_df[lat_col].min():.6f}",
f"{geo_df[lat_col].max():.6f}",
f"{geo_df[lon_col].min():.6f}",
f"{geo_df[lon_col].max():.6f}",
f"{center_lat:.6f}",
f"{center_lon:.6f}"
]
})
st.dataframe(coord_stats)
else:
st.warning("No valid coordinates found in this dataset")
else:
if selected_file_name == 'semantic_segments.csv':
st.info("📅 This dataset contains temporal data that links to spatial information in other datasets")
else:
st.warning(f"Expected coordinate columns ({lat_col}, {lon_col}) not found in this dataset")
# Show first few rows
st.subheader("Data Preview")
st.dataframe(df.head(100))
# Show column info
st.subheader("Column Information")
col_info = pd.DataFrame({
'Column': df.columns,
'Data Type': df.dtypes,
'Non-Null Count': df.count(),
'Null Count': df.isnull().sum()
})
st.dataframe(col_info)
# Show basic statistics for numeric columns
numeric_cols = df.select_dtypes(include=['number']).columns
if len(numeric_cols) > 0:
st.subheader("Numeric Column Statistics")
st.dataframe(df[numeric_cols].describe())
except Exception as e:
st.error(f"Error reading the CSV file: {str(e)}")