beter clusters and qol

2025-08-11 03:04:50 +01:00
parent 647111e9d3
commit 2b8659fc95
5 changed files with 234 additions and 15 deletions
--- a/apps/cluster_map/main.py
+++ b/apps/cluster_map/main.py
@@ -17,10 +17,10 @@ from data_loader import (
 from dimensionality_reduction import (
    reduce_dimensions, apply_density_based_jittering
 )
-from clustering import apply_clustering
+from clustering import apply_clustering, generate_cluster_names
 from visualization import (
    create_visualization_plot, display_clustering_metrics, display_summary_stats,
-    display_clustering_results, display_data_table
+    display_clustering_results, display_data_table, display_cluster_summary
 )


@@ -51,11 +51,34 @@ def main():
    # Get UI parameters
    params = get_all_ui_parameters(valid_df)
    
+    # Check if any sources are selected before proceeding
+    if not params['selected_sources']:
+        st.info("📂 **Select source files from the sidebar to begin visualization**")
+        st.markdown("### Available Data Sources:")
+        
+        # Show available sources as an informational table
+        source_info = []
+        for source in valid_df['source_file'].unique():
+            source_data = valid_df[valid_df['source_file'] == source]
+            source_info.append({
+                'Source File': source,
+                'Messages': len(source_data),
+                'Unique Authors': source_data['author_name'].nunique(),
+                'Date Range': f"{source_data['timestamp_utc'].min()} to {source_data['timestamp_utc'].max()}"
+            })
+        
+        import pandas as pd
+        source_df = pd.DataFrame(source_info)
+        st.dataframe(source_df, use_container_width=True, hide_index=True)
+        
+        st.markdown("👈 **Use the sidebar to select which sources to visualize**")
+        st.stop()
+    
    # Filter data
    filtered_df = filter_data(valid_df, params['selected_sources'], params['selected_authors'])
    
    if filtered_df.empty:
-        st.warning("No data matches the current filters!")
+        st.warning("No data matches the current filters! Try selecting different sources or authors.")
        st.stop()
    
    # Display performance warnings
@@ -95,12 +118,22 @@ def main():
                jitter_strength=params['jitter_strength']
            )
    
+    # Generate cluster names if clustering was applied
+    cluster_names = None
+    if cluster_labels is not None:
+        with st.spinner("Generating cluster names..."):
+            cluster_names = generate_cluster_names(filtered_df, cluster_labels)
+    
    # Display clustering metrics
    display_clustering_metrics(
        cluster_labels, silhouette_avg, calinski_harabasz, 
        params['show_cluster_metrics']
    )
    
+    # Display cluster summary with names
+    if cluster_names:
+        display_cluster_summary(cluster_names, cluster_labels)
+    
    # Create and display the main plot
    fig = create_visualization_plot(
        reduced_embeddings=reduced_embeddings,
@@ -113,7 +146,8 @@ def main():
        point_opacity=params['point_opacity'],
        density_based_sizing=params['density_based_sizing'],
        size_variation=params['size_variation'],
-        enable_3d=params['enable_3d']
+        enable_3d=params['enable_3d'],
+        cluster_names=cluster_names
    )
    
    st.plotly_chart(fig, use_container_width=True)