beter clusters and qol
This commit is contained in:
@@ -17,10 +17,10 @@ from data_loader import (
|
||||
from dimensionality_reduction import (
|
||||
reduce_dimensions, apply_density_based_jittering
|
||||
)
|
||||
from clustering import apply_clustering
|
||||
from clustering import apply_clustering, generate_cluster_names
|
||||
from visualization import (
|
||||
create_visualization_plot, display_clustering_metrics, display_summary_stats,
|
||||
display_clustering_results, display_data_table
|
||||
display_clustering_results, display_data_table, display_cluster_summary
|
||||
)
|
||||
|
||||
|
||||
@@ -51,11 +51,34 @@ def main():
|
||||
# Get UI parameters
|
||||
params = get_all_ui_parameters(valid_df)
|
||||
|
||||
# Check if any sources are selected before proceeding
|
||||
if not params['selected_sources']:
|
||||
st.info("📂 **Select source files from the sidebar to begin visualization**")
|
||||
st.markdown("### Available Data Sources:")
|
||||
|
||||
# Show available sources as an informational table
|
||||
source_info = []
|
||||
for source in valid_df['source_file'].unique():
|
||||
source_data = valid_df[valid_df['source_file'] == source]
|
||||
source_info.append({
|
||||
'Source File': source,
|
||||
'Messages': len(source_data),
|
||||
'Unique Authors': source_data['author_name'].nunique(),
|
||||
'Date Range': f"{source_data['timestamp_utc'].min()} to {source_data['timestamp_utc'].max()}"
|
||||
})
|
||||
|
||||
import pandas as pd
|
||||
source_df = pd.DataFrame(source_info)
|
||||
st.dataframe(source_df, use_container_width=True, hide_index=True)
|
||||
|
||||
st.markdown("👈 **Use the sidebar to select which sources to visualize**")
|
||||
st.stop()
|
||||
|
||||
# Filter data
|
||||
filtered_df = filter_data(valid_df, params['selected_sources'], params['selected_authors'])
|
||||
|
||||
if filtered_df.empty:
|
||||
st.warning("No data matches the current filters!")
|
||||
st.warning("No data matches the current filters! Try selecting different sources or authors.")
|
||||
st.stop()
|
||||
|
||||
# Display performance warnings
|
||||
@@ -95,12 +118,22 @@ def main():
|
||||
jitter_strength=params['jitter_strength']
|
||||
)
|
||||
|
||||
# Generate cluster names if clustering was applied
|
||||
cluster_names = None
|
||||
if cluster_labels is not None:
|
||||
with st.spinner("Generating cluster names..."):
|
||||
cluster_names = generate_cluster_names(filtered_df, cluster_labels)
|
||||
|
||||
# Display clustering metrics
|
||||
display_clustering_metrics(
|
||||
cluster_labels, silhouette_avg, calinski_harabasz,
|
||||
params['show_cluster_metrics']
|
||||
)
|
||||
|
||||
# Display cluster summary with names
|
||||
if cluster_names:
|
||||
display_cluster_summary(cluster_names, cluster_labels)
|
||||
|
||||
# Create and display the main plot
|
||||
fig = create_visualization_plot(
|
||||
reduced_embeddings=reduced_embeddings,
|
||||
@@ -113,7 +146,8 @@ def main():
|
||||
point_opacity=params['point_opacity'],
|
||||
density_based_sizing=params['density_based_sizing'],
|
||||
size_variation=params['size_variation'],
|
||||
enable_3d=params['enable_3d']
|
||||
enable_3d=params['enable_3d'],
|
||||
cluster_names=cluster_names
|
||||
)
|
||||
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
Reference in New Issue
Block a user