""" Configuration settings and constants for the Discord Chat Embeddings Visualizer. """ # Application settings APP_TITLE = "Discord Chat Embeddings Visualizer" APP_ICON = "🗨️" APP_LAYOUT = "wide" # File paths CHAT_LOGS_PATH = "../../discord_chat_logs" # Algorithm parameters DEFAULT_RANDOM_STATE = 42 DEFAULT_N_COMPONENTS = 2 DEFAULT_N_CLUSTERS = 5 # Visualization settings DEFAULT_POINT_SIZE = 8 DEFAULT_POINT_OPACITY = 0.7 MAX_DISPLAYED_AUTHORS = 10 MESSAGE_CONTENT_PREVIEW_LENGTH = 200 MESSAGE_CONTENT_DISPLAY_LENGTH = 100 # Performance thresholds LARGE_DATASET_WARNING_THRESHOLD = 1000 # Color palettes PRIMARY_COLORS = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"] # Clustering method categories CLUSTERING_METHODS_REQUIRING_N_CLUSTERS = [ "Spectral Clustering", "Gaussian Mixture", "Agglomerative (Ward)", "Agglomerative (Complete)" ] COMPUTATIONALLY_INTENSIVE_METHODS = { "dimension_reduction": ["t-SNE", "Spectral Embedding"], "clustering": ["Spectral Clustering", "OPTICS"] } # Method explanations METHOD_EXPLANATIONS = { "dimension_reduction": { "PCA": "Linear, fast, preserves global variance", "t-SNE": "Non-linear, good for local structure, slower", "UMAP": "Balanced speed/quality, preserves local & global structure", "Spectral Embedding": "Uses graph theory, good for non-convex clusters", "Force-Directed": "Physics-based layout, creates natural spacing" }, "clustering": { "HDBSCAN": "Density-based, finds variable density clusters, handles noise", "Spectral Clustering": "Uses eigenvalues, good for non-convex shapes", "Gaussian Mixture": "Probabilistic, assumes gaussian distributions", "Agglomerative (Ward)": "Hierarchical, minimizes within-cluster variance", "Agglomerative (Complete)": "Hierarchical, minimizes maximum distance", "OPTICS": "Density-based, finds clusters of varying densities" }, "separation": { "Spread Factor": "Applies repulsive forces between nearby points", "Smart Jittering": "Adds intelligent noise to separate overlapping points", "Density-Based Jittering": "Stronger separation in crowded areas", "Perplexity Factor": "Controls t-SNE's focus on local vs global structure", "Min Distance Factor": "Controls UMAP's point packing tightness" }, "metrics": { "Silhouette Score": "Higher is better (range: -1 to 1)", "Calinski-Harabasz": "Higher is better, measures cluster separation" } }