Files
cult-scraper/apps/cluster_map/config.py
2025-08-11 02:37:21 +01:00

74 lines
2.6 KiB
Python

"""
Configuration settings and constants for the Discord Chat Embeddings Visualizer.
"""
# Application settings
APP_TITLE = "Discord Chat Embeddings Visualizer"
APP_ICON = "🗨️"
APP_LAYOUT = "wide"
# File paths
CHAT_LOGS_PATH = "../../discord_chat_logs"
# Algorithm parameters
DEFAULT_RANDOM_STATE = 42
DEFAULT_N_COMPONENTS = 2
DEFAULT_N_CLUSTERS = 5
# Visualization settings
DEFAULT_POINT_SIZE = 8
DEFAULT_POINT_OPACITY = 0.7
MAX_DISPLAYED_AUTHORS = 10
MESSAGE_CONTENT_PREVIEW_LENGTH = 200
MESSAGE_CONTENT_DISPLAY_LENGTH = 100
# Performance thresholds
LARGE_DATASET_WARNING_THRESHOLD = 1000
# Color palettes
PRIMARY_COLORS = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
"#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"]
# Clustering method categories
CLUSTERING_METHODS_REQUIRING_N_CLUSTERS = [
"Spectral Clustering",
"Gaussian Mixture",
"Agglomerative (Ward)",
"Agglomerative (Complete)"
]
COMPUTATIONALLY_INTENSIVE_METHODS = {
"dimension_reduction": ["t-SNE", "Spectral Embedding"],
"clustering": ["Spectral Clustering", "OPTICS"]
}
# Method explanations
METHOD_EXPLANATIONS = {
"dimension_reduction": {
"PCA": "Linear, fast, preserves global variance",
"t-SNE": "Non-linear, good for local structure, slower",
"UMAP": "Balanced speed/quality, preserves local & global structure",
"Spectral Embedding": "Uses graph theory, good for non-convex clusters",
"Force-Directed": "Physics-based layout, creates natural spacing"
},
"clustering": {
"HDBSCAN": "Density-based, finds variable density clusters, handles noise",
"Spectral Clustering": "Uses eigenvalues, good for non-convex shapes",
"Gaussian Mixture": "Probabilistic, assumes gaussian distributions",
"Agglomerative (Ward)": "Hierarchical, minimizes within-cluster variance",
"Agglomerative (Complete)": "Hierarchical, minimizes maximum distance",
"OPTICS": "Density-based, finds clusters of varying densities"
},
"separation": {
"Spread Factor": "Applies repulsive forces between nearby points",
"Smart Jittering": "Adds intelligent noise to separate overlapping points",
"Density-Based Jittering": "Stronger separation in crowded areas",
"Perplexity Factor": "Controls t-SNE's focus on local vs global structure",
"Min Distance Factor": "Controls UMAP's point packing tightness"
},
"metrics": {
"Silhouette Score": "Higher is better (range: -1 to 1)",
"Calinski-Harabasz": "Higher is better, measures cluster separation"
}
}