refactor
This commit is contained in:
73
apps/cluster_map/config.py
Normal file
73
apps/cluster_map/config.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""
|
||||
Configuration settings and constants for the Discord Chat Embeddings Visualizer.
|
||||
"""
|
||||
|
||||
# Application settings
|
||||
APP_TITLE = "Discord Chat Embeddings Visualizer"
|
||||
APP_ICON = "🗨️"
|
||||
APP_LAYOUT = "wide"
|
||||
|
||||
# File paths
|
||||
CHAT_LOGS_PATH = "../../discord_chat_logs"
|
||||
|
||||
# Algorithm parameters
|
||||
DEFAULT_RANDOM_STATE = 42
|
||||
DEFAULT_N_COMPONENTS = 2
|
||||
DEFAULT_N_CLUSTERS = 5
|
||||
|
||||
# Visualization settings
|
||||
DEFAULT_POINT_SIZE = 8
|
||||
DEFAULT_POINT_OPACITY = 0.7
|
||||
MAX_DISPLAYED_AUTHORS = 10
|
||||
MESSAGE_CONTENT_PREVIEW_LENGTH = 200
|
||||
MESSAGE_CONTENT_DISPLAY_LENGTH = 100
|
||||
|
||||
# Performance thresholds
|
||||
LARGE_DATASET_WARNING_THRESHOLD = 1000
|
||||
|
||||
# Color palettes
|
||||
PRIMARY_COLORS = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
|
||||
"#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"]
|
||||
|
||||
# Clustering method categories
|
||||
CLUSTERING_METHODS_REQUIRING_N_CLUSTERS = [
|
||||
"Spectral Clustering",
|
||||
"Gaussian Mixture",
|
||||
"Agglomerative (Ward)",
|
||||
"Agglomerative (Complete)"
|
||||
]
|
||||
|
||||
COMPUTATIONALLY_INTENSIVE_METHODS = {
|
||||
"dimension_reduction": ["t-SNE", "Spectral Embedding"],
|
||||
"clustering": ["Spectral Clustering", "OPTICS"]
|
||||
}
|
||||
|
||||
# Method explanations
|
||||
METHOD_EXPLANATIONS = {
|
||||
"dimension_reduction": {
|
||||
"PCA": "Linear, fast, preserves global variance",
|
||||
"t-SNE": "Non-linear, good for local structure, slower",
|
||||
"UMAP": "Balanced speed/quality, preserves local & global structure",
|
||||
"Spectral Embedding": "Uses graph theory, good for non-convex clusters",
|
||||
"Force-Directed": "Physics-based layout, creates natural spacing"
|
||||
},
|
||||
"clustering": {
|
||||
"HDBSCAN": "Density-based, finds variable density clusters, handles noise",
|
||||
"Spectral Clustering": "Uses eigenvalues, good for non-convex shapes",
|
||||
"Gaussian Mixture": "Probabilistic, assumes gaussian distributions",
|
||||
"Agglomerative (Ward)": "Hierarchical, minimizes within-cluster variance",
|
||||
"Agglomerative (Complete)": "Hierarchical, minimizes maximum distance",
|
||||
"OPTICS": "Density-based, finds clusters of varying densities"
|
||||
},
|
||||
"separation": {
|
||||
"Spread Factor": "Applies repulsive forces between nearby points",
|
||||
"Smart Jittering": "Adds intelligent noise to separate overlapping points",
|
||||
"Density-Based Jittering": "Stronger separation in crowded areas",
|
||||
"Perplexity Factor": "Controls t-SNE's focus on local vs global structure",
|
||||
"Min Distance Factor": "Controls UMAP's point packing tightness"
|
||||
},
|
||||
"metrics": {
|
||||
"Silhouette Score": "Higher is better (range: -1 to 1)",
|
||||
"Calinski-Harabasz": "Higher is better, measures cluster separation"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user