74 lines
2.6 KiB
Python
74 lines
2.6 KiB
Python
"""
|
|
Configuration settings and constants for the Discord Chat Embeddings Visualizer.
|
|
"""
|
|
|
|
# Application settings
|
|
APP_TITLE = "Discord Chat Embeddings Visualizer"
|
|
APP_ICON = "🗨️"
|
|
APP_LAYOUT = "wide"
|
|
|
|
# File paths
|
|
CHAT_LOGS_PATH = "../../discord_chat_logs"
|
|
|
|
# Algorithm parameters
|
|
DEFAULT_RANDOM_STATE = 42
|
|
DEFAULT_N_COMPONENTS = 2
|
|
DEFAULT_N_CLUSTERS = 5
|
|
|
|
# Visualization settings
|
|
DEFAULT_POINT_SIZE = 8
|
|
DEFAULT_POINT_OPACITY = 0.7
|
|
MAX_DISPLAYED_AUTHORS = 10
|
|
MESSAGE_CONTENT_PREVIEW_LENGTH = 200
|
|
MESSAGE_CONTENT_DISPLAY_LENGTH = 100
|
|
|
|
# Performance thresholds
|
|
LARGE_DATASET_WARNING_THRESHOLD = 1000
|
|
|
|
# Color palettes
|
|
PRIMARY_COLORS = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
|
|
"#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"]
|
|
|
|
# Clustering method categories
|
|
CLUSTERING_METHODS_REQUIRING_N_CLUSTERS = [
|
|
"Spectral Clustering",
|
|
"Gaussian Mixture",
|
|
"Agglomerative (Ward)",
|
|
"Agglomerative (Complete)"
|
|
]
|
|
|
|
COMPUTATIONALLY_INTENSIVE_METHODS = {
|
|
"dimension_reduction": ["t-SNE", "Spectral Embedding"],
|
|
"clustering": ["Spectral Clustering", "OPTICS"]
|
|
}
|
|
|
|
# Method explanations
|
|
METHOD_EXPLANATIONS = {
|
|
"dimension_reduction": {
|
|
"PCA": "Linear, fast, preserves global variance",
|
|
"t-SNE": "Non-linear, good for local structure, slower",
|
|
"UMAP": "Balanced speed/quality, preserves local & global structure",
|
|
"Spectral Embedding": "Uses graph theory, good for non-convex clusters",
|
|
"Force-Directed": "Physics-based layout, creates natural spacing"
|
|
},
|
|
"clustering": {
|
|
"HDBSCAN": "Density-based, finds variable density clusters, handles noise",
|
|
"Spectral Clustering": "Uses eigenvalues, good for non-convex shapes",
|
|
"Gaussian Mixture": "Probabilistic, assumes gaussian distributions",
|
|
"Agglomerative (Ward)": "Hierarchical, minimizes within-cluster variance",
|
|
"Agglomerative (Complete)": "Hierarchical, minimizes maximum distance",
|
|
"OPTICS": "Density-based, finds clusters of varying densities"
|
|
},
|
|
"separation": {
|
|
"Spread Factor": "Applies repulsive forces between nearby points",
|
|
"Smart Jittering": "Adds intelligent noise to separate overlapping points",
|
|
"Density-Based Jittering": "Stronger separation in crowded areas",
|
|
"Perplexity Factor": "Controls t-SNE's focus on local vs global structure",
|
|
"Min Distance Factor": "Controls UMAP's point packing tightness"
|
|
},
|
|
"metrics": {
|
|
"Silhouette Score": "Higher is better (range: -1 to 1)",
|
|
"Calinski-Harabasz": "Higher is better, measures cluster separation"
|
|
}
|
|
}
|