moved overview script

This commit is contained in:
2025-08-31 16:36:04 +01:00
parent 2bda93cca2
commit a7f526be8a

View File

@@ -178,6 +178,134 @@ def analyze_timeline_data(timeline_path: str) -> Dict[str, Any]:
return stats
def export_statistics_to_file(stats: Dict[str, Any], output_path: str):
"""Export comprehensive statistics to a text file."""
with open(output_path, 'w', encoding='utf-8') as f:
# Redirect print statements to file
original_stdout = sys.stdout
sys.stdout = f
print_statistics(stats)
# Restore stdout
sys.stdout = original_stdout
def analyze_json_structure(timeline_path: str, output_path: str):
"""Analyze and export the JSON structure to a text file."""
print(f"Analyzing JSON structure from: {timeline_path}")
with open(timeline_path, 'r', encoding='utf-8') as f:
data = json.load(f)
def explore_structure(obj, path="", depth=0, max_depth=4):
"""Recursively explore JSON structure."""
indent = " " * depth
structure_info = []
if depth > max_depth:
return [f"{indent}... (max depth reached)"]
if isinstance(obj, dict):
structure_info.append(f"{indent}{path} (dict) - {len(obj)} keys:")
for key, value in list(obj.items())[:10]: # Limit to first 10 keys
key_path = f"{path}.{key}" if path else key
if isinstance(value, (dict, list)):
structure_info.extend(explore_structure(value, key_path, depth + 1, max_depth))
else:
value_type = type(value).__name__
if isinstance(value, str) and len(value) > 50:
sample = value[:50] + "..."
else:
sample = str(value)
structure_info.append(f"{indent} {key}: {value_type} = {sample}")
if len(obj) > 10:
structure_info.append(f"{indent} ... and {len(obj) - 10} more keys")
elif isinstance(obj, list):
structure_info.append(f"{indent}{path} (list) - {len(obj)} items:")
if obj:
structure_info.append(f"{indent} Sample item structure:")
structure_info.extend(explore_structure(obj[0], f"{path}[0]", depth + 1, max_depth))
if len(obj) > 1:
structure_info.append(f"{indent} ... and {len(obj) - 1} more items")
else:
value_type = type(obj).__name__
structure_info.append(f"{indent}{path}: {value_type} = {obj}")
return structure_info
with open(output_path, 'w', encoding='utf-8') as f:
f.write("="*80 + "\n")
f.write("TIMELINE JSON STRUCTURE ANALYSIS\n")
f.write("="*80 + "\n\n")
f.write(f"File: {timeline_path}\n")
f.write(f"Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
# Overall structure
f.write("ROOT LEVEL STRUCTURE:\n")
f.write("-" * 40 + "\n")
for key, value in data.items():
if isinstance(value, list):
f.write(f"{key}: list with {len(value)} items\n")
elif isinstance(value, dict):
f.write(f"{key}: dict with {len(value)} keys\n")
else:
f.write(f"{key}: {type(value).__name__} = {value}\n")
f.write("\n" + "="*80 + "\n")
f.write("DETAILED STRUCTURE:\n")
f.write("="*80 + "\n\n")
# Detailed structure analysis
structure_lines = explore_structure(data)
for line in structure_lines:
f.write(line + "\n")
# Sample semantic segment analysis
semantic_segments = data.get('semanticSegments', [])
if semantic_segments:
f.write("\n" + "="*80 + "\n")
f.write("SEMANTIC SEGMENTS ANALYSIS:\n")
f.write("="*80 + "\n\n")
f.write(f"Total semantic segments: {len(semantic_segments)}\n\n")
# Analyze different types of segments
visit_count = sum(1 for seg in semantic_segments if 'visit' in seg)
path_count = sum(1 for seg in semantic_segments if 'timelinePath' in seg)
f.write(f"Segments with visits: {visit_count}\n")
f.write(f"Segments with timeline paths: {path_count}\n\n")
# Sample visit structure
sample_visit = None
sample_path = None
for segment in semantic_segments[:100]: # Check first 100 segments
if 'visit' in segment and sample_visit is None:
sample_visit = segment
if 'timelinePath' in segment and sample_path is None:
sample_path = segment
if sample_visit and sample_path:
break
if sample_visit:
f.write("SAMPLE VISIT STRUCTURE:\n")
f.write("-" * 40 + "\n")
visit_structure = explore_structure(sample_visit, "sample_visit")
for line in visit_structure:
f.write(line + "\n")
f.write("\n")
if sample_path:
f.write("SAMPLE TIMELINE PATH STRUCTURE:\n")
f.write("-" * 40 + "\n")
path_structure = explore_structure(sample_path, "sample_timelinePath")
for line in path_structure:
f.write(line + "\n")
def print_statistics(stats: Dict[str, Any]):
"""Print comprehensive statistics in a readable format."""
print("\n" + "="*80)
@@ -277,14 +405,32 @@ def main():
print(f"Searched in: {repo_root}")
sys.exit(1)
# Generate output file names with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
stats_output_path = os.path.join(repo_root, f"timeline_statistics_{timestamp}.txt")
structure_output_path = os.path.join(repo_root, f"timeline_structure_{timestamp}.txt")
try:
# Analyze the JSON structure first
print("📋 Analyzing JSON structure...")
analyze_json_structure(timeline_path, structure_output_path)
print(f"✅ JSON structure exported to: {structure_output_path}")
# Analyze the data
stats = analyze_timeline_data(timeline_path)
# Print the results
# Print the results to console
print_statistics(stats)
print(f"\n✅ Analysis complete! File analyzed: {timeline_path}")
# Export statistics to file
print(f"\n📄 Exporting statistics to file...")
export_statistics_to_file(stats, stats_output_path)
print(f"✅ Statistics exported to: {stats_output_path}")
print(f"\n🎉 Analysis complete!")
print(f"📊 Statistics file: {stats_output_path}")
print(f"🏗️ Structure file: {structure_output_path}")
print(f"📁 Source file: {timeline_path}")
except FileNotFoundError:
print(f"❌ Error: Could not find Timeline.json at {timeline_path}")