scripts/comment_fs/create_comment_tree.py

38 lines
1.0 KiB
Python

#!/usr/bin/env python3
import json
from collections import defaultdict
def build_comment_tree(comments):
tree = defaultdict(list)
root_comments = []
print(f"Total comments: {len(comments)}") # Debug info
for comment in comments:
if comment['parent'] == "root":
root_comments.append(comment)
else:
tree[comment['parent']].append(comment)
print(f"Root comments: {len(root_comments)}") # Debug info
def build_subtree(comment):
return {
#"id": comment['id'],
"text": comment['text'],
"replies": [build_subtree(reply) for reply in tree[comment['id']]]
}
return [build_subtree(comment) for comment in root_comments]
with open('comments.jsonl', 'r', encoding='utf-8') as f:
comments = [json.loads(line) for line in f]
comment_tree = build_comment_tree(comments)
print(f"Final tree length: {len(comment_tree)}") # Debug info
with open('comment_tree.json', 'w') as f:
json.dump(comment_tree, f, ensure_ascii=False, indent=2)