experiemental: youtube comment fs
This commit is contained in:
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
from collections import defaultdict
|
||||
|
||||
def build_comment_tree(comments):
|
||||
tree = defaultdict(list)
|
||||
root_comments = []
|
||||
|
||||
print(f"Total comments: {len(comments)}") # Debug info
|
||||
|
||||
for comment in comments:
|
||||
if comment['parent'] == "root":
|
||||
root_comments.append(comment)
|
||||
else:
|
||||
tree[comment['parent']].append(comment)
|
||||
|
||||
print(f"Root comments: {len(root_comments)}") # Debug info
|
||||
|
||||
def build_subtree(comment):
|
||||
return {
|
||||
#"id": comment['id'],
|
||||
"text": comment['text'],
|
||||
"replies": [build_subtree(reply) for reply in tree[comment['id']]]
|
||||
}
|
||||
|
||||
return [build_subtree(comment) for comment in root_comments]
|
||||
|
||||
with open('comments.jsonl', 'r', encoding='utf-8') as f:
|
||||
comments = [json.loads(line) for line in f]
|
||||
|
||||
comment_tree = build_comment_tree(comments)
|
||||
|
||||
print(f"Final tree length: {len(comment_tree)}") # Debug info
|
||||
|
||||
with open('comment_tree.json', 'w') as f:
|
||||
json.dump(comment_tree, f, ensure_ascii=False, indent=2)
|
||||
|
||||
Reference in New Issue
Block a user