transcode.py: add caching mechanism

This commit is contained in:
2025-12-06 19:41:15 +01:00
parent ecba7c7bb0
commit 2a1aa37ae9
+104 -3
View File
@@ -8,6 +8,7 @@
import argparse
import csv
import datetime
import hashlib
import json
import logging
import os
@@ -34,6 +35,10 @@ VIDEO_EXTENSIONS = {
".ts",
}
# Global cache dictionary
# Structure: { "filesize-md5hash": "codec_name" }
TRANSCODE_CACHE = {}
def get_config_dir():
"""Returns the main configuration directory path."""
@@ -41,6 +46,66 @@ def get_config_dir():
return Path(xdg_config) / "transcoder"
def get_cache_file():
"""Returns the path to the cache file."""
return get_config_dir() / "cache.json"
def load_cache():
"""Loads the codec cache from disk into the global variable."""
global TRANSCODE_CACHE
cache_path = get_cache_file()
if cache_path.exists():
try:
with open(cache_path, "r") as f:
TRANSCODE_CACHE = json.load(f)
logging.debug(f"Loaded {len(TRANSCODE_CACHE)} entries from cache.")
except Exception as e:
logging.error(f"Failed to load cache: {e}")
TRANSCODE_CACHE = {}
else:
TRANSCODE_CACHE = {}
def save_cache():
"""Saves the global cache to disk."""
cache_path = get_cache_file()
try:
with open(cache_path, "w") as f:
json.dump(TRANSCODE_CACHE, f, indent=2)
except Exception as e:
logging.warning(f"Failed to save cache: {e}")
def get_file_signature(file_path):
"""
Generates a robust signature for the file based on size and header hash.
This allows detection even if filename or modification date changes.
"""
try:
stat = file_path.stat()
file_size = stat.st_size
# Read the first 32KB of the file to generate a partial hash
# This is extremely fast but unique enough for video files
with open(file_path, "rb") as f:
header = f.read(32 * 1024)
header_hash = hashlib.md5(header).hexdigest()
return f"{file_size}-{header_hash}"
except Exception as e:
logging.debug(f"Could not generate signature for {file_path}: {e}")
return None
def update_cache_entry(file_path, codec):
"""Updates the cache for a specific file using its signature."""
signature = get_file_signature(file_path)
if signature:
TRANSCODE_CACHE[signature] = codec
save_cache()
def setup_logging():
"""
Sets up logging to both console (INFO) and file (DEBUG/Verbose).
@@ -124,16 +189,32 @@ def _run_ffprobe(cmd):
def get_video_codec(file_path):
"""
Uses ffprobe to determine the codec of the first video stream.
Checks memory cache first using robust file signatures.
Returns the codec name (e.g., 'av1', 'h264', 'hevc') or None if detection fails.
"""
try:
# We don't log this subprocess call to avoid spamming debug logs,
# unless it fails.
# 1. Check Cache using Signature
signature = get_file_signature(file_path)
if signature and signature in TRANSCODE_CACHE:
codec = TRANSCODE_CACHE[signature]
logging.debug(
f"Cache hit for {file_path} (sig: {signature[:10]}...): {codec}"
)
return codec
# 2. Run FFprobe (Cache Miss)
ffprobe_data = _run_ffprobe([str(file_path)])
if ffprobe_data:
codec = ffprobe_data["streams"][0]["codec_name"]
logging.debug(f"Detected codec for {file_path}: {codec}")
# 3. Update Cache
if signature:
TRANSCODE_CACHE[signature] = codec
save_cache()
return codec
return None
except Exception as e:
logging.warning(f"Failed to probe codec for {file_path}: {e}")
@@ -307,6 +388,7 @@ def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=Fal
regex_elapsed = re.compile(r"elapsed=([0-9:.]+)")
transcoding_duration = None
# Write FFmpeg output with timestamps as it's generated
try:
while True:
@@ -360,12 +442,28 @@ def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=Fal
leftalign(f"DONE: Successfully transcoded to {transcode_output_path}")
)
transcoding_duration = transcode_output_path
logging.debug(f"Transcoding duration was {transcoding_duration}.")
original_codec = get_video_codec(input_path)
# Update cache for the NEW file (which is now AV1)
# This ensures if we restart, we know this new file is already done
if replace_mode and use_temp_file:
# We replaced the input file with the temp file.
# Update signature for the new file content
update_cache_entry(input_path, "av1")
transcoded_codec = get_video_codec(transcode_output_path)
video_duration = get_video_duration(input_path)
original_size = f"{input_path.stat().st_size / (1024 * 1024):.0f}MB"
# Safe size check
try:
original_size = f"{input_path.stat().st_size / (1024 * 1024):.0f}MB"
except FileNotFoundError:
original_size = "0MB"
transcoded_size = (
f"{transcode_output_path.stat().st_size / (1024 * 1024):.0f}MB"
)
@@ -412,6 +510,8 @@ def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=Fal
try:
transcode_output_path.replace(input_path)
logging.info(leftalign("REPLACE: Overwrote original file."))
# IMPORTANT: Update cache again because file content at input_path changed
update_cache_entry(input_path, "av1")
except OSError as e:
logging.error(
leftalign(f"Failed to replace original file: {e}")
@@ -494,6 +594,7 @@ class NewFileHandler(FileSystemEventHandler):
def main():
setup_logging()
load_cache() # Load the cache at startup
config = load_config()
parser = argparse.ArgumentParser(description="Nvidia AV1 Transcoder & Watcher")