transcode.py: add caching mechanism
This commit is contained in:
+104
-3
@@ -8,6 +8,7 @@
|
||||
import argparse
|
||||
import csv
|
||||
import datetime
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -34,6 +35,10 @@ VIDEO_EXTENSIONS = {
|
||||
".ts",
|
||||
}
|
||||
|
||||
# Global cache dictionary
|
||||
# Structure: { "filesize-md5hash": "codec_name" }
|
||||
TRANSCODE_CACHE = {}
|
||||
|
||||
|
||||
def get_config_dir():
|
||||
"""Returns the main configuration directory path."""
|
||||
@@ -41,6 +46,66 @@ def get_config_dir():
|
||||
return Path(xdg_config) / "transcoder"
|
||||
|
||||
|
||||
def get_cache_file():
|
||||
"""Returns the path to the cache file."""
|
||||
return get_config_dir() / "cache.json"
|
||||
|
||||
|
||||
def load_cache():
|
||||
"""Loads the codec cache from disk into the global variable."""
|
||||
global TRANSCODE_CACHE
|
||||
cache_path = get_cache_file()
|
||||
if cache_path.exists():
|
||||
try:
|
||||
with open(cache_path, "r") as f:
|
||||
TRANSCODE_CACHE = json.load(f)
|
||||
logging.debug(f"Loaded {len(TRANSCODE_CACHE)} entries from cache.")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to load cache: {e}")
|
||||
TRANSCODE_CACHE = {}
|
||||
else:
|
||||
TRANSCODE_CACHE = {}
|
||||
|
||||
|
||||
def save_cache():
|
||||
"""Saves the global cache to disk."""
|
||||
cache_path = get_cache_file()
|
||||
try:
|
||||
with open(cache_path, "w") as f:
|
||||
json.dump(TRANSCODE_CACHE, f, indent=2)
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to save cache: {e}")
|
||||
|
||||
|
||||
def get_file_signature(file_path):
|
||||
"""
|
||||
Generates a robust signature for the file based on size and header hash.
|
||||
This allows detection even if filename or modification date changes.
|
||||
"""
|
||||
try:
|
||||
stat = file_path.stat()
|
||||
file_size = stat.st_size
|
||||
|
||||
# Read the first 32KB of the file to generate a partial hash
|
||||
# This is extremely fast but unique enough for video files
|
||||
with open(file_path, "rb") as f:
|
||||
header = f.read(32 * 1024)
|
||||
header_hash = hashlib.md5(header).hexdigest()
|
||||
|
||||
return f"{file_size}-{header_hash}"
|
||||
except Exception as e:
|
||||
logging.debug(f"Could not generate signature for {file_path}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def update_cache_entry(file_path, codec):
|
||||
"""Updates the cache for a specific file using its signature."""
|
||||
signature = get_file_signature(file_path)
|
||||
if signature:
|
||||
TRANSCODE_CACHE[signature] = codec
|
||||
save_cache()
|
||||
|
||||
|
||||
def setup_logging():
|
||||
"""
|
||||
Sets up logging to both console (INFO) and file (DEBUG/Verbose).
|
||||
@@ -124,16 +189,32 @@ def _run_ffprobe(cmd):
|
||||
def get_video_codec(file_path):
|
||||
"""
|
||||
Uses ffprobe to determine the codec of the first video stream.
|
||||
Checks memory cache first using robust file signatures.
|
||||
Returns the codec name (e.g., 'av1', 'h264', 'hevc') or None if detection fails.
|
||||
"""
|
||||
try:
|
||||
# We don't log this subprocess call to avoid spamming debug logs,
|
||||
# unless it fails.
|
||||
# 1. Check Cache using Signature
|
||||
signature = get_file_signature(file_path)
|
||||
|
||||
if signature and signature in TRANSCODE_CACHE:
|
||||
codec = TRANSCODE_CACHE[signature]
|
||||
logging.debug(
|
||||
f"Cache hit for {file_path} (sig: {signature[:10]}...): {codec}"
|
||||
)
|
||||
return codec
|
||||
|
||||
# 2. Run FFprobe (Cache Miss)
|
||||
ffprobe_data = _run_ffprobe([str(file_path)])
|
||||
if ffprobe_data:
|
||||
codec = ffprobe_data["streams"][0]["codec_name"]
|
||||
logging.debug(f"Detected codec for {file_path}: {codec}")
|
||||
|
||||
# 3. Update Cache
|
||||
if signature:
|
||||
TRANSCODE_CACHE[signature] = codec
|
||||
save_cache()
|
||||
return codec
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to probe codec for {file_path}: {e}")
|
||||
@@ -307,6 +388,7 @@ def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=Fal
|
||||
|
||||
regex_elapsed = re.compile(r"elapsed=([0-9:.]+)")
|
||||
transcoding_duration = None
|
||||
|
||||
# Write FFmpeg output with timestamps as it's generated
|
||||
try:
|
||||
while True:
|
||||
@@ -360,12 +442,28 @@ def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=Fal
|
||||
leftalign(f"DONE: Successfully transcoded to {transcode_output_path}")
|
||||
)
|
||||
|
||||
transcoding_duration = transcode_output_path
|
||||
logging.debug(f"Transcoding duration was {transcoding_duration}.")
|
||||
|
||||
original_codec = get_video_codec(input_path)
|
||||
|
||||
# Update cache for the NEW file (which is now AV1)
|
||||
# This ensures if we restart, we know this new file is already done
|
||||
if replace_mode and use_temp_file:
|
||||
# We replaced the input file with the temp file.
|
||||
# Update signature for the new file content
|
||||
update_cache_entry(input_path, "av1")
|
||||
|
||||
transcoded_codec = get_video_codec(transcode_output_path)
|
||||
|
||||
video_duration = get_video_duration(input_path)
|
||||
original_size = f"{input_path.stat().st_size / (1024 * 1024):.0f}MB"
|
||||
|
||||
# Safe size check
|
||||
try:
|
||||
original_size = f"{input_path.stat().st_size / (1024 * 1024):.0f}MB"
|
||||
except FileNotFoundError:
|
||||
original_size = "0MB"
|
||||
|
||||
transcoded_size = (
|
||||
f"{transcode_output_path.stat().st_size / (1024 * 1024):.0f}MB"
|
||||
)
|
||||
@@ -412,6 +510,8 @@ def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=Fal
|
||||
try:
|
||||
transcode_output_path.replace(input_path)
|
||||
logging.info(leftalign("REPLACE: Overwrote original file."))
|
||||
# IMPORTANT: Update cache again because file content at input_path changed
|
||||
update_cache_entry(input_path, "av1")
|
||||
except OSError as e:
|
||||
logging.error(
|
||||
leftalign(f"Failed to replace original file: {e}")
|
||||
@@ -494,6 +594,7 @@ class NewFileHandler(FileSystemEventHandler):
|
||||
|
||||
def main():
|
||||
setup_logging()
|
||||
load_cache() # Load the cache at startup
|
||||
config = load_config()
|
||||
|
||||
parser = argparse.ArgumentParser(description="Nvidia AV1 Transcoder & Watcher")
|
||||
|
||||
Reference in New Issue
Block a user