transcode.py: add caching mechanism
This commit is contained in:
+103
-2
@@ -8,6 +8,7 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import csv
|
import csv
|
||||||
import datetime
|
import datetime
|
||||||
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@@ -34,6 +35,10 @@ VIDEO_EXTENSIONS = {
|
|||||||
".ts",
|
".ts",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Global cache dictionary
|
||||||
|
# Structure: { "filesize-md5hash": "codec_name" }
|
||||||
|
TRANSCODE_CACHE = {}
|
||||||
|
|
||||||
|
|
||||||
def get_config_dir():
|
def get_config_dir():
|
||||||
"""Returns the main configuration directory path."""
|
"""Returns the main configuration directory path."""
|
||||||
@@ -41,6 +46,66 @@ def get_config_dir():
|
|||||||
return Path(xdg_config) / "transcoder"
|
return Path(xdg_config) / "transcoder"
|
||||||
|
|
||||||
|
|
||||||
|
def get_cache_file():
|
||||||
|
"""Returns the path to the cache file."""
|
||||||
|
return get_config_dir() / "cache.json"
|
||||||
|
|
||||||
|
|
||||||
|
def load_cache():
|
||||||
|
"""Loads the codec cache from disk into the global variable."""
|
||||||
|
global TRANSCODE_CACHE
|
||||||
|
cache_path = get_cache_file()
|
||||||
|
if cache_path.exists():
|
||||||
|
try:
|
||||||
|
with open(cache_path, "r") as f:
|
||||||
|
TRANSCODE_CACHE = json.load(f)
|
||||||
|
logging.debug(f"Loaded {len(TRANSCODE_CACHE)} entries from cache.")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to load cache: {e}")
|
||||||
|
TRANSCODE_CACHE = {}
|
||||||
|
else:
|
||||||
|
TRANSCODE_CACHE = {}
|
||||||
|
|
||||||
|
|
||||||
|
def save_cache():
|
||||||
|
"""Saves the global cache to disk."""
|
||||||
|
cache_path = get_cache_file()
|
||||||
|
try:
|
||||||
|
with open(cache_path, "w") as f:
|
||||||
|
json.dump(TRANSCODE_CACHE, f, indent=2)
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Failed to save cache: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def get_file_signature(file_path):
|
||||||
|
"""
|
||||||
|
Generates a robust signature for the file based on size and header hash.
|
||||||
|
This allows detection even if filename or modification date changes.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
stat = file_path.stat()
|
||||||
|
file_size = stat.st_size
|
||||||
|
|
||||||
|
# Read the first 32KB of the file to generate a partial hash
|
||||||
|
# This is extremely fast but unique enough for video files
|
||||||
|
with open(file_path, "rb") as f:
|
||||||
|
header = f.read(32 * 1024)
|
||||||
|
header_hash = hashlib.md5(header).hexdigest()
|
||||||
|
|
||||||
|
return f"{file_size}-{header_hash}"
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug(f"Could not generate signature for {file_path}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def update_cache_entry(file_path, codec):
|
||||||
|
"""Updates the cache for a specific file using its signature."""
|
||||||
|
signature = get_file_signature(file_path)
|
||||||
|
if signature:
|
||||||
|
TRANSCODE_CACHE[signature] = codec
|
||||||
|
save_cache()
|
||||||
|
|
||||||
|
|
||||||
def setup_logging():
|
def setup_logging():
|
||||||
"""
|
"""
|
||||||
Sets up logging to both console (INFO) and file (DEBUG/Verbose).
|
Sets up logging to both console (INFO) and file (DEBUG/Verbose).
|
||||||
@@ -124,16 +189,32 @@ def _run_ffprobe(cmd):
|
|||||||
def get_video_codec(file_path):
|
def get_video_codec(file_path):
|
||||||
"""
|
"""
|
||||||
Uses ffprobe to determine the codec of the first video stream.
|
Uses ffprobe to determine the codec of the first video stream.
|
||||||
|
Checks memory cache first using robust file signatures.
|
||||||
Returns the codec name (e.g., 'av1', 'h264', 'hevc') or None if detection fails.
|
Returns the codec name (e.g., 'av1', 'h264', 'hevc') or None if detection fails.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# We don't log this subprocess call to avoid spamming debug logs,
|
# 1. Check Cache using Signature
|
||||||
# unless it fails.
|
signature = get_file_signature(file_path)
|
||||||
|
|
||||||
|
if signature and signature in TRANSCODE_CACHE:
|
||||||
|
codec = TRANSCODE_CACHE[signature]
|
||||||
|
logging.debug(
|
||||||
|
f"Cache hit for {file_path} (sig: {signature[:10]}...): {codec}"
|
||||||
|
)
|
||||||
|
return codec
|
||||||
|
|
||||||
|
# 2. Run FFprobe (Cache Miss)
|
||||||
ffprobe_data = _run_ffprobe([str(file_path)])
|
ffprobe_data = _run_ffprobe([str(file_path)])
|
||||||
if ffprobe_data:
|
if ffprobe_data:
|
||||||
codec = ffprobe_data["streams"][0]["codec_name"]
|
codec = ffprobe_data["streams"][0]["codec_name"]
|
||||||
logging.debug(f"Detected codec for {file_path}: {codec}")
|
logging.debug(f"Detected codec for {file_path}: {codec}")
|
||||||
|
|
||||||
|
# 3. Update Cache
|
||||||
|
if signature:
|
||||||
|
TRANSCODE_CACHE[signature] = codec
|
||||||
|
save_cache()
|
||||||
return codec
|
return codec
|
||||||
|
|
||||||
return None
|
return None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"Failed to probe codec for {file_path}: {e}")
|
logging.warning(f"Failed to probe codec for {file_path}: {e}")
|
||||||
@@ -307,6 +388,7 @@ def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=Fal
|
|||||||
|
|
||||||
regex_elapsed = re.compile(r"elapsed=([0-9:.]+)")
|
regex_elapsed = re.compile(r"elapsed=([0-9:.]+)")
|
||||||
transcoding_duration = None
|
transcoding_duration = None
|
||||||
|
|
||||||
# Write FFmpeg output with timestamps as it's generated
|
# Write FFmpeg output with timestamps as it's generated
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
@@ -360,12 +442,28 @@ def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=Fal
|
|||||||
leftalign(f"DONE: Successfully transcoded to {transcode_output_path}")
|
leftalign(f"DONE: Successfully transcoded to {transcode_output_path}")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
transcoding_duration = transcode_output_path
|
||||||
logging.debug(f"Transcoding duration was {transcoding_duration}.")
|
logging.debug(f"Transcoding duration was {transcoding_duration}.")
|
||||||
|
|
||||||
original_codec = get_video_codec(input_path)
|
original_codec = get_video_codec(input_path)
|
||||||
|
|
||||||
|
# Update cache for the NEW file (which is now AV1)
|
||||||
|
# This ensures if we restart, we know this new file is already done
|
||||||
|
if replace_mode and use_temp_file:
|
||||||
|
# We replaced the input file with the temp file.
|
||||||
|
# Update signature for the new file content
|
||||||
|
update_cache_entry(input_path, "av1")
|
||||||
|
|
||||||
transcoded_codec = get_video_codec(transcode_output_path)
|
transcoded_codec = get_video_codec(transcode_output_path)
|
||||||
|
|
||||||
video_duration = get_video_duration(input_path)
|
video_duration = get_video_duration(input_path)
|
||||||
|
|
||||||
|
# Safe size check
|
||||||
|
try:
|
||||||
original_size = f"{input_path.stat().st_size / (1024 * 1024):.0f}MB"
|
original_size = f"{input_path.stat().st_size / (1024 * 1024):.0f}MB"
|
||||||
|
except FileNotFoundError:
|
||||||
|
original_size = "0MB"
|
||||||
|
|
||||||
transcoded_size = (
|
transcoded_size = (
|
||||||
f"{transcode_output_path.stat().st_size / (1024 * 1024):.0f}MB"
|
f"{transcode_output_path.stat().st_size / (1024 * 1024):.0f}MB"
|
||||||
)
|
)
|
||||||
@@ -412,6 +510,8 @@ def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=Fal
|
|||||||
try:
|
try:
|
||||||
transcode_output_path.replace(input_path)
|
transcode_output_path.replace(input_path)
|
||||||
logging.info(leftalign("REPLACE: Overwrote original file."))
|
logging.info(leftalign("REPLACE: Overwrote original file."))
|
||||||
|
# IMPORTANT: Update cache again because file content at input_path changed
|
||||||
|
update_cache_entry(input_path, "av1")
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
logging.error(
|
logging.error(
|
||||||
leftalign(f"Failed to replace original file: {e}")
|
leftalign(f"Failed to replace original file: {e}")
|
||||||
@@ -494,6 +594,7 @@ class NewFileHandler(FileSystemEventHandler):
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
setup_logging()
|
setup_logging()
|
||||||
|
load_cache() # Load the cache at startup
|
||||||
config = load_config()
|
config = load_config()
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="Nvidia AV1 Transcoder & Watcher")
|
parser = argparse.ArgumentParser(description="Nvidia AV1 Transcoder & Watcher")
|
||||||
|
|||||||
Reference in New Issue
Block a user