diff --git a/transcode.py b/transcode.py index 0d6e8e0..1ec87e2 100644 --- a/transcode.py +++ b/transcode.py @@ -8,6 +8,7 @@ import argparse import csv import datetime +import hashlib import json import logging import os @@ -34,6 +35,10 @@ VIDEO_EXTENSIONS = { ".ts", } +# Global cache dictionary +# Structure: { "filesize-md5hash": "codec_name" } +TRANSCODE_CACHE = {} + def get_config_dir(): """Returns the main configuration directory path.""" @@ -41,6 +46,66 @@ def get_config_dir(): return Path(xdg_config) / "transcoder" +def get_cache_file(): + """Returns the path to the cache file.""" + return get_config_dir() / "cache.json" + + +def load_cache(): + """Loads the codec cache from disk into the global variable.""" + global TRANSCODE_CACHE + cache_path = get_cache_file() + if cache_path.exists(): + try: + with open(cache_path, "r") as f: + TRANSCODE_CACHE = json.load(f) + logging.debug(f"Loaded {len(TRANSCODE_CACHE)} entries from cache.") + except Exception as e: + logging.error(f"Failed to load cache: {e}") + TRANSCODE_CACHE = {} + else: + TRANSCODE_CACHE = {} + + +def save_cache(): + """Saves the global cache to disk.""" + cache_path = get_cache_file() + try: + with open(cache_path, "w") as f: + json.dump(TRANSCODE_CACHE, f, indent=2) + except Exception as e: + logging.warning(f"Failed to save cache: {e}") + + +def get_file_signature(file_path): + """ + Generates a robust signature for the file based on size and header hash. + This allows detection even if filename or modification date changes. + """ + try: + stat = file_path.stat() + file_size = stat.st_size + + # Read the first 32KB of the file to generate a partial hash + # This is extremely fast but unique enough for video files + with open(file_path, "rb") as f: + header = f.read(32 * 1024) + header_hash = hashlib.md5(header).hexdigest() + + return f"{file_size}-{header_hash}" + except Exception as e: + logging.debug(f"Could not generate signature for {file_path}: {e}") + return None + + +def update_cache_entry(file_path, codec): + """Updates the cache for a specific file using its signature.""" + signature = get_file_signature(file_path) + if signature: + TRANSCODE_CACHE[signature] = codec + save_cache() + + def setup_logging(): """ Sets up logging to both console (INFO) and file (DEBUG/Verbose). @@ -124,16 +189,32 @@ def _run_ffprobe(cmd): def get_video_codec(file_path): """ Uses ffprobe to determine the codec of the first video stream. + Checks memory cache first using robust file signatures. Returns the codec name (e.g., 'av1', 'h264', 'hevc') or None if detection fails. """ try: - # We don't log this subprocess call to avoid spamming debug logs, - # unless it fails. + # 1. Check Cache using Signature + signature = get_file_signature(file_path) + + if signature and signature in TRANSCODE_CACHE: + codec = TRANSCODE_CACHE[signature] + logging.debug( + f"Cache hit for {file_path} (sig: {signature[:10]}...): {codec}" + ) + return codec + + # 2. Run FFprobe (Cache Miss) ffprobe_data = _run_ffprobe([str(file_path)]) if ffprobe_data: codec = ffprobe_data["streams"][0]["codec_name"] logging.debug(f"Detected codec for {file_path}: {codec}") + + # 3. Update Cache + if signature: + TRANSCODE_CACHE[signature] = codec + save_cache() return codec + return None except Exception as e: logging.warning(f"Failed to probe codec for {file_path}: {e}") @@ -307,6 +388,7 @@ def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=Fal regex_elapsed = re.compile(r"elapsed=([0-9:.]+)") transcoding_duration = None + # Write FFmpeg output with timestamps as it's generated try: while True: @@ -360,12 +442,28 @@ def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=Fal leftalign(f"DONE: Successfully transcoded to {transcode_output_path}") ) + transcoding_duration = transcode_output_path logging.debug(f"Transcoding duration was {transcoding_duration}.") original_codec = get_video_codec(input_path) + + # Update cache for the NEW file (which is now AV1) + # This ensures if we restart, we know this new file is already done + if replace_mode and use_temp_file: + # We replaced the input file with the temp file. + # Update signature for the new file content + update_cache_entry(input_path, "av1") + transcoded_codec = get_video_codec(transcode_output_path) + video_duration = get_video_duration(input_path) - original_size = f"{input_path.stat().st_size / (1024 * 1024):.0f}MB" + + # Safe size check + try: + original_size = f"{input_path.stat().st_size / (1024 * 1024):.0f}MB" + except FileNotFoundError: + original_size = "0MB" + transcoded_size = ( f"{transcode_output_path.stat().st_size / (1024 * 1024):.0f}MB" ) @@ -412,6 +510,8 @@ def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=Fal try: transcode_output_path.replace(input_path) logging.info(leftalign("REPLACE: Overwrote original file.")) + # IMPORTANT: Update cache again because file content at input_path changed + update_cache_entry(input_path, "av1") except OSError as e: logging.error( leftalign(f"Failed to replace original file: {e}") @@ -494,6 +594,7 @@ class NewFileHandler(FileSystemEventHandler): def main(): setup_logging() + load_cache() # Load the cache at startup config = load_config() parser = argparse.ArgumentParser(description="Nvidia AV1 Transcoder & Watcher")