transcode.py: add logging, write stats, recursive mode, simplify

arguments
This commit is contained in:
2025-12-06 18:22:46 +01:00
parent 0df2e757ab
commit a3dea4eaba
+375 -91
View File
@@ -6,24 +6,40 @@
# ]
# ///
import argparse
import time
import sys
import subprocess
import os
import logging
import csv
import datetime
import json
import logging
import os
import re
import subprocess
import sys
import time
from pathlib import Path
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from watchdog.observers import Observer
# Supported video extensions to monitor
VIDEO_EXTENSIONS = {'.mkv', '.mp4', '.avi', '.mov', '.wmv', '.flv', '.webm', '.m4v', '.ts'}
VIDEO_EXTENSIONS = {
".mkv",
".mp4",
".avi",
".mov",
".wmv",
".flv",
".webm",
".m4v",
".ts",
}
def get_config_dir():
"""Returns the main configuration directory path."""
xdg_config = os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
xdg_config = os.environ.get("XDG_CONFIG_HOME", os.path.expanduser("~/.config"))
return Path(xdg_config) / "transcoder"
def setup_logging():
"""
Sets up logging to both console (INFO) and file (DEBUG/Verbose).
@@ -35,10 +51,10 @@ def setup_logging():
# Create logger
logger = logging.getLogger()
logger.setLevel(logging.DEBUG) # Capture everything
logger.setLevel(logging.DEBUG) # Capture everything
# formatter
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
# File Handler (Verbose)
file_handler = logging.FileHandler(log_file)
@@ -49,11 +65,12 @@ def setup_logging():
# Console Handler (Concise)
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.INFO)
console_handler.setFormatter(logging.Formatter('[%(levelname)s] %(message)s'))
console_handler.setFormatter(logging.Formatter("[%(levelname)s] %(message)s"))
logger.addHandler(console_handler)
logging.info(f"Logging started. Verbose logs at: {log_file}")
def load_config():
"""
Loads configuration from config.json in the config directory.
@@ -64,7 +81,7 @@ def load_config():
return {}
try:
with open(config_path, 'r') as f:
with open(config_path, "r") as f:
config = json.load(f)
logging.debug(f"Loaded config from {config_path}: {config}")
return config
@@ -72,58 +89,118 @@ def load_config():
logging.error(f"Failed to load config file: {e}")
return {}
def get_video_codec(input_path):
def _run_ffprobe(cmd):
"""
Uses ffprobe to determine the codec of the first video stream.
Returns the codec name (e.g., 'av1', 'h264', 'hevc') or None if detection fails.
Helper function to run ffprobe commands with consistent error handling.
"""
cmd = [
cmd_base = [
"ffprobe",
"-v", "error",
"-select_streams", "v:0",
"-show_entries", "stream=codec_name",
"-of", "default=noprint_wrappers=1:nokey=1",
str(input_path)
"-v",
"quiet",
"-print_format",
"json",
"-show_streams",
"-select_streams",
"v:0",
]
try:
# We don't log this subprocess call to avoid spamming debug logs,
# unless it fails.
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
codec = result.stdout.strip()
logging.debug(f"Detected codec for {input_path.name}: {codec}")
return codec
result = subprocess.run(
cmd_base + cmd, capture_output=True, text=True, check=True
)
if result.stdout.strip():
return json.loads(result.stdout.strip())
else:
return json.loads("{}")
except subprocess.CalledProcessError as e:
logging.warning(f"Failed to probe codec for {input_path.name}: {e}")
logging.warning(f"ffprobe command failed: {e}")
return None
except FileNotFoundError:
logging.error("ffprobe not found. Please ensure ffmpeg/ffprobe is installed.")
return None
def get_video_codec(file_path):
"""
Uses ffprobe to determine the codec of the first video stream.
Returns the codec name (e.g., 'av1', 'h264', 'hevc') or None if detection fails.
"""
try:
# We don't log this subprocess call to avoid spamming debug logs,
# unless it fails.
ffprobe_data = _run_ffprobe([str(file_path)])
if ffprobe_data:
codec = ffprobe_data["streams"][0]["codec_name"]
logging.debug(f"Detected codec for {file_path}: {codec}")
return codec
return None
except Exception as e:
logging.warning(f"Failed to probe codec for {file_path}: {e}")
return None
def get_video_duration(file_path):
"""
Get video duration using ffprobe.
Returns duration in seconds as a float, or 0.0 if error occurs.
"""
try:
ffprobe_data = _run_ffprobe([str(file_path)])
if ffprobe_data:
duration = float(ffprobe_data["streams"][0]["duration"])
# format to minutes with "m" suffix
duration_formatted = f"{duration / 60:.0f}m"
logging.debug(f"Detected duration for {file_path}: {duration_formatted}")
return duration_formatted
return "0m"
except (ValueError, Exception) as e:
logging.warning(f"Could not get duration for {file_path}: {e}")
return "0m"
def leftalign(str):
return 5 * " " + str
def get_ffmpeg_command(input_path, output_path):
"""
Constructs the FFmpeg command based on the Handbrake preset requirements.
"""
cmd = [
"ffmpeg",
"-n", # Never overwrite output files
"-i", str(input_path), # Input file
"-c:v", "av1_nvenc", # Video Encoder
"-pix_fmt", "p010le", # 10-bit color
"-preset", "p4", # Medium preset
"-rc", "vbr", # Variable Bit Rate control
"-cq", "35", # Constant Quality factor
"-n", # Never overwrite output files
"-i",
str(input_path), # Input file
"-c:v",
"av1_nvenc", # Video Encoder
"-pix_fmt",
"p010le", # 10-bit color
"-preset",
"p4", # Medium preset
"-rc",
"vbr", # Variable Bit Rate control
"-cq",
"35", # Constant Quality factor
# Filter chain: Deinterlace -> Scale down if >1080p -> Cap FPS at 30
"-vf", "yadif,scale='min(1920,iw)':-2,fps=30",
"-c:a", "aac", # Audio Encoder
"-b:a", "160k", # Audio Bitrate
"-ac", "2", # Audio Channels (Stereo)
"-color_range", "tv", # Limited color range
"-movflags", "+faststart", # Web optimization
str(output_path) # Output file
"-vf",
"yadif,scale='min(1920,iw)':-2,fps=30",
"-c:a",
"aac", # Audio Encoder
"-b:a",
"160k", # Audio Bitrate
"-ac",
"2", # Audio Channels (Stereo)
"-color_range",
"tv", # Limited color range
"-movflags",
"+faststart", # Web optimization
str(output_path), # Output file
]
return cmd
def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=False):
exit_after_next = False
input_path = Path(input_file)
logging.debug(f"Processing request for: {input_path}")
@@ -132,14 +209,15 @@ def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=Fal
use_temp_file = False
if replace_mode:
logging.warning("Policy: --replace is enabled. --output-dir is ignored. Transcoding in-place.")
target_path = input_path.with_suffix('.mp4')
target_path = input_path.with_suffix(".mp4")
# If the target is the same as input (e.g. input is already .mp4), use a temp file
if target_path == input_path:
use_temp_file = True
transcode_output_path = input_path.with_suffix('.tmp.mp4')
logging.debug(f"Input and output filenames match. Using temp file: {transcode_output_path}")
transcode_output_path = input_path.with_suffix(".tmp.mp4")
logging.debug(
f"Input and output filenames match. Using temp file: {transcode_output_path}"
)
else:
transcode_output_path = target_path
else:
@@ -155,16 +233,20 @@ def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=Fal
# If replace_mode is True and extensions differ, we still want to respect -n (no overwrite)
# for the destination file if it already exists.
if transcode_output_path.exists():
logging.info(f"SKIP: Output file already exists: {transcode_output_path.name}")
logging.info(
leftalign(f"SKIP: Output file already exists: {transcode_output_path}")
)
return
# 2. Check if file is ready (simple size stability check)
logging.info(f"WAIT: Ensuring file is ready: {input_path.name}...")
logging.info(leftalign("WAIT: Ensuring file is ready..."))
try:
historical_size = -1
while True:
current_size = input_path.stat().st_size
logging.debug(f"File stability check - Current: {current_size}, Previous: {historical_size}")
logging.debug(
f"File stability check - Current: {current_size}, Previous: {historical_size}"
)
if current_size == historical_size and current_size > 0:
logging.debug("File size stable. Proceeding.")
@@ -173,27 +255,32 @@ def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=Fal
historical_size = current_size
time.sleep(2)
except FileNotFoundError:
logging.warning(f"File vanished during checks: {input_path.name}")
logging.warning(f"File vanished during checks: {input_path}")
return
# 3. Check Codec (Optional Skip)
if skip_av1:
codec = get_video_codec(input_path)
if codec == 'av1':
logging.info(f"SKIP: Input file is already AV1: {input_path.name}")
if codec == "av1":
logging.info(leftalign(f"SKIP: Input file is already AV1: {input_path}"))
return
else:
logging.info(leftalign(f"Codec is {codec}"))
# Prepare individual FFmpeg log file
ffmpeg_logs_dir = get_config_dir() / "ffmpeg_logs"
ffmpeg_logs_dir.mkdir(parents=True, exist_ok=True)
# We append .log to the full filename to avoid collisions (e.g. video.mp4.log)
ffmpeg_log_file = ffmpeg_logs_dir / f"{input_path.name}.log"
logfile_timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H:%M:%S")
# We also append .log to the full filename to avoid collisions (e.g. video.mp4.log)
ffmpeg_log_file = ffmpeg_logs_dir / f"{logfile_timestamp}_{input_path.name}.log"
logging.info(f"START: Transcoding {input_path.name}")
logging.info(leftalign("START: Transcoding..."))
if replace_mode:
logging.info(f" Outputting to temporary file: {input_path.name}")
logging.info(f" FFmpeg details logging to: {ffmpeg_log_file}")
logging.info(
leftalign(f"Outputting to temporary file: {transcode_output_path}")
)
logging.info(leftalign(f"FFmpeg details logging to: {ffmpeg_log_file}"))
cmd = get_ffmpeg_command(input_path, transcode_output_path)
logging.debug(f"Executing FFmpeg command: {' '.join(cmd)}")
@@ -204,36 +291,139 @@ def transcode_file(input_file, output_file=None, skip_av1=True, replace_mode=Fal
f_log.write(f"COMMAND: {' '.join(cmd)}\n\n")
f_log.flush()
# Run FFmpeg, redirecting both stdout and stderr to the individual log file
result = subprocess.run(cmd, stdout=f_log, stderr=subprocess.STDOUT, text=True)
# Run FFmpeg and process output in real-time with timestamps
process = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
)
if result.returncode == 0:
logging.info(f"DONE: Successfully created {transcode_output_path.name}")
regex_elapsed = re.compile(r"elapsed=([0-9:.]+)")
transcoding_duration = None
# Write FFmpeg output with timestamps as it's generated
try:
while True:
output = process.stdout.readline() # pyright: ignore[reportOptionalMemberAccess]
if output == "" and process.poll() is not None:
break
if output:
# capture the elapsed time for later use, only keep the last occurence
if regex_result := re.findall(regex_elapsed, output):
transcoding_duration = regex_result[0]
timestamp = datetime.datetime.now().strftime(
"%Y-%m-%d %H:%M:%S"
)
f_log.write(f"[{timestamp}] {output.rstrip()}\n")
f_log.flush()
# Wait for process to complete and get return code
result = process.wait()
except Exception as e:
# Ensure we clean up temporary files on error
if replace_mode and use_temp_file and transcode_output_path.exists():
try:
transcode_output_path.unlink()
except Exception:
pass
raise e
if result == 0:
logging.info(
leftalign(f"DONE: Successfully transcoded to {transcode_output_path}")
)
transcoding_duration = transcode_output_path
logging.debug(f"Transcoding duration was {transcoding_duration}.")
original_codec = get_video_codec(input_path)
transcoded_codec = get_video_codec(transcode_output_path)
video_duration = get_video_duration(input_path)
original_size = f"{input_path.stat().st_size / (1024 * 1024):.0f}MB"
transcoded_size = (
f"{transcode_output_path.stat().st_size / (1024 * 1024):.0f}MB"
)
logging.debug(f"Original file size: {original_size}")
logging.debug(f"Transcoded file size: {transcoded_size}")
# Write to stats CSV file
stats_file = get_config_dir() / "stats.csv"
file_exists = stats_file.exists()
with open(stats_file, "a", newline="", encoding="utf-8") as csvfile:
writer = csv.writer(csvfile)
# Write header if file doesn't exist
if not file_exists:
writer.writerow(
[
"filename",
"original_size",
"transcoded_size",
"original_codec",
"transcoded_codec",
"video_duration",
"transcoding_duration",
]
)
writer.writerow(
[
input_path.name,
original_size,
transcoded_size,
original_codec,
transcoded_codec,
video_duration,
transcoding_duration,
]
)
if replace_mode:
if use_temp_file:
# Rename temp file to overwrite original
try:
transcode_output_path.replace(input_path)
logging.info(f"REPLACE: Overwrote original file {input_path.name} with new version.")
logging.info(leftalign("REPLACE: Overwrote original file."))
except OSError as e:
logging.error(f"Failed to replace original file: {e}")
logging.error(
leftalign(f"Failed to replace original file: {e}")
)
else:
# Different extensions (e.g. mkv -> mp4). Delete original.
try:
input_path.unlink()
logging.info(f"DELETE: Removed original file {input_path.name}")
logging.info(leftalign("DELETE: Removed original file."))
except OSError as e:
logging.error(f"Failed to delete original file: {e}")
logging.error(leftalign(f"Failed to delete original file: {e}"))
else:
logging.error(f"FFmpeg failed for {input_path.name}. See log at {ffmpeg_log_file}")
logging.error(
f"FFmpeg failed for {input_path}. See log at {ffmpeg_log_file}"
)
if replace_mode and use_temp_file and transcode_output_path.exists():
try:
transcode_output_path.unlink()
except Exception:
pass
if exit_after_next:
logging.info("Quitting early due to user interrupt.")
sys.exit(0)
except Exception as e:
logging.exception(f"Unexpected error during transcoding of {input_path.name}")
logging.exception(f"Unexpected error during transcoding of {input_path}: {e}")
except KeyboardInterrupt:
logging.warning("Interrupted by user during transcoding.")
sys.exit(0)
logging.info(
"Will quit after the current file is transcoded. Press Ctrl+C again to force quit."
)
if exit_after_next:
sys.exit(0)
exit_after_next = True
class NewFileHandler(FileSystemEventHandler):
def __init__(self, output_dir=None, skip_av1=True, replace_mode=False):
@@ -258,7 +448,7 @@ class NewFileHandler(FileSystemEventHandler):
# Filter for video extensions
if input_path.suffix.lower() not in VIDEO_EXTENSIONS:
logging.debug(f"Ignored non-video file: {input_path.name}")
logging.debug(f"Ignored non-video file: {input_path}")
return
output_path = None
@@ -266,7 +456,13 @@ class NewFileHandler(FileSystemEventHandler):
new_filename = input_path.stem + ".mp4"
output_path = self.output_dir / new_filename
transcode_file(input_path, output_path, skip_av1=self.skip_av1, replace_mode=self.replace_mode)
transcode_file(
input_path,
output_path,
skip_av1=self.skip_av1,
replace_mode=self.replace_mode,
)
def main():
setup_logging()
@@ -274,24 +470,40 @@ def main():
parser = argparse.ArgumentParser(description="Nvidia AV1 Transcoder & Watcher")
parser.add_argument("--input", type=str, help="Single file to transcode (Overrides watch mode)")
parser.add_argument("--watch-dir", type=str, help="Directory to monitor for new files")
parser.add_argument("--input", type=str, help="File or directory to transcode")
parser.add_argument(
"--watch", action="store_true", help="Enable filesystem watching mode"
)
parser.add_argument("--output-dir", type=str, help="Output directory")
parser.add_argument("--no-skip-av1", action="store_true", help="Force transcoding even if input is already AV1")
parser.add_argument("--replace", action="store_true", help="Replace original files with transcoded versions (Ignores --output-dir)")
parser.add_argument(
"--no-skip-av1",
action="store_true",
help="Force transcoding even if input is already AV1",
)
parser.add_argument(
"--replace",
action="store_true",
help="Replace original files with transcoded versions (Ignores --output-dir)",
)
parser.add_argument(
"--recursive",
action="store_true",
help="Recursively scan directories for input files",
)
# Set defaults from config
# We accept both hyphenated and underscore keys from JSON for user convenience
default_watch = config.get("watch-dir") or config.get("watch_dir")
default_watch = config.get("watch") or False
default_output = config.get("output-dir") or config.get("output_dir")
default_no_skip = config.get("no-skip-av1") or config.get("no_skip_av1") or False
default_replace = config.get("replace") or False
default_recursive = config.get("recursive") or False
parser.set_defaults(
watch_dir=default_watch,
watch=default_watch,
output_dir=default_output,
no_skip_av1=default_no_skip,
replace=default_replace
replace=default_replace,
recursive=default_recursive,
)
args = parser.parse_args()
@@ -300,15 +512,16 @@ def main():
# --- Initial Policy Logging ---
if args.replace:
logging.info("Policy: REPLACE mode enabled. Original files will be overwritten/deleted.")
logging.info("POLICY: --replace: Original files will be overwritten/deleted.")
if args.output_dir:
logging.warning("Warning: --output-dir is specified but will be IGNORED due to --replace mode.")
logging.warning(
"--output-dir is specified but will be IGNORED due to --replace mode."
)
if args.no_skip_av1:
logging.info("Policy: Force transcoding all files (including AV1).")
logging.info("POLICY: --no-skip-av1: Do not skip AV1 files.")
else:
logging.info("Policy: Skipping files that are already AV1.")
logging.info("POLICY: Skip AV1 files. Change with --no-skip-av1.")
# --- Mode 1: Single File ---
if args.input:
@@ -317,6 +530,39 @@ def main():
logging.critical(f"Input file '{args.input}' not found.")
sys.exit(1)
# If input is a directory, process accordingly
if input_path.is_dir():
if args.recursive:
# Process directory recursively
process_recursive_directory(input_path, args, skip_av1)
return
else:
# Process directory non-recursively (all files in this directory only)
logging.info(f"Processing directory non-recursively: {input_path}")
for video_file in input_path.iterdir():
if (
video_file.is_file()
and video_file.suffix.lower() in VIDEO_EXTENSIONS
):
logging.info(f"FILE: {video_file}")
output_path = None
if not args.replace:
if args.output_dir:
out_dir = Path(args.output_dir)
out_dir.mkdir(parents=True, exist_ok=True)
output_path = out_dir / (video_file.stem + ".mp4")
else:
output_path = video_file.parent / (
video_file.stem + "_av1.mp4"
)
transcode_file(
video_file,
output_path,
skip_av1=skip_av1,
replace_mode=args.replace,
)
return
output_path = None
if not args.replace:
if args.output_dir:
@@ -326,18 +572,27 @@ def main():
else:
output_path = input_path.parent / (input_path.stem + "_av1.mp4")
transcode_file(input_path, output_path, skip_av1=skip_av1, replace_mode=args.replace)
transcode_file(
input_path, output_path, skip_av1=skip_av1, replace_mode=args.replace
)
return
# --- Mode 2: Watch Directory ---
if args.watch_dir:
if args.watch:
# If replace mode is OFF, output-dir is required.
if not args.replace and not args.output_dir:
logging.critical("Output directory is not specified in CLI (--output-dir) or Config.")
logging.critical(
"Output directory is not specified in CLI (--output-dir) or Config."
)
logging.critical("Either specify --output-dir OR enable --replace mode.")
sys.exit(1)
watch_dir = Path(args.watch_dir)
# Use --input as the watch directory if provided, otherwise default to current directory
if args.input:
watch_dir = Path(args.input)
else:
watch_dir = Path(".")
logging.info("No watch directory specified, monitoring current directory.")
if not watch_dir.exists():
logging.critical(f"Watch directory '{watch_dir}' does not exist.")
@@ -355,9 +610,13 @@ def main():
logging.info("Press Ctrl+C to stop.")
event_handler = NewFileHandler(output_dir_path, skip_av1=skip_av1, replace_mode=args.replace)
event_handler = NewFileHandler(
output_dir_path, skip_av1=skip_av1, replace_mode=args.replace
)
observer = Observer()
observer.schedule(event_handler, str(watch_dir), recursive=False)
# Use recursive monitoring if --recursive is specified
recursive_watch = args.recursive
observer.schedule(event_handler, str(watch_dir), recursive=recursive_watch)
observer.start()
try:
@@ -369,8 +628,33 @@ def main():
observer.join()
else:
logging.critical("No operation mode selected.")
logging.critical("Please provide --input for single file OR configure watch-dir via CLI or Config.")
logging.critical(
"Please provide --input for single file OR enable --watch mode."
)
sys.exit(1)
def process_recursive_directory(input_path, args, skip_av1):
"""Process all video files in a directory recursively."""
logging.info(f"Scanning directory recursively for video files: {input_path}")
for video_file in input_path.rglob("*"):
if video_file.is_file() and video_file.suffix.lower() in VIDEO_EXTENSIONS:
logging.info(f"FILE: {video_file}")
output_path = None
if not args.replace:
if args.output_dir:
out_dir = Path(args.output_dir)
out_dir.mkdir(parents=True, exist_ok=True)
output_path = out_dir / (video_file.stem + ".mp4")
else:
output_path = video_file.parent / (video_file.stem + "_av1.mp4")
transcode_file(
video_file,
output_path,
skip_av1=skip_av1,
replace_mode=args.replace,
)
if __name__ == "__main__":
main()