From 94963ee47d0454189c1490379ea51a327d3b5807 Mon Sep 17 00:00:00 2001 From: henryruhs Date: Mon, 14 Oct 2024 12:22:04 +0200 Subject: [PATCH] Remove shortest and use fixed video duration --- facefusion/ffmpeg.py | 8 +++++--- facefusion/typing.py | 1 + facefusion/vision.py | 10 +++++++++- tests/test_vision.py | 7 ++++++- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/facefusion/ffmpeg.py b/facefusion/ffmpeg.py index 2be95d1a..d7f7c480 100644 --- a/facefusion/ffmpeg.py +++ b/facefusion/ffmpeg.py @@ -10,7 +10,7 @@ from facefusion import logger, process_manager, state_manager from facefusion.filesystem import remove_file from facefusion.temp_helper import get_temp_file_path, get_temp_frames_pattern from facefusion.typing import AudioBuffer, Fps, OutputVideoPreset -from facefusion.vision import restrict_video_fps +from facefusion.vision import detect_video_duration, restrict_video_fps def run_ffmpeg(args : List[str]) -> subprocess.Popen[bytes]: @@ -138,6 +138,7 @@ def restore_audio(target_path : str, output_path : str, output_video_fps : Fps) trim_frame_start = state_manager.get_item('trim_frame_start') trim_frame_end = state_manager.get_item('trim_frame_end') temp_file_path = get_temp_file_path(target_path) + temp_video_duration = detect_video_duration(temp_file_path) commands = [ '-i', temp_file_path ] if isinstance(trim_frame_start, int): @@ -146,13 +147,14 @@ def restore_audio(target_path : str, output_path : str, output_video_fps : Fps) if isinstance(trim_frame_end, int): end_time = trim_frame_end / output_video_fps commands.extend([ '-to', str(end_time) ]) - commands.extend([ '-i', target_path, '-c:v', 'copy', '-c:a', state_manager.get_item('output_audio_encoder'), '-map', '0:v:0', '-map', '1:a:0', '-shortest', '-y', output_path ]) + commands.extend([ '-i', target_path, '-c:v', 'copy', '-c:a', state_manager.get_item('output_audio_encoder'), '-map', '0:v:0', '-map', '1:a:0', '-t', str(temp_video_duration), '-y', output_path ]) return run_ffmpeg(commands).returncode == 0 def replace_audio(target_path : str, audio_path : str, output_path : str) -> bool: temp_file_path = get_temp_file_path(target_path) - commands = [ '-i', temp_file_path, '-i', audio_path, '-c:v', 'copy', '-c:a', state_manager.get_item('output_audio_encoder'), '-shortest', '-y', output_path ] + temp_video_duration = detect_video_duration(temp_file_path) + commands = [ '-i', temp_file_path, '-i', audio_path, '-c:v', 'copy', '-c:a', state_manager.get_item('output_audio_encoder'), '-t', str(temp_video_duration), '-y', output_path ] return run_ffmpeg(commands).returncode == 0 diff --git a/facefusion/typing.py b/facefusion/typing.py index c051b407..4bd16ec6 100755 --- a/facefusion/typing.py +++ b/facefusion/typing.py @@ -67,6 +67,7 @@ Mel = NDArray[Any] MelFilterBank = NDArray[Any] Fps = float +Duration = float Padding = Tuple[int, int, int, int] Orientation = Literal['landscape', 'portrait'] Resolution = Tuple[int, int] diff --git a/facefusion/vision.py b/facefusion/vision.py index d5c925db..215dcf77 100644 --- a/facefusion/vision.py +++ b/facefusion/vision.py @@ -8,7 +8,7 @@ from cv2.typing import Size from facefusion.choices import image_template_sizes, video_template_sizes from facefusion.common_helper import is_windows from facefusion.filesystem import is_image, is_video, sanitize_path_for_windows -from facefusion.typing import Fps, Orientation, Resolution, VisionFrame +from facefusion.typing import Duration, Fps, Orientation, Resolution, VisionFrame @lru_cache(maxsize = 128) @@ -119,6 +119,14 @@ def restrict_video_fps(video_path : str, fps : Fps) -> Fps: return fps +def detect_video_duration(video_path : str) -> Duration: + video_frame_total = count_video_frame_total(video_path) + video_fps = detect_video_fps(video_path) + if video_frame_total and video_fps: + return video_frame_total / video_fps + return 0 + + def detect_video_resolution(video_path : str) -> Optional[Resolution]: if is_video(video_path): if is_windows(): diff --git a/tests/test_vision.py b/tests/test_vision.py index 7cb69860..dc02f4ea 100644 --- a/tests/test_vision.py +++ b/tests/test_vision.py @@ -3,7 +3,7 @@ import subprocess import pytest from facefusion.download import conditional_download -from facefusion.vision import count_video_frame_total, create_image_resolutions, create_video_resolutions, detect_image_resolution, detect_video_fps, detect_video_resolution, get_video_frame, normalize_resolution, pack_resolution, restrict_image_resolution, restrict_video_fps, restrict_video_resolution, unpack_resolution +from facefusion.vision import count_video_frame_total, create_image_resolutions, create_video_resolutions, detect_image_resolution, detect_video_fps, detect_video_duration, detect_video_resolution, get_video_frame, normalize_resolution, pack_resolution, restrict_image_resolution, restrict_video_fps, restrict_video_resolution, unpack_resolution from .helper import get_test_example_file, get_test_examples_directory @@ -73,6 +73,11 @@ def test_restrict_video_fps() -> None: assert restrict_video_fps(get_test_example_file('target-1080p.mp4'), 60.0) == 25.0 +def test_detect_video_duration() -> None: + assert detect_video_duration(get_test_example_file('target-240p.mp4')) == 10.8 + assert detect_video_duration('invalid') == 0 + + def test_detect_video_resolution() -> None: assert detect_video_resolution(get_test_example_file('target-240p.mp4')) == (426, 226) assert detect_video_resolution(get_test_example_file('target-240p-90deg.mp4')) == (226, 426)