From 94963ee47d0454189c1490379ea51a327d3b5807 Mon Sep 17 00:00:00 2001
From: henryruhs <info@henryruhs.com>
Date: Mon, 14 Oct 2024 12:22:04 +0200
Subject: [PATCH] Remove shortest and use fixed video duration

---
 facefusion/ffmpeg.py |  8 +++++---
 facefusion/typing.py |  1 +
 facefusion/vision.py | 10 +++++++++-
 tests/test_vision.py |  7 ++++++-
 4 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/facefusion/ffmpeg.py b/facefusion/ffmpeg.py
index 2be95d1a..d7f7c480 100644
--- a/facefusion/ffmpeg.py
+++ b/facefusion/ffmpeg.py
@@ -10,7 +10,7 @@ from facefusion import logger, process_manager, state_manager
 from facefusion.filesystem import remove_file
 from facefusion.temp_helper import get_temp_file_path, get_temp_frames_pattern
 from facefusion.typing import AudioBuffer, Fps, OutputVideoPreset
-from facefusion.vision import restrict_video_fps
+from facefusion.vision import detect_video_duration, restrict_video_fps
 
 
 def run_ffmpeg(args : List[str]) -> subprocess.Popen[bytes]:
@@ -138,6 +138,7 @@ def restore_audio(target_path : str, output_path : str, output_video_fps : Fps)
 	trim_frame_start = state_manager.get_item('trim_frame_start')
 	trim_frame_end = state_manager.get_item('trim_frame_end')
 	temp_file_path = get_temp_file_path(target_path)
+	temp_video_duration = detect_video_duration(temp_file_path)
 	commands = [ '-i', temp_file_path ]
 
 	if isinstance(trim_frame_start, int):
@@ -146,13 +147,14 @@ def restore_audio(target_path : str, output_path : str, output_video_fps : Fps)
 	if isinstance(trim_frame_end, int):
 		end_time = trim_frame_end / output_video_fps
 		commands.extend([ '-to', str(end_time) ])
-	commands.extend([ '-i', target_path, '-c:v', 'copy', '-c:a', state_manager.get_item('output_audio_encoder'), '-map', '0:v:0', '-map', '1:a:0', '-shortest', '-y', output_path ])
+	commands.extend([ '-i', target_path, '-c:v', 'copy', '-c:a', state_manager.get_item('output_audio_encoder'), '-map', '0:v:0', '-map', '1:a:0', '-t', str(temp_video_duration), '-y', output_path ])
 	return run_ffmpeg(commands).returncode == 0
 
 
 def replace_audio(target_path : str, audio_path : str, output_path : str) -> bool:
 	temp_file_path = get_temp_file_path(target_path)
-	commands = [ '-i', temp_file_path, '-i', audio_path, '-c:v', 'copy', '-c:a', state_manager.get_item('output_audio_encoder'), '-shortest', '-y', output_path ]
+	temp_video_duration = detect_video_duration(temp_file_path)
+	commands = [ '-i', temp_file_path, '-i', audio_path, '-c:v', 'copy', '-c:a', state_manager.get_item('output_audio_encoder'), '-t', str(temp_video_duration), '-y', output_path ]
 	return run_ffmpeg(commands).returncode == 0
 
 
diff --git a/facefusion/typing.py b/facefusion/typing.py
index c051b407..4bd16ec6 100755
--- a/facefusion/typing.py
+++ b/facefusion/typing.py
@@ -67,6 +67,7 @@ Mel = NDArray[Any]
 MelFilterBank = NDArray[Any]
 
 Fps = float
+Duration = float
 Padding = Tuple[int, int, int, int]
 Orientation = Literal['landscape', 'portrait']
 Resolution = Tuple[int, int]
diff --git a/facefusion/vision.py b/facefusion/vision.py
index d5c925db..215dcf77 100644
--- a/facefusion/vision.py
+++ b/facefusion/vision.py
@@ -8,7 +8,7 @@ from cv2.typing import Size
 from facefusion.choices import image_template_sizes, video_template_sizes
 from facefusion.common_helper import is_windows
 from facefusion.filesystem import is_image, is_video, sanitize_path_for_windows
-from facefusion.typing import Fps, Orientation, Resolution, VisionFrame
+from facefusion.typing import Duration, Fps, Orientation, Resolution, VisionFrame
 
 
 @lru_cache(maxsize = 128)
@@ -119,6 +119,14 @@ def restrict_video_fps(video_path : str, fps : Fps) -> Fps:
 	return fps
 
 
+def detect_video_duration(video_path : str) -> Duration:
+	video_frame_total = count_video_frame_total(video_path)
+	video_fps = detect_video_fps(video_path)
+	if video_frame_total and video_fps:
+		return video_frame_total / video_fps
+	return 0
+
+
 def detect_video_resolution(video_path : str) -> Optional[Resolution]:
 	if is_video(video_path):
 		if is_windows():
diff --git a/tests/test_vision.py b/tests/test_vision.py
index 7cb69860..dc02f4ea 100644
--- a/tests/test_vision.py
+++ b/tests/test_vision.py
@@ -3,7 +3,7 @@ import subprocess
 import pytest
 
 from facefusion.download import conditional_download
-from facefusion.vision import count_video_frame_total, create_image_resolutions, create_video_resolutions, detect_image_resolution, detect_video_fps, detect_video_resolution, get_video_frame, normalize_resolution, pack_resolution, restrict_image_resolution, restrict_video_fps, restrict_video_resolution, unpack_resolution
+from facefusion.vision import count_video_frame_total, create_image_resolutions, create_video_resolutions, detect_image_resolution, detect_video_fps, detect_video_duration, detect_video_resolution, get_video_frame, normalize_resolution, pack_resolution, restrict_image_resolution, restrict_video_fps, restrict_video_resolution, unpack_resolution
 from .helper import get_test_example_file, get_test_examples_directory
 
 
@@ -73,6 +73,11 @@ def test_restrict_video_fps() -> None:
 	assert restrict_video_fps(get_test_example_file('target-1080p.mp4'), 60.0) == 25.0
 
 
+def test_detect_video_duration() -> None:
+	assert detect_video_duration(get_test_example_file('target-240p.mp4')) == 10.8
+	assert detect_video_duration('invalid') == 0
+
+
 def test_detect_video_resolution() -> None:
 	assert detect_video_resolution(get_test_example_file('target-240p.mp4')) == (426, 226)
 	assert detect_video_resolution(get_test_example_file('target-240p-90deg.mp4')) == (226, 426)