diff --git a/.github/preview.png b/.github/preview.png index 1287db0f..ad94832d 100644 Binary files a/.github/preview.png and b/.github/preview.png differ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e31ce521..51fa80e5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,6 +30,6 @@ jobs: uses: actions/setup-python@v2 with: python-version: '3.10' - - run: python install.py --torch cpu --onnxruntime default --skip-venv + - run: python install.py --onnxruntime default --skip-venv - run: pip install pytest - run: pytest diff --git a/README.md b/README.md index 7568f9cc..6d14f688 100644 --- a/README.md +++ b/README.md @@ -54,12 +54,13 @@ face analyser: --face-analyser-order {left-right,right-left,top-bottom,bottom-top,small-large,large-small,best-worst,worst-best} specify the order in which the face analyser detects faces. --face-analyser-age {child,teen,adult,senior} filter the detected faces based on their age --face-analyser-gender {female,male} filter the detected faces based on their gender - --face-detector-model {retinaface,yoloface,yunet} choose the model responsible for detecting the face + --face-detector-model {many,retinaface,scrfd,yoloface,yunet} choose the model responsible for detecting the face --face-detector-size FACE_DETECTOR_SIZE specify the size of the frame provided to the face detector --face-detector-score [0.0-1.0] filter the detected faces base on the confidence score + --face-landmarker-score [0.0-1.0] filter the detected landmarks base on the confidence score face selector: - --face-selector-mode {reference,one,many} use reference based tracking with simple matching + --face-selector-mode {many,one,reference} use reference based tracking or simple matching --reference-face-position REFERENCE_FACE_POSITION specify the position used to create the reference face --reference-face-distance [0.0-1.5] specify the desired similarity between the reference face and target face --reference-frame-number REFERENCE_FRAME_NUMBER specify the frame used to create the reference face @@ -74,12 +75,12 @@ frame extraction: --trim-frame-start TRIM_FRAME_START specify the the start frame of the target video --trim-frame-end TRIM_FRAME_END specify the the end frame of the target video --temp-frame-format {bmp,jpg,png} specify the temporary resources format - --temp-frame-quality [0-100] specify the temporary resources quality --keep-temp keep the temporary resources after processing output creation: --output-image-quality [0-100] specify the image quality which translates to the compression factor - --output-video-encoder {libx264,libx265,libvpx-vp9,h264_nvenc,hevc_nvenc} specify the encoder use for the video compression + --output-image-resolution OUTPUT_IMAGE_RESOLUTION specify the image output resolution based on the target image + --output-video-encoder {libx264,libx265,libvpx-vp9,h264_nvenc,hevc_nvenc,h264_amf,hevc_amf} specify the encoder use for the video compression --output-video-preset {ultrafast,superfast,veryfast,faster,fast,medium,slow,slower,veryslow} balance fast video processing and video file size --output-video-quality [0-100] specify the video quality which translates to the compression factor --output-video-resolution OUTPUT_VIDEO_RESOLUTION specify the video output resolution based on the target video @@ -88,11 +89,11 @@ output creation: frame processors: --frame-processors FRAME_PROCESSORS [FRAME_PROCESSORS ...] load a single or multiple frame processors. (choices: face_debugger, face_enhancer, face_swapper, frame_enhancer, lip_syncer, ...) - --face-debugger-items FACE_DEBUGGER_ITEMS [FACE_DEBUGGER_ITEMS ...] load a single or multiple frame processors (choices: bounding-box, landmark-5, landmark-68, face-mask, score, age, gender) + --face-debugger-items FACE_DEBUGGER_ITEMS [FACE_DEBUGGER_ITEMS ...] load a single or multiple frame processors (choices: bounding-box, face-landmark-5, face-landmark-5/68, face-landmark-68, face-mask, face-detector-score, face-landmarker-score, age, gender) --face-enhancer-model {codeformer,gfpgan_1.2,gfpgan_1.3,gfpgan_1.4,gpen_bfr_256,gpen_bfr_512,restoreformer_plus_plus} choose the model responsible for enhancing the face --face-enhancer-blend [0-100] blend the enhanced into the previous face --face-swapper-model {blendswap_256,inswapper_128,inswapper_128_fp16,simswap_256,simswap_512_unofficial,uniface_256} choose the model responsible for swapping the face - --frame-enhancer-model {real_esrgan_x2plus,real_esrgan_x4plus,real_esrnet_x4plus} choose the model responsible for enhancing the frame + --frame-enhancer-model {lsdir_x4,nomos8k_sc_x4,real_esrgan_x4,span_kendata_x4} choose the model responsible for enhancing the frame --frame-enhancer-blend [0-100] blend the enhanced into the previous frame --lip-syncer-model {wav2lip_gan} choose the model responsible for syncing the lips diff --git a/facefusion.ini b/facefusion.ini index b2932ad4..1021c803 100644 --- a/facefusion.ini +++ b/facefusion.ini @@ -24,6 +24,7 @@ face_analyser_gender = face_detector_model = face_detector_size = face_detector_score = +face_landmarker_score = [face_selector] face_selector_mode = @@ -41,11 +42,11 @@ face_mask_regions = trim_frame_start = trim_frame_end = temp_frame_format = -temp_frame_quality = keep_temp = [output_creation] output_image_quality = +output_image_resolution = output_video_encoder = output_video_preset = output_video_quality = diff --git a/facefusion/audio.py b/facefusion/audio.py index cd4a08b1..a77448f3 100644 --- a/facefusion/audio.py +++ b/facefusion/audio.py @@ -11,11 +11,16 @@ from facefusion.typing import Fps, Audio, Spectrogram, AudioFrame def get_audio_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Optional[AudioFrame]: if is_audio(audio_path): audio_frames = read_static_audio(audio_path, fps) - if frame_number < len(audio_frames): + if frame_number in range(len(audio_frames)): return audio_frames[frame_number] return None +def create_empty_audio_frame() -> AudioFrame: + audio_frame = numpy.zeros((80, 16), dtype = numpy.int16) + return audio_frame + + @lru_cache(maxsize = None) def read_static_audio(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: if is_audio(audio_path): diff --git a/facefusion/choices.py b/facefusion/choices.py index d1dc4fce..49d47add 100755 --- a/facefusion/choices.py +++ b/facefusion/choices.py @@ -9,26 +9,29 @@ face_analyser_ages : List[FaceAnalyserAge] = [ 'child', 'teen', 'adult', 'senior face_analyser_genders : List[FaceAnalyserGender] = [ 'female', 'male' ] face_detector_set : Dict[FaceDetectorModel, List[str]] =\ { - 'retinaface': [ '160x160', '320x320', '480x480', '512x512', '640x640' ], + 'many': [ '640x640' ], + 'retinaface': [ '160x160', '320x320', '480x480', '512x512', '640x640' ], + 'scrfd': [ '160x160', '320x320', '480x480', '512x512', '640x640' ], 'yoloface': [ '640x640' ], 'yunet': [ '160x160', '320x320', '480x480', '512x512', '640x640', '768x768', '960x960', '1024x1024' ] } -face_selector_modes : List[FaceSelectorMode] = [ 'reference', 'one', 'many' ] +face_selector_modes : List[FaceSelectorMode] = [ 'many', 'one', 'reference' ] face_mask_types : List[FaceMaskType] = [ 'box', 'occlusion', 'region' ] face_mask_regions : List[FaceMaskRegion] = [ 'skin', 'left-eyebrow', 'right-eyebrow', 'left-eye', 'right-eye', 'eye-glasses', 'nose', 'mouth', 'upper-lip', 'lower-lip' ] temp_frame_formats : List[TempFrameFormat] = [ 'bmp', 'jpg', 'png' ] -output_video_encoders : List[OutputVideoEncoder] = [ 'libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc' ] +output_video_encoders : List[OutputVideoEncoder] = [ 'libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf' ] output_video_presets : List[OutputVideoPreset] = [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow' ] -video_template_sizes : List[int] = [ 240, 360, 480, 540, 720, 1080, 1440, 2160 ] +image_template_sizes : List[float] = [ 0.25, 0.5, 0.75, 1, 1.5, 2, 2.5, 3, 3.5, 4 ] +video_template_sizes : List[int] = [ 240, 360, 480, 540, 720, 1080, 1440, 2160, 4320 ] execution_thread_count_range : List[int] = create_int_range(1, 128, 1) execution_queue_count_range : List[int] = create_int_range(1, 32, 1) system_memory_limit_range : List[int] = create_int_range(0, 128, 1) face_detector_score_range : List[float] = create_float_range(0.0, 1.0, 0.05) +face_landmarker_score_range : List[float] = create_float_range(0.0, 1.0, 0.05) face_mask_blur_range : List[float] = create_float_range(0.0, 1.0, 0.05) face_mask_padding_range : List[int] = create_int_range(0, 100, 1) reference_face_distance_range : List[float] = create_float_range(0.0, 1.5, 0.05) -temp_frame_quality_range : List[int] = create_int_range(0, 100, 1) output_image_quality_range : List[int] = create_int_range(0, 100, 1) output_video_quality_range : List[int] = create_int_range(0, 100, 1) diff --git a/facefusion/common_helper.py b/facefusion/common_helper.py index ea1ec830..f5d8b68d 100644 --- a/facefusion/common_helper.py +++ b/facefusion/common_helper.py @@ -1,4 +1,4 @@ -from typing import List, Any +from typing import List, Any, Tuple import numpy @@ -16,3 +16,12 @@ def create_float_range(start : float, stop : float, step : float) -> List[float] def get_first(__list__ : Any) -> Any: return next(iter(__list__), None) + + +def extract_major_version(version : str) -> Tuple[int, int]: + versions = version.split('.') + if len(versions) > 1: + return int(versions[0]), int(versions[1]) + if len(versions) == 1: + return int(versions[0]), 0 + return 0, 0 diff --git a/facefusion/config.py b/facefusion/config.py index f6aaabae..675ef3f1 100644 --- a/facefusion/config.py +++ b/facefusion/config.py @@ -12,7 +12,7 @@ def get_config() -> ConfigParser: if CONFIG is None: config_path = resolve_relative_path('../facefusion.ini') CONFIG = ConfigParser() - CONFIG.read(config_path) + CONFIG.read(config_path, encoding = 'utf-8') return CONFIG diff --git a/facefusion/content_analyser.py b/facefusion/content_analyser.py index 5f6133d2..d7f5b375 100644 --- a/facefusion/content_analyser.py +++ b/facefusion/content_analyser.py @@ -9,7 +9,7 @@ from tqdm import tqdm import facefusion.globals from facefusion import wording from facefusion.typing import VisionFrame, ModelValue, Fps -from facefusion.execution_helper import apply_execution_provider_options +from facefusion.execution import apply_execution_provider_options from facefusion.vision import get_video_frame, count_video_frame_total, read_image, detect_video_fps from facefusion.filesystem import resolve_relative_path from facefusion.download import conditional_download @@ -62,23 +62,23 @@ def analyse_stream(vision_frame : VisionFrame, video_fps : Fps) -> bool: return False -def prepare_frame(vision_frame : VisionFrame) -> VisionFrame: - vision_frame = cv2.resize(vision_frame, (224, 224)).astype(numpy.float32) - vision_frame -= numpy.array([ 104, 117, 123 ]).astype(numpy.float32) - vision_frame = numpy.expand_dims(vision_frame, axis = 0) - return vision_frame - - def analyse_frame(vision_frame : VisionFrame) -> bool: content_analyser = get_content_analyser() vision_frame = prepare_frame(vision_frame) probability = content_analyser.run(None, { - 'input:0': vision_frame + content_analyser.get_inputs()[0].name: vision_frame })[0][0][1] return probability > PROBABILITY_LIMIT +def prepare_frame(vision_frame : VisionFrame) -> VisionFrame: + vision_frame = cv2.resize(vision_frame, (224, 224)).astype(numpy.float32) + vision_frame -= numpy.array([ 104, 117, 123 ]).astype(numpy.float32) + vision_frame = numpy.expand_dims(vision_frame, axis = 0) + return vision_frame + + @lru_cache(maxsize = None) def analyse_image(image_path : str) -> bool: frame = read_image(image_path) diff --git a/facefusion/core.py b/facefusion/core.py index 4d06e902..505e59f1 100755 --- a/facefusion/core.py +++ b/facefusion/core.py @@ -4,32 +4,31 @@ os.environ['OMP_NUM_THREADS'] = '1' import signal import sys -import time import warnings import shutil import numpy import onnxruntime -from time import sleep +from time import sleep, time from argparse import ArgumentParser, HelpFormatter import facefusion.choices import facefusion.globals from facefusion.face_analyser import get_one_face, get_average_face from facefusion.face_store import get_reference_faces, append_reference_face -from facefusion import face_analyser, face_masker, content_analyser, config, metadata, logger, wording +from facefusion import face_analyser, face_masker, content_analyser, config, process_manager, metadata, logger, wording from facefusion.content_analyser import analyse_image, analyse_video from facefusion.processors.frame.core import get_frame_processors_modules, load_frame_processor_module from facefusion.common_helper import create_metavar, get_first -from facefusion.execution_helper import encode_execution_providers, decode_execution_providers +from facefusion.execution import encode_execution_providers, decode_execution_providers from facefusion.normalizer import normalize_output_path, normalize_padding, normalize_fps from facefusion.memory import limit_system_memory +from facefusion.statistics import conditional_log_statistics from facefusion.filesystem import list_directory, get_temp_frame_paths, create_temp, move_temp, clear_temp, is_image, is_video, filter_audio_paths -from facefusion.ffmpeg import extract_frames, compress_image, merge_video, restore_audio, replace_audio -from facefusion.vision import get_video_frame, read_image, read_static_images, pack_resolution, detect_video_resolution, detect_video_fps, create_video_resolutions +from facefusion.ffmpeg import extract_frames, merge_video, copy_image, finalize_image, restore_audio, replace_audio +from facefusion.vision import read_image, read_static_images, detect_image_resolution, restrict_video_fps, create_image_resolutions, get_video_frame, detect_video_resolution, detect_video_fps, restrict_video_resolution, restrict_image_resolution, create_video_resolutions, pack_resolution, unpack_resolution onnxruntime.set_default_logger_severity(3) warnings.filterwarnings('ignore', category = UserWarning, module = 'gradio') -warnings.filterwarnings('ignore', category = UserWarning, module = 'torchvision') def cli() -> None: @@ -63,6 +62,7 @@ def cli() -> None: group_face_analyser.add_argument('--face-detector-model', help = wording.get('help.face_detector_model'), default = config.get_str_value('face_analyser.face_detector_model', 'yoloface'), choices = facefusion.choices.face_detector_set.keys()) group_face_analyser.add_argument('--face-detector-size', help = wording.get('help.face_detector_size'), default = config.get_str_value('face_analyser.face_detector_size', '640x640')) group_face_analyser.add_argument('--face-detector-score', help = wording.get('help.face_detector_score'), type = float, default = config.get_float_value('face_analyser.face_detector_score', '0.5'), choices = facefusion.choices.face_detector_score_range, metavar = create_metavar(facefusion.choices.face_detector_score_range)) + group_face_analyser.add_argument('--face-landmarker-score', help = wording.get('help.face_landmarker_score'), type = float, default = config.get_float_value('face_analyser.face_landmarker_score', '0.5'), choices = facefusion.choices.face_landmarker_score_range, metavar = create_metavar(facefusion.choices.face_landmarker_score_range)) # face selector group_face_selector = program.add_argument_group('face selector') group_face_selector.add_argument('--face-selector-mode', help = wording.get('help.face_selector_mode'), default = config.get_str_value('face_selector.face_selector_mode', 'reference'), choices = facefusion.choices.face_selector_modes) @@ -79,12 +79,12 @@ def cli() -> None: group_frame_extraction = program.add_argument_group('frame extraction') group_frame_extraction.add_argument('--trim-frame-start', help = wording.get('help.trim_frame_start'), type = int, default = facefusion.config.get_int_value('frame_extraction.trim_frame_start')) group_frame_extraction.add_argument('--trim-frame-end', help = wording.get('help.trim_frame_end'), type = int, default = facefusion.config.get_int_value('frame_extraction.trim_frame_end')) - group_frame_extraction.add_argument('--temp-frame-format', help = wording.get('help.temp_frame_format'), default = config.get_str_value('frame_extraction.temp_frame_format', 'jpg'), choices = facefusion.choices.temp_frame_formats) - group_frame_extraction.add_argument('--temp-frame-quality', help = wording.get('help.temp_frame_quality'), type = int, default = config.get_int_value('frame_extraction.temp_frame_quality', '100'), choices = facefusion.choices.temp_frame_quality_range, metavar = create_metavar(facefusion.choices.temp_frame_quality_range)) + group_frame_extraction.add_argument('--temp-frame-format', help = wording.get('help.temp_frame_format'), default = config.get_str_value('frame_extraction.temp_frame_format', 'png'), choices = facefusion.choices.temp_frame_formats) group_frame_extraction.add_argument('--keep-temp', help = wording.get('help.keep_temp'), action = 'store_true', default = config.get_bool_value('frame_extraction.keep_temp')) # output creation group_output_creation = program.add_argument_group('output creation') group_output_creation.add_argument('--output-image-quality', help = wording.get('help.output_image_quality'), type = int, default = config.get_int_value('output_creation.output_image_quality', '80'), choices = facefusion.choices.output_image_quality_range, metavar = create_metavar(facefusion.choices.output_image_quality_range)) + group_output_creation.add_argument('--output-image-resolution', help = wording.get('help.output_image_resolution'), default = config.get_str_value('output_creation.output_image_resolution')) group_output_creation.add_argument('--output-video-encoder', help = wording.get('help.output_video_encoder'), default = config.get_str_value('output_creation.output_video_encoder', 'libx264'), choices = facefusion.choices.output_video_encoders) group_output_creation.add_argument('--output-video-preset', help = wording.get('help.output_video_preset'), default = config.get_str_value('output_creation.output_video_preset', 'veryfast'), choices = facefusion.choices.output_video_presets) group_output_creation.add_argument('--output-video-quality', help = wording.get('help.output_video_quality'), type = int, default = config.get_int_value('output_creation.output_video_quality', '80'), choices = facefusion.choices.output_video_quality_range, metavar = create_metavar(facefusion.choices.output_video_quality_range)) @@ -111,7 +111,7 @@ def apply_args(program : ArgumentParser) -> None: # general facefusion.globals.source_paths = args.source_paths facefusion.globals.target_path = args.target_path - facefusion.globals.output_path = normalize_output_path(facefusion.globals.source_paths, facefusion.globals.target_path, args.output_path) + facefusion.globals.output_path = args.output_path # misc facefusion.globals.skip_download = args.skip_download facefusion.globals.headless = args.headless @@ -133,6 +133,7 @@ def apply_args(program : ArgumentParser) -> None: else: facefusion.globals.face_detector_size = '640x640' facefusion.globals.face_detector_score = args.face_detector_score + facefusion.globals.face_landmarker_score = args.face_landmarker_score # face selector facefusion.globals.face_selector_mode = args.face_selector_mode facefusion.globals.reference_face_position = args.reference_face_position @@ -147,20 +148,26 @@ def apply_args(program : ArgumentParser) -> None: facefusion.globals.trim_frame_start = args.trim_frame_start facefusion.globals.trim_frame_end = args.trim_frame_end facefusion.globals.temp_frame_format = args.temp_frame_format - facefusion.globals.temp_frame_quality = args.temp_frame_quality facefusion.globals.keep_temp = args.keep_temp # output creation facefusion.globals.output_image_quality = args.output_image_quality + if is_image(args.target_path): + output_image_resolution = detect_image_resolution(args.target_path) + output_image_resolutions = create_image_resolutions(output_image_resolution) + if args.output_image_resolution in output_image_resolutions: + facefusion.globals.output_image_resolution = args.output_image_resolution + else: + facefusion.globals.output_image_resolution = pack_resolution(output_image_resolution) facefusion.globals.output_video_encoder = args.output_video_encoder facefusion.globals.output_video_preset = args.output_video_preset facefusion.globals.output_video_quality = args.output_video_quality if is_video(args.target_path): - target_video_resolutions = create_video_resolutions(args.target_path) - if args.output_video_resolution in target_video_resolutions: + output_video_resolution = detect_video_resolution(args.target_path) + output_video_resolutions = create_video_resolutions(output_video_resolution) + if args.output_video_resolution in output_video_resolutions: facefusion.globals.output_video_resolution = args.output_video_resolution else: - target_video_resolution = detect_video_resolution(args.target_path) - facefusion.globals.output_video_resolution = pack_resolution(target_video_resolution) + facefusion.globals.output_video_resolution = pack_resolution(output_video_resolution) if args.output_video_fps or is_video(args.target_path): facefusion.globals.output_video_fps = normalize_fps(args.output_video_fps) or detect_video_fps(args.target_path) facefusion.globals.skip_audio = args.skip_audio @@ -196,6 +203,9 @@ def run(program : ArgumentParser) -> None: def destroy() -> None: + process_manager.stop() + while process_manager.is_processing(): + sleep(0.5) if facefusion.globals.target_path: clear_temp(facefusion.globals.target_path) sys.exit(0) @@ -212,7 +222,7 @@ def pre_check() -> bool: def conditional_process() -> None: - start_time = time.time() + start_time = time() for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): while not frame_processor_module.post_check(): logger.disable() @@ -247,28 +257,43 @@ def conditional_append_reference_faces() -> None: def process_image(start_time : float) -> None: + normed_output_path = normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path) if analyse_image(facefusion.globals.target_path): return - shutil.copy2(facefusion.globals.target_path, facefusion.globals.output_path) - # process frame + # copy image + process_manager.start() + temp_image_resolution = pack_resolution(restrict_image_resolution(facefusion.globals.target_path, unpack_resolution(facefusion.globals.output_image_resolution))) + logger.info(wording.get('copying_image').format(resolution = temp_image_resolution), __name__.upper()) + if copy_image(facefusion.globals.target_path, normed_output_path, temp_image_resolution): + logger.debug(wording.get('copying_image_succeed'), __name__.upper()) + else: + logger.error(wording.get('copying_image_failed'), __name__.upper()) + return + # process image for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): logger.info(wording.get('processing'), frame_processor_module.NAME) - frame_processor_module.process_image(facefusion.globals.source_paths, facefusion.globals.output_path, facefusion.globals.output_path) + frame_processor_module.process_image(facefusion.globals.source_paths, normed_output_path, normed_output_path) frame_processor_module.post_process() - # compress image - if compress_image(facefusion.globals.output_path): - logger.info(wording.get('compressing_image_succeed'), __name__.upper()) + if is_process_stopping(): + return + # finalize image + logger.info(wording.get('finalizing_image').format(resolution = facefusion.globals.output_image_resolution), __name__.upper()) + if finalize_image(normed_output_path, facefusion.globals.output_image_resolution): + logger.debug(wording.get('finalizing_image_succeed'), __name__.upper()) else: - logger.warn(wording.get('compressing_image_skipped'), __name__.upper()) + logger.warn(wording.get('finalizing_image_skipped'), __name__.upper()) # validate image - if is_image(facefusion.globals.output_path): - seconds = '{:.2f}'.format((time.time() - start_time) % 60) + if is_image(normed_output_path): + seconds = '{:.2f}'.format((time() - start_time) % 60) logger.info(wording.get('processing_image_succeed').format(seconds = seconds), __name__.upper()) + conditional_log_statistics() else: logger.error(wording.get('processing_image_failed'), __name__.upper()) + process_manager.end() def process_video(start_time : float) -> None: + normed_output_path = normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path) if analyse_video(facefusion.globals.target_path, facefusion.globals.trim_frame_start, facefusion.globals.trim_frame_end): return # clear temp @@ -278,47 +303,75 @@ def process_video(start_time : float) -> None: logger.debug(wording.get('creating_temp'), __name__.upper()) create_temp(facefusion.globals.target_path) # extract frames - logger.info(wording.get('extracting_frames_fps').format(video_fps = facefusion.globals.output_video_fps), __name__.upper()) - extract_frames(facefusion.globals.target_path, facefusion.globals.output_video_resolution, facefusion.globals.output_video_fps) - # process frame + process_manager.start() + temp_video_resolution = pack_resolution(restrict_video_resolution(facefusion.globals.target_path, unpack_resolution(facefusion.globals.output_video_resolution))) + temp_video_fps = restrict_video_fps(facefusion.globals.target_path, facefusion.globals.output_video_fps) + logger.info(wording.get('extracting_frames').format(resolution = temp_video_resolution, fps = temp_video_fps), __name__.upper()) + if extract_frames(facefusion.globals.target_path, temp_video_resolution, temp_video_fps): + logger.debug(wording.get('extracting_frames_succeed'), __name__.upper()) + else: + if is_process_stopping(): + return + logger.error(wording.get('extracting_frames_failed'), __name__.upper()) + return + # process frames temp_frame_paths = get_temp_frame_paths(facefusion.globals.target_path) if temp_frame_paths: for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): logger.info(wording.get('processing'), frame_processor_module.NAME) frame_processor_module.process_video(facefusion.globals.source_paths, temp_frame_paths) frame_processor_module.post_process() + if is_process_stopping(): + return else: logger.error(wording.get('temp_frames_not_found'), __name__.upper()) return # merge video - logger.info(wording.get('merging_video_fps').format(video_fps = facefusion.globals.output_video_fps), __name__.upper()) - if not merge_video(facefusion.globals.target_path, facefusion.globals.output_video_resolution, facefusion.globals.output_video_fps): + logger.info(wording.get('merging_video').format(resolution = facefusion.globals.output_video_resolution, fps = facefusion.globals.output_video_fps), __name__.upper()) + if merge_video(facefusion.globals.target_path, facefusion.globals.output_video_resolution, facefusion.globals.output_video_fps): + logger.debug(wording.get('merging_video_succeed'), __name__.upper()) + else: + if is_process_stopping(): + return logger.error(wording.get('merging_video_failed'), __name__.upper()) return # handle audio if facefusion.globals.skip_audio: logger.info(wording.get('skipping_audio'), __name__.upper()) - move_temp(facefusion.globals.target_path, facefusion.globals.output_path) + move_temp(facefusion.globals.target_path, normed_output_path) else: if 'lip_syncer' in facefusion.globals.frame_processors: source_audio_path = get_first(filter_audio_paths(facefusion.globals.source_paths)) - if source_audio_path and replace_audio(facefusion.globals.target_path, source_audio_path, facefusion.globals.output_path): - logger.info(wording.get('restoring_audio_succeed'), __name__.upper()) + if source_audio_path and replace_audio(facefusion.globals.target_path, source_audio_path, normed_output_path): + logger.debug(wording.get('restoring_audio_succeed'), __name__.upper()) else: + if is_process_stopping(): + return logger.warn(wording.get('restoring_audio_skipped'), __name__.upper()) - move_temp(facefusion.globals.target_path, facefusion.globals.output_path) + move_temp(facefusion.globals.target_path, normed_output_path) else: - if restore_audio(facefusion.globals.target_path, facefusion.globals.output_path, facefusion.globals.output_video_fps): - logger.info(wording.get('restoring_audio_succeed'), __name__.upper()) + if restore_audio(facefusion.globals.target_path, normed_output_path, facefusion.globals.output_video_fps): + logger.debug(wording.get('restoring_audio_succeed'), __name__.upper()) else: + if is_process_stopping(): + return logger.warn(wording.get('restoring_audio_skipped'), __name__.upper()) - move_temp(facefusion.globals.target_path, facefusion.globals.output_path) + move_temp(facefusion.globals.target_path, normed_output_path) # clear temp logger.debug(wording.get('clearing_temp'), __name__.upper()) clear_temp(facefusion.globals.target_path) # validate video - if is_video(facefusion.globals.output_path): - seconds = '{:.2f}'.format((time.time() - start_time)) + if is_video(normed_output_path): + seconds = '{:.2f}'.format((time() - start_time)) logger.info(wording.get('processing_video_succeed').format(seconds = seconds), __name__.upper()) + conditional_log_statistics() else: logger.error(wording.get('processing_video_failed'), __name__.upper()) + process_manager.end() + + +def is_process_stopping() -> bool: + if process_manager.is_stopping(): + process_manager.end() + logger.info(wording.get('processing_stopped'), __name__.upper()) + return process_manager.is_pending() diff --git a/facefusion/download.py b/facefusion/download.py index c26764f8..01d883b9 100644 --- a/facefusion/download.py +++ b/facefusion/download.py @@ -32,6 +32,9 @@ def conditional_download(download_directory_path : str, urls : List[str]) -> Non if is_file(download_file_path): current = os.path.getsize(download_file_path) progress.update(current - progress.n) + if not is_download_done(url, download_file_path): + os.remove(download_file_path) + conditional_download(download_directory_path, [ url] ) @lru_cache(maxsize = None) diff --git a/facefusion/execution.py b/facefusion/execution.py new file mode 100644 index 00000000..8a4f7a58 --- /dev/null +++ b/facefusion/execution.py @@ -0,0 +1,97 @@ +from typing import List, Any +from functools import lru_cache +import subprocess +import xml.etree.ElementTree as ElementTree +import onnxruntime + +from facefusion.typing import ExecutionDevice, ValueAndUnit + + +def encode_execution_providers(execution_providers : List[str]) -> List[str]: + return [ execution_provider.replace('ExecutionProvider', '').lower() for execution_provider in execution_providers ] + + +def decode_execution_providers(execution_providers: List[str]) -> List[str]: + available_execution_providers = onnxruntime.get_available_providers() + encoded_execution_providers = encode_execution_providers(available_execution_providers) + + return [ execution_provider for execution_provider, encoded_execution_provider in zip(available_execution_providers, encoded_execution_providers) if any(execution_provider in encoded_execution_provider for execution_provider in execution_providers) ] + + +def apply_execution_provider_options(execution_providers: List[str]) -> List[Any]: + execution_providers_with_options : List[Any] = [] + + for execution_provider in execution_providers: + if execution_provider == 'CUDAExecutionProvider': + execution_providers_with_options.append((execution_provider, + { + 'cudnn_conv_algo_search': 'EXHAUSTIVE' if use_exhaustive() else 'DEFAULT' + })) + else: + execution_providers_with_options.append(execution_provider) + return execution_providers_with_options + + +def use_exhaustive() -> bool: + execution_devices = detect_static_execution_devices() + product_names = ('GeForce GTX 1630', 'GeForce GTX 1650', 'GeForce GTX 1660') + + return any(execution_device.get('product').get('name').startswith(product_names) for execution_device in execution_devices) + + +def run_nvidia_smi() -> subprocess.Popen[bytes]: + commands = [ 'nvidia-smi', '--query', '--xml-format' ] + return subprocess.Popen(commands, stdout = subprocess.PIPE) + + +@lru_cache(maxsize = None) +def detect_static_execution_devices() -> List[ExecutionDevice]: + return detect_execution_devices() + + +def detect_execution_devices() -> List[ExecutionDevice]: + execution_devices : List[ExecutionDevice] = [] + try: + output, _ = run_nvidia_smi().communicate() + root_element = ElementTree.fromstring(output) + except Exception: + root_element = ElementTree.Element('xml') + + for gpu_element in root_element.findall('gpu'): + execution_devices.append( + { + 'driver_version': root_element.find('driver_version').text, + 'framework': + { + 'name': 'CUDA', + 'version': root_element.find('cuda_version').text, + }, + 'product': + { + 'vendor': 'NVIDIA', + 'name': gpu_element.find('product_name').text.replace('NVIDIA ', ''), + 'architecture': gpu_element.find('product_architecture').text, + }, + 'video_memory': + { + 'total': create_value_and_unit(gpu_element.find('fb_memory_usage/total').text), + 'free': create_value_and_unit(gpu_element.find('fb_memory_usage/free').text) + }, + 'utilization': + { + 'gpu': create_value_and_unit(gpu_element.find('utilization/gpu_util').text), + 'memory': create_value_and_unit(gpu_element.find('utilization/memory_util').text) + } + }) + return execution_devices + + +def create_value_and_unit(text : str) -> ValueAndUnit: + value, unit = text.split() + value_and_unit : ValueAndUnit =\ + { + 'value': value, + 'unit': unit + } + + return value_and_unit diff --git a/facefusion/execution_helper.py b/facefusion/execution_helper.py deleted file mode 100644 index 9bdea337..00000000 --- a/facefusion/execution_helper.py +++ /dev/null @@ -1,37 +0,0 @@ -from typing import Any, List -import onnxruntime - - -def encode_execution_providers(execution_providers : List[str]) -> List[str]: - return [ execution_provider.replace('ExecutionProvider', '').lower() for execution_provider in execution_providers ] - - -def decode_execution_providers(execution_providers: List[str]) -> List[str]: - available_execution_providers = onnxruntime.get_available_providers() - encoded_execution_providers = encode_execution_providers(available_execution_providers) - - return [ execution_provider for execution_provider, encoded_execution_provider in zip(available_execution_providers, encoded_execution_providers) if any(execution_provider in encoded_execution_provider for execution_provider in execution_providers) ] - - -def apply_execution_provider_options(execution_providers: List[str]) -> List[Any]: - execution_providers_with_options : List[Any] = [] - - for execution_provider in execution_providers: - if execution_provider == 'CUDAExecutionProvider': - execution_providers_with_options.append((execution_provider, - { - 'cudnn_conv_algo_search': 'DEFAULT' - })) - else: - execution_providers_with_options.append(execution_provider) - return execution_providers_with_options - - -def map_torch_backend(execution_providers : List[str]) -> str: - if 'CoreMLExecutionProvider' in execution_providers: - return 'mps' - if 'CUDAExecutionProvider' in execution_providers or 'ROCMExecutionProvider' in execution_providers : - return 'cuda' - if 'OpenVINOExecutionProvider' in execution_providers: - return 'mkl' - return 'cpu' diff --git a/facefusion/face_analyser.py b/facefusion/face_analyser.py index 3df55012..6c1811db 100644 --- a/facefusion/face_analyser.py +++ b/facefusion/face_analyser.py @@ -8,10 +8,10 @@ import facefusion.globals from facefusion.common_helper import get_first from facefusion.face_helper import warp_face_by_face_landmark_5, warp_face_by_translation, create_static_anchors, distance_to_face_landmark_5, distance_to_bounding_box, convert_face_landmark_68_to_5, apply_nms, categorize_age, categorize_gender from facefusion.face_store import get_static_faces, set_static_faces -from facefusion.execution_helper import apply_execution_provider_options +from facefusion.execution import apply_execution_provider_options from facefusion.download import conditional_download from facefusion.filesystem import resolve_relative_path -from facefusion.typing import VisionFrame, Face, FaceSet, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, ModelSet, BoundingBox, FaceLandmarkSet, FaceLandmark5, FaceLandmark68, Score, Embedding +from facefusion.typing import VisionFrame, Face, FaceSet, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, ModelSet, BoundingBox, FaceLandmarkSet, FaceLandmark5, FaceLandmark68, Score, FaceScoreSet, Embedding from facefusion.vision import resize_frame_resolution, unpack_resolution FACE_ANALYSER = None @@ -24,6 +24,11 @@ MODELS : ModelSet =\ 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/retinaface_10g.onnx', 'path': resolve_relative_path('../.assets/models/retinaface_10g.onnx') }, + 'face_detector_scrfd': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/scrfd_2.5g.onnx', + 'path': resolve_relative_path('../.assets/models/scrfd_2.5g.onnx') + }, 'face_detector_yoloface': { 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/yoloface_8n.onnx', @@ -70,14 +75,21 @@ MODELS : ModelSet =\ def get_face_analyser() -> Any: global FACE_ANALYSER + face_detectors = {} with THREAD_LOCK: if FACE_ANALYSER is None: - if facefusion.globals.face_detector_model == 'retinaface': + if facefusion.globals.face_detector_model in [ 'many', 'retinaface' ]: face_detector = onnxruntime.InferenceSession(MODELS.get('face_detector_retinaface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) - if facefusion.globals.face_detector_model == 'yoloface': + face_detectors['retinaface'] = face_detector + if facefusion.globals.face_detector_model in [ 'many', 'scrfd' ]: + face_detector = onnxruntime.InferenceSession(MODELS.get('face_detector_scrfd').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + face_detectors['scrfd'] = face_detector + if facefusion.globals.face_detector_model in [ 'many', 'yoloface' ]: face_detector = onnxruntime.InferenceSession(MODELS.get('face_detector_yoloface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) - if facefusion.globals.face_detector_model == 'yunet': + face_detectors['yoloface'] = face_detector + if facefusion.globals.face_detector_model in [ 'yunet' ]: face_detector = cv2.FaceDetectorYN.create(MODELS.get('face_detector_yunet').get('path'), '', (0, 0)) + face_detectors['yunet'] = face_detector if facefusion.globals.face_recognizer_model == 'arcface_blendswap': face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_blendswap').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) if facefusion.globals.face_recognizer_model == 'arcface_inswapper': @@ -90,7 +102,7 @@ def get_face_analyser() -> Any: gender_age = onnxruntime.InferenceSession(MODELS.get('gender_age').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) FACE_ANALYSER =\ { - 'face_detector': face_detector, + 'face_detectors': face_detectors, 'face_recognizer': face_recognizer, 'face_landmarker': face_landmarker, 'gender_age': gender_age @@ -110,6 +122,7 @@ def pre_check() -> bool: model_urls =\ [ MODELS.get('face_detector_retinaface').get('url'), + MODELS.get('face_detector_scrfd').get('url'), MODELS.get('face_detector_yoloface').get('url'), MODELS.get('face_detector_yunet').get('url'), MODELS.get('face_recognizer_arcface_blendswap').get('url'), @@ -124,22 +137,23 @@ def pre_check() -> bool: def detect_with_retinaface(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: - face_detector = get_face_analyser().get('face_detector') + face_detector = get_face_analyser().get('face_detectors').get('retinaface') face_detector_width, face_detector_height = unpack_resolution(face_detector_size) - temp_vision_frame = resize_frame_resolution(vision_frame, face_detector_width, face_detector_height) + temp_vision_frame = resize_frame_resolution(vision_frame, (face_detector_width, face_detector_height)) ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0] ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1] feature_strides = [ 8, 16, 32 ] feature_map_channel = 3 anchor_total = 2 bounding_box_list = [] - face_landmark5_list = [] + face_landmark_5_list = [] score_list = [] + detect_vision_frame = prepare_detect_frame(temp_vision_frame, face_detector_size) with THREAD_SEMAPHORE: detections = face_detector.run(None, { - face_detector.get_inputs()[0].name: prepare_detect_frame(temp_vision_frame, face_detector_size) + face_detector.get_inputs()[0].name: detect_vision_frame }) for index, feature_stride in enumerate(feature_strides): keep_indices = numpy.where(detections[index] >= facefusion.globals.face_detector_score)[0] @@ -157,27 +171,70 @@ def detect_with_retinaface(vision_frame : VisionFrame, face_detector_size : str) bounding_box[2] * ratio_width, bounding_box[3] * ratio_height ])) - for face_landmark5 in distance_to_face_landmark_5(anchors, face_landmark_5_raw)[keep_indices]: - face_landmark5_list.append(face_landmark5 * [ ratio_width, ratio_height ]) + for face_landmark_5 in distance_to_face_landmark_5(anchors, face_landmark_5_raw)[keep_indices]: + face_landmark_5_list.append(face_landmark_5 * [ ratio_width, ratio_height ]) for score in detections[index][keep_indices]: score_list.append(score[0]) - return bounding_box_list, face_landmark5_list, score_list + return bounding_box_list, face_landmark_5_list, score_list -def detect_with_yoloface(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: - face_detector = get_face_analyser().get('face_detector') +def detect_with_scrfd(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: + face_detector = get_face_analyser().get('face_detectors').get('scrfd') face_detector_width, face_detector_height = unpack_resolution(face_detector_size) - temp_vision_frame = resize_frame_resolution(vision_frame, face_detector_width, face_detector_height) + temp_vision_frame = resize_frame_resolution(vision_frame, (face_detector_width, face_detector_height)) ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0] ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1] + feature_strides = [ 8, 16, 32 ] + feature_map_channel = 3 + anchor_total = 2 bounding_box_list = [] - face_landmark5_list = [] + face_landmark_5_list = [] score_list = [] + detect_vision_frame = prepare_detect_frame(temp_vision_frame, face_detector_size) with THREAD_SEMAPHORE: detections = face_detector.run(None, { - face_detector.get_inputs()[0].name: prepare_detect_frame(temp_vision_frame, face_detector_size) + face_detector.get_inputs()[0].name: detect_vision_frame + }) + for index, feature_stride in enumerate(feature_strides): + keep_indices = numpy.where(detections[index] >= facefusion.globals.face_detector_score)[0] + if keep_indices.any(): + stride_height = face_detector_height // feature_stride + stride_width = face_detector_width // feature_stride + anchors = create_static_anchors(feature_stride, anchor_total, stride_height, stride_width) + bounding_box_raw = detections[index + feature_map_channel] * feature_stride + face_landmark_5_raw = detections[index + feature_map_channel * 2] * feature_stride + for bounding_box in distance_to_bounding_box(anchors, bounding_box_raw)[keep_indices]: + bounding_box_list.append(numpy.array( + [ + bounding_box[0] * ratio_width, + bounding_box[1] * ratio_height, + bounding_box[2] * ratio_width, + bounding_box[3] * ratio_height + ])) + for face_landmark_5 in distance_to_face_landmark_5(anchors, face_landmark_5_raw)[keep_indices]: + face_landmark_5_list.append(face_landmark_5 * [ ratio_width, ratio_height ]) + for score in detections[index][keep_indices]: + score_list.append(score[0]) + return bounding_box_list, face_landmark_5_list, score_list + + +def detect_with_yoloface(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: + face_detector = get_face_analyser().get('face_detectors').get('yoloface') + face_detector_width, face_detector_height = unpack_resolution(face_detector_size) + temp_vision_frame = resize_frame_resolution(vision_frame, (face_detector_width, face_detector_height)) + ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0] + ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1] + bounding_box_list = [] + face_landmark_5_list = [] + score_list = [] + + detect_vision_frame = prepare_detect_frame(temp_vision_frame, face_detector_size) + with THREAD_SEMAPHORE: + detections = face_detector.run(None, + { + face_detector.get_inputs()[0].name: detect_vision_frame }) detections = numpy.squeeze(detections).T bounding_box_raw, score_raw, face_landmark_5_raw = numpy.split(detections, [ 4, 5 ], axis = 1) @@ -195,26 +252,26 @@ def detect_with_yoloface(vision_frame : VisionFrame, face_detector_size : str) - face_landmark_5_raw[:, 0::3] = (face_landmark_5_raw[:, 0::3]) * ratio_width face_landmark_5_raw[:, 1::3] = (face_landmark_5_raw[:, 1::3]) * ratio_height for face_landmark_5 in face_landmark_5_raw: - face_landmark5_list.append(numpy.array(face_landmark_5.reshape(-1, 3)[:, :2])) + face_landmark_5_list.append(numpy.array(face_landmark_5.reshape(-1, 3)[:, :2])) score_list = score_raw.ravel().tolist() - return bounding_box_list, face_landmark5_list, score_list + return bounding_box_list, face_landmark_5_list, score_list def detect_with_yunet(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: - face_detector = get_face_analyser().get('face_detector') + face_detector = get_face_analyser().get('face_detectors').get('yunet') face_detector_width, face_detector_height = unpack_resolution(face_detector_size) - temp_vision_frame = resize_frame_resolution(vision_frame, face_detector_width, face_detector_height) + temp_vision_frame = resize_frame_resolution(vision_frame, (face_detector_width, face_detector_height)) ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0] ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1] bounding_box_list = [] - face_landmark5_list = [] + face_landmark_5_list = [] score_list = [] face_detector.setInputSize((temp_vision_frame.shape[1], temp_vision_frame.shape[0])) face_detector.setScoreThreshold(facefusion.globals.face_detector_score) with THREAD_SEMAPHORE: _, detections = face_detector.detect(temp_vision_frame) - if detections.any(): + if numpy.any(detections): for detection in detections: bounding_box_list.append(numpy.array( [ @@ -223,9 +280,9 @@ def detect_with_yunet(vision_frame : VisionFrame, face_detector_size : str) -> T (detection[0] + detection[2]) * ratio_width, (detection[1] + detection[3]) * ratio_height ])) - face_landmark5_list.append(detection[4:14].reshape((5, 2)) * [ ratio_width, ratio_height ]) + face_landmark_5_list.append(detection[4:14].reshape((5, 2)) * [ ratio_width, ratio_height ]) score_list.append(detection[14]) - return bounding_box_list, face_landmark5_list, score_list + return bounding_box_list, face_landmark_5_list, score_list def prepare_detect_frame(temp_vision_frame : VisionFrame, face_detector_size : str) -> VisionFrame: @@ -237,30 +294,41 @@ def prepare_detect_frame(temp_vision_frame : VisionFrame, face_detector_size : s return detect_vision_frame -def create_faces(vision_frame : VisionFrame, bounding_box_list : List[BoundingBox], face_landmark5_list : List[FaceLandmark5], score_list : List[Score]) -> List[Face]: +def create_faces(vision_frame : VisionFrame, bounding_box_list : List[BoundingBox], face_landmark_5_list : List[FaceLandmark5], score_list : List[Score]) -> List[Face]: faces = [] if facefusion.globals.face_detector_score > 0: sort_indices = numpy.argsort(-numpy.array(score_list)) bounding_box_list = [ bounding_box_list[index] for index in sort_indices ] - face_landmark5_list = [ face_landmark5_list[index] for index in sort_indices ] + face_landmark_5_list = [face_landmark_5_list[index] for index in sort_indices] score_list = [ score_list[index] for index in sort_indices ] - keep_indices = apply_nms(bounding_box_list, 0.4) + iou_threshold = 0.1 if facefusion.globals.face_detector_model == 'many' else 0.4 + keep_indices = apply_nms(bounding_box_list, iou_threshold) for index in keep_indices: bounding_box = bounding_box_list[index] - face_landmark_68 = detect_face_landmark_68(vision_frame, bounding_box) - landmark : FaceLandmarkSet =\ + face_landmark_5_68 = face_landmark_5_list[index] + face_landmark_68 = None + face_landmark_68_score = 0.0 + if facefusion.globals.face_landmarker_score > 0: + face_landmark_68, face_landmark_68_score = detect_face_landmark_68(vision_frame, bounding_box) + if face_landmark_68_score > facefusion.globals.face_landmarker_score: + face_landmark_5_68 = convert_face_landmark_68_to_5(face_landmark_68) + landmarks : FaceLandmarkSet =\ { - '5': face_landmark5_list[index], - '5/68': convert_face_landmark_68_to_5(face_landmark_68), + '5': face_landmark_5_list[index], + '5/68': face_landmark_5_68, '68': face_landmark_68 } - score = score_list[index] - embedding, normed_embedding = calc_embedding(vision_frame, landmark['5/68']) + scores : FaceScoreSet = \ + { + 'detector': score_list[index], + 'landmarker': face_landmark_68_score + } + embedding, normed_embedding = calc_embedding(vision_frame, landmarks.get('5/68')) gender, age = detect_gender_age(vision_frame, bounding_box) faces.append(Face( bounding_box = bounding_box, - landmark = landmark, - score = score, + landmarks = landmarks, + scores = scores, embedding = embedding, normed_embedding = normed_embedding, gender = gender, @@ -284,21 +352,27 @@ def calc_embedding(temp_vision_frame : VisionFrame, face_landmark_5 : FaceLandma return embedding, normed_embedding -def detect_face_landmark_68(temp_vision_frame : VisionFrame, bounding_box : BoundingBox) -> FaceLandmark68: +def detect_face_landmark_68(temp_vision_frame : VisionFrame, bounding_box : BoundingBox) -> Tuple[FaceLandmark68, Score]: face_landmarker = get_face_analyser().get('face_landmarker') scale = 195 / numpy.subtract(bounding_box[2:], bounding_box[:2]).max() translation = (256 - numpy.add(bounding_box[2:], bounding_box[:2]) * scale) * 0.5 crop_vision_frame, affine_matrix = warp_face_by_translation(temp_vision_frame, translation, scale, (256, 256)) + crop_vision_frame = cv2.cvtColor(crop_vision_frame, cv2.COLOR_RGB2Lab) + if numpy.mean(crop_vision_frame[:, :, 0]) < 30: + crop_vision_frame[:, :, 0] = cv2.createCLAHE(clipLimit = 2).apply(crop_vision_frame[:, :, 0]) + crop_vision_frame = cv2.cvtColor(crop_vision_frame, cv2.COLOR_Lab2RGB) crop_vision_frame = crop_vision_frame.transpose(2, 0, 1).astype(numpy.float32) / 255.0 - face_landmark_68 = face_landmarker.run(None, + face_landmark_68, face_heatmap = face_landmarker.run(None, { face_landmarker.get_inputs()[0].name: [ crop_vision_frame ] - })[0] + }) face_landmark_68 = face_landmark_68[:, :, :2][0] / 64 face_landmark_68 = face_landmark_68.reshape(1, -1, 2) * 256 face_landmark_68 = cv2.transform(face_landmark_68, cv2.invertAffineTransform(affine_matrix)) face_landmark_68 = face_landmark_68.reshape(-1, 2) - return face_landmark_68 + face_landmark_68_score = numpy.amax(face_heatmap, axis = (2, 3)) + face_landmark_68_score = numpy.mean(face_landmark_68_score) + return face_landmark_68, face_landmark_68_score def detect_gender_age(temp_vision_frame : VisionFrame, bounding_box : BoundingBox) -> Tuple[int, int]: @@ -344,8 +418,8 @@ def get_average_face(vision_frames : List[VisionFrame], position : int = 0) -> O first_face = get_first(faces) average_face = Face( bounding_box = first_face.bounding_box, - landmark = first_face.landmark, - score = first_face.score, + landmarks = first_face.landmarks, + scores = first_face.scores, embedding = numpy.mean(embedding_list, axis = 0), normed_embedding = numpy.mean(normed_embedding_list, axis = 0), gender = first_face.gender, @@ -361,15 +435,32 @@ def get_many_faces(vision_frame : VisionFrame) -> List[Face]: if faces_cache: faces = faces_cache else: - if facefusion.globals.face_detector_model == 'retinaface': - bounding_box_list, face_landmark5_list, score_list = detect_with_retinaface(vision_frame, facefusion.globals.face_detector_size) - faces = create_faces(vision_frame, bounding_box_list, face_landmark5_list, score_list) - if facefusion.globals.face_detector_model == 'yoloface': - bounding_box_list, face_landmark5_list, score_list = detect_with_yoloface(vision_frame, facefusion.globals.face_detector_size) - faces = create_faces(vision_frame, bounding_box_list, face_landmark5_list, score_list) - if facefusion.globals.face_detector_model == 'yunet': - bounding_box_list, face_landmark5_list, score_list = detect_with_yunet(vision_frame, facefusion.globals.face_detector_size) - faces = create_faces(vision_frame, bounding_box_list, face_landmark5_list, score_list) + bounding_box_list = [] + face_landmark_5_list = [] + score_list = [] + + if facefusion.globals.face_detector_model in [ 'many', 'retinaface']: + bounding_box_list_retinaface, face_landmark_5_list_retinaface, score_list_retinaface = detect_with_retinaface(vision_frame, facefusion.globals.face_detector_size) + bounding_box_list.extend(bounding_box_list_retinaface) + face_landmark_5_list.extend(face_landmark_5_list_retinaface) + score_list.extend(score_list_retinaface) + if facefusion.globals.face_detector_model in [ 'many', 'scrfd' ]: + bounding_box_list_scrfd, face_landmark_5_list_scrfd, score_list_scrfd = detect_with_scrfd(vision_frame, facefusion.globals.face_detector_size) + bounding_box_list.extend(bounding_box_list_scrfd) + face_landmark_5_list.extend(face_landmark_5_list_scrfd) + score_list.extend(score_list_scrfd) + if facefusion.globals.face_detector_model in [ 'many', 'yoloface' ]: + bounding_box_list_yoloface, face_landmark_5_list_yoloface, score_list_yoloface = detect_with_yoloface(vision_frame, facefusion.globals.face_detector_size) + bounding_box_list.extend(bounding_box_list_yoloface) + face_landmark_5_list.extend(face_landmark_5_list_yoloface) + score_list.extend(score_list_yoloface) + if facefusion.globals.face_detector_model in [ 'yunet' ]: + bounding_box_list_yunet, face_landmark_5_list_yunet, score_list_yunet = detect_with_yunet(vision_frame, facefusion.globals.face_detector_size) + bounding_box_list.extend(bounding_box_list_yunet) + face_landmark_5_list.extend(face_landmark_5_list_yunet) + score_list.extend(score_list_yunet) + if bounding_box_list and face_landmark_5_list and score_list: + faces = create_faces(vision_frame, bounding_box_list, face_landmark_5_list, score_list) if faces: set_static_faces(vision_frame, faces) if facefusion.globals.face_analyser_order: @@ -422,9 +513,9 @@ def sort_by_order(faces : List[Face], order : FaceAnalyserOrder) -> List[Face]: if order == 'large-small': return sorted(faces, key = lambda face: (face.bounding_box[2] - face.bounding_box[0]) * (face.bounding_box[3] - face.bounding_box[1]), reverse = True) if order == 'best-worst': - return sorted(faces, key = lambda face: face.score, reverse = True) + return sorted(faces, key = lambda face: face.scores.get('detector'), reverse = True) if order == 'worst-best': - return sorted(faces, key = lambda face: face.score) + return sorted(faces, key = lambda face: face.scores.get('detector')) return faces diff --git a/facefusion/face_helper.py b/facefusion/face_helper.py index 2391eedb..b74c9e45 100644 --- a/facefusion/face_helper.py +++ b/facefusion/face_helper.py @@ -1,12 +1,12 @@ -from typing import Any, Dict, Tuple, List +from typing import Any, Tuple, List from cv2.typing import Size from functools import lru_cache import cv2 import numpy -from facefusion.typing import BoundingBox, FaceLandmark5, FaceLandmark68, VisionFrame, Mask, Matrix, Translation, Template, FaceAnalyserAge, FaceAnalyserGender +from facefusion.typing import BoundingBox, FaceLandmark5, FaceLandmark68, VisionFrame, Mask, Matrix, Translation, WarpTemplate, WarpTemplateSet, FaceAnalyserAge, FaceAnalyserGender -TEMPLATES : Dict[Template, numpy.ndarray[Any, Any]] =\ +WARP_TEMPLATES : WarpTemplateSet =\ { 'arcface_112_v1': numpy.array( [ @@ -43,16 +43,16 @@ TEMPLATES : Dict[Template, numpy.ndarray[Any, Any]] =\ } -def warp_face_by_face_landmark_5(temp_vision_frame : VisionFrame, face_landmark_5 : FaceLandmark5, template : Template, crop_size : Size) -> Tuple[VisionFrame, Matrix]: - normed_template = TEMPLATES.get(template) * crop_size - affine_matrix = cv2.estimateAffinePartial2D(face_landmark_5, normed_template, method = cv2.RANSAC, ransacReprojThreshold = 100)[0] +def warp_face_by_face_landmark_5(temp_vision_frame : VisionFrame, face_landmark_5 : FaceLandmark5, warp_template : WarpTemplate, crop_size : Size) -> Tuple[VisionFrame, Matrix]: + normed_warp_template = WARP_TEMPLATES.get(warp_template) * crop_size + affine_matrix = cv2.estimateAffinePartial2D(face_landmark_5, normed_warp_template, method = cv2.RANSAC, ransacReprojThreshold = 100)[0] crop_vision_frame = cv2.warpAffine(temp_vision_frame, affine_matrix, crop_size, borderMode = cv2.BORDER_REPLICATE, flags = cv2.INTER_AREA) return crop_vision_frame, affine_matrix def warp_face_by_bounding_box(temp_vision_frame : VisionFrame, bounding_box : BoundingBox, crop_size : Size) -> Tuple[VisionFrame, Matrix]: - source_points = numpy.array([[bounding_box[0], bounding_box[1]], [bounding_box[2], bounding_box[1]], [bounding_box[0], bounding_box[3]]], dtype = numpy.float32) - target_points = numpy.array([[ 0, 0 ], [ crop_size[0], 0 ], [ 0, crop_size[1] ]], dtype = numpy.float32) + source_points = numpy.array([ [ bounding_box[0], bounding_box[1] ], [bounding_box[2], bounding_box[1] ], [ bounding_box[0], bounding_box[3] ] ], dtype = numpy.float32) + target_points = numpy.array([ [ 0, 0 ], [ crop_size[0], 0 ], [ 0, crop_size[1] ] ], dtype = numpy.float32) affine_matrix = cv2.getAffineTransform(source_points, target_points) if bounding_box[2] - bounding_box[0] > crop_size[0] or bounding_box[3] - bounding_box[1] > crop_size[1]: interpolation_method = cv2.INTER_AREA @@ -63,7 +63,7 @@ def warp_face_by_bounding_box(temp_vision_frame : VisionFrame, bounding_box : Bo def warp_face_by_translation(temp_vision_frame : VisionFrame, translation : Translation, scale : float, crop_size : Size) -> Tuple[VisionFrame, Matrix]: - affine_matrix = numpy.array([[ scale, 0, translation[0] ], [ 0, scale, translation[1] ]]) + affine_matrix = numpy.array([ [ scale, 0, translation[0] ], [ 0, scale, translation[1] ] ]) crop_vision_frame = cv2.warpAffine(temp_vision_frame, affine_matrix, crop_size) return crop_vision_frame, affine_matrix @@ -89,7 +89,7 @@ def create_static_anchors(feature_stride : int, anchor_total : int, stride_heigh return anchors -def create_bounding_box_from_landmark(face_landmark_68 : FaceLandmark68) -> BoundingBox: +def create_bounding_box_from_face_landmark_68(face_landmark_68 : FaceLandmark68) -> BoundingBox: min_x, min_y = numpy.min(face_landmark_68, axis = 0) max_x, max_y = numpy.max(face_landmark_68, axis = 0) bounding_box = numpy.array([ min_x, min_y, max_x, max_y ]).astype(numpy.int16) @@ -113,12 +113,14 @@ def distance_to_face_landmark_5(points : numpy.ndarray[Any, Any], distance : num def convert_face_landmark_68_to_5(landmark_68 : FaceLandmark68) -> FaceLandmark5: - left_eye = numpy.mean(landmark_68[36:42], axis = 0) - right_eye = numpy.mean(landmark_68[42:48], axis = 0) - nose = landmark_68[30] - left_mouth_end = landmark_68[48] - right_mouth_end = landmark_68[54] - face_landmark_5 = numpy.array([ left_eye, right_eye, nose, left_mouth_end, right_mouth_end ]) + face_landmark_5 = numpy.array( + [ + numpy.mean(landmark_68[36:42], axis = 0), + numpy.mean(landmark_68[42:48], axis = 0), + landmark_68[30], + landmark_68[48], + landmark_68[54] + ]) return face_landmark_5 diff --git a/facefusion/face_masker.py b/facefusion/face_masker.py index 0e76fd8d..e71b2b09 100755 --- a/facefusion/face_masker.py +++ b/facefusion/face_masker.py @@ -8,7 +8,7 @@ import onnxruntime import facefusion.globals from facefusion.typing import FaceLandmark68, VisionFrame, Mask, Padding, FaceMaskRegion, ModelSet -from facefusion.execution_helper import apply_execution_provider_options +from facefusion.execution import apply_execution_provider_options from facefusion.filesystem import resolve_relative_path from facefusion.download import conditional_download diff --git a/facefusion/ffmpeg.py b/facefusion/ffmpeg.py index f6cf4071..55b9e4a0 100644 --- a/facefusion/ffmpeg.py +++ b/facefusion/ffmpeg.py @@ -1,57 +1,55 @@ from typing import List, Optional import subprocess +import filetype import facefusion.globals -from facefusion import logger +from facefusion import process_manager from facefusion.typing import OutputVideoPreset, Fps, AudioBuffer from facefusion.filesystem import get_temp_frames_pattern, get_temp_output_video_path def run_ffmpeg(args : List[str]) -> bool: - commands = [ 'ffmpeg', '-hide_banner', '-loglevel', 'error' ] + commands = [ 'ffmpeg', '-hide_banner', '-loglevel', 'quiet' ] commands.extend(args) - try: - subprocess.run(commands, stderr = subprocess.PIPE, check = True) - return True - except subprocess.CalledProcessError as exception: - logger.debug(exception.stderr.decode().strip(), __name__.upper()) - return False + process = subprocess.Popen(commands, stdout = subprocess.PIPE) + + while process_manager.is_processing(): + try: + return process.wait(timeout = 0.5) == 0 + except subprocess.TimeoutExpired: + continue + return process.returncode == 0 def open_ffmpeg(args : List[str]) -> subprocess.Popen[bytes]: - commands = [ 'ffmpeg', '-hide_banner', '-loglevel', 'error' ] + commands = [ 'ffmpeg', '-hide_banner', '-loglevel', 'quiet' ] commands.extend(args) - return subprocess.Popen(commands, stdin = subprocess.PIPE, stdout = subprocess.PIPE) + return subprocess.Popen(commands, stdout = subprocess.PIPE) -def extract_frames(target_path : str, video_resolution : str, video_fps : Fps) -> bool: - temp_frame_compression = round(31 - (facefusion.globals.temp_frame_quality * 0.31)) +def extract_frames(target_path : str, temp_video_resolution : str, temp_video_fps : Fps) -> bool: trim_frame_start = facefusion.globals.trim_frame_start trim_frame_end = facefusion.globals.trim_frame_end temp_frames_pattern = get_temp_frames_pattern(target_path, '%04d') - commands = [ '-hwaccel', 'auto', '-i', target_path, '-q:v', str(temp_frame_compression), '-pix_fmt', 'rgb24' ] + commands = [ '-hwaccel', 'auto', '-i', target_path, '-q:v', '0' ] + if trim_frame_start is not None and trim_frame_end is not None: - commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ':end_frame=' + str(trim_frame_end) + ',scale=' + str(video_resolution) + ',fps=' + str(video_fps) ]) + commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ':end_frame=' + str(trim_frame_end) + ',scale=' + str(temp_video_resolution) + ',fps=' + str(temp_video_fps) ]) elif trim_frame_start is not None: - commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ',scale=' + str(video_resolution) + ',fps=' + str(video_fps) ]) + commands.extend([ '-vf', 'trim=start_frame=' + str(trim_frame_start) + ',scale=' + str(temp_video_resolution) + ',fps=' + str(temp_video_fps) ]) elif trim_frame_end is not None: - commands.extend([ '-vf', 'trim=end_frame=' + str(trim_frame_end) + ',scale=' + str(video_resolution) + ',fps=' + str(video_fps) ]) + commands.extend([ '-vf', 'trim=end_frame=' + str(trim_frame_end) + ',scale=' + str(temp_video_resolution) + ',fps=' + str(temp_video_fps) ]) else: - commands.extend([ '-vf', 'scale=' + str(video_resolution) + ',fps=' + str(video_fps) ]) + commands.extend([ '-vf', 'scale=' + str(temp_video_resolution) + ',fps=' + str(temp_video_fps) ]) commands.extend([ '-vsync', '0', temp_frames_pattern ]) return run_ffmpeg(commands) -def compress_image(output_path : str) -> bool: - output_image_compression = round(31 - (facefusion.globals.output_image_quality * 0.31)) - commands = [ '-hwaccel', 'auto', '-i', output_path, '-q:v', str(output_image_compression), '-y', output_path ] - return run_ffmpeg(commands) - - -def merge_video(target_path : str, video_resolution : str, video_fps : Fps) -> bool: +def merge_video(target_path : str, output_video_resolution : str, output_video_fps : Fps) -> bool: temp_output_video_path = get_temp_output_video_path(target_path) temp_frames_pattern = get_temp_frames_pattern(target_path, '%04d') - commands = [ '-hwaccel', 'auto', '-s', str(video_resolution), '-r', str(video_fps), '-i', temp_frames_pattern, '-c:v', facefusion.globals.output_video_encoder ] + commands = [ '-hwaccel', 'auto', '-s', str(output_video_resolution), '-r', str(output_video_fps), '-i', temp_frames_pattern, '-c:v', facefusion.globals.output_video_encoder ] + if facefusion.globals.output_video_encoder in [ 'libx264', 'libx265' ]: output_video_compression = round(51 - (facefusion.globals.output_video_quality * 0.51)) commands.extend([ '-crf', str(output_video_compression), '-preset', facefusion.globals.output_video_preset ]) @@ -61,29 +59,46 @@ def merge_video(target_path : str, video_resolution : str, video_fps : Fps) -> b if facefusion.globals.output_video_encoder in [ 'h264_nvenc', 'hevc_nvenc' ]: output_video_compression = round(51 - (facefusion.globals.output_video_quality * 0.51)) commands.extend([ '-cq', str(output_video_compression), '-preset', map_nvenc_preset(facefusion.globals.output_video_preset) ]) + if facefusion.globals.output_video_encoder in [ 'h264_amf', 'hevc_amf' ]: + output_video_compression = round(51 - (facefusion.globals.output_video_quality * 0.51)) + commands.extend([ '-qp_i', str(output_video_compression), '-qp_p', str(output_video_compression), '-quality', map_amf_preset(facefusion.globals.output_video_preset) ]) commands.extend([ '-pix_fmt', 'yuv420p', '-colorspace', 'bt709', '-y', temp_output_video_path ]) return run_ffmpeg(commands) -def read_audio_buffer(target_path : str, sample_rate : int, channel_total : int) -> Optional[AudioBuffer]: - commands = [ '-i', target_path, '-vn', '-f', 's16le', '-acodec', 'pcm_s16le', '-ar', str(sample_rate), '-ac', str(channel_total), '-' ] +def copy_image(target_path : str, output_path : str, temp_image_resolution : str) -> bool: + is_webp = filetype.guess_mime(target_path) == 'image/webp' + temp_image_compression = 100 if is_webp else 0 + commands = [ '-i', target_path, '-q:v', str(temp_image_compression), '-vf', 'scale=' + str(temp_image_resolution), '-y', output_path ] + return run_ffmpeg(commands) + + +def finalize_image(output_path : str, output_image_resolution : str) -> bool: + output_image_compression = round(31 - (facefusion.globals.output_image_quality * 0.31)) + commands = [ '-i', output_path, '-q:v', str(output_image_compression), '-vf', 'scale=' + str(output_image_resolution), '-y', output_path ] + return run_ffmpeg(commands) + + +def read_audio_buffer(target_path : str, sample_rate : int, total_channel : int) -> Optional[AudioBuffer]: + commands = [ '-i', target_path, '-vn', '-f', 's16le', '-acodec', 'pcm_s16le', '-ar', str(sample_rate), '-ac', str(total_channel), '-' ] process = open_ffmpeg(commands) - audio_buffer, error = process.communicate() + audio_buffer, _ = process.communicate() if process.returncode == 0: return audio_buffer return None -def restore_audio(target_path : str, output_path : str, video_fps : Fps) -> bool: +def restore_audio(target_path : str, output_path : str, output_video_fps : Fps) -> bool: trim_frame_start = facefusion.globals.trim_frame_start trim_frame_end = facefusion.globals.trim_frame_end temp_output_video_path = get_temp_output_video_path(target_path) commands = [ '-hwaccel', 'auto', '-i', temp_output_video_path ] + if trim_frame_start is not None: - start_time = trim_frame_start / video_fps + start_time = trim_frame_start / output_video_fps commands.extend([ '-ss', str(start_time) ]) if trim_frame_end is not None: - end_time = trim_frame_end / video_fps + end_time = trim_frame_end / output_video_fps commands.extend([ '-to', str(end_time) ]) commands.extend([ '-i', target_path, '-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest', '-y', output_path ]) return run_ffmpeg(commands) @@ -111,3 +126,13 @@ def map_nvenc_preset(output_video_preset : OutputVideoPreset) -> Optional[str]: if output_video_preset == 'veryslow': return 'p7' return None + + +def map_amf_preset(output_video_preset : OutputVideoPreset) -> Optional[str]: + if output_video_preset in [ 'ultrafast', 'superfast', 'veryfast' ]: + return 'speed' + if output_video_preset in [ 'faster', 'fast', 'medium' ]: + return 'balanced' + if output_video_preset in [ 'slow', 'slower', 'veryslow' ]: + return 'quality' + return None diff --git a/facefusion/filesystem.py b/facefusion/filesystem.py index a3320fd9..61009838 100644 --- a/facefusion/filesystem.py +++ b/facefusion/filesystem.py @@ -49,7 +49,7 @@ def clear_temp(target_path : str) -> None: temp_directory_path = get_temp_directory_path(target_path) parent_directory_path = os.path.dirname(temp_directory_path) if not facefusion.globals.keep_temp and is_directory(temp_directory_path): - shutil.rmtree(temp_directory_path) + shutil.rmtree(temp_directory_path, ignore_errors = True) if os.path.exists(parent_directory_path) and not os.listdir(parent_directory_path): os.rmdir(parent_directory_path) diff --git a/facefusion/globals.py b/facefusion/globals.py index b6324415..10b0d14b 100755 --- a/facefusion/globals.py +++ b/facefusion/globals.py @@ -24,6 +24,7 @@ face_analyser_gender : Optional[FaceAnalyserGender] = None face_detector_model : Optional[FaceDetectorModel] = None face_detector_size : Optional[str] = None face_detector_score : Optional[float] = None +face_landmarker_score : Optional[float] = None face_recognizer_model : Optional[FaceRecognizerModel] = None # face selector face_selector_mode : Optional[FaceSelectorMode] = None @@ -39,10 +40,10 @@ face_mask_regions : Optional[List[FaceMaskRegion]] = None trim_frame_start : Optional[int] = None trim_frame_end : Optional[int] = None temp_frame_format : Optional[TempFrameFormat] = None -temp_frame_quality : Optional[int] = None keep_temp : Optional[bool] = None # output creation output_image_quality : Optional[int] = None +output_image_resolution : Optional[str] = None output_video_encoder : Optional[OutputVideoEncoder] = None output_video_preset : Optional[OutputVideoPreset] = None output_video_quality : Optional[int] = None diff --git a/facefusion/installer.py b/facefusion/installer.py index 326f1a62..b2904b87 100644 --- a/facefusion/installer.py +++ b/facefusion/installer.py @@ -9,26 +9,17 @@ from argparse import ArgumentParser, HelpFormatter from facefusion import metadata, wording -TORCH : Dict[str, str] =\ -{ - 'default': 'default', - 'cpu': 'cpu' -} ONNXRUNTIMES : Dict[str, Tuple[str, str]] = {} if platform.system().lower() == 'darwin': - ONNXRUNTIMES['default'] = ('onnxruntime', '1.17.0') + ONNXRUNTIMES['default'] = ('onnxruntime', '1.17.1') else: ONNXRUNTIMES['default'] = ('onnxruntime', '1.16.3') if platform.system().lower() == 'linux' or platform.system().lower() == 'windows': - TORCH['cuda-12.1'] = 'cu121' - TORCH['cuda-11.8'] = 'cu118' - ONNXRUNTIMES['cuda-12.1'] = ('onnxruntime-gpu', '1.17.0') + ONNXRUNTIMES['cuda-12.2'] = ('onnxruntime-gpu', '1.17.1') ONNXRUNTIMES['cuda-11.8'] = ('onnxruntime-gpu', '1.16.3') ONNXRUNTIMES['openvino'] = ('onnxruntime-openvino', '1.16.0') if platform.system().lower() == 'linux': - TORCH['rocm-5.4.2'] = 'rocm5.4.2' - TORCH['rocm-5.6'] = 'rocm5.6' ONNXRUNTIMES['rocm-5.4.2'] = ('onnxruntime-rocm', '1.16.3') ONNXRUNTIMES['rocm-5.6'] = ('onnxruntime-rocm', '1.16.3') if platform.system().lower() == 'windows': @@ -37,7 +28,6 @@ if platform.system().lower() == 'windows': def cli() -> None: program = ArgumentParser(formatter_class = lambda prog: HelpFormatter(prog, max_help_position = 130)) - program.add_argument('--torch', help = wording.get('help.install_dependency').format(dependency = 'torch'), choices = TORCH.keys()) program.add_argument('--onnxruntime', help = wording.get('help.install_dependency').format(dependency = 'onnxruntime'), choices = ONNXRUNTIMES.keys()) program.add_argument('--skip-venv', help = wording.get('help.skip_venv'), action = 'store_true') program.add_argument('-v', '--version', version = metadata.get('name') + ' ' + metadata.get('version'), action = 'version') @@ -52,29 +42,21 @@ def run(program : ArgumentParser) -> None: os.environ['SYSTEM_VERSION_COMPAT'] = '0' if not args.skip_venv: os.environ['PIP_REQUIRE_VIRTUALENV'] = '1' - if args.torch and args.onnxruntime: + if args.onnxruntime: answers =\ { - 'torch': args.torch, 'onnxruntime': args.onnxruntime } else: answers = inquirer.prompt( [ - inquirer.List('torch', message = wording.get('help.install_dependency').format(dependency = 'torch'), choices = list(TORCH.keys())), inquirer.List('onnxruntime', message = wording.get('help.install_dependency').format(dependency = 'onnxruntime'), choices = list(ONNXRUNTIMES.keys())) ]) if answers: - torch = answers['torch'] - torch_wheel = TORCH[torch] onnxruntime = answers['onnxruntime'] onnxruntime_name, onnxruntime_version = ONNXRUNTIMES[onnxruntime] - subprocess.call([ 'pip', 'uninstall', 'torch', '-y', '-q' ]) - if torch_wheel == 'default': - subprocess.call([ 'pip', 'install', '-r', 'requirements.txt', '--force-reinstall' ]) - else: - subprocess.call([ 'pip', 'install', '-r', 'requirements.txt', '--extra-index-url', 'https://download.pytorch.org/whl/' + torch_wheel, '--force-reinstall' ]) + subprocess.call([ 'pip', 'install', '-r', 'requirements.txt', '--force-reinstall' ]) if onnxruntime == 'rocm-5.4.2' or onnxruntime == 'rocm-5.6': if python_id in [ 'cp39', 'cp310', 'cp311' ]: rocm_version = onnxruntime.replace('-', '') diff --git a/facefusion/metadata.py b/facefusion/metadata.py index 9d2d88aa..d204858c 100644 --- a/facefusion/metadata.py +++ b/facefusion/metadata.py @@ -2,7 +2,7 @@ METADATA =\ { 'name': 'FaceFusion', 'description': 'Next generation face swapper and enhancer', - 'version': '2.3.0', + 'version': '2.4.0', 'license': 'MIT', 'author': 'Henry Ruhs', 'url': 'https://facefusion.io' diff --git a/facefusion/normalizer.py b/facefusion/normalizer.py index 354e5670..5324f7ec 100644 --- a/facefusion/normalizer.py +++ b/facefusion/normalizer.py @@ -1,25 +1,24 @@ from typing import List, Optional +import hashlib import os -from facefusion.filesystem import is_file, is_directory +import facefusion.globals +from facefusion.filesystem import is_directory from facefusion.typing import Padding, Fps -def normalize_output_path(source_paths : List[str], target_path : str, output_path : str) -> Optional[str]: - if is_file(target_path) and is_directory(output_path): +def normalize_output_path(target_path : Optional[str], output_path : Optional[str]) -> Optional[str]: + if target_path and output_path: target_name, target_extension = os.path.splitext(os.path.basename(target_path)) - if source_paths and is_file(source_paths[0]): - source_name, _ = os.path.splitext(os.path.basename(source_paths[0])) - return os.path.join(output_path, source_name + '-' + target_name + target_extension) - return os.path.join(output_path, target_name + target_extension) - if is_file(target_path) and output_path: - _, target_extension = os.path.splitext(os.path.basename(target_path)) + if is_directory(output_path): + output_hash = hashlib.sha1(str(facefusion.globals.__dict__).encode('utf-8')).hexdigest()[:8] + output_name = target_name + '-' + output_hash + return os.path.join(output_path, output_name + target_extension) output_name, output_extension = os.path.splitext(os.path.basename(output_path)) output_directory_path = os.path.dirname(output_path) if is_directory(output_directory_path) and output_extension: return os.path.join(output_directory_path, output_name + target_extension) - return None - return output_path + return None def normalize_padding(padding : Optional[List[int]]) -> Optional[Padding]: diff --git a/facefusion/process_manager.py b/facefusion/process_manager.py new file mode 100644 index 00000000..1983528a --- /dev/null +++ b/facefusion/process_manager.py @@ -0,0 +1,45 @@ +from typing import Generator, List + +from facefusion.typing import QueuePayload, ProcessState + +PROCESS_STATE : ProcessState = 'pending' + + +def get_process_state() -> ProcessState: + return PROCESS_STATE + + +def set_process_state(process_state : ProcessState) -> None: + global PROCESS_STATE + + PROCESS_STATE = process_state + + +def is_processing() -> bool: + return get_process_state() == 'processing' + + +def is_stopping() -> bool: + return get_process_state() == 'stopping' + + +def is_pending() -> bool: + return get_process_state() == 'pending' + + +def start() -> None: + set_process_state('processing') + + +def stop() -> None: + set_process_state('stopping') + + +def end() -> None: + set_process_state('pending') + + +def manage(queue_payloads : List[QueuePayload]) -> Generator[QueuePayload, None, None]: + for query_payload in queue_payloads: + if is_processing(): + yield query_payload diff --git a/facefusion/processors/frame/choices.py b/facefusion/processors/frame/choices.py index f0daeccf..28f511d8 100755 --- a/facefusion/processors/frame/choices.py +++ b/facefusion/processors/frame/choices.py @@ -3,10 +3,10 @@ from typing import List from facefusion.common_helper import create_int_range from facefusion.processors.frame.typings import FaceDebuggerItem, FaceEnhancerModel, FaceSwapperModel, FrameEnhancerModel, LipSyncerModel -face_debugger_items : List[FaceDebuggerItem] = [ 'bounding-box', 'landmark-5', 'landmark-68', 'face-mask', 'score', 'age', 'gender' ] +face_debugger_items : List[FaceDebuggerItem] = [ 'bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-mask', 'face-detector-score', 'face-landmarker-score', 'age', 'gender' ] face_enhancer_models : List[FaceEnhancerModel] = [ 'codeformer', 'gfpgan_1.2', 'gfpgan_1.3', 'gfpgan_1.4', 'gpen_bfr_256', 'gpen_bfr_512', 'restoreformer_plus_plus' ] face_swapper_models : List[FaceSwapperModel] = [ 'blendswap_256', 'inswapper_128', 'inswapper_128_fp16', 'simswap_256', 'simswap_512_unofficial', 'uniface_256' ] -frame_enhancer_models : List[FrameEnhancerModel] = [ 'real_esrgan_x2plus', 'real_esrgan_x4plus', 'real_esrnet_x4plus' ] +frame_enhancer_models : List[FrameEnhancerModel] = [ 'lsdir_x4', 'nomos8k_sc_x4', 'real_esrgan_x4', 'real_esrgan_x4_fp16', 'span_kendata_x4' ] lip_syncer_models : List[LipSyncerModel] = [ 'wav2lip_gan' ] face_enhancer_blend_range : List[int] = create_int_range(0, 100, 1) diff --git a/facefusion/processors/frame/core.py b/facefusion/processors/frame/core.py index f09ba2ad..8b3f12b6 100644 --- a/facefusion/processors/frame/core.py +++ b/facefusion/processors/frame/core.py @@ -8,8 +8,8 @@ from typing import Any, List from tqdm import tqdm import facefusion.globals -from facefusion.typing import Process_Frames, QueuePayload -from facefusion.execution_helper import encode_execution_providers +from facefusion.typing import ProcessFrames, QueuePayload +from facefusion.execution import encode_execution_providers from facefusion import logger, wording FRAME_PROCESSORS_MODULES : List[ModuleType] = [] @@ -67,7 +67,7 @@ def clear_frame_processors_modules() -> None: FRAME_PROCESSORS_MODULES = [] -def multi_process_frames(source_paths : List[str], temp_frame_paths : List[str], process_frames : Process_Frames) -> None: +def multi_process_frames(source_paths : List[str], temp_frame_paths : List[str], process_frames : ProcessFrames) -> None: queue_payloads = create_queue_payloads(temp_frame_paths) with tqdm(total = len(queue_payloads), desc = wording.get('processing'), unit = 'frame', ascii = ' =', disable = facefusion.globals.log_level in [ 'warn', 'error' ]) as progress: progress.set_postfix( @@ -109,8 +109,8 @@ def create_queue_payloads(temp_frame_paths : List[str]) -> List[QueuePayload]: for frame_number, frame_path in enumerate(temp_frame_paths): frame_payload : QueuePayload =\ { - 'frame_number' : frame_number, - 'frame_path' : frame_path + 'frame_number': frame_number, + 'frame_path': frame_path } queue_payloads.append(frame_payload) return queue_payloads diff --git a/facefusion/processors/frame/modules/face_debugger.py b/facefusion/processors/frame/modules/face_debugger.py index de7c136a..2025c761 100755 --- a/facefusion/processors/frame/modules/face_debugger.py +++ b/facefusion/processors/frame/modules/face_debugger.py @@ -5,13 +5,13 @@ import numpy import facefusion.globals import facefusion.processors.frame.core as frame_processors -from facefusion import config, wording +from facefusion import config, process_manager, wording from facefusion.face_analyser import get_one_face, get_many_faces, find_similar_faces, clear_face_analyser from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, create_region_mask, clear_face_occluder, clear_face_parser from facefusion.face_helper import warp_face_by_face_landmark_5, categorize_age, categorize_gender from facefusion.face_store import get_reference_faces from facefusion.content_analyser import clear_content_analyser -from facefusion.typing import Face, VisionFrame, Update_Process, ProcessMode, QueuePayload +from facefusion.typing import Face, VisionFrame, UpdateProcess, ProcessMode, QueuePayload from facefusion.vision import read_image, read_static_image, write_image from facefusion.processors.frame.typings import FaceDebuggerInputs from facefusion.processors.frame import globals as frame_processors_globals, choices as frame_processors_choices @@ -36,7 +36,7 @@ def set_options(key : Literal['model'], value : Any) -> None: def register_args(program : ArgumentParser) -> None: - program.add_argument('--face-debugger-items', help = wording.get('help.face_debugger_items').format(choices = ', '.join(frame_processors_choices.face_debugger_items)), default = config.get_str_list('frame_processors.face_debugger_items', 'landmark-5 face-mask'), choices = frame_processors_choices.face_debugger_items, nargs = '+', metavar = 'FACE_DEBUGGER_ITEMS') + program.add_argument('--face-debugger-items', help = wording.get('help.face_debugger_items').format(choices = ', '.join(frame_processors_choices.face_debugger_items)), default = config.get_str_list('frame_processors.face_debugger_items', 'face-landmark-5 face-mask'), choices = frame_processors_choices.face_debugger_items, nargs = '+', metavar = 'FACE_DEBUGGER_ITEMS') def apply_args(program : ArgumentParser) -> None: @@ -70,13 +70,15 @@ def post_process() -> None: def debug_face(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: primary_color = (0, 0, 255) secondary_color = (0, 255, 0) + tertiary_color = (255, 255, 0) bounding_box = target_face.bounding_box.astype(numpy.int32) temp_vision_frame = temp_vision_frame.copy() + has_face_landmark_5_fallback = numpy.array_equal(target_face.landmarks.get('5'), target_face.landmarks.get('5/68')) if 'bounding-box' in frame_processors_globals.face_debugger_items: - cv2.rectangle(temp_vision_frame, (bounding_box[0], bounding_box[1]), (bounding_box[2], bounding_box[3]), secondary_color, 2) + cv2.rectangle(temp_vision_frame, (bounding_box[0], bounding_box[1]), (bounding_box[2], bounding_box[3]), primary_color, 2) if 'face-mask' in frame_processors_globals.face_debugger_items: - crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmark['5/68'], 'arcface_128_v2', (512, 512)) + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmarks.get('5/68'), 'arcface_128_v2', (512, 512)) inverse_matrix = cv2.invertAffineTransform(affine_matrix) temp_size = temp_vision_frame.shape[:2][::-1] crop_mask_list = [] @@ -95,30 +97,38 @@ def debug_face(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFra inverse_vision_frame = cv2.threshold(inverse_vision_frame, 100, 255, cv2.THRESH_BINARY)[1] inverse_vision_frame[inverse_vision_frame > 0] = 255 inverse_contours = cv2.findContours(inverse_vision_frame, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)[0] - cv2.drawContours(temp_vision_frame, inverse_contours, -1, primary_color, 2) - if bounding_box[3] - bounding_box[1] > 60 and bounding_box[2] - bounding_box[0] > 60: + cv2.drawContours(temp_vision_frame, inverse_contours, -1, tertiary_color if has_face_landmark_5_fallback else secondary_color, 2) + if 'face-landmark-5' in frame_processors_globals.face_debugger_items and numpy.any(target_face.landmarks.get('5')): + face_landmark_5 = target_face.landmarks.get('5').astype(numpy.int32) + for index in range(face_landmark_5.shape[0]): + cv2.circle(temp_vision_frame, (face_landmark_5[index][0], face_landmark_5[index][1]), 3, primary_color, -1) + if 'face-landmark-5/68' in frame_processors_globals.face_debugger_items and numpy.any(target_face.landmarks.get('5/68')): + face_landmark_5_68 = target_face.landmarks.get('5/68').astype(numpy.int32) + for index in range(face_landmark_5_68.shape[0]): + cv2.circle(temp_vision_frame, (face_landmark_5_68[index][0], face_landmark_5_68[index][1]), 3, tertiary_color if has_face_landmark_5_fallback else secondary_color, -1) + if 'face-landmark-68' in frame_processors_globals.face_debugger_items and numpy.any(target_face.landmarks.get('68')): + face_landmark_68 = target_face.landmarks.get('68').astype(numpy.int32) + for index in range(face_landmark_68.shape[0]): + cv2.circle(temp_vision_frame, (face_landmark_68[index][0], face_landmark_68[index][1]), 3, secondary_color, -1) + if bounding_box[3] - bounding_box[1] > 50 and bounding_box[2] - bounding_box[0] > 50: top = bounding_box[1] - left = bounding_box[0] + 20 - if 'landmark-5' in frame_processors_globals.face_debugger_items: - face_landmark_5 = target_face.landmark['5/68'].astype(numpy.int32) - for index in range(face_landmark_5.shape[0]): - cv2.circle(temp_vision_frame, (face_landmark_5[index][0], face_landmark_5[index][1]), 3, primary_color, -1) - if 'landmark-68' in frame_processors_globals.face_debugger_items: - face_landmark_68 = target_face.landmark['68'].astype(numpy.int32) - for index in range(face_landmark_68.shape[0]): - cv2.circle(temp_vision_frame, (face_landmark_68[index][0], face_landmark_68[index][1]), 3, secondary_color, -1) - if 'score' in frame_processors_globals.face_debugger_items: - face_score_text = str(round(target_face.score, 2)) + left = bounding_box[0] - 20 + if 'face-detector-score' in frame_processors_globals.face_debugger_items: + face_score_text = str(round(target_face.scores.get('detector'), 2)) top = top + 20 - cv2.putText(temp_vision_frame, face_score_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, secondary_color, 2) + cv2.putText(temp_vision_frame, face_score_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, primary_color, 2) + if 'face-landmarker-score' in frame_processors_globals.face_debugger_items: + face_score_text = str(round(target_face.scores.get('landmarker'), 2)) + top = top + 20 + cv2.putText(temp_vision_frame, face_score_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, tertiary_color if has_face_landmark_5_fallback else secondary_color, 2) if 'age' in frame_processors_globals.face_debugger_items: face_age_text = categorize_age(target_face.age) top = top + 20 - cv2.putText(temp_vision_frame, face_age_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, secondary_color, 2) + cv2.putText(temp_vision_frame, face_age_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, primary_color, 2) if 'gender' in frame_processors_globals.face_debugger_items: face_gender_text = categorize_gender(target_face.gender) top = top + 20 - cv2.putText(temp_vision_frame, face_gender_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, secondary_color, 2) + cv2.putText(temp_vision_frame, face_gender_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, primary_color, 2) return temp_vision_frame @@ -127,50 +137,50 @@ def get_reference_frame(source_face : Face, target_face : Face, temp_vision_fram def process_frame(inputs : FaceDebuggerInputs) -> VisionFrame: - reference_faces = inputs['reference_faces'] - target_vision_frame = inputs['target_vision_frame'] + reference_faces = inputs.get('reference_faces') + target_vision_frame = inputs.get('target_vision_frame') - if 'reference' in facefusion.globals.face_selector_mode: - similar_faces = find_similar_faces(reference_faces, target_vision_frame, facefusion.globals.reference_face_distance) - if similar_faces: - for similar_face in similar_faces: - target_vision_frame = debug_face(similar_face, target_vision_frame) - if 'one' in facefusion.globals.face_selector_mode: - target_face = get_one_face(target_vision_frame) - if target_face: - target_vision_frame = debug_face(target_face, target_vision_frame) - if 'many' in facefusion.globals.face_selector_mode: + if facefusion.globals.face_selector_mode == 'many': many_faces = get_many_faces(target_vision_frame) if many_faces: for target_face in many_faces: target_vision_frame = debug_face(target_face, target_vision_frame) + if facefusion.globals.face_selector_mode == 'one': + target_face = get_one_face(target_vision_frame) + if target_face: + target_vision_frame = debug_face(target_face, target_vision_frame) + if facefusion.globals.face_selector_mode == 'reference': + similar_faces = find_similar_faces(reference_faces, target_vision_frame, facefusion.globals.reference_face_distance) + if similar_faces: + for similar_face in similar_faces: + target_vision_frame = debug_face(similar_face, target_vision_frame) return target_vision_frame -def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : Update_Process) -> None: +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProcess) -> None: reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None - for queue_payload in queue_payloads: + for queue_payload in process_manager.manage(queue_payloads): target_vision_path = queue_payload['frame_path'] target_vision_frame = read_image(target_vision_path) - result_frame = process_frame( + output_vision_frame = process_frame( { 'reference_faces': reference_faces, 'target_vision_frame': target_vision_frame }) - write_image(target_vision_path, result_frame) + write_image(target_vision_path, output_vision_frame) update_progress() def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None target_vision_frame = read_static_image(target_path) - result_frame = process_frame( + output_vision_frame = process_frame( { 'reference_faces': reference_faces, 'target_vision_frame': target_vision_frame }) - write_image(output_path, result_frame) + write_image(output_path, output_vision_frame) def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: diff --git a/facefusion/processors/frame/modules/face_enhancer.py b/facefusion/processors/frame/modules/face_enhancer.py index c4ea5bb9..a469a4bf 100755 --- a/facefusion/processors/frame/modules/face_enhancer.py +++ b/facefusion/processors/frame/modules/face_enhancer.py @@ -7,14 +7,15 @@ import onnxruntime import facefusion.globals import facefusion.processors.frame.core as frame_processors -from facefusion import config, logger, wording +from facefusion import config, process_manager, logger, wording from facefusion.face_analyser import get_many_faces, clear_face_analyser, find_similar_faces, get_one_face from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, clear_face_occluder from facefusion.face_helper import warp_face_by_face_landmark_5, paste_back -from facefusion.execution_helper import apply_execution_provider_options +from facefusion.execution import apply_execution_provider_options from facefusion.content_analyser import clear_content_analyser from facefusion.face_store import get_reference_faces -from facefusion.typing import Face, VisionFrame, Update_Process, ProcessMode, ModelSet, OptionsWithModel, QueuePayload +from facefusion.normalizer import normalize_output_path +from facefusion.typing import Face, VisionFrame, UpdateProcess, ProcessMode, ModelSet, OptionsWithModel, QueuePayload from facefusion.common_helper import create_metavar from facefusion.filesystem import is_file, is_image, is_video, resolve_relative_path from facefusion.download import conditional_download, is_download_done @@ -150,7 +151,7 @@ def pre_process(mode : ProcessMode) -> bool: if mode in [ 'output', 'preview' ] and not is_image(facefusion.globals.target_path) and not is_video(facefusion.globals.target_path): logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) return False - if mode == 'output' and not facefusion.globals.output_path: + if mode == 'output' and not normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path): logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) return False return True @@ -169,7 +170,7 @@ def post_process() -> None: def enhance_face(target_face: Face, temp_vision_frame : VisionFrame) -> VisionFrame: model_template = get_options('model').get('template') model_size = get_options('model').get('size') - crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmark['5/68'], model_template, model_size) + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmarks.get('5/68'), model_template, model_size) box_mask = create_static_box_mask(crop_vision_frame.shape[:2][::-1], facefusion.globals.face_mask_blur, (0, 0, 0, 0)) crop_mask_list =\ [ @@ -230,50 +231,50 @@ def get_reference_frame(source_face : Face, target_face : Face, temp_vision_fram def process_frame(inputs : FaceEnhancerInputs) -> VisionFrame: - reference_faces = inputs['reference_faces'] - target_vision_frame = inputs['target_vision_frame'] + reference_faces = inputs.get('reference_faces') + target_vision_frame = inputs.get('target_vision_frame') - if 'reference' in facefusion.globals.face_selector_mode: - similar_faces = find_similar_faces(reference_faces, target_vision_frame, facefusion.globals.reference_face_distance) - if similar_faces: - for similar_face in similar_faces: - target_vision_frame = enhance_face(similar_face, target_vision_frame) - if 'one' in facefusion.globals.face_selector_mode: - target_face = get_one_face(target_vision_frame) - if target_face: - target_vision_frame = enhance_face(target_face, target_vision_frame) - if 'many' in facefusion.globals.face_selector_mode: + if facefusion.globals.face_selector_mode == 'many': many_faces = get_many_faces(target_vision_frame) if many_faces: for target_face in many_faces: target_vision_frame = enhance_face(target_face, target_vision_frame) + if facefusion.globals.face_selector_mode == 'one': + target_face = get_one_face(target_vision_frame) + if target_face: + target_vision_frame = enhance_face(target_face, target_vision_frame) + if facefusion.globals.face_selector_mode == 'reference': + similar_faces = find_similar_faces(reference_faces, target_vision_frame, facefusion.globals.reference_face_distance) + if similar_faces: + for similar_face in similar_faces: + target_vision_frame = enhance_face(similar_face, target_vision_frame) return target_vision_frame -def process_frames(source_path : List[str], queue_payloads : List[QueuePayload], update_progress : Update_Process) -> None: +def process_frames(source_path : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProcess) -> None: reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None - for queue_payload in queue_payloads: + for queue_payload in process_manager.manage(queue_payloads): target_vision_path = queue_payload['frame_path'] target_vision_frame = read_image(target_vision_path) - result_frame = process_frame( + output_vision_frame = process_frame( { 'reference_faces': reference_faces, 'target_vision_frame': target_vision_frame }) - write_image(target_vision_path, result_frame) + write_image(target_vision_path, output_vision_frame) update_progress() def process_image(source_path : str, target_path : str, output_path : str) -> None: reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None target_vision_frame = read_static_image(target_path) - result_frame = process_frame( + output_vision_frame = process_frame( { 'reference_faces': reference_faces, 'target_vision_frame': target_vision_frame }) - write_image(output_path, result_frame) + write_image(output_path, output_vision_frame) def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: diff --git a/facefusion/processors/frame/modules/face_swapper.py b/facefusion/processors/frame/modules/face_swapper.py index dd974f81..a890cdde 100755 --- a/facefusion/processors/frame/modules/face_swapper.py +++ b/facefusion/processors/frame/modules/face_swapper.py @@ -8,14 +8,16 @@ from onnx import numpy_helper import facefusion.globals import facefusion.processors.frame.core as frame_processors -from facefusion import config, logger, wording -from facefusion.execution_helper import apply_execution_provider_options +from facefusion import config, process_manager, logger, wording +from facefusion.execution import apply_execution_provider_options from facefusion.face_analyser import get_one_face, get_average_face, get_many_faces, find_similar_faces, clear_face_analyser from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, create_region_mask, clear_face_occluder, clear_face_parser from facefusion.face_helper import warp_face_by_face_landmark_5, paste_back from facefusion.face_store import get_reference_faces +from facefusion.common_helper import extract_major_version from facefusion.content_analyser import clear_content_analyser -from facefusion.typing import Face, Embedding, VisionFrame, Update_Process, ProcessMode, ModelSet, OptionsWithModel, QueuePayload +from facefusion.normalizer import normalize_output_path +from facefusion.typing import Face, Embedding, VisionFrame, UpdateProcess, ProcessMode, ModelSet, OptionsWithModel, QueuePayload from facefusion.filesystem import is_file, is_image, has_image, is_video, filter_image_paths, resolve_relative_path from facefusion.download import conditional_download, is_download_done from facefusion.vision import read_image, read_static_image, read_static_images, write_image @@ -144,7 +146,8 @@ def set_options(key : Literal['model'], value : Any) -> None: def register_args(program : ArgumentParser) -> None: - if onnxruntime.__version__ == '1.17.0': + onnxruntime_version = extract_major_version(onnxruntime.__version__) + if onnxruntime_version > (1, 16): face_swapper_model_fallback = 'inswapper_128' else: face_swapper_model_fallback = 'inswapper_128_fp16' @@ -197,7 +200,7 @@ def pre_process(mode : ProcessMode) -> bool: if mode in [ 'output', 'preview' ] and not is_image(facefusion.globals.target_path) and not is_video(facefusion.globals.target_path): logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) return False - if mode == 'output' and not facefusion.globals.output_path: + if mode == 'output' and not normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path): logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) return False return True @@ -218,7 +221,7 @@ def post_process() -> None: def swap_face(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: model_template = get_options('model').get('template') model_size = get_options('model').get('size') - crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmark['5/68'], model_template, model_size) + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmarks.get('5/68'), model_template, model_size) crop_mask_list = [] if 'box' in facefusion.globals.face_mask_types: @@ -259,9 +262,9 @@ def prepare_source_frame(source_face : Face) -> VisionFrame: model_type = get_options('model').get('type') source_vision_frame = read_static_image(facefusion.globals.source_paths[0]) if model_type == 'blendswap': - source_vision_frame, _ = warp_face_by_face_landmark_5(source_vision_frame, source_face.landmark['5/68'], 'arcface_112_v2', (112, 112)) + source_vision_frame, _ = warp_face_by_face_landmark_5(source_vision_frame, source_face.landmarks.get('5/68'), 'arcface_112_v2', (112, 112)) if model_type == 'uniface': - source_vision_frame, _ = warp_face_by_face_landmark_5(source_vision_frame, source_face.landmark['5/68'], 'ffhq_512', (256, 256)) + source_vision_frame, _ = warp_face_by_face_landmark_5(source_vision_frame, source_face.landmarks.get('5/68'), 'ffhq_512', (256, 256)) source_vision_frame = source_vision_frame[:, :, ::-1] / 255.0 source_vision_frame = source_vision_frame.transpose(2, 0, 1) source_vision_frame = numpy.expand_dims(source_vision_frame, axis = 0).astype(numpy.float32) @@ -301,42 +304,42 @@ def get_reference_frame(source_face : Face, target_face : Face, temp_vision_fram def process_frame(inputs : FaceSwapperInputs) -> VisionFrame: - reference_faces = inputs['reference_faces'] - source_face = inputs['source_face'] - target_vision_frame = inputs['target_vision_frame'] + reference_faces = inputs.get('reference_faces') + source_face = inputs.get('source_face') + target_vision_frame = inputs.get('target_vision_frame') - if 'reference' in facefusion.globals.face_selector_mode: - similar_faces = find_similar_faces(reference_faces, target_vision_frame, facefusion.globals.reference_face_distance) - if similar_faces: - for similar_face in similar_faces: - target_vision_frame = swap_face(source_face, similar_face, target_vision_frame) - if 'one' in facefusion.globals.face_selector_mode: - target_face = get_one_face(target_vision_frame) - if target_face: - target_vision_frame = swap_face(source_face, target_face, target_vision_frame) - if 'many' in facefusion.globals.face_selector_mode: + if facefusion.globals.face_selector_mode == 'many': many_faces = get_many_faces(target_vision_frame) if many_faces: for target_face in many_faces: target_vision_frame = swap_face(source_face, target_face, target_vision_frame) + if facefusion.globals.face_selector_mode == 'one': + target_face = get_one_face(target_vision_frame) + if target_face: + target_vision_frame = swap_face(source_face, target_face, target_vision_frame) + if facefusion.globals.face_selector_mode == 'reference': + similar_faces = find_similar_faces(reference_faces, target_vision_frame, facefusion.globals.reference_face_distance) + if similar_faces: + for similar_face in similar_faces: + target_vision_frame = swap_face(source_face, similar_face, target_vision_frame) return target_vision_frame -def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : Update_Process) -> None: +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProcess) -> None: reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None source_frames = read_static_images(source_paths) source_face = get_average_face(source_frames) - for queue_payload in queue_payloads: + for queue_payload in process_manager.manage(queue_payloads): target_vision_path = queue_payload['frame_path'] target_vision_frame = read_image(target_vision_path) - result_frame = process_frame( + output_vision_frame = process_frame( { 'reference_faces': reference_faces, 'source_face': source_face, 'target_vision_frame': target_vision_frame }) - write_image(target_vision_path, result_frame) + write_image(target_vision_path, output_vision_frame) update_progress() @@ -345,13 +348,13 @@ def process_image(source_paths : List[str], target_path : str, output_path : str source_frames = read_static_images(source_paths) source_face = get_average_face(source_frames) target_vision_frame = read_static_image(target_path) - result_frame = process_frame( + output_vision_frame = process_frame( { 'reference_faces': reference_faces, 'source_face': source_face, 'target_vision_frame': target_vision_frame }) - write_image(output_path, result_frame) + write_image(output_path, output_vision_frame) def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: diff --git a/facefusion/processors/frame/modules/frame_enhancer.py b/facefusion/processors/frame/modules/frame_enhancer.py index bf85b61d..7209207e 100644 --- a/facefusion/processors/frame/modules/frame_enhancer.py +++ b/facefusion/processors/frame/modules/frame_enhancer.py @@ -2,46 +2,63 @@ from typing import Any, List, Literal, Optional from argparse import ArgumentParser import threading import cv2 -from basicsr.archs.rrdbnet_arch import RRDBNet -from realesrgan import RealESRGANer +import numpy +import onnxruntime import facefusion.globals import facefusion.processors.frame.core as frame_processors -from facefusion import config, logger, wording +from facefusion import config, process_manager, logger, wording from facefusion.face_analyser import clear_face_analyser from facefusion.content_analyser import clear_content_analyser -from facefusion.typing import Face, VisionFrame, Update_Process, ProcessMode, ModelSet, OptionsWithModel, QueuePayload +from facefusion.execution import apply_execution_provider_options +from facefusion.normalizer import normalize_output_path +from facefusion.typing import Face, VisionFrame, UpdateProcess, ProcessMode, ModelSet, OptionsWithModel, QueuePayload from facefusion.common_helper import create_metavar -from facefusion.execution_helper import map_torch_backend -from facefusion.filesystem import is_file, resolve_relative_path +from facefusion.filesystem import is_file, resolve_relative_path, is_image, is_video from facefusion.download import conditional_download, is_download_done -from facefusion.vision import read_image, read_static_image, write_image +from facefusion.vision import read_image, read_static_image, write_image, merge_tile_frames, create_tile_frames from facefusion.processors.frame.typings import FrameEnhancerInputs from facefusion.processors.frame import globals as frame_processors_globals from facefusion.processors.frame import choices as frame_processors_choices FRAME_PROCESSOR = None -THREAD_SEMAPHORE : threading.Semaphore = threading.Semaphore() THREAD_LOCK : threading.Lock = threading.Lock() NAME = __name__.upper() MODELS : ModelSet =\ { - 'real_esrgan_x2plus': + 'lsdir_x4': { - 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_esrgan_x2plus.pth', - 'path': resolve_relative_path('../.assets/models/real_esrgan_x2plus.pth'), - 'scale': 2 - }, - 'real_esrgan_x4plus': - { - 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_esrgan_x4plus.pth', - 'path': resolve_relative_path('../.assets/models/real_esrgan_x4plus.pth'), + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/lsdir_x4.onnx', + 'path': resolve_relative_path('../.assets/models/lsdir_x4.onnx'), + 'size': (128, 8, 2), 'scale': 4 }, - 'real_esrnet_x4plus': + 'nomos8k_sc_x4': { - 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_esrnet_x4plus.pth', - 'path': resolve_relative_path('../.assets/models/real_esrnet_x4plus.pth'), + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/nomos8k_sc_x4.onnx', + 'path': resolve_relative_path('../.assets/models/nomos8k_sc_x4.onnx'), + 'size': (128, 8, 2), + 'scale': 4 + }, + 'real_esrgan_x4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_esrgan_x4.onnx', + 'path': resolve_relative_path('../.assets/models/real_esrgan_x4.onnx'), + 'size': (128, 8, 2), + 'scale': 4 + }, + 'real_esrgan_x4_fp16': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_esrgan_x4_fp16.onnx', + 'path': resolve_relative_path('../.assets/models/real_esrgan_x4_fp16.onnx'), + 'size': (128, 8, 2), + 'scale': 4 + }, + 'span_kendata_x4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/span_kendata_x4.onnx', + 'path': resolve_relative_path('../.assets/models/span_kendata_x4.onnx'), + 'size': (128, 8, 2), 'scale': 4 } } @@ -54,17 +71,7 @@ def get_frame_processor() -> Any: with THREAD_LOCK: if FRAME_PROCESSOR is None: model_path = get_options('model').get('path') - model_scale = get_options('model').get('scale') - FRAME_PROCESSOR = RealESRGANer( - model_path = model_path, - model = RRDBNet( - num_in_ch = 3, - num_out_ch = 3, - scale = model_scale - ), - device = map_torch_backend(facefusion.globals.execution_providers), - scale = model_scale - ) + FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) return FRAME_PROCESSOR @@ -92,7 +99,7 @@ def set_options(key : Literal['model'], value : Any) -> None: def register_args(program : ArgumentParser) -> None: - program.add_argument('--frame-enhancer-model', help = wording.get('help.frame_enhancer_model'), default = config.get_str_value('frame_processors.frame_enhancer_model', 'real_esrgan_x2plus'), choices = frame_processors_choices.frame_enhancer_models) + program.add_argument('--frame-enhancer-model', help = wording.get('help.frame_enhancer_model'), default = config.get_str_value('frame_processors.frame_enhancer_model', 'span_kendata_x4'), choices = frame_processors_choices.frame_enhancer_models) program.add_argument('--frame-enhancer-blend', help = wording.get('help.frame_enhancer_blend'), type = int, default = config.get_int_value('frame_processors.frame_enhancer_blend', '80'), choices = frame_processors_choices.frame_enhancer_blend_range, metavar = create_metavar(frame_processors_choices.frame_enhancer_blend_range)) @@ -123,7 +130,10 @@ def post_check() -> bool: def pre_process(mode : ProcessMode) -> bool: - if mode == 'output' and not facefusion.globals.output_path: + if mode in [ 'output', 'preview' ] and not is_image(facefusion.globals.target_path) and not is_video(facefusion.globals.target_path): + logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) + return False + if mode == 'output' and not normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path): logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) return False return True @@ -139,12 +149,36 @@ def post_process() -> None: def enhance_frame(temp_vision_frame : VisionFrame) -> VisionFrame: - with THREAD_SEMAPHORE: - paste_vision_frame, _ = get_frame_processor().enhance(temp_vision_frame) - temp_vision_frame = blend_frame(temp_vision_frame, paste_vision_frame) + frame_processor = get_frame_processor() + size = get_options('model').get('size') + scale = get_options('model').get('scale') + temp_height, temp_width = temp_vision_frame.shape[:2] + tile_vision_frames, pad_width, pad_height = create_tile_frames(temp_vision_frame, size) + + for index, tile_vision_frame in enumerate(tile_vision_frames): + tile_vision_frame = frame_processor.run(None, + { + frame_processor.get_inputs()[0].name : prepare_tile_frame(tile_vision_frame) + })[0] + tile_vision_frames[index] = normalize_tile_frame(tile_vision_frame) + merge_vision_frame = merge_tile_frames(tile_vision_frames, temp_width * scale, temp_height * scale, pad_width * scale, pad_height * scale, (size[0] * scale, size[1] * scale, size[2] * scale)) + temp_vision_frame = blend_frame(temp_vision_frame, merge_vision_frame) return temp_vision_frame +def prepare_tile_frame(vision_tile_frame : VisionFrame) -> VisionFrame: + vision_tile_frame = numpy.expand_dims(vision_tile_frame[:,:,::-1], axis = 0) + vision_tile_frame = vision_tile_frame.transpose(0, 3, 1, 2) + vision_tile_frame = vision_tile_frame.astype(numpy.float32) / 255 + return vision_tile_frame + + +def normalize_tile_frame(vision_tile_frame : VisionFrame) -> VisionFrame: + vision_tile_frame = vision_tile_frame.transpose(0, 2, 3, 1).squeeze(0) * 255 + vision_tile_frame = vision_tile_frame.clip(0, 255).astype(numpy.uint8)[:,:,::-1] + return vision_tile_frame + + def blend_frame(temp_vision_frame : VisionFrame, paste_vision_frame : VisionFrame) -> VisionFrame: frame_enhancer_blend = 1 - (frame_processors_globals.frame_enhancer_blend / 100) temp_vision_frame = cv2.resize(temp_vision_frame, (paste_vision_frame.shape[1], paste_vision_frame.shape[0])) @@ -157,29 +191,29 @@ def get_reference_frame(source_face : Face, target_face : Face, temp_vision_fram def process_frame(inputs : FrameEnhancerInputs) -> VisionFrame: - target_vision_frame = inputs['target_vision_frame'] + target_vision_frame = inputs.get('target_vision_frame') return enhance_frame(target_vision_frame) -def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : Update_Process) -> None: - for queue_payload in queue_payloads: +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProcess) -> None: + for queue_payload in process_manager.manage(queue_payloads): target_vision_path = queue_payload['frame_path'] target_vision_frame = read_image(target_vision_path) - result_frame = process_frame( + output_vision_frame = process_frame( { 'target_vision_frame': target_vision_frame }) - write_image(target_vision_path, result_frame) + write_image(target_vision_path, output_vision_frame) update_progress() def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: target_vision_frame = read_static_image(target_path) - result_frame = process_frame( + output_vision_frame = process_frame( { 'target_vision_frame': target_vision_frame }) - write_image(output_path, result_frame) + write_image(output_path, output_vision_frame) def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: diff --git a/facefusion/processors/frame/modules/lip_syncer.py b/facefusion/processors/frame/modules/lip_syncer.py index 39fb371c..33c27af8 100755 --- a/facefusion/processors/frame/modules/lip_syncer.py +++ b/facefusion/processors/frame/modules/lip_syncer.py @@ -7,17 +7,18 @@ import onnxruntime import facefusion.globals import facefusion.processors.frame.core as frame_processors -from facefusion import config, logger, wording -from facefusion.execution_helper import apply_execution_provider_options +from facefusion import config, process_manager, logger, wording +from facefusion.execution import apply_execution_provider_options from facefusion.face_analyser import get_one_face, get_many_faces, find_similar_faces, clear_face_analyser from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, create_mouth_mask, clear_face_occluder, clear_face_parser -from facefusion.face_helper import warp_face_by_face_landmark_5, warp_face_by_bounding_box, paste_back, create_bounding_box_from_landmark +from facefusion.face_helper import warp_face_by_face_landmark_5, warp_face_by_bounding_box, paste_back, create_bounding_box_from_face_landmark_68 from facefusion.face_store import get_reference_faces from facefusion.content_analyser import clear_content_analyser -from facefusion.typing import Face, VisionFrame, Update_Process, ProcessMode, ModelSet, OptionsWithModel, AudioFrame, QueuePayload +from facefusion.normalizer import normalize_output_path +from facefusion.typing import Face, VisionFrame, UpdateProcess, ProcessMode, ModelSet, OptionsWithModel, AudioFrame, QueuePayload from facefusion.filesystem import is_file, has_audio, resolve_relative_path from facefusion.download import conditional_download, is_download_done -from facefusion.audio import read_static_audio, get_audio_frame +from facefusion.audio import read_static_audio, get_audio_frame, create_empty_audio_frame from facefusion.filesystem import is_image, is_video, filter_audio_paths from facefusion.common_helper import get_first from facefusion.vision import read_image, write_image, read_static_image @@ -109,7 +110,7 @@ def pre_process(mode : ProcessMode) -> bool: if mode in [ 'output', 'preview' ] and not is_image(facefusion.globals.target_path) and not is_video(facefusion.globals.target_path): logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) return False - if mode == 'output' and not facefusion.globals.output_path: + if mode == 'output' and not normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path): logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) return False return True @@ -129,23 +130,24 @@ def post_process() -> None: def sync_lip(target_face : Face, temp_audio_frame : AudioFrame, temp_vision_frame : VisionFrame) -> VisionFrame: frame_processor = get_frame_processor() + crop_mask_list = [] temp_audio_frame = prepare_audio_frame(temp_audio_frame) - crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmark['5/68'], 'ffhq_512', (512, 512)) - face_landmark_68 = cv2.transform(target_face.landmark['68'].reshape(1, -1, 2), affine_matrix).reshape(-1, 2) - bounding_box = create_bounding_box_from_landmark(face_landmark_68) - bounding_box[1] -= numpy.abs(bounding_box[3] - bounding_box[1]) * 0.125 - mouth_mask = create_mouth_mask(face_landmark_68) + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmarks.get('5/68'), 'ffhq_512', (512, 512)) + if numpy.any(target_face.landmarks.get('68')): + face_landmark_68 = cv2.transform(target_face.landmarks.get('68').reshape(1, -1, 2), affine_matrix).reshape(-1, 2) + bounding_box = create_bounding_box_from_face_landmark_68(face_landmark_68) + bounding_box[1] -= numpy.abs(bounding_box[3] - bounding_box[1]) * 0.125 + mouth_mask = create_mouth_mask(face_landmark_68) + crop_mask_list.append(mouth_mask) + else: + bounding_box = target_face.bounding_box box_mask = create_static_box_mask(crop_vision_frame.shape[:2][::-1], facefusion.globals.face_mask_blur, facefusion.globals.face_mask_padding) - crop_mask_list =\ - [ - mouth_mask, - box_mask - ] + crop_mask_list.append(box_mask) if 'occlusion' in facefusion.globals.face_mask_types: occlusion_mask = create_occlusion_mask(crop_vision_frame) crop_mask_list.append(occlusion_mask) - close_vision_frame, closeup_matrix = warp_face_by_bounding_box(crop_vision_frame, bounding_box, (96, 96)) + close_vision_frame, close_matrix = warp_face_by_bounding_box(crop_vision_frame, bounding_box, (96, 96)) close_vision_frame = prepare_crop_frame(close_vision_frame) close_vision_frame = frame_processor.run(None, { @@ -153,7 +155,7 @@ def sync_lip(target_face : Face, temp_audio_frame : AudioFrame, temp_vision_fram 'target': close_vision_frame })[0] crop_vision_frame = normalize_crop_frame(close_vision_frame) - crop_vision_frame = cv2.warpAffine(crop_vision_frame, cv2.invertAffineTransform(closeup_matrix), (512, 512), borderMode = cv2.BORDER_REPLICATE) + crop_vision_frame = cv2.warpAffine(crop_vision_frame, cv2.invertAffineTransform(close_matrix), (512, 512), borderMode = cv2.BORDER_REPLICATE) crop_mask = numpy.minimum.reduce(crop_mask_list) paste_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix) return paste_vision_frame @@ -188,60 +190,59 @@ def get_reference_frame(source_face : Face, target_face : Face, temp_vision_fram def process_frame(inputs : LipSyncerInputs) -> VisionFrame: - reference_faces = inputs['reference_faces'] - source_audio_frame = inputs['source_audio_frame'] - target_vision_frame = inputs['target_vision_frame'] - is_source_audio_frame = isinstance(source_audio_frame, numpy.ndarray) and source_audio_frame.any() + reference_faces = inputs.get('reference_faces') + source_audio_frame = inputs.get('source_audio_frame') + target_vision_frame = inputs.get('target_vision_frame') - if 'reference' in facefusion.globals.face_selector_mode: - similar_faces = find_similar_faces(reference_faces, target_vision_frame, facefusion.globals.reference_face_distance) - if similar_faces and is_source_audio_frame: - for similar_face in similar_faces: - target_vision_frame = sync_lip(similar_face, source_audio_frame, target_vision_frame) - if 'one' in facefusion.globals.face_selector_mode: - target_face = get_one_face(target_vision_frame) - if target_face and is_source_audio_frame: - target_vision_frame = sync_lip(target_face, source_audio_frame, target_vision_frame) - if 'many' in facefusion.globals.face_selector_mode: + if facefusion.globals.face_selector_mode == 'many': many_faces = get_many_faces(target_vision_frame) - if many_faces and is_source_audio_frame: + if many_faces: for target_face in many_faces: target_vision_frame = sync_lip(target_face, source_audio_frame, target_vision_frame) + if facefusion.globals.face_selector_mode == 'one': + target_face = get_one_face(target_vision_frame) + if target_face: + target_vision_frame = sync_lip(target_face, source_audio_frame, target_vision_frame) + if facefusion.globals.face_selector_mode == 'reference': + similar_faces = find_similar_faces(reference_faces, target_vision_frame, facefusion.globals.reference_face_distance) + if similar_faces: + for similar_face in similar_faces: + target_vision_frame = sync_lip(similar_face, source_audio_frame, target_vision_frame) return target_vision_frame -def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : Update_Process) -> None: +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProcess) -> None: reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None source_audio_path = get_first(filter_audio_paths(source_paths)) - target_video_fps = facefusion.globals.output_video_fps - for queue_payload in queue_payloads: + for queue_payload in process_manager.manage(queue_payloads): frame_number = queue_payload['frame_number'] target_vision_path = queue_payload['frame_path'] - source_audio_frame = get_audio_frame(source_audio_path, target_video_fps, frame_number) + source_audio_frame = get_audio_frame(source_audio_path, facefusion.globals.output_video_fps, frame_number) + if not numpy.any(source_audio_frame): + source_audio_frame = create_empty_audio_frame() target_vision_frame = read_image(target_vision_path) - result_frame = process_frame( + output_vision_frame = process_frame( { 'reference_faces': reference_faces, 'source_audio_frame': source_audio_frame, 'target_vision_frame': target_vision_frame }) - write_image(target_vision_path, result_frame) + write_image(target_vision_path, output_vision_frame) update_progress() def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None - source_audio_path = get_first(filter_audio_paths(source_paths)) - source_audio_frame = get_audio_frame(source_audio_path, 25) + source_audio_frame = create_empty_audio_frame() target_vision_frame = read_static_image(target_path) - result_frame = process_frame( + output_vision_frame = process_frame( { 'reference_faces': reference_faces, 'source_audio_frame': source_audio_frame, 'target_vision_frame': target_vision_frame }) - write_image(output_path, result_frame) + write_image(output_path, output_vision_frame) def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: diff --git a/facefusion/processors/frame/typings.py b/facefusion/processors/frame/typings.py index e7a93fd3..c7de5ef1 100644 --- a/facefusion/processors/frame/typings.py +++ b/facefusion/processors/frame/typings.py @@ -2,10 +2,10 @@ from typing import Literal, TypedDict from facefusion.typing import Face, FaceSet, AudioFrame, VisionFrame -FaceDebuggerItem = Literal['bounding-box', 'landmark-5', 'landmark-68', 'face-mask', 'score', 'age', 'gender'] +FaceDebuggerItem = Literal['bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-mask', 'face-detector-score', 'face-landmarker-score', 'age', 'gender'] FaceEnhancerModel = Literal['codeformer', 'gfpgan_1.2', 'gfpgan_1.3', 'gfpgan_1.4', 'gpen_bfr_256', 'gpen_bfr_512', 'restoreformer_plus_plus'] FaceSwapperModel = Literal['blendswap_256', 'inswapper_128', 'inswapper_128_fp16', 'simswap_256', 'simswap_512_unofficial', 'uniface_256'] -FrameEnhancerModel = Literal['real_esrgan_x2plus', 'real_esrgan_x4plus', 'real_esrnet_x4plus'] +FrameEnhancerModel = Literal['lsdir_x4', 'nomos8k_sc_x4', 'real_esrgan_x4', 'real_esrgan_x4_fp16', 'span_kendata_x4'] LipSyncerModel = Literal['wav2lip_gan'] FaceDebuggerInputs = TypedDict('FaceDebuggerInputs', diff --git a/facefusion/statistics.py b/facefusion/statistics.py new file mode 100644 index 00000000..f67c32b9 --- /dev/null +++ b/facefusion/statistics.py @@ -0,0 +1,51 @@ +from typing import Any, Dict +import numpy + +import facefusion.globals +from facefusion.face_store import FACE_STORE +from facefusion.typing import FaceSet +from facefusion import logger + + +def create_statistics(static_faces : FaceSet) -> Dict[str, Any]: + face_detector_score_list = [] + face_landmarker_score_list = [] + statistics =\ + { + 'min_face_detector_score': 0, + 'min_face_landmarker_score': 0, + 'max_face_detector_score': 0, + 'max_face_landmarker_score': 0, + 'average_face_detector_score': 0, + 'average_face_landmarker_score': 0, + 'total_face_landmark_5_fallbacks': 0, + 'total_frames_with_faces': 0, + 'total_faces': 0 + } + + for faces in static_faces.values(): + statistics['total_frames_with_faces'] = statistics.get('total_frames_with_faces') + 1 + for face in faces: + statistics['total_faces'] = statistics.get('total_faces') + 1 + face_detector_score_list.append(face.scores.get('detector')) + face_landmarker_score_list.append(face.scores.get('landmarker')) + if numpy.array_equal(face.landmarks.get('5'), face.landmarks.get('5/68')): + statistics['total_face_landmark_5_fallbacks'] = statistics.get('total_face_landmark_5_fallbacks') + 1 + + if face_detector_score_list: + statistics['min_face_detector_score'] = round(min(face_detector_score_list), 2) + statistics['max_face_detector_score'] = round(max(face_detector_score_list), 2) + statistics['average_face_detector_score'] = round(numpy.mean(face_detector_score_list), 2) + if face_landmarker_score_list: + statistics['min_face_landmarker_score'] = round(min(face_landmarker_score_list), 2) + statistics['max_face_landmarker_score'] = round(max(face_landmarker_score_list), 2) + statistics['average_face_landmarker_score'] = round(numpy.mean(face_landmarker_score_list), 2) + return statistics + + +def conditional_log_statistics() -> None: + if facefusion.globals.log_level == 'debug': + statistics = create_statistics(FACE_STORE.get('static_faces')) + + for name, value in statistics.items(): + logger.debug(str(name) + ': ' + str(value), __name__.upper()) diff --git a/facefusion/typing.py b/facefusion/typing.py index cd302420..e6aefc22 100755 --- a/facefusion/typing.py +++ b/facefusion/typing.py @@ -12,12 +12,17 @@ FaceLandmarkSet = TypedDict('FaceLandmarkSet', '68' : FaceLandmark68 # type: ignore[valid-type] }) Score = float +FaceScoreSet = TypedDict('FaceScoreSet', +{ + 'detector' : Score, + 'landmarker' : Score +}) Embedding = numpy.ndarray[Any, Any] Face = namedtuple('Face', [ 'bounding_box', - 'landmark', - 'score', + 'landmarks', + 'scores', 'embedding', 'normed_embedding', 'gender', @@ -29,6 +34,7 @@ FaceStore = TypedDict('FaceStore', 'static_faces' : FaceSet, 'reference_faces': FaceSet }) + VisionFrame = numpy.ndarray[Any, Any] Mask = numpy.ndarray[Any, Any] Matrix = numpy.ndarray[Any, Any] @@ -43,29 +49,32 @@ Fps = float Padding = Tuple[int, int, int, int] Resolution = Tuple[int, int] +ProcessState = Literal['processing', 'stopping', 'pending'] QueuePayload = TypedDict('QueuePayload', { 'frame_number' : int, 'frame_path' : str }) -Update_Process = Callable[[], None] -Process_Frames = Callable[[List[str], List[QueuePayload], Update_Process], None] +UpdateProcess = Callable[[], None] +ProcessFrames = Callable[[List[str], List[QueuePayload], UpdateProcess], None] -Template = Literal['arcface_112_v1', 'arcface_112_v2', 'arcface_128_v2', 'ffhq_512'] +WarpTemplate = Literal['arcface_112_v1', 'arcface_112_v2', 'arcface_128_v2', 'ffhq_512'] +WarpTemplateSet = Dict[WarpTemplate, numpy.ndarray[Any, Any]] ProcessMode = Literal['output', 'preview', 'stream'] LogLevel = Literal['error', 'warn', 'info', 'debug'] VideoMemoryStrategy = Literal['strict', 'moderate', 'tolerant'] -FaceSelectorMode = Literal['reference', 'one', 'many'] +FaceSelectorMode = Literal['many', 'one', 'reference'] FaceAnalyserOrder = Literal['left-right', 'right-left', 'top-bottom', 'bottom-top', 'small-large', 'large-small', 'best-worst', 'worst-best'] FaceAnalyserAge = Literal['child', 'teen', 'adult', 'senior'] FaceAnalyserGender = Literal['female', 'male'] -FaceDetectorModel = Literal['retinaface', 'yoloface', 'yunet'] +FaceDetectorModel = Literal['many', 'retinaface', 'scrfd', 'yoloface', 'yunet'] +FaceDetectorTweak = Literal['low-luminance', 'high-luminance'] FaceRecognizerModel = Literal['arcface_blendswap', 'arcface_inswapper', 'arcface_simswap', 'arcface_uniface'] FaceMaskType = Literal['box', 'occlusion', 'region'] FaceMaskRegion = Literal['skin', 'left-eyebrow', 'right-eyebrow', 'left-eye', 'right-eye', 'eye-glasses', 'nose', 'mouth', 'upper-lip', 'lower-lip'] TempFrameFormat = Literal['jpg', 'png', 'bmp'] -OutputVideoEncoder = Literal['libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc'] +OutputVideoEncoder = Literal['libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc', 'h264_amf', 'hevc_amf'] OutputVideoPreset = Literal['ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow'] ModelValue = Dict[str, Any] @@ -74,3 +83,38 @@ OptionsWithModel = TypedDict('OptionsWithModel', { 'model' : ModelValue }) + +ValueAndUnit = TypedDict('ValueAndUnit', +{ + 'value' : str, + 'unit' : str +}) +ExecutionDeviceFramework = TypedDict('ExecutionDeviceFramework', +{ + 'name' : str, + 'version' : str +}) +ExecutionDeviceProduct = TypedDict('ExecutionDeviceProduct', +{ + 'vendor' : str, + 'name' : str, + 'architecture' : str, +}) +ExecutionDeviceVideoMemory = TypedDict('ExecutionDeviceVideoMemory', +{ + 'total' : ValueAndUnit, + 'free' : ValueAndUnit +}) +ExecutionDeviceUtilization = TypedDict('ExecutionDeviceUtilization', +{ + 'gpu' : ValueAndUnit, + 'memory' : ValueAndUnit +}) +ExecutionDevice = TypedDict('ExecutionDevice', +{ + 'driver_version' : str, + 'framework' : ExecutionDeviceFramework, + 'product' : ExecutionDeviceProduct, + 'video_memory' : ExecutionDeviceVideoMemory, + 'utilization' : ExecutionDeviceUtilization +}) diff --git a/facefusion/uis/assets/overrides.css b/facefusion/uis/assets/overrides.css index 86ca371d..744ed3ba 100644 --- a/facefusion/uis/assets/overrides.css +++ b/facefusion/uis/assets/overrides.css @@ -42,3 +42,17 @@ grid-template-columns: repeat(var(--grid-cols), minmax(5em, 1fr)); grid-template-rows: repeat(var(--grid-rows), minmax(5em, 1fr)); } + +:root:root:root .tab-nav > button +{ + border: unset; + border-bottom: 0.125rem solid transparent; + font-size: 1.125em; + margin: 0.5rem 1rem; + padding: 0; +} + +:root:root:root .tab-nav > button.selected +{ + border-bottom: 0.125rem solid; +} diff --git a/facefusion/uis/components/benchmark.py b/facefusion/uis/components/benchmark.py index 572fcc3e..2160d09d 100644 --- a/facefusion/uis/components/benchmark.py +++ b/facefusion/uis/components/benchmark.py @@ -1,17 +1,16 @@ from typing import Any, Optional, List, Dict, Generator -import time +from time import sleep, perf_counter import tempfile import statistics import gradio import facefusion.globals -from facefusion import wording +from facefusion import process_manager, wording from facefusion.face_store import clear_static_faces from facefusion.processors.frame.core import get_frame_processors_modules from facefusion.vision import count_video_frame_total, detect_video_resolution, detect_video_fps, pack_resolution from facefusion.core import conditional_process from facefusion.memory import limit_system_memory -from facefusion.normalizer import normalize_output_path from facefusion.filesystem import clear_temp from facefusion.uis.core import get_ui_component @@ -70,6 +69,7 @@ def render() -> None: def listen() -> None: benchmark_runs_checkbox_group = get_ui_component('benchmark_runs_checkbox_group') benchmark_cycles_slider = get_ui_component('benchmark_cycles_slider') + if benchmark_runs_checkbox_group and benchmark_cycles_slider: BENCHMARK_START_BUTTON.click(start, inputs = [ benchmark_runs_checkbox_group, benchmark_cycles_slider ], outputs = BENCHMARK_RESULTS_DATAFRAME) BENCHMARK_CLEAR_BUTTON.click(clear, outputs = BENCHMARK_RESULTS_DATAFRAME) @@ -77,10 +77,13 @@ def listen() -> None: def start(benchmark_runs : List[str], benchmark_cycles : int) -> Generator[List[Any], None, None]: facefusion.globals.source_paths = [ '.assets/examples/source.jpg' ] + facefusion.globals.output_path = tempfile.gettempdir() + facefusion.globals.face_landmarker_score = 0 facefusion.globals.temp_frame_format = 'bmp' facefusion.globals.output_video_preset = 'ultrafast' - target_paths = [ BENCHMARKS[benchmark_run] for benchmark_run in benchmark_runs if benchmark_run in BENCHMARKS ] benchmark_results = [] + target_paths = [ BENCHMARKS[benchmark_run] for benchmark_run in benchmark_runs if benchmark_run in BENCHMARKS ] + if target_paths: pre_process() for target_path in target_paths: @@ -103,16 +106,16 @@ def post_process() -> None: def benchmark(target_path : str, benchmark_cycles : int) -> List[Any]: process_times = [] total_fps = 0.0 + facefusion.globals.target_path = target_path + video_frame_total = count_video_frame_total(facefusion.globals.target_path) + output_video_resolution = detect_video_resolution(facefusion.globals.target_path) + facefusion.globals.output_video_resolution = pack_resolution(output_video_resolution) + facefusion.globals.output_video_fps = detect_video_fps(facefusion.globals.target_path) + for index in range(benchmark_cycles): - facefusion.globals.target_path = target_path - facefusion.globals.output_path = normalize_output_path(facefusion.globals.source_paths, facefusion.globals.target_path, tempfile.gettempdir()) - target_video_resolution = detect_video_resolution(facefusion.globals.target_path) - facefusion.globals.output_video_resolution = pack_resolution(target_video_resolution) - facefusion.globals.output_video_fps = detect_video_fps(facefusion.globals.target_path) - video_frame_total = count_video_frame_total(facefusion.globals.target_path) - start_time = time.perf_counter() + start_time = perf_counter() conditional_process() - end_time = time.perf_counter() + end_time = perf_counter() process_time = end_time - start_time total_fps += video_frame_total / process_time process_times.append(process_time) @@ -132,6 +135,8 @@ def benchmark(target_path : str, benchmark_cycles : int) -> List[Any]: def clear() -> gradio.Dataframe: + while process_manager.is_processing(): + sleep(0.5) if facefusion.globals.target_path: clear_temp(facefusion.globals.target_path) return gradio.Dataframe(value = None) diff --git a/facefusion/uis/components/execution.py b/facefusion/uis/components/execution.py index b3b6ec0e..083727de 100644 --- a/facefusion/uis/components/execution.py +++ b/facefusion/uis/components/execution.py @@ -6,7 +6,7 @@ import facefusion.globals from facefusion import wording from facefusion.face_analyser import clear_face_analyser from facefusion.processors.frame.core import clear_frame_processors_modules -from facefusion.execution_helper import encode_execution_providers, decode_execution_providers +from facefusion.execution import encode_execution_providers, decode_execution_providers EXECUTION_PROVIDERS_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None @@ -28,7 +28,6 @@ def listen() -> None: def update_execution_providers(execution_providers : List[str]) -> gradio.CheckboxGroup: clear_face_analyser() clear_frame_processors_modules() - if not execution_providers: - execution_providers = encode_execution_providers(onnxruntime.get_available_providers()) + execution_providers = execution_providers or encode_execution_providers(onnxruntime.get_available_providers()) facefusion.globals.execution_providers = decode_execution_providers(execution_providers) return gradio.CheckboxGroup(value = execution_providers) diff --git a/facefusion/uis/components/face_analyser.py b/facefusion/uis/components/face_analyser.py index eb7b68f7..b6f17b37 100644 --- a/facefusion/uis/components/face_analyser.py +++ b/facefusion/uis/components/face_analyser.py @@ -11,18 +11,20 @@ from facefusion.uis.core import register_ui_component FACE_ANALYSER_ORDER_DROPDOWN : Optional[gradio.Dropdown] = None FACE_ANALYSER_AGE_DROPDOWN : Optional[gradio.Dropdown] = None FACE_ANALYSER_GENDER_DROPDOWN : Optional[gradio.Dropdown] = None +FACE_DETECTOR_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None FACE_DETECTOR_SIZE_DROPDOWN : Optional[gradio.Dropdown] = None FACE_DETECTOR_SCORE_SLIDER : Optional[gradio.Slider] = None -FACE_DETECTOR_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None +FACE_LANDMARKER_SCORE_SLIDER : Optional[gradio.Slider] = None def render() -> None: global FACE_ANALYSER_ORDER_DROPDOWN global FACE_ANALYSER_AGE_DROPDOWN global FACE_ANALYSER_GENDER_DROPDOWN + global FACE_DETECTOR_MODEL_DROPDOWN global FACE_DETECTOR_SIZE_DROPDOWN global FACE_DETECTOR_SCORE_SLIDER - global FACE_DETECTOR_MODEL_DROPDOWN + global FACE_LANDMARKER_SCORE_SLIDER face_detector_size_dropdown_args : Dict[str, Any] =\ { @@ -53,19 +55,28 @@ def render() -> None: value = facefusion.globals.face_detector_model ) FACE_DETECTOR_SIZE_DROPDOWN = gradio.Dropdown(**face_detector_size_dropdown_args) - FACE_DETECTOR_SCORE_SLIDER = gradio.Slider( - label = wording.get('uis.face_detector_score_slider'), - value = facefusion.globals.face_detector_score, - step = facefusion.choices.face_detector_score_range[1] - facefusion.choices.face_detector_score_range[0], - minimum = facefusion.choices.face_detector_score_range[0], - maximum = facefusion.choices.face_detector_score_range[-1] - ) + with gradio.Row(): + FACE_DETECTOR_SCORE_SLIDER = gradio.Slider( + label = wording.get('uis.face_detector_score_slider'), + value = facefusion.globals.face_detector_score, + step = facefusion.choices.face_detector_score_range[1] - facefusion.choices.face_detector_score_range[0], + minimum = facefusion.choices.face_detector_score_range[0], + maximum = facefusion.choices.face_detector_score_range[-1] + ) + FACE_LANDMARKER_SCORE_SLIDER = gradio.Slider( + label = wording.get('uis.face_landmarker_score_slider'), + value = facefusion.globals.face_landmarker_score, + step = facefusion.choices.face_landmarker_score_range[1] - facefusion.choices.face_landmarker_score_range[0], + minimum = facefusion.choices.face_landmarker_score_range[0], + maximum = facefusion.choices.face_landmarker_score_range[-1] + ) register_ui_component('face_analyser_order_dropdown', FACE_ANALYSER_ORDER_DROPDOWN) register_ui_component('face_analyser_age_dropdown', FACE_ANALYSER_AGE_DROPDOWN) register_ui_component('face_analyser_gender_dropdown', FACE_ANALYSER_GENDER_DROPDOWN) register_ui_component('face_detector_model_dropdown', FACE_DETECTOR_MODEL_DROPDOWN) register_ui_component('face_detector_size_dropdown', FACE_DETECTOR_SIZE_DROPDOWN) register_ui_component('face_detector_score_slider', FACE_DETECTOR_SCORE_SLIDER) + register_ui_component('face_landmarker_score_slider', FACE_LANDMARKER_SCORE_SLIDER) def listen() -> None: @@ -74,7 +85,8 @@ def listen() -> None: FACE_ANALYSER_GENDER_DROPDOWN.change(update_face_analyser_gender, inputs = FACE_ANALYSER_GENDER_DROPDOWN) FACE_DETECTOR_MODEL_DROPDOWN.change(update_face_detector_model, inputs = FACE_DETECTOR_MODEL_DROPDOWN, outputs = FACE_DETECTOR_SIZE_DROPDOWN) FACE_DETECTOR_SIZE_DROPDOWN.change(update_face_detector_size, inputs = FACE_DETECTOR_SIZE_DROPDOWN) - FACE_DETECTOR_SCORE_SLIDER.change(update_face_detector_score, inputs = FACE_DETECTOR_SCORE_SLIDER) + FACE_DETECTOR_SCORE_SLIDER.release(update_face_detector_score, inputs = FACE_DETECTOR_SCORE_SLIDER) + FACE_LANDMARKER_SCORE_SLIDER.release(update_face_landmarker_score, inputs = FACE_LANDMARKER_SCORE_SLIDER) def update_face_analyser_order(face_analyser_order : FaceAnalyserOrder) -> None: @@ -91,9 +103,10 @@ def update_face_analyser_gender(face_analyser_gender : FaceAnalyserGender) -> No def update_face_detector_model(face_detector_model : FaceDetectorModel) -> gradio.Dropdown: facefusion.globals.face_detector_model = face_detector_model + facefusion.globals.face_detector_size = '640x640' if facefusion.globals.face_detector_size in facefusion.choices.face_detector_set[face_detector_model]: - return gradio.Dropdown(value = '640x640', choices = facefusion.choices.face_detector_set[face_detector_model]) - return gradio.Dropdown(value = '640x640', choices = [ '640x640' ]) + return gradio.Dropdown(value = facefusion.globals.face_detector_size, choices = facefusion.choices.face_detector_set[face_detector_model]) + return gradio.Dropdown(value = facefusion.globals.face_detector_size, choices = [ facefusion.globals.face_detector_size ]) def update_face_detector_size(face_detector_size : str) -> None: @@ -102,3 +115,7 @@ def update_face_detector_size(face_detector_size : str) -> None: def update_face_detector_score(face_detector_score : float) -> None: facefusion.globals.face_detector_score = face_detector_score + + +def update_face_landmarker_score(face_landmarker_score : float) -> None: + facefusion.globals.face_landmarker_score = face_landmarker_score diff --git a/facefusion/uis/components/face_masker.py b/facefusion/uis/components/face_masker.py index 51289973..fb111e03 100755 --- a/facefusion/uis/components/face_masker.py +++ b/facefusion/uis/components/face_masker.py @@ -100,12 +100,10 @@ def listen() -> None: def update_face_mask_type(face_mask_types : List[FaceMaskType]) -> Tuple[gradio.CheckboxGroup, gradio.Group, gradio.CheckboxGroup]: - if not face_mask_types: - face_mask_types = facefusion.choices.face_mask_types - facefusion.globals.face_mask_types = face_mask_types + facefusion.globals.face_mask_types = face_mask_types or facefusion.choices.face_mask_types has_box_mask = 'box' in face_mask_types has_region_mask = 'region' in face_mask_types - return gradio.CheckboxGroup(value = face_mask_types), gradio.Group(visible = has_box_mask), gradio.CheckboxGroup(visible = has_region_mask) + return gradio.CheckboxGroup(value = facefusion.globals.face_mask_types), gradio.Group(visible = has_box_mask), gradio.CheckboxGroup(visible = has_region_mask) def update_face_mask_blur(face_mask_blur : float) -> None: @@ -117,7 +115,5 @@ def update_face_mask_padding(face_mask_padding_top : int, face_mask_padding_righ def update_face_mask_regions(face_mask_regions : List[FaceMaskRegion]) -> gradio.CheckboxGroup: - if not face_mask_regions: - face_mask_regions = facefusion.choices.face_mask_regions - facefusion.globals.face_mask_regions = face_mask_regions - return gradio.CheckboxGroup(value = face_mask_regions) + facefusion.globals.face_mask_regions = face_mask_regions or facefusion.choices.face_mask_regions + return gradio.CheckboxGroup(value = facefusion.globals.face_mask_regions) diff --git a/facefusion/uis/components/face_selector.py b/facefusion/uis/components/face_selector.py index d429d5a3..df8c3037 100644 --- a/facefusion/uis/components/face_selector.py +++ b/facefusion/uis/components/face_selector.py @@ -23,7 +23,7 @@ def render() -> None: global REFERENCE_FACE_POSITION_GALLERY global REFERENCE_FACE_DISTANCE_SLIDER - reference_face_gallery_args: Dict[str, Any] =\ + reference_face_gallery_args : Dict[str, Any] =\ { 'label': wording.get('uis.reference_face_gallery'), 'object_fit': 'cover', @@ -85,7 +85,8 @@ def listen() -> None: [ 'face_detector_model_dropdown', 'face_detector_size_dropdown', - 'face_detector_score_slider' + 'face_detector_score_slider', + 'face_landmarker_score_slider' ] for component_name in change_two_component_names: component = get_ui_component(component_name) @@ -98,15 +99,15 @@ def listen() -> None: def update_face_selector_mode(face_selector_mode : FaceSelectorMode) -> Tuple[gradio.Gallery, gradio.Slider]: - if face_selector_mode == 'reference': - facefusion.globals.face_selector_mode = face_selector_mode - return gradio.Gallery(visible = True), gradio.Slider(visible = True) - if face_selector_mode == 'one': - facefusion.globals.face_selector_mode = face_selector_mode - return gradio.Gallery(visible = False), gradio.Slider(visible = False) if face_selector_mode == 'many': facefusion.globals.face_selector_mode = face_selector_mode return gradio.Gallery(visible = False), gradio.Slider(visible = False) + if face_selector_mode == 'one': + facefusion.globals.face_selector_mode = face_selector_mode + return gradio.Gallery(visible = False), gradio.Slider(visible = False) + if face_selector_mode == 'reference': + facefusion.globals.face_selector_mode = face_selector_mode + return gradio.Gallery(visible = True), gradio.Slider(visible = True) def clear_and_update_reference_face_position(event : gradio.SelectData) -> gradio.Gallery: diff --git a/facefusion/uis/components/frame_processors.py b/facefusion/uis/components/frame_processors.py index 65ef5759..4195c63b 100644 --- a/facefusion/uis/components/frame_processors.py +++ b/facefusion/uis/components/frame_processors.py @@ -32,7 +32,7 @@ def update_frame_processors(frame_processors : List[str]) -> gradio.CheckboxGrou frame_processor_module = load_frame_processor_module(frame_processor) if not frame_processor_module.pre_check(): return gradio.CheckboxGroup() - return gradio.CheckboxGroup(value = frame_processors, choices = sort_frame_processors(frame_processors)) + return gradio.CheckboxGroup(value = facefusion.globals.frame_processors, choices = sort_frame_processors(facefusion.globals.frame_processors)) def sort_frame_processors(frame_processors : List[str]) -> list[str]: diff --git a/facefusion/uis/components/frame_processors_options.py b/facefusion/uis/components/frame_processors_options.py index d7172f2a..85e0e8c7 100755 --- a/facefusion/uis/components/frame_processors_options.py +++ b/facefusion/uis/components/frame_processors_options.py @@ -113,7 +113,7 @@ def update_face_enhancer_model(face_enhancer_model : FaceEnhancerModel) -> gradi face_enhancer_module.clear_frame_processor() face_enhancer_module.set_options('model', face_enhancer_module.MODELS[face_enhancer_model]) if face_enhancer_module.pre_check(): - return gradio.Dropdown(value = face_enhancer_model) + return gradio.Dropdown(value = frame_processors_globals.face_enhancer_model) return gradio.Dropdown() @@ -135,7 +135,7 @@ def update_face_swapper_model(face_swapper_model : FaceSwapperModel) -> gradio.D face_swapper_module.clear_frame_processor() face_swapper_module.set_options('model', face_swapper_module.MODELS[face_swapper_model]) if face_swapper_module.pre_check(): - return gradio.Dropdown(value = face_swapper_model) + return gradio.Dropdown(value = frame_processors_globals.face_swapper_model) return gradio.Dropdown() @@ -145,7 +145,7 @@ def update_frame_enhancer_model(frame_enhancer_model : FrameEnhancerModel) -> gr frame_enhancer_module.clear_frame_processor() frame_enhancer_module.set_options('model', frame_enhancer_module.MODELS[frame_enhancer_model]) if frame_enhancer_module.pre_check(): - return gradio.Dropdown(value = frame_enhancer_model) + return gradio.Dropdown(value = frame_processors_globals.frame_enhancer_model) return gradio.Dropdown() @@ -159,5 +159,5 @@ def update_lip_syncer_model(lip_syncer_model : LipSyncerModel) -> gradio.Dropdow lip_syncer_module.clear_frame_processor() lip_syncer_module.set_options('model', lip_syncer_module.MODELS[lip_syncer_model]) if lip_syncer_module.pre_check(): - return gradio.Dropdown(value = lip_syncer_model) + return gradio.Dropdown(value = frame_processors_globals.lip_syncer_model) return gradio.Dropdown() diff --git a/facefusion/uis/components/output.py b/facefusion/uis/components/output.py index cbbd3972..cfba2a6d 100644 --- a/facefusion/uis/components/output.py +++ b/facefusion/uis/components/output.py @@ -1,24 +1,27 @@ from typing import Tuple, Optional +from time import sleep import gradio import facefusion.globals -from facefusion import wording +from facefusion import process_manager, wording from facefusion.core import conditional_process from facefusion.memory import limit_system_memory -from facefusion.uis.core import get_ui_component from facefusion.normalizer import normalize_output_path +from facefusion.uis.core import get_ui_component from facefusion.filesystem import clear_temp, is_image, is_video OUTPUT_IMAGE : Optional[gradio.Image] = None OUTPUT_VIDEO : Optional[gradio.Video] = None OUTPUT_START_BUTTON : Optional[gradio.Button] = None OUTPUT_CLEAR_BUTTON : Optional[gradio.Button] = None +OUTPUT_STOP_BUTTON : Optional[gradio.Button] = None def render() -> None: global OUTPUT_IMAGE global OUTPUT_VIDEO global OUTPUT_START_BUTTON + global OUTPUT_STOP_BUTTON global OUTPUT_CLEAR_BUTTON OUTPUT_IMAGE = gradio.Image( @@ -33,6 +36,12 @@ def render() -> None: variant = 'primary', size = 'sm' ) + OUTPUT_STOP_BUTTON = gradio.Button( + value = wording.get('uis.stop_button'), + variant = 'primary', + size = 'sm', + visible = False + ) OUTPUT_CLEAR_BUTTON = gradio.Button( value = wording.get('uis.clear_button'), size = 'sm' @@ -42,23 +51,38 @@ def render() -> None: def listen() -> None: output_path_textbox = get_ui_component('output_path_textbox') if output_path_textbox: - OUTPUT_START_BUTTON.click(start, inputs = output_path_textbox, outputs = [ OUTPUT_IMAGE, OUTPUT_VIDEO ]) + OUTPUT_START_BUTTON.click(start, outputs = [ OUTPUT_START_BUTTON, OUTPUT_STOP_BUTTON ]) + OUTPUT_START_BUTTON.click(process, outputs = [ OUTPUT_IMAGE, OUTPUT_VIDEO, OUTPUT_START_BUTTON, OUTPUT_STOP_BUTTON ]) + OUTPUT_STOP_BUTTON.click(stop, outputs = [ OUTPUT_START_BUTTON, OUTPUT_STOP_BUTTON ]) OUTPUT_CLEAR_BUTTON.click(clear, outputs = [ OUTPUT_IMAGE, OUTPUT_VIDEO ]) -def start(output_path : str) -> Tuple[gradio.Image, gradio.Video]: - facefusion.globals.output_path = normalize_output_path(facefusion.globals.source_paths, facefusion.globals.target_path, output_path) +def start() -> Tuple[gradio.Button, gradio.Button]: + while not process_manager.is_processing(): + sleep(0.5) + return gradio.Button(visible = False), gradio.Button(visible = True) + + +def process() -> Tuple[gradio.Image, gradio.Video, gradio.Button, gradio.Button]: + normed_output_path = normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path) if facefusion.globals.system_memory_limit > 0: limit_system_memory(facefusion.globals.system_memory_limit) conditional_process() - if is_image(facefusion.globals.output_path): - return gradio.Image(value = facefusion.globals.output_path, visible = True), gradio.Video(value = None, visible = False) - if is_video(facefusion.globals.output_path): - return gradio.Image(value = None, visible = False), gradio.Video(value = facefusion.globals.output_path, visible = True) - return gradio.Image(), gradio.Video() + if is_image(normed_output_path): + return gradio.Image(value = normed_output_path, visible = True), gradio.Video(value = None, visible = False), gradio.Button(visible = True), gradio.Button(visible = False) + if is_video(normed_output_path): + return gradio.Image(value = None, visible = False), gradio.Video(value = normed_output_path, visible = True), gradio.Button(visible = True), gradio.Button(visible = False) + return gradio.Image(value = None), gradio.Video(value = None), gradio.Button(visible = True), gradio.Button(visible = False) + + +def stop() -> Tuple[gradio.Button, gradio.Button]: + process_manager.stop() + return gradio.Button(visible = True), gradio.Button(visible = False) def clear() -> Tuple[gradio.Image, gradio.Video]: + while process_manager.is_processing(): + sleep(0.5) if facefusion.globals.target_path: clear_temp(facefusion.globals.target_path) return gradio.Image(value = None), gradio.Video(value = None) diff --git a/facefusion/uis/components/output_options.py b/facefusion/uis/components/output_options.py index b828c715..757316d4 100644 --- a/facefusion/uis/components/output_options.py +++ b/facefusion/uis/components/output_options.py @@ -1,5 +1,4 @@ from typing import Optional, Tuple, List -import tempfile import gradio import facefusion.globals @@ -9,10 +8,11 @@ from facefusion.typing import OutputVideoEncoder, OutputVideoPreset, Fps from facefusion.filesystem import is_image, is_video from facefusion.uis.typing import ComponentName from facefusion.uis.core import get_ui_component, register_ui_component -from facefusion.vision import detect_video_fps, create_video_resolutions, detect_video_resolution, pack_resolution +from facefusion.vision import detect_image_resolution, create_image_resolutions, detect_video_fps, detect_video_resolution, create_video_resolutions, pack_resolution OUTPUT_PATH_TEXTBOX : Optional[gradio.Textbox] = None OUTPUT_IMAGE_QUALITY_SLIDER : Optional[gradio.Slider] = None +OUTPUT_IMAGE_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None OUTPUT_VIDEO_ENCODER_DROPDOWN : Optional[gradio.Dropdown] = None OUTPUT_VIDEO_PRESET_DROPDOWN : Optional[gradio.Dropdown] = None OUTPUT_VIDEO_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None @@ -23,15 +23,25 @@ OUTPUT_VIDEO_FPS_SLIDER : Optional[gradio.Slider] = None def render() -> None: global OUTPUT_PATH_TEXTBOX global OUTPUT_IMAGE_QUALITY_SLIDER + global OUTPUT_IMAGE_RESOLUTION_DROPDOWN global OUTPUT_VIDEO_ENCODER_DROPDOWN global OUTPUT_VIDEO_PRESET_DROPDOWN global OUTPUT_VIDEO_RESOLUTION_DROPDOWN global OUTPUT_VIDEO_QUALITY_SLIDER global OUTPUT_VIDEO_FPS_SLIDER + output_image_resolutions = [] + output_video_resolutions = [] + if is_image(facefusion.globals.target_path): + output_image_resolution = detect_image_resolution(facefusion.globals.target_path) + output_image_resolutions = create_image_resolutions(output_image_resolution) + if is_video(facefusion.globals.target_path): + output_video_resolution = detect_video_resolution(facefusion.globals.target_path) + output_video_resolutions = create_video_resolutions(output_video_resolution) + facefusion.globals.output_path = facefusion.globals.output_path or '.' OUTPUT_PATH_TEXTBOX = gradio.Textbox( label = wording.get('uis.output_path_textbox'), - value = facefusion.globals.output_path or tempfile.gettempdir(), + value = facefusion.globals.output_path, max_lines = 1 ) OUTPUT_IMAGE_QUALITY_SLIDER = gradio.Slider( @@ -42,6 +52,12 @@ def render() -> None: maximum = facefusion.choices.output_image_quality_range[-1], visible = is_image(facefusion.globals.target_path) ) + OUTPUT_IMAGE_RESOLUTION_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.output_image_resolution_dropdown'), + choices = output_image_resolutions, + value = facefusion.globals.output_image_resolution, + visible = is_image(facefusion.globals.target_path) + ) OUTPUT_VIDEO_ENCODER_DROPDOWN = gradio.Dropdown( label = wording.get('uis.output_video_encoder_dropdown'), choices = facefusion.choices.output_video_encoders, @@ -64,7 +80,7 @@ def render() -> None: ) OUTPUT_VIDEO_RESOLUTION_DROPDOWN = gradio.Dropdown( label = wording.get('uis.output_video_resolution_dropdown'), - choices = create_video_resolutions(facefusion.globals.target_path), + choices = output_video_resolutions, value = facefusion.globals.output_video_resolution, visible = is_video(facefusion.globals.target_path) ) @@ -83,6 +99,7 @@ def render() -> None: def listen() -> None: OUTPUT_PATH_TEXTBOX.change(update_output_path, inputs = OUTPUT_PATH_TEXTBOX) OUTPUT_IMAGE_QUALITY_SLIDER.change(update_output_image_quality, inputs = OUTPUT_IMAGE_QUALITY_SLIDER) + OUTPUT_IMAGE_RESOLUTION_DROPDOWN.change(update_output_image_resolution, inputs = OUTPUT_IMAGE_RESOLUTION_DROPDOWN) OUTPUT_VIDEO_ENCODER_DROPDOWN.change(update_output_video_encoder, inputs = OUTPUT_VIDEO_ENCODER_DROPDOWN) OUTPUT_VIDEO_PRESET_DROPDOWN.change(update_output_video_preset, inputs = OUTPUT_VIDEO_PRESET_DROPDOWN) OUTPUT_VIDEO_QUALITY_SLIDER.change(update_output_video_quality, inputs = OUTPUT_VIDEO_QUALITY_SLIDER) @@ -97,19 +114,22 @@ def listen() -> None: component = get_ui_component(component_name) if component: for method in [ 'upload', 'change', 'clear' ]: - getattr(component, method)(remote_update, outputs = [ OUTPUT_IMAGE_QUALITY_SLIDER, OUTPUT_VIDEO_ENCODER_DROPDOWN, OUTPUT_VIDEO_PRESET_DROPDOWN, OUTPUT_VIDEO_QUALITY_SLIDER, OUTPUT_VIDEO_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_FPS_SLIDER ]) + getattr(component, method)(remote_update, outputs = [ OUTPUT_IMAGE_QUALITY_SLIDER, OUTPUT_IMAGE_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_ENCODER_DROPDOWN, OUTPUT_VIDEO_PRESET_DROPDOWN, OUTPUT_VIDEO_QUALITY_SLIDER, OUTPUT_VIDEO_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_FPS_SLIDER ]) -def remote_update() -> Tuple[gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Slider]: +def remote_update() -> Tuple[gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Slider]: if is_image(facefusion.globals.target_path): - return gradio.Slider(visible = True), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False, value = None, choices = None), gradio.Slider(visible = False, value = None) + output_image_resolution = detect_image_resolution(facefusion.globals.target_path) + output_image_resolutions = create_image_resolutions(output_image_resolution) + facefusion.globals.output_image_resolution = pack_resolution(output_image_resolution) + return gradio.Slider(visible = True), gradio.Dropdown(visible = True, value = facefusion.globals.output_image_resolution, choices = output_image_resolutions), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False, value = None, choices = None), gradio.Slider(visible = False, value = None) if is_video(facefusion.globals.target_path): - target_video_resolution = detect_video_resolution(facefusion.globals.target_path) - output_video_resolution = pack_resolution(target_video_resolution) - output_video_resolutions = create_video_resolutions(facefusion.globals.target_path) - output_video_fps = detect_video_fps(facefusion.globals.target_path) - return gradio.Slider(visible = False), gradio.Dropdown(visible = True), gradio.Dropdown(visible = True), gradio.Slider(visible = True), gradio.Dropdown(visible = True, value = output_video_resolution, choices = output_video_resolutions), gradio.Slider(visible = True, value = output_video_fps) - return gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False, value = None, choices = None), gradio.Slider(visible = False, value = None) + output_video_resolution = detect_video_resolution(facefusion.globals.target_path) + output_video_resolutions = create_video_resolutions(output_video_resolution) + facefusion.globals.output_video_resolution = pack_resolution(output_video_resolution) + facefusion.globals.output_video_fps = detect_video_fps(facefusion.globals.target_path) + return gradio.Slider(visible = False), gradio.Dropdown(visible = False), gradio.Dropdown(visible = True), gradio.Dropdown(visible = True), gradio.Slider(visible = True), gradio.Dropdown(visible = True, value = facefusion.globals.output_video_resolution, choices = output_video_resolutions), gradio.Slider(visible = True, value = facefusion.globals.output_video_fps) + return gradio.Slider(visible = False), gradio.Dropdown(visible = False, value = None, choices = None), gradio.Dropdown(visible = False), gradio.Dropdown(visible = False), gradio.Slider(visible = False), gradio.Dropdown(visible = False, value = None, choices = None), gradio.Slider(visible = False, value = None) def update_output_path(output_path : str) -> None: @@ -120,6 +140,10 @@ def update_output_image_quality(output_image_quality : int) -> None: facefusion.globals.output_image_quality = output_image_quality +def update_output_image_resolution(output_image_resolution : str) -> None: + facefusion.globals.output_image_resolution = output_image_resolution + + def update_output_video_encoder(output_video_encoder: OutputVideoEncoder) -> None: facefusion.globals.output_video_encoder = output_video_encoder diff --git a/facefusion/uis/components/preview.py b/facefusion/uis/components/preview.py index 07b8b01d..7b0d8e73 100755 --- a/facefusion/uis/components/preview.py +++ b/facefusion/uis/components/preview.py @@ -2,10 +2,11 @@ from typing import Any, Dict, List, Optional from time import sleep import cv2 import gradio +import numpy import facefusion.globals from facefusion import wording, logger -from facefusion.audio import get_audio_frame +from facefusion.audio import get_audio_frame, create_empty_audio_frame from facefusion.common_helper import get_first from facefusion.core import conditional_append_reference_faces from facefusion.face_analyser import get_average_face, clear_face_analyser @@ -26,12 +27,12 @@ def render() -> None: global PREVIEW_IMAGE global PREVIEW_FRAME_SLIDER - preview_image_args: Dict[str, Any] =\ + preview_image_args : Dict[str, Any] =\ { 'label': wording.get('uis.preview_image'), 'interactive': False } - preview_frame_slider_args: Dict[str, Any] =\ + preview_frame_slider_args : Dict[str, Any] =\ { 'label': wording.get('uis.preview_frame_slider'), 'step': 1, @@ -46,6 +47,8 @@ def render() -> None: source_audio_path = get_first(filter_audio_paths(facefusion.globals.source_paths)) if source_audio_path and facefusion.globals.output_video_fps: source_audio_frame = get_audio_frame(source_audio_path, facefusion.globals.output_video_fps, facefusion.globals.reference_frame_number) + if not numpy.any(source_audio_frame): + source_audio_frame = create_empty_audio_frame() else: source_audio_frame = None if is_image(facefusion.globals.target_path): @@ -97,6 +100,8 @@ def listen() -> None: 'face_debugger_items_checkbox_group', 'face_enhancer_blend_slider', 'frame_enhancer_blend_slider', + 'trim_frame_start_slider', + 'trim_frame_end_slider', 'face_selector_mode_dropdown', 'reference_face_distance_slider', 'face_mask_types_checkbox_group', @@ -124,7 +129,8 @@ def listen() -> None: 'lip_syncer_model_dropdown', 'face_detector_model_dropdown', 'face_detector_size_dropdown', - 'face_detector_score_slider' + 'face_detector_score_slider', + 'face_landmarker_score_slider' ] for component_name in change_two_component_names: component = get_ui_component(component_name) @@ -153,10 +159,14 @@ def update_preview_image(frame_number : int = 0) -> gradio.Image: source_face = get_average_face(source_frames) source_audio_path = get_first(filter_audio_paths(facefusion.globals.source_paths)) if source_audio_path and facefusion.globals.output_video_fps: - source_audio_frame = get_audio_frame(source_audio_path, facefusion.globals.output_video_fps, facefusion.globals.reference_frame_number) + reference_audio_frame_number = facefusion.globals.reference_frame_number + if facefusion.globals.trim_frame_start: + reference_audio_frame_number -= facefusion.globals.trim_frame_start + source_audio_frame = get_audio_frame(source_audio_path, facefusion.globals.output_video_fps, reference_audio_frame_number) + if not numpy.any(source_audio_frame): + source_audio_frame = create_empty_audio_frame() else: source_audio_frame = None - if is_image(facefusion.globals.target_path): target_vision_frame = read_static_image(facefusion.globals.target_path) preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, target_vision_frame) @@ -178,7 +188,7 @@ def update_preview_frame_slider() -> gradio.Slider: def process_preview_frame(reference_faces : FaceSet, source_face : Face, source_audio_frame : AudioFrame, target_vision_frame : VisionFrame) -> VisionFrame: - target_vision_frame = resize_frame_resolution(target_vision_frame, 640, 640) + target_vision_frame = resize_frame_resolution(target_vision_frame, (640, 640)) if analyse_frame(target_vision_frame): return cv2.GaussianBlur(target_vision_frame, (99, 99), 0) for frame_processor in facefusion.globals.frame_processors: diff --git a/facefusion/uis/components/temp_frame.py b/facefusion/uis/components/temp_frame.py index d9d851dd..a601653f 100644 --- a/facefusion/uis/components/temp_frame.py +++ b/facefusion/uis/components/temp_frame.py @@ -1,4 +1,4 @@ -from typing import Optional, Tuple +from typing import Optional import gradio import facefusion.globals @@ -9,12 +9,10 @@ from facefusion.filesystem import is_video from facefusion.uis.core import get_ui_component TEMP_FRAME_FORMAT_DROPDOWN : Optional[gradio.Dropdown] = None -TEMP_FRAME_QUALITY_SLIDER : Optional[gradio.Slider] = None def render() -> None: global TEMP_FRAME_FORMAT_DROPDOWN - global TEMP_FRAME_QUALITY_SLIDER TEMP_FRAME_FORMAT_DROPDOWN = gradio.Dropdown( label = wording.get('uis.temp_frame_format_dropdown'), @@ -22,34 +20,22 @@ def render() -> None: value = facefusion.globals.temp_frame_format, visible = is_video(facefusion.globals.target_path) ) - TEMP_FRAME_QUALITY_SLIDER = gradio.Slider( - label = wording.get('uis.temp_frame_quality_slider'), - value = facefusion.globals.temp_frame_quality, - step = facefusion.choices.temp_frame_quality_range[1] - facefusion.choices.temp_frame_quality_range[0], - minimum = facefusion.choices.temp_frame_quality_range[0], - maximum = facefusion.choices.temp_frame_quality_range[-1], - visible = is_video(facefusion.globals.target_path) - ) def listen() -> None: TEMP_FRAME_FORMAT_DROPDOWN.change(update_temp_frame_format, inputs = TEMP_FRAME_FORMAT_DROPDOWN) - TEMP_FRAME_QUALITY_SLIDER.change(update_temp_frame_quality, inputs = TEMP_FRAME_QUALITY_SLIDER) target_video = get_ui_component('target_video') if target_video: for method in [ 'upload', 'change', 'clear' ]: - getattr(target_video, method)(remote_update, outputs = [ TEMP_FRAME_FORMAT_DROPDOWN, TEMP_FRAME_QUALITY_SLIDER ]) + getattr(target_video, method)(remote_update, outputs = TEMP_FRAME_FORMAT_DROPDOWN) -def remote_update() -> Tuple[gradio.Dropdown, gradio.Slider]: +def remote_update() -> gradio.Dropdown: if is_video(facefusion.globals.target_path): - return gradio.Dropdown(visible = True), gradio.Slider(visible = True) - return gradio.Dropdown(visible = False), gradio.Slider(visible = False) + return gradio.Dropdown(visible = True) + return gradio.Dropdown(visible = False) def update_temp_frame_format(temp_frame_format : TempFrameFormat) -> None: facefusion.globals.temp_frame_format = temp_frame_format - -def update_temp_frame_quality(temp_frame_quality : int) -> None: - facefusion.globals.temp_frame_quality = temp_frame_quality diff --git a/facefusion/uis/components/trim_frame.py b/facefusion/uis/components/trim_frame.py index fdfb240c..bce70e53 100644 --- a/facefusion/uis/components/trim_frame.py +++ b/facefusion/uis/components/trim_frame.py @@ -5,7 +5,7 @@ import facefusion.globals from facefusion import wording from facefusion.vision import count_video_frame_total from facefusion.filesystem import is_video -from facefusion.uis.core import get_ui_component +from facefusion.uis.core import get_ui_component, register_ui_component TRIM_FRAME_START_SLIDER : Optional[gradio.Slider] = None TRIM_FRAME_END_SLIDER : Optional[gradio.Slider] = None @@ -42,6 +42,8 @@ def render() -> None: with gradio.Row(): TRIM_FRAME_START_SLIDER = gradio.Slider(**trim_frame_start_slider_args) TRIM_FRAME_END_SLIDER = gradio.Slider(**trim_frame_end_slider_args) + register_ui_component('trim_frame_start_slider', TRIM_FRAME_START_SLIDER) + register_ui_component('trim_frame_end_slider', TRIM_FRAME_END_SLIDER) def listen() -> None: diff --git a/facefusion/uis/components/webcam.py b/facefusion/uis/components/webcam.py index 6534bf4b..9f9b967a 100644 --- a/facefusion/uis/components/webcam.py +++ b/facefusion/uis/components/webcam.py @@ -11,7 +11,9 @@ from tqdm import tqdm import facefusion.globals from facefusion import logger, wording +from facefusion.audio import create_empty_audio_frame from facefusion.content_analyser import analyse_stream +from facefusion.filesystem import filter_image_paths from facefusion.typing import VisionFrame, Face, Fps from facefusion.face_analyser import get_average_face from facefusion.processors.frame.core import get_frame_processors_modules, load_frame_processor_module @@ -92,9 +94,11 @@ def listen() -> None: def start(webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps) -> Generator[VisionFrame, None, None]: facefusion.globals.face_selector_mode = 'one' facefusion.globals.face_analyser_order = 'large-small' - source_frames = read_static_images(facefusion.globals.source_paths) + source_image_paths = filter_image_paths(facefusion.globals.source_paths) + source_frames = read_static_images(source_image_paths) source_face = get_average_face(source_frames) stream = None + if webcam_mode in [ 'udp', 'v4l2' ]: stream = open_stream(webcam_mode, webcam_resolution, webcam_fps) # type: ignore[arg-type] webcam_width, webcam_height = unpack_resolution(webcam_resolution) @@ -150,6 +154,7 @@ def stop() -> gradio.Image: def process_stream_frame(source_face : Face, target_vision_frame : VisionFrame) -> VisionFrame: + source_audio_frame = create_empty_audio_frame() for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): logger.disable() if frame_processor_module.pre_process('stream'): @@ -157,8 +162,7 @@ def process_stream_frame(source_face : Face, target_vision_frame : VisionFrame) target_vision_frame = frame_processor_module.process_frame( { 'source_face': source_face, - 'reference_faces': None, - 'source_audio_frame': None, + 'source_audio_frame': source_audio_frame, 'target_vision_frame': target_vision_frame }) return target_vision_frame diff --git a/facefusion/uis/core.py b/facefusion/uis/core.py index 3af6a561..9ce0ee53 100644 --- a/facefusion/uis/core.py +++ b/facefusion/uis/core.py @@ -28,7 +28,7 @@ def load_ui_layout_module(ui_layout : str) -> Any: if not hasattr(ui_layout_module, method_name): raise NotImplementedError except ModuleNotFoundError as exception: - logger.error(wording.get('ui_layout_not_loaded').format(ui_layout=ui_layout), __name__.upper()) + logger.error(wording.get('ui_layout_not_loaded').format(ui_layout = ui_layout), __name__.upper()) logger.debug(exception.msg, __name__.upper()) sys.exit(1) except NotImplementedError: @@ -58,12 +58,18 @@ def register_ui_component(name : ComponentName, component: Component) -> None: def launch() -> None: + ui_layouts_total = len(facefusion.globals.ui_layouts) with gradio.Blocks(theme = get_theme(), css = get_css(), title = metadata.get('name') + ' ' + metadata.get('version')) as ui: for ui_layout in facefusion.globals.ui_layouts: ui_layout_module = load_ui_layout_module(ui_layout) if ui_layout_module.pre_render(): - ui_layout_module.render() - ui_layout_module.listen() + if ui_layouts_total > 1: + with gradio.Tab(ui_layout): + ui_layout_module.render() + ui_layout_module.listen() + else: + ui_layout_module.render() + ui_layout_module.listen() for ui_layout in facefusion.globals.ui_layouts: ui_layout_module = load_ui_layout_module(ui_layout) diff --git a/facefusion/uis/layouts/default.py b/facefusion/uis/layouts/default.py index 06d7d4de..3c6fd99d 100755 --- a/facefusion/uis/layouts/default.py +++ b/facefusion/uis/layouts/default.py @@ -75,4 +75,4 @@ def listen() -> None: def run(ui : gradio.Blocks) -> None: - ui.launch(show_api = False, quiet = True) + ui.queue(concurrency_count = 4).launch(show_api = False, quiet = True) diff --git a/facefusion/uis/typing.py b/facefusion/uis/typing.py index 2e7a1afe..0dc2b13e 100644 --- a/facefusion/uis/typing.py +++ b/facefusion/uis/typing.py @@ -10,6 +10,8 @@ ComponentName = Literal\ 'target_image', 'target_video', 'preview_frame_slider', + 'trim_frame_start_slider', + 'trim_frame_end_slider', 'face_selector_mode_dropdown', 'reference_face_position_gallery', 'reference_face_distance_slider', @@ -19,6 +21,7 @@ ComponentName = Literal\ 'face_detector_model_dropdown', 'face_detector_size_dropdown', 'face_detector_score_slider', + 'face_landmarker_score_slider', 'face_mask_types_checkbox_group', 'face_mask_blur_slider', 'face_mask_padding_top_slider', diff --git a/facefusion/vision.py b/facefusion/vision.py index d0a2a219..6f9dc3cd 100644 --- a/facefusion/vision.py +++ b/facefusion/vision.py @@ -1,12 +1,55 @@ from typing import Optional, List, Tuple from functools import lru_cache import cv2 +import numpy +from cv2.typing import Size -from facefusion.typing import VisionFrame, Resolution -from facefusion.choices import video_template_sizes +from facefusion.typing import VisionFrame, Resolution, Fps +from facefusion.choices import image_template_sizes, video_template_sizes from facefusion.filesystem import is_image, is_video +@lru_cache(maxsize = 128) +def read_static_image(image_path : str) -> Optional[VisionFrame]: + return read_image(image_path) + + +def read_static_images(image_paths : List[str]) -> Optional[List[VisionFrame]]: + frames = [] + if image_paths: + for image_path in image_paths: + frames.append(read_static_image(image_path)) + return frames + + +def read_image(image_path : str) -> Optional[VisionFrame]: + if is_image(image_path): + return cv2.imread(image_path) + return None + + +def write_image(image_path : str, vision_frame : VisionFrame) -> bool: + if image_path: + return cv2.imwrite(image_path, vision_frame) + return False + + +def detect_image_resolution(image_path : str) -> Optional[Resolution]: + if is_image(image_path): + image = read_image(image_path) + height, width = image.shape[:2] + return width, height + return None + + +def restrict_image_resolution(image_path : str, resolution : Resolution) -> Resolution: + if is_image(image_path): + image_resolution = detect_image_resolution(image_path) + if image_resolution < resolution: + return image_resolution + return resolution + + def get_video_frame(video_path : str, frame_number : int = 0) -> Optional[VisionFrame]: if is_video(video_path): video_capture = cv2.VideoCapture(video_path) @@ -20,6 +63,21 @@ def get_video_frame(video_path : str, frame_number : int = 0) -> Optional[Vision return None +def create_image_resolutions(resolution : Resolution) -> List[str]: + resolutions = [] + temp_resolutions = [] + + if resolution: + width, height = resolution + temp_resolutions.append(normalize_resolution(resolution)) + for template_size in image_template_sizes: + temp_resolutions.append(normalize_resolution((width * template_size, height * template_size))) + temp_resolutions = sorted(set(temp_resolutions)) + for temp_resolution in temp_resolutions: + resolutions.append(pack_resolution(temp_resolution)) + return resolutions + + def count_video_frame_total(video_path : str) -> int: if is_video(video_path): video_capture = cv2.VideoCapture(video_path) @@ -40,35 +98,49 @@ def detect_video_fps(video_path : str) -> Optional[float]: return None -def detect_video_resolution(video_path : str) -> Optional[Tuple[float, float]]: +def restrict_video_fps(video_path : str, fps : Fps) -> Fps: + if is_video(video_path): + video_fps = detect_video_fps(video_path) + if video_fps < fps: + return video_fps + return fps + + +def detect_video_resolution(video_path : str) -> Optional[Resolution]: if is_video(video_path): video_capture = cv2.VideoCapture(video_path) if video_capture.isOpened(): width = video_capture.get(cv2.CAP_PROP_FRAME_WIDTH) height = video_capture.get(cv2.CAP_PROP_FRAME_HEIGHT) video_capture.release() - return width, height + return int(width), int(height) return None -def create_video_resolutions(video_path : str) -> Optional[List[str]]: - temp_resolutions = [] - video_resolutions = [] - video_resolution = detect_video_resolution(video_path) +def restrict_video_resolution(video_path : str, resolution : Resolution) -> Resolution: + if is_video(video_path): + video_resolution = detect_video_resolution(video_path) + if video_resolution < resolution: + return video_resolution + return resolution - if video_resolution: - width, height = video_resolution - temp_resolutions.append(normalize_resolution(video_resolution)) + +def create_video_resolutions(resolution : Resolution) -> List[str]: + resolutions = [] + temp_resolutions = [] + + if resolution: + width, height = resolution + temp_resolutions.append(normalize_resolution(resolution)) for template_size in video_template_sizes: if width > height: temp_resolutions.append(normalize_resolution((template_size * width / height, template_size))) else: temp_resolutions.append(normalize_resolution((template_size, template_size * height / width))) temp_resolutions = sorted(set(temp_resolutions)) - for temp in temp_resolutions: - video_resolutions.append(pack_resolution(temp)) - return video_resolutions - return None + for temp_resolution in temp_resolutions: + resolutions.append(pack_resolution(temp_resolution)) + return resolutions def normalize_resolution(resolution : Tuple[float, float]) -> Resolution: @@ -81,7 +153,7 @@ def normalize_resolution(resolution : Tuple[float, float]) -> Resolution: return 0, 0 -def pack_resolution(resolution : Tuple[float, float]) -> str: +def pack_resolution(resolution : Resolution) -> str: width, height = normalize_resolution(resolution) return str(width) + 'x' + str(height) @@ -91,8 +163,9 @@ def unpack_resolution(resolution : str) -> Resolution: return width, height -def resize_frame_resolution(vision_frame : VisionFrame, max_width : int, max_height : int) -> VisionFrame: +def resize_frame_resolution(vision_frame : VisionFrame, max_resolution : Resolution) -> VisionFrame: height, width = vision_frame.shape[:2] + max_width, max_height = max_resolution if height > max_height or width > max_width: scale = min(max_height / height, max_width / width) @@ -106,26 +179,40 @@ def normalize_frame_color(vision_frame : VisionFrame) -> VisionFrame: return cv2.cvtColor(vision_frame, cv2.COLOR_BGR2RGB) -@lru_cache(maxsize = 128) -def read_static_image(image_path : str) -> Optional[VisionFrame]: - return read_image(image_path) +def create_tile_frames(vision_frame : VisionFrame, size : Size) -> Tuple[List[VisionFrame], int, int]: + vision_frame = numpy.pad(vision_frame, ((size[1], size[1]), (size[1], size[1]), (0, 0))) + tile_width = size[0] - 2 * size[2] + pad_size_bottom = size[2] + tile_width - vision_frame.shape[0] % tile_width + pad_size_right = size[2] + tile_width - vision_frame.shape[1] % tile_width + pad_vision_frame = numpy.pad(vision_frame, ((size[2], pad_size_bottom), (size[2], pad_size_right), (0, 0))) + pad_height, pad_width = pad_vision_frame.shape[:2] + row_range = range(size[2], pad_height - size[2], tile_width) + col_range = range(size[2], pad_width - size[2], tile_width) + tile_vision_frames = [] + + for row_vision_frame in row_range: + top = row_vision_frame - size[2] + bottom = row_vision_frame + size[2] + tile_width + for column_vision_frame in col_range: + left = column_vision_frame - size[2] + right = column_vision_frame + size[2] + tile_width + tile_vision_frames.append(pad_vision_frame[top:bottom, left:right, :]) + return tile_vision_frames, pad_width, pad_height -def read_static_images(image_paths : List[str]) -> Optional[List[VisionFrame]]: - frames = [] - if image_paths: - for image_path in image_paths: - frames.append(read_static_image(image_path)) - return frames +def merge_tile_frames(tile_vision_frames : List[VisionFrame], temp_width : int, temp_height : int, pad_width : int, pad_height : int, size : Size) -> VisionFrame: + merge_vision_frame = numpy.zeros((pad_height, pad_width, 3)).astype(numpy.uint8) + tile_width = tile_vision_frames[0].shape[1] - 2 * size[2] + tiles_per_row = min(pad_width // tile_width, len(tile_vision_frames)) - -def read_image(image_path : str) -> Optional[VisionFrame]: - if is_image(image_path): - return cv2.imread(image_path) - return None - - -def write_image(image_path : str, frame : VisionFrame) -> bool: - if image_path: - return cv2.imwrite(image_path, frame) - return False + for index, tile_vision_frame in enumerate(tile_vision_frames): + tile_vision_frame = tile_vision_frame[size[2]:-size[2], size[2]:-size[2]] + row_index = index // tiles_per_row + col_index = index % tiles_per_row + top = row_index * tile_vision_frame.shape[0] + bottom = top + tile_vision_frame.shape[0] + left = col_index * tile_vision_frame.shape[1] + right = left + tile_vision_frame.shape[1] + merge_vision_frame[top:bottom, left:right, :] = tile_vision_frame + merge_vision_frame = merge_vision_frame[size[1] : size[1] + temp_height, size[1]: size[1] + temp_width, :] + return merge_vision_frame diff --git a/facefusion/wording.py b/facefusion/wording.py index ce7016f8..39befc0d 100755 --- a/facefusion/wording.py +++ b/facefusion/wording.py @@ -5,19 +5,27 @@ WORDING : Dict[str, Any] =\ 'python_not_supported': 'Python version is not supported, upgrade to {version} or higher', 'ffmpeg_not_installed': 'FFMpeg is not installed', 'creating_temp': 'Creating temporary resources', - 'extracting_frames_fps': 'Extracting frames with {video_fps} FPS', + 'extracting_frames': 'Extracting frames with a resolution of {resolution} and {fps} frames per second', + 'extracting_frames_succeed': 'Extracting frames succeed', + 'extracting_frames_failed': 'Extracting frames failed', 'analysing': 'Analysing', 'processing': 'Processing', 'downloading': 'Downloading', 'temp_frames_not_found': 'Temporary frames not found', - 'compressing_image_succeed': 'Compressing image succeed', - 'compressing_image_skipped': 'Compressing image skipped', - 'merging_video_fps': 'Merging video with {video_fps} FPS', + 'copying_image': 'Copying image with a resolution of {resolution}', + 'copying_image_succeed': 'Copying image succeed', + 'copying_image_failed': 'Copying image failed', + 'finalizing_image': 'Finalizing image with a resolution of {resolution}', + 'finalizing_image_succeed': 'Finalizing image succeed', + 'finalizing_image_skipped': 'Finalizing image skipped', + 'merging_video': 'Merging video with a resolution of {resolution} and {fps} frames per second', + 'merging_video_succeed': 'Merging video succeed', 'merging_video_failed': 'Merging video failed', 'skipping_audio': 'Skipping audio', 'restoring_audio_succeed': 'Restoring audio succeed', 'restoring_audio_skipped': 'Restoring audio skipped', 'clearing_temp': 'Clearing temporary resources', + 'processing_stopped': 'Processing stopped', 'processing_image_succeed': 'Processing to image succeed in {seconds} seconds', 'processing_image_failed': 'Processing to image failed', 'processing_video_succeed': 'Processing to video succeed in {seconds} seconds', @@ -67,8 +75,9 @@ WORDING : Dict[str, Any] =\ 'face_detector_model': 'choose the model responsible for detecting the face', 'face_detector_size': 'specify the size of the frame provided to the face detector', 'face_detector_score': 'filter the detected faces base on the confidence score', + 'face_landmarker_score': 'filter the detected landmarks base on the confidence score', # face selector - 'face_selector_mode': 'use reference based tracking with simple matching', + 'face_selector_mode': 'use reference based tracking or simple matching', 'reference_face_position': 'specify the position used to create the reference face', 'reference_face_distance': 'specify the desired similarity between the reference face and target face', 'reference_frame_number': 'specify the frame used to create the reference face', @@ -81,10 +90,10 @@ WORDING : Dict[str, Any] =\ 'trim_frame_start': 'specify the the start frame of the target video', 'trim_frame_end': 'specify the the end frame of the target video', 'temp_frame_format': 'specify the temporary resources format', - 'temp_frame_quality': 'specify the temporary resources quality', 'keep_temp': 'keep the temporary resources after processing', # output creation 'output_image_quality': 'specify the image quality which translates to the compression factor', + 'output_image_resolution': 'specify the image output resolution based on the target image', 'output_video_encoder': 'specify the encoder use for the video compression', 'output_video_preset': 'balance fast video processing and video file size', 'output_video_quality': 'specify the video quality which translates to the compression factor', @@ -131,6 +140,7 @@ WORDING : Dict[str, Any] =\ 'face_detector_model_dropdown': 'FACE DETECTOR MODEL', 'face_detector_size_dropdown': 'FACE DETECTOR SIZE', 'face_detector_score_slider': 'FACE DETECTOR SCORE', + 'face_landmarker_score_slider': 'FACE LANDMARKER SCORE', # face masker 'face_mask_types_checkbox_group': 'FACE MASK TYPES', 'face_mask_blur_slider': 'FACE MASK BLUR', @@ -161,6 +171,7 @@ WORDING : Dict[str, Any] =\ # output options 'output_path_textbox': 'OUTPUT PATH', 'output_image_quality_slider': 'OUTPUT IMAGE QUALITY', + 'output_image_resolution_dropdown': 'OUTPUT IMAGE RESOLUTION', 'output_video_encoder_dropdown': 'OUTPUT VIDEO ENCODER', 'output_video_preset_dropdown': 'OUTPUT VIDEO PRESET', 'output_video_quality_slider': 'OUTPUT VIDEO QUALITY', @@ -175,7 +186,6 @@ WORDING : Dict[str, Any] =\ 'target_file': 'TARGET', # temp frame 'temp_frame_format_dropdown': 'TEMP FRAME FORMAT', - 'temp_frame_quality_slider': 'TEMP FRAME QUALITY', # trim frame 'trim_frame_start_slider': 'TRIM FRAME START', 'trim_frame_end_slider': 'TRIM FRAME END', diff --git a/requirements.txt b/requirements.txt index d4f0bc51..7f72f340 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,11 +1,9 @@ -basicsr==1.4.2 filetype==1.2.0 gradio==3.50.2 -numpy==1.26.2 +numpy==1.26.4 onnx==1.15.0 onnxruntime==1.16.3 opencv-python==4.8.1.78 -psutil==5.9.6 -realesrgan==0.3.0 -torch==2.1.2 -tqdm==4.66.1 +psutil==5.9.8 +tqdm==4.66.2 +scipy==1.12.0 diff --git a/tests/test_common_helper.py b/tests/test_common_helper.py index 339e9a1e..eaafbeef 100644 --- a/tests/test_common_helper.py +++ b/tests/test_common_helper.py @@ -1,4 +1,4 @@ -from facefusion.common_helper import create_metavar, create_int_range, create_float_range +from facefusion.common_helper import create_metavar, create_int_range, create_float_range, extract_major_version def test_create_metavar() -> None: @@ -13,3 +13,9 @@ def test_create_int_range() -> None: def test_create_float_range() -> None: assert create_float_range(0.0, 1.0, 0.5) == [ 0.0, 0.5, 1.0 ] assert create_float_range(0.0, 0.2, 0.05) == [ 0.0, 0.05, 0.10, 0.15, 0.20 ] + + +def test_extract_major_version() -> None: + assert extract_major_version('1') == (1, 0) + assert extract_major_version('1.1') == (1, 1) + assert extract_major_version('1.2.0') == (1, 2) diff --git a/tests/test_execution_helper.py b/tests/test_execution.py similarity index 63% rename from tests/test_execution_helper.py rename to tests/test_execution.py index bbb69ae7..a847f424 100644 --- a/tests/test_execution_helper.py +++ b/tests/test_execution.py @@ -1,4 +1,4 @@ -from facefusion.execution_helper import encode_execution_providers, decode_execution_providers, apply_execution_provider_options, map_torch_backend +from facefusion.execution import encode_execution_providers, decode_execution_providers, apply_execution_provider_options def test_encode_execution_providers() -> None: @@ -19,8 +19,3 @@ def test_multiple_execution_providers() -> None: }) ] assert apply_execution_provider_options([ 'CPUExecutionProvider', 'CUDAExecutionProvider' ]) == execution_provider_with_options - - -def test_map_device() -> None: - assert map_torch_backend([ 'CPUExecutionProvider' ]) == 'cpu' - assert map_torch_backend([ 'CPUExecutionProvider', 'CUDAExecutionProvider' ]) == 'cuda' diff --git a/tests/test_face_analyser.py b/tests/test_face_analyser.py index 6ec1f7f1..f4fa0dae 100644 --- a/tests/test_face_analyser.py +++ b/tests/test_face_analyser.py @@ -21,14 +21,33 @@ def before_all() -> None: @pytest.fixture(autouse = True) def before_each() -> None: + facefusion.globals.face_detector_score = 0.5 + facefusion.globals.face_landmarker_score = 0.5 + facefusion.globals.face_recognizer_model = 'arcface_inswapper' clear_face_analyser() def test_get_one_face_with_retinaface() -> None: facefusion.globals.face_detector_model = 'retinaface' facefusion.globals.face_detector_size = '320x320' - facefusion.globals.face_detector_score = 0.5 - facefusion.globals.face_recognizer_model = 'arcface_inswapper' + + source_paths =\ + [ + '.assets/examples/source.jpg', + '.assets/examples/source-80crop.jpg', + '.assets/examples/source-70crop.jpg', + '.assets/examples/source-60crop.jpg' + ] + for source_path in source_paths: + source_frame = read_static_image(source_path) + face = get_one_face(source_frame) + + assert isinstance(face, Face) + + +def test_get_one_face_with_scrfd() -> None: + facefusion.globals.face_detector_model = 'scrfd' + facefusion.globals.face_detector_size = '640x640' source_paths =\ [ @@ -47,8 +66,6 @@ def test_get_one_face_with_retinaface() -> None: def test_get_one_face_with_yoloface() -> None: facefusion.globals.face_detector_model = 'yoloface' facefusion.globals.face_detector_size = '640x640' - facefusion.globals.face_detector_score = 0.5 - facefusion.globals.face_recognizer_model = 'arcface_inswapper' source_paths =\ [ @@ -67,8 +84,6 @@ def test_get_one_face_with_yoloface() -> None: def test_get_one_face_with_yunet() -> None: facefusion.globals.face_detector_model = 'yunet' facefusion.globals.face_detector_size = '640x640' - facefusion.globals.face_detector_score = 0.5 - facefusion.globals.face_recognizer_model = 'arcface_inswapper' source_paths =\ [ diff --git a/tests/test_ffmpeg.py b/tests/test_ffmpeg.py index f46467c0..5c4bd2d0 100644 --- a/tests/test_ffmpeg.py +++ b/tests/test_ffmpeg.py @@ -3,6 +3,7 @@ import subprocess import pytest import facefusion.globals +from facefusion import process_manager from facefusion.filesystem import get_temp_directory_path, create_temp, clear_temp from facefusion.download import conditional_download from facefusion.ffmpeg import extract_frames, read_audio_buffer @@ -10,6 +11,7 @@ from facefusion.ffmpeg import extract_frames, read_audio_buffer @pytest.fixture(scope = 'module', autouse = True) def before_all() -> None: + process_manager.start() conditional_download('.assets/examples', [ 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', @@ -26,7 +28,6 @@ def before_all() -> None: def before_each() -> None: facefusion.globals.trim_frame_start = None facefusion.globals.trim_frame_end = None - facefusion.globals.temp_frame_quality = 80 facefusion.globals.temp_frame_format = 'jpg' @@ -37,6 +38,7 @@ def test_extract_frames() -> None: '.assets/examples/target-240p-30fps.mp4', '.assets/examples/target-240p-60fps.mp4' ] + for target_path in target_paths: temp_directory_path = get_temp_directory_path(target_path) create_temp(target_path) @@ -55,6 +57,7 @@ def test_extract_frames_with_trim_start() -> None: ('.assets/examples/target-240p-30fps.mp4', 100), ('.assets/examples/target-240p-60fps.mp4', 212) ] + for target_path, frame_total in data_provider: temp_directory_path = get_temp_directory_path(target_path) create_temp(target_path) @@ -74,6 +77,7 @@ def test_extract_frames_with_trim_start_and_trim_end() -> None: ('.assets/examples/target-240p-30fps.mp4', 100), ('.assets/examples/target-240p-60fps.mp4', 50) ] + for target_path, frame_total in data_provider: temp_directory_path = get_temp_directory_path(target_path) create_temp(target_path) @@ -92,6 +96,7 @@ def test_extract_frames_with_trim_end() -> None: ('.assets/examples/target-240p-30fps.mp4', 100), ('.assets/examples/target-240p-60fps.mp4', 50) ] + for target_path, frame_total in data_provider: temp_directory_path = get_temp_directory_path(target_path) create_temp(target_path) diff --git a/tests/test_normalizer.py b/tests/test_normalizer.py index 0d0512a8..28b79149 100644 --- a/tests/test_normalizer.py +++ b/tests/test_normalizer.py @@ -4,17 +4,16 @@ from facefusion.normalizer import normalize_output_path, normalize_padding, norm def test_normalize_output_path() -> None: - if platform.system().lower() != 'windows': - assert normalize_output_path([ '.assets/examples/source.jpg' ], None, '.assets/examples/target-240p.mp4') == '.assets/examples/target-240p.mp4' - assert normalize_output_path(None, '.assets/examples/target-240p.mp4', '.assets/examples/target-240p.mp4') == '.assets/examples/target-240p.mp4' - assert normalize_output_path(None, '.assets/examples/target-240p.mp4', '.assets/examples') == '.assets/examples/target-240p.mp4' - assert normalize_output_path([ '.assets/examples/source.jpg' ], '.assets/examples/target-240p.mp4', '.assets/examples') == '.assets/examples/source-target-240p.mp4' - assert normalize_output_path(None, '.assets/examples/target-240p.mp4', '.assets/examples/output.mp4') == '.assets/examples/output.mp4' - assert normalize_output_path(None, '.assets/examples/target-240p.mp4', '.assets/output.mov') == '.assets/output.mp4' - assert normalize_output_path(None, '.assets/examples/target-240p.mp4', '.assets/examples/invalid') is None - assert normalize_output_path(None, '.assets/examples/target-240p.mp4', '.assets/invalid/output.mp4') is None - assert normalize_output_path(None, '.assets/examples/target-240p.mp4', 'invalid') is None - assert normalize_output_path([ '.assets/examples/source.jpg' ], '.assets/examples/target-240p.mp4', None) is None + if platform.system().lower() == 'linux' or platform.system().lower() == 'darwin': + assert normalize_output_path('.assets/examples/target-240p.mp4', '.assets/examples/target-240p.mp4') == '.assets/examples/target-240p.mp4' + assert normalize_output_path('.assets/examples/target-240p.mp4', '.assets/examples').startswith('.assets/examples/target-240p') + assert normalize_output_path('.assets/examples/target-240p.mp4', '.assets/examples').endswith('.mp4') + assert normalize_output_path('.assets/examples/target-240p.mp4', '.assets/examples/output.mp4') == '.assets/examples/output.mp4' + assert normalize_output_path('.assets/examples/target-240p.mp4', '.assets/examples/invalid') is None + assert normalize_output_path('.assets/examples/target-240p.mp4', '.assets/invalid/output.mp4') is None + assert normalize_output_path('.assets/examples/target-240p.mp4', 'invalid') is None + assert normalize_output_path('.assets/examples/target-240p.mp4', None) is None + assert normalize_output_path(None, '.assets/examples/output.mp4') is None def test_normalize_padding() -> None: diff --git a/tests/test_process_manager.py b/tests/test_process_manager.py new file mode 100644 index 00000000..1fbe74bb --- /dev/null +++ b/tests/test_process_manager.py @@ -0,0 +1,22 @@ +from facefusion.process_manager import set_process_state, is_processing, is_stopping, is_pending, start, stop, end + + +def test_start() -> None: + set_process_state('pending') + start() + + assert is_processing() + + +def test_stop() -> None: + set_process_state('processing') + stop() + + assert is_stopping() + + +def test_end() -> None: + set_process_state('processing') + end() + + assert is_pending() diff --git a/tests/test_vision.py b/tests/test_vision.py index 3368eef3..6cf48647 100644 --- a/tests/test_vision.py +++ b/tests/test_vision.py @@ -2,7 +2,7 @@ import subprocess import pytest from facefusion.download import conditional_download -from facefusion.vision import get_video_frame, count_video_frame_total, detect_video_fps, detect_video_resolution, pack_resolution, unpack_resolution, create_video_resolutions +from facefusion.vision import detect_image_resolution, restrict_image_resolution, create_image_resolutions, get_video_frame, count_video_frame_total, detect_video_fps, restrict_video_fps, detect_video_resolution, restrict_video_resolution, create_video_resolutions, normalize_resolution, pack_resolution, unpack_resolution @pytest.fixture(scope = 'module', autouse = True) @@ -13,6 +13,10 @@ def before_all() -> None: 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-1080p.mp4' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vframes', '1', '.assets/examples/target-240p.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-1080p.mp4', '-vframes', '1', '.assets/examples/target-1080p.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vframes', '1', '-vf', 'transpose=0', '.assets/examples/target-240p-90deg.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-1080p.mp4', '-vframes', '1', '-vf', 'transpose=0', '.assets/examples/target-1080p-90deg.jpg' ]) subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'fps=25', '.assets/examples/target-240p-25fps.mp4' ]) subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'fps=30', '.assets/examples/target-240p-30fps.mp4' ]) subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'fps=60', '.assets/examples/target-240p-60fps.mp4' ]) @@ -20,6 +24,28 @@ def before_all() -> None: subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-1080p.mp4', '-vf', 'transpose=0', '.assets/examples/target-1080p-90deg.mp4' ]) +def test_detect_image_resolution() -> None: + assert detect_image_resolution('.assets/examples/target-240p.jpg') == (426, 226) + assert detect_image_resolution('.assets/examples/target-240p-90deg.jpg') == (226, 426) + assert detect_image_resolution('.assets/examples/target-1080p.jpg') == (2048, 1080) + assert detect_image_resolution('.assets/examples/target-1080p-90deg.jpg') == (1080, 2048) + assert detect_image_resolution('invalid') is None + + +def test_restrict_image_resolution() -> None: + assert restrict_image_resolution('.assets/examples/target-1080p.jpg', (426, 226)) == (426, 226) + assert restrict_image_resolution('.assets/examples/target-1080p.jpg', (2048, 1080)) == (2048, 1080) + assert restrict_image_resolution('.assets/examples/target-1080p.jpg', (4096, 2160)) == (2048, 1080) + + +def test_create_image_resolutions() -> None: + assert create_image_resolutions((426, 226)) == [ '106x56', '212x112', '320x170', '426x226', '640x340', '852x452', '1064x564', '1278x678', '1492x792', '1704x904' ] + assert create_image_resolutions((226, 426)) == [ '56x106', '112x212', '170x320', '226x426', '340x640', '452x852', '564x1064', '678x1278', '792x1492', '904x1704' ] + assert create_image_resolutions((2048, 1080)) == [ '512x270', '1024x540', '1536x810', '2048x1080', '3072x1620', '4096x2160', '5120x2700', '6144x3240', '7168x3780', '8192x4320' ] + assert create_image_resolutions((1080, 2048)) == [ '270x512', '540x1024', '810x1536', '1080x2048', '1620x3072', '2160x4096', '2700x5120', '3240x6144', '3780x7168', '4320x8192' ] + assert create_image_resolutions(None) == [] + + def test_get_video_frame() -> None: assert get_video_frame('.assets/examples/target-240p-25fps.mp4') is not None assert get_video_frame('invalid') is None @@ -39,25 +65,45 @@ def test_detect_video_fps() -> None: assert detect_video_fps('invalid') is None +def test_restrict_video_fps() -> None: + assert restrict_video_fps('.assets/examples/target-1080p.mp4', 20.0) == 20.0 + assert restrict_video_fps('.assets/examples/target-1080p.mp4', 25.0) == 25.0 + assert restrict_video_fps('.assets/examples/target-1080p.mp4', 60.0) == 25.0 + + def test_detect_video_resolution() -> None: - assert detect_video_resolution('.assets/examples/target-240p.mp4') == (426.0, 226.0) - assert detect_video_resolution('.assets/examples/target-1080p.mp4') == (2048.0, 1080.0) + assert detect_video_resolution('.assets/examples/target-240p.mp4') == (426, 226) + assert detect_video_resolution('.assets/examples/target-240p-90deg.mp4') == (226, 426) + assert detect_video_resolution('.assets/examples/target-1080p.mp4') == (2048, 1080) + assert detect_video_resolution('.assets/examples/target-1080p-90deg.mp4') == (1080, 2048) assert detect_video_resolution('invalid') is None +def test_restrict_video_resolution() -> None: + assert restrict_video_resolution('.assets/examples/target-1080p.mp4', (426, 226)) == (426, 226) + assert restrict_video_resolution('.assets/examples/target-1080p.mp4', (2048, 1080)) == (2048, 1080) + assert restrict_video_resolution('.assets/examples/target-1080p.mp4', (4096, 2160)) == (2048, 1080) + + +def test_create_video_resolutions() -> None: + assert create_video_resolutions((426, 226)) == [ '426x226', '452x240', '678x360', '904x480', '1018x540', '1358x720', '2036x1080', '2714x1440', '4072x2160', '8144x4320' ] + assert create_video_resolutions((226, 426)) == [ '226x426', '240x452', '360x678', '480x904', '540x1018', '720x1358', '1080x2036', '1440x2714', '2160x4072', '4320x8144' ] + assert create_video_resolutions((2048, 1080)) == [ '456x240', '682x360', '910x480', '1024x540', '1366x720', '2048x1080', '2730x1440', '4096x2160', '8192x4320' ] + assert create_video_resolutions((1080, 2048)) == [ '240x456', '360x682', '480x910', '540x1024', '720x1366', '1080x2048', '1440x2730', '2160x4096', '4320x8192' ] + assert create_video_resolutions(None) == [] + + +def test_normalize_resolution() -> None: + assert normalize_resolution((2.5, 2.5)) == (2, 2) + assert normalize_resolution((3.0, 3.0)) == (4, 4) + assert normalize_resolution((6.5, 6.5)) == (6, 6) + + def test_pack_resolution() -> None: - assert pack_resolution((1.0, 1.0)) == '0x0' - assert pack_resolution((2.0, 2.0)) == '2x2' + assert pack_resolution((1, 1)) == '0x0' + assert pack_resolution((2, 2)) == '2x2' def test_unpack_resolution() -> None: assert unpack_resolution('0x0') == (0, 0) assert unpack_resolution('2x2') == (2, 2) - - -def test_create_video_resolutions() -> None: - assert create_video_resolutions('.assets/examples/target-240p.mp4') == [ '426x226', '452x240', '678x360', '904x480', '1018x540', '1358x720', '2036x1080', '2714x1440', '4072x2160' ] - assert create_video_resolutions('.assets/examples/target-240p-90deg.mp4') == [ '226x426', '240x452', '360x678', '480x904', '540x1018', '720x1358', '1080x2036', '1440x2714', '2160x4072' ] - assert create_video_resolutions('.assets/examples/target-1080p.mp4') == [ '456x240', '682x360', '910x480', '1024x540', '1366x720', '2048x1080', '2730x1440', '4096x2160' ] - assert create_video_resolutions('.assets/examples/target-1080p-90deg.mp4') == [ '240x456', '360x682', '480x910', '540x1024', '720x1366', '1080x2048', '1440x2730', '2160x4096' ] - assert create_video_resolutions('invalid') is None