diff --git a/.flake8 b/.flake8 index 483099ec..1ca6ddab 100644 --- a/.flake8 +++ b/.flake8 @@ -1,3 +1,3 @@ [flake8] select = E3, E4, F -per-file-ignores = facefusion/core.py:E402, facefusion/installer.py:E402 +per-file-ignores = facefusion/core.py:E402 diff --git a/.github/preview.png b/.github/preview.png index 1ee2d023..1287db0f 100644 Binary files a/.github/preview.png and b/.github/preview.png differ diff --git a/README.md b/README.md index 1f7b530f..404f9cc1 100644 --- a/README.md +++ b/README.md @@ -30,73 +30,74 @@ Run the command: python run.py [options] options: - -h, --help show this help message and exit - -s SOURCE_PATHS, --source SOURCE_PATHS select a source image - -t TARGET_PATH, --target TARGET_PATH select a target image or video - -o OUTPUT_PATH, --output OUTPUT_PATH specify the output file or directory - -v, --version show program's version number and exit + -h, --help show this help message and exit + -s SOURCE_PATHS, --source SOURCE_PATHS choose single or multiple source images + -t TARGET_PATH, --target TARGET_PATH choose single target image or video + -o OUTPUT_PATH, --output OUTPUT_PATH specify the output file or directory + -v, --version show program's version number and exit misc: - --skip-download omit automate downloads and lookups - --headless run the program in headless mode - --log-level {error,warn,info,debug} choose from the available log levels + --skip-download omit automate downloads and remote lookups + --headless run the program without a user interface + --log-level {error,warn,info,debug} adjust the message severity displayed in the terminal execution: - --execution-providers EXECUTION_PROVIDERS [EXECUTION_PROVIDERS ...] choose from the available execution providers (choices: cpu, ...) - --execution-thread-count [1-128] specify the number of execution threads - --execution-queue-count [1-32] specify the number of execution queries + --execution-providers EXECUTION_PROVIDERS [EXECUTION_PROVIDERS ...] accelerate the model inference using different providers (choices: cpu, ...) + --execution-thread-count [1-128] specify the amount of parallel threads while processing + --execution-queue-count [1-32] specify the amount of frames each thread is processing memory: - --video-memory-strategy {strict,moderate,tolerant} specify strategy to handle the video memory - --system-memory-limit [0-128] specify the amount (gb) of system memory to be used + --video-memory-strategy {strict,moderate,tolerant} balance fast frame processing and low vram usage + --system-memory-limit [0-128] limit the available ram that can be used while processing face analyser: - --face-analyser-order {left-right,right-left,top-bottom,bottom-top,small-large,large-small,best-worst,worst-best} specify the order used for the face analyser - --face-analyser-age {child,teen,adult,senior} specify the age used for the face analyser - --face-analyser-gender {male,female} specify the gender used for the face analyser - --face-detector-model {retinaface,yunet} specify the model used for the face detector - --face-detector-size {160x160,320x320,480x480,512x512,640x640,768x768,960x960,1024x1024} specify the size threshold used for the face detector - --face-detector-score [0.0-1.0] specify the score threshold used for the face detector + --face-analyser-order {left-right,right-left,top-bottom,bottom-top,small-large,large-small,best-worst,worst-best} specify the order in which the face analyser detects faces. + --face-analyser-age {child,teen,adult,senior} filter the detected faces based on their age + --face-analyser-gender {female,male} filter the detected faces based on their gender + --face-detector-model {retinaface,yoloface,yunet} choose the model responsible for detecting the face + --face-detector-size FACE_DETECTOR_SIZE specify the size of the frame provided to the face detector + --face-detector-score [0.0-1.0] filter the detected faces base on the confidence score face selector: - --face-selector-mode {reference,one,many} specify the mode for the face selector - --reference-face-position REFERENCE_FACE_POSITION specify the position of the reference face - --reference-face-distance [0.0-1.5] specify the distance between the reference face and the target face - --reference-frame-number REFERENCE_FRAME_NUMBER specify the number of the reference frame + --face-selector-mode {reference,one,many} use reference based tracking with simple matching + --reference-face-position REFERENCE_FACE_POSITION specify the position used to create the reference face + --reference-face-distance [0.0-1.5] specify the desired similarity between the reference face and target face + --reference-frame-number REFERENCE_FRAME_NUMBER specify the frame used to create the reference face face mask: - --face-mask-types FACE_MASK_TYPES [FACE_MASK_TYPES ...] choose from the available face mask types (choices: box, occlusion, region) - --face-mask-blur [0.0-1.0] specify the blur amount for face mask - --face-mask-padding FACE_MASK_PADDING [FACE_MASK_PADDING ...] specify the face mask padding (top, right, bottom, left) in percent - --face-mask-regions FACE_MASK_REGIONS [FACE_MASK_REGIONS ...] choose from the available face mask regions (choices: skin, left-eyebrow, right-eyebrow, left-eye, right-eye, eye-glasses, nose, mouth, upper-lip, lower-lip) + --face-mask-types FACE_MASK_TYPES [FACE_MASK_TYPES ...] mix and match different face mask types (choices: box, occlusion, region) + --face-mask-blur [0.0-1.0] specify the degree of blur applied the box mask + --face-mask-padding FACE_MASK_PADDING [FACE_MASK_PADDING ...] apply top, right, bottom and left padding to the box mask + --face-mask-regions FACE_MASK_REGIONS [FACE_MASK_REGIONS ...] choose the facial features used for the region mask (choices: skin, left-eyebrow, right-eyebrow, left-eye, right-eye, eye-glasses, nose, mouth, upper-lip, lower-lip) frame extraction: - --trim-frame-start TRIM_FRAME_START specify the start frame for extraction - --trim-frame-end TRIM_FRAME_END specify the end frame for extraction - --temp-frame-format {jpg,png,bmp} specify the image format used for frame extraction - --temp-frame-quality [0-100] specify the image quality used for frame extraction - --keep-temp retain temporary frames after processing + --trim-frame-start TRIM_FRAME_START specify the the start frame of the target video + --trim-frame-end TRIM_FRAME_END specify the the end frame of the target video + --temp-frame-format {bmp,jpg,png} specify the temporary resources format + --temp-frame-quality [0-100] specify the temporary resources quality + --keep-temp keep the temporary resources after processing output creation: - --output-image-quality [0-100] specify the quality used for the output image - --output-video-encoder {libx264,libx265,libvpx-vp9,h264_nvenc,hevc_nvenc} specify the encoder used for the output video - --output-video-preset {ultrafast,superfast,veryfast,faster,fast,medium,slow,slower,veryslow} specify the preset used for the output video - --output-video-quality [0-100] specify the quality used for the output video - --output-video-resolution OUTPUT_VIDEO_RESOLUTION specify the resolution used for the output video - --output-video-fps OUTPUT_VIDEO_FPS specify the frames per second (fps) used for the output video - --skip-audio omit audio from the target + --output-image-quality [0-100] specify the image quality which translates to the compression factor + --output-video-encoder {libx264,libx265,libvpx-vp9,h264_nvenc,hevc_nvenc} specify the encoder use for the video compression + --output-video-preset {ultrafast,superfast,veryfast,faster,fast,medium,slow,slower,veryslow} balance fast video processing and video file size + --output-video-quality [0-100] specify the video quality which translates to the compression factor + --output-video-resolution OUTPUT_VIDEO_RESOLUTION specify the video output resolution based on the target video + --output-video-fps OUTPUT_VIDEO_FPS specify the video output fps based on the target video + --skip-audio omit the audio from the target video frame processors: - --frame-processors FRAME_PROCESSORS [FRAME_PROCESSORS ...] choose from the available frame processors (choices: face_debugger, face_enhancer, face_swapper, frame_enhancer, ...) - --face-debugger-items FACE_DEBUGGER_ITEMS [FACE_DEBUGGER_ITEMS ...] specify the face debugger items (choices: bbox, kps, face-mask, score) - --face-enhancer-model {codeformer,gfpgan_1.2,gfpgan_1.3,gfpgan_1.4,gpen_bfr_256,gpen_bfr_512,restoreformer} choose the model for the frame processor - --face-enhancer-blend [0-100] specify the blend amount for the frame processor - --face-swapper-model {blendswap_256,inswapper_128,inswapper_128_fp16,simswap_256,simswap_512_unofficial} choose the model for the frame processor - --frame-enhancer-model {real_esrgan_x2plus,real_esrgan_x4plus,real_esrnet_x4plus} choose the model for the frame processor - --frame-enhancer-blend [0-100] specify the blend amount for the frame processor + --frame-processors FRAME_PROCESSORS [FRAME_PROCESSORS ...] load a single or multiple frame processors. (choices: face_debugger, face_enhancer, face_swapper, frame_enhancer, lip_syncer, ...) + --face-debugger-items FACE_DEBUGGER_ITEMS [FACE_DEBUGGER_ITEMS ...] load a single or multiple frame processors (choices: bounding-box, landmark-5, landmark-68, face-mask, score, age, gender) + --face-enhancer-model {codeformer,gfpgan_1.2,gfpgan_1.3,gfpgan_1.4,gpen_bfr_256,gpen_bfr_512,restoreformer_plus_plus} choose the model responsible for enhancing the face + --face-enhancer-blend [0-100] blend the enhanced into the previous face + --face-swapper-model {blendswap_256,inswapper_128,inswapper_128_fp16,simswap_256,simswap_512_unofficial,uniface_256} choose the model responsible for swapping the face + --frame-enhancer-model {real_esrgan_x2plus,real_esrgan_x4plus,real_esrnet_x4plus} choose the model responsible for enhancing the frame + --frame-enhancer-blend [0-100] blend the enhanced into the previous frame + --lip-syncer-model {wav2lip_gan} choose the model responsible for syncing the lips uis: - --ui-layouts UI_LAYOUTS [UI_LAYOUTS ...] choose from the available ui layouts (choices: benchmark, webcam, default, ...) + --ui-layouts UI_LAYOUTS [UI_LAYOUTS ...] launch a single or multiple UI layouts (choices: benchmark, default, webcam, ...) ``` diff --git a/facefusion.ini b/facefusion.ini index 2c7be36a..b2932ad4 100644 --- a/facefusion.ini +++ b/facefusion.ini @@ -61,6 +61,7 @@ face_enhancer_blend = face_swapper_model = frame_enhancer_model = frame_enhancer_blend = +lip_syncer_model = [uis] ui_layouts = diff --git a/facefusion/audio.py b/facefusion/audio.py new file mode 100644 index 00000000..cd4a08b1 --- /dev/null +++ b/facefusion/audio.py @@ -0,0 +1,76 @@ +from typing import Optional, Any, List +from functools import lru_cache +import numpy +import scipy + +from facefusion.filesystem import is_audio +from facefusion.ffmpeg import read_audio_buffer +from facefusion.typing import Fps, Audio, Spectrogram, AudioFrame + + +def get_audio_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Optional[AudioFrame]: + if is_audio(audio_path): + audio_frames = read_static_audio(audio_path, fps) + if frame_number < len(audio_frames): + return audio_frames[frame_number] + return None + + +@lru_cache(maxsize = None) +def read_static_audio(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: + if is_audio(audio_path): + audio_buffer = read_audio_buffer(audio_path, 16000, 2) + audio = numpy.frombuffer(audio_buffer, dtype = numpy.int16).reshape(-1, 2) + audio = normalize_audio(audio) + audio = filter_audio(audio, -0.97) + spectrogram = create_spectrogram(audio, 16000, 80, 800, 55.0, 7600.0) + audio_frames = extract_audio_frames(spectrogram, 80, 16, fps) + return audio_frames + return None + + +def normalize_audio(audio : numpy.ndarray[Any, Any]) -> Audio: + if audio.ndim > 1: + audio = numpy.mean(audio, axis = 1) + audio = audio / numpy.max(numpy.abs(audio), axis = 0) + return audio + + +def filter_audio(audio : Audio, filter_coefficient : float) -> Audio: + audio = scipy.signal.lfilter([ 1.0, filter_coefficient ], [1.0], audio) + return audio + + +def convert_hertz_to_mel(hertz : float) -> float: + return 2595 * numpy.log10(1 + hertz / 700) + + +def convert_mel_to_hertz(mel : numpy.ndarray[Any, Any]) -> numpy.ndarray[Any, Any]: + return 700 * (10 ** (mel / 2595) - 1) + + +@lru_cache(maxsize = None) +def create_static_mel_filter(sample_rate : int, filter_total : int, filter_size : int, frequency_minimum : float, frequency_maximum : float) -> numpy.ndarray[Any, Any]: + frequency_maximum = min(sample_rate / 2, frequency_maximum) + mel_filter = numpy.zeros((filter_total, filter_size // 2 + 1)) + mel_bins = numpy.linspace(convert_hertz_to_mel(frequency_minimum), convert_hertz_to_mel(frequency_maximum), filter_total + 2) + indices = numpy.floor((filter_size + 1) * convert_mel_to_hertz(mel_bins) / sample_rate).astype(numpy.int16) + for index in range(filter_total): + mel_filter[index, indices[index]: indices[index + 1]] = scipy.signal.windows.triang(indices[index + 1] - indices[index]) + return mel_filter + + +def create_spectrogram(audio : Audio, sample_rate : int, filter_total : int, filter_size : int, frequency_minimum : float, frequency_maximum : float) -> Spectrogram: + mel_filter = create_static_mel_filter(sample_rate, filter_total, filter_size, frequency_minimum, frequency_maximum) + spectrogram = scipy.signal.stft(audio, nperseg = filter_size, noverlap = 600, nfft = filter_size)[2] + spectrogram = numpy.dot(mel_filter, numpy.abs(spectrogram)) + return spectrogram + + +def extract_audio_frames(spectrogram : Spectrogram, filter_total : int, audio_frame_step : int, fps : Fps) -> List[AudioFrame]: + indices = numpy.arange(0, spectrogram.shape[1], filter_total / fps).astype(numpy.int16) + indices = indices[indices >= audio_frame_step] + audio_frames = [] + for index in indices: + audio_frames.append(spectrogram[:, max(0, index - audio_frame_step) : index]) + return audio_frames diff --git a/facefusion/choices.py b/facefusion/choices.py index b2fb133f..d1dc4fce 100755 --- a/facefusion/choices.py +++ b/facefusion/choices.py @@ -1,18 +1,22 @@ -from typing import List +from typing import List, Dict -from facefusion.typing import VideoMemoryStrategy, FaceSelectorMode, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, FaceMaskType, FaceMaskRegion, TempFrameFormat, OutputVideoEncoder, OutputVideoPreset +from facefusion.typing import VideoMemoryStrategy, FaceSelectorMode, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, FaceDetectorModel, FaceMaskType, FaceMaskRegion, TempFrameFormat, OutputVideoEncoder, OutputVideoPreset from facefusion.common_helper import create_int_range, create_float_range video_memory_strategies : List[VideoMemoryStrategy] = [ 'strict', 'moderate', 'tolerant' ] face_analyser_orders : List[FaceAnalyserOrder] = [ 'left-right', 'right-left', 'top-bottom', 'bottom-top', 'small-large', 'large-small', 'best-worst', 'worst-best' ] face_analyser_ages : List[FaceAnalyserAge] = [ 'child', 'teen', 'adult', 'senior' ] -face_analyser_genders : List[FaceAnalyserGender] = [ 'male', 'female' ] -face_detector_models : List[str] = [ 'retinaface', 'yunet' ] -face_detector_sizes : List[str] = [ '160x160', '320x320', '480x480', '512x512', '640x640', '768x768', '960x960', '1024x1024' ] +face_analyser_genders : List[FaceAnalyserGender] = [ 'female', 'male' ] +face_detector_set : Dict[FaceDetectorModel, List[str]] =\ +{ + 'retinaface': [ '160x160', '320x320', '480x480', '512x512', '640x640' ], + 'yoloface': [ '640x640' ], + 'yunet': [ '160x160', '320x320', '480x480', '512x512', '640x640', '768x768', '960x960', '1024x1024' ] +} face_selector_modes : List[FaceSelectorMode] = [ 'reference', 'one', 'many' ] face_mask_types : List[FaceMaskType] = [ 'box', 'occlusion', 'region' ] face_mask_regions : List[FaceMaskRegion] = [ 'skin', 'left-eyebrow', 'right-eyebrow', 'left-eye', 'right-eye', 'eye-glasses', 'nose', 'mouth', 'upper-lip', 'lower-lip' ] -temp_frame_formats : List[TempFrameFormat] = [ 'jpg', 'png', 'bmp' ] +temp_frame_formats : List[TempFrameFormat] = [ 'bmp', 'jpg', 'png' ] output_video_encoders : List[OutputVideoEncoder] = [ 'libx264', 'libx265', 'libvpx-vp9', 'h264_nvenc', 'hevc_nvenc' ] output_video_presets : List[OutputVideoPreset] = [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast', 'medium', 'slow', 'slower', 'veryslow' ] diff --git a/facefusion/common_helper.py b/facefusion/common_helper.py index 5e258511..ea1ec830 100644 --- a/facefusion/common_helper.py +++ b/facefusion/common_helper.py @@ -12,3 +12,7 @@ def create_int_range(start : int, stop : int, step : int) -> List[int]: def create_float_range(start : float, stop : float, step : float) -> List[float]: return (numpy.around(numpy.arange(start, stop + step, step), decimals = 2)).tolist() + + +def get_first(__list__ : Any) -> Any: + return next(iter(__list__), None) diff --git a/facefusion/config.py b/facefusion/config.py index a58d24dd..f6aaabae 100644 --- a/facefusion/config.py +++ b/facefusion/config.py @@ -82,8 +82,11 @@ def get_float_list(key : str, fallback : Optional[str] = None) -> Optional[List[ def get_value_by_notation(key : str) -> Optional[Any]: config = get_config() - section, name = key.split('.') - if section in config and name in config[section]: - return config[section][name] + if '.' in key: + section, name = key.split('.') + if section in config and name in config[section]: + return config[section][name] + if key in config: + return config[key] return None diff --git a/facefusion/content_analyser.py b/facefusion/content_analyser.py index 4c8b9804..5f6133d2 100644 --- a/facefusion/content_analyser.py +++ b/facefusion/content_analyser.py @@ -8,7 +8,7 @@ from tqdm import tqdm import facefusion.globals from facefusion import wording -from facefusion.typing import Frame, ModelValue, Fps +from facefusion.typing import VisionFrame, ModelValue, Fps from facefusion.execution_helper import apply_execution_provider_options from facefusion.vision import get_video_frame, count_video_frame_total, read_image, detect_video_fps from facefusion.filesystem import resolve_relative_path @@ -53,28 +53,28 @@ def pre_check() -> bool: return True -def analyse_stream(frame : Frame, video_fps : Fps) -> bool: +def analyse_stream(vision_frame : VisionFrame, video_fps : Fps) -> bool: global STREAM_COUNTER STREAM_COUNTER = STREAM_COUNTER + 1 if STREAM_COUNTER % int(video_fps) == 0: - return analyse_frame(frame) + return analyse_frame(vision_frame) return False -def prepare_frame(frame : Frame) -> Frame: - frame = cv2.resize(frame, (224, 224)).astype(numpy.float32) - frame -= numpy.array([ 104, 117, 123 ]).astype(numpy.float32) - frame = numpy.expand_dims(frame, axis = 0) - return frame +def prepare_frame(vision_frame : VisionFrame) -> VisionFrame: + vision_frame = cv2.resize(vision_frame, (224, 224)).astype(numpy.float32) + vision_frame -= numpy.array([ 104, 117, 123 ]).astype(numpy.float32) + vision_frame = numpy.expand_dims(vision_frame, axis = 0) + return vision_frame -def analyse_frame(frame : Frame) -> bool: +def analyse_frame(vision_frame : VisionFrame) -> bool: content_analyser = get_content_analyser() - frame = prepare_frame(frame) + vision_frame = prepare_frame(vision_frame) probability = content_analyser.run(None, { - 'input:0': frame + 'input:0': vision_frame })[0][0][1] return probability > PROBABILITY_LIMIT @@ -92,6 +92,7 @@ def analyse_video(video_path : str, start_frame : int, end_frame : int) -> bool: frame_range = range(start_frame or 0, end_frame or video_frame_total) rate = 0.0 counter = 0 + with tqdm(total = len(frame_range), desc = wording.get('analysing'), unit = 'frame', ascii = ' =', disable = facefusion.globals.log_level in [ 'warn', 'error' ]) as progress: for frame_number in frame_range: if frame_number % int(video_fps) == 0: diff --git a/facefusion/core.py b/facefusion/core.py index 5ac4c8f4..4d06e902 100755 --- a/facefusion/core.py +++ b/facefusion/core.py @@ -19,12 +19,12 @@ from facefusion.face_store import get_reference_faces, append_reference_face from facefusion import face_analyser, face_masker, content_analyser, config, metadata, logger, wording from facefusion.content_analyser import analyse_image, analyse_video from facefusion.processors.frame.core import get_frame_processors_modules, load_frame_processor_module -from facefusion.common_helper import create_metavar +from facefusion.common_helper import create_metavar, get_first from facefusion.execution_helper import encode_execution_providers, decode_execution_providers from facefusion.normalizer import normalize_output_path, normalize_padding, normalize_fps from facefusion.memory import limit_system_memory -from facefusion.filesystem import list_directory, get_temp_frame_paths, create_temp, move_temp, clear_temp, is_image, is_video -from facefusion.ffmpeg import extract_frames, compress_image, merge_video, restore_audio +from facefusion.filesystem import list_directory, get_temp_frame_paths, create_temp, move_temp, clear_temp, is_image, is_video, filter_audio_paths +from facefusion.ffmpeg import extract_frames, compress_image, merge_video, restore_audio, replace_audio from facefusion.vision import get_video_frame, read_image, read_static_images, pack_resolution, detect_video_resolution, detect_video_fps, create_video_resolutions onnxruntime.set_default_logger_severity(3) @@ -34,75 +34,75 @@ warnings.filterwarnings('ignore', category = UserWarning, module = 'torchvision' def cli() -> None: signal.signal(signal.SIGINT, lambda signal_number, frame: destroy()) - program = ArgumentParser(formatter_class = lambda prog: HelpFormatter(prog, max_help_position = 120), add_help = False) + program = ArgumentParser(formatter_class = lambda prog: HelpFormatter(prog, max_help_position = 130), add_help = False) # general - program.add_argument('-s', '--source', help = wording.get('source_help'), action = 'append', dest = 'source_paths', default = config.get_str_list('general.source_paths')) - program.add_argument('-t', '--target', help = wording.get('target_help'), dest = 'target_path', default = config.get_str_value('general.target_path')) - program.add_argument('-o', '--output', help = wording.get('output_help'), dest = 'output_path', default = config.get_str_value('general.output_path')) + program.add_argument('-s', '--source', help = wording.get('help.source'), action = 'append', dest = 'source_paths', default = config.get_str_list('general.source_paths')) + program.add_argument('-t', '--target', help = wording.get('help.target'), dest = 'target_path', default = config.get_str_value('general.target_path')) + program.add_argument('-o', '--output', help = wording.get('help.output'), dest = 'output_path', default = config.get_str_value('general.output_path')) program.add_argument('-v', '--version', version = metadata.get('name') + ' ' + metadata.get('version'), action = 'version') # misc group_misc = program.add_argument_group('misc') - group_misc.add_argument('--skip-download', help = wording.get('skip_download_help'), action = 'store_true', default = config.get_bool_value('misc.skip_download')) - group_misc.add_argument('--headless', help = wording.get('headless_help'), action = 'store_true', default = config.get_bool_value('misc.headless')) - group_misc.add_argument('--log-level', help = wording.get('log_level_help'), default = config.get_str_value('misc.log_level', 'info'), choices = logger.get_log_levels()) + group_misc.add_argument('--skip-download', help = wording.get('help.skip_download'), action = 'store_true', default = config.get_bool_value('misc.skip_download')) + group_misc.add_argument('--headless', help = wording.get('help.headless'), action = 'store_true', default = config.get_bool_value('misc.headless')) + group_misc.add_argument('--log-level', help = wording.get('help.log_level'), default = config.get_str_value('misc.log_level', 'info'), choices = logger.get_log_levels()) # execution execution_providers = encode_execution_providers(onnxruntime.get_available_providers()) group_execution = program.add_argument_group('execution') - group_execution.add_argument('--execution-providers', help = wording.get('execution_providers_help').format(choices = ', '.join(execution_providers)), default = config.get_str_list('execution.execution_providers', 'cpu'), choices = execution_providers, nargs = '+', metavar = 'EXECUTION_PROVIDERS') - group_execution.add_argument('--execution-thread-count', help = wording.get('execution_thread_count_help'), type = int, default = config.get_int_value('execution.execution_thread_count', '4'), choices = facefusion.choices.execution_thread_count_range, metavar = create_metavar(facefusion.choices.execution_thread_count_range)) - group_execution.add_argument('--execution-queue-count', help = wording.get('execution_queue_count_help'), type = int, default = config.get_int_value('execution.execution_queue_count', '1'), choices = facefusion.choices.execution_queue_count_range, metavar = create_metavar(facefusion.choices.execution_queue_count_range)) + group_execution.add_argument('--execution-providers', help = wording.get('help.execution_providers').format(choices = ', '.join(execution_providers)), default = config.get_str_list('execution.execution_providers', 'cpu'), choices = execution_providers, nargs = '+', metavar = 'EXECUTION_PROVIDERS') + group_execution.add_argument('--execution-thread-count', help = wording.get('help.execution_thread_count'), type = int, default = config.get_int_value('execution.execution_thread_count', '4'), choices = facefusion.choices.execution_thread_count_range, metavar = create_metavar(facefusion.choices.execution_thread_count_range)) + group_execution.add_argument('--execution-queue-count', help = wording.get('help.execution_queue_count'), type = int, default = config.get_int_value('execution.execution_queue_count', '1'), choices = facefusion.choices.execution_queue_count_range, metavar = create_metavar(facefusion.choices.execution_queue_count_range)) # memory group_memory = program.add_argument_group('memory') - group_memory.add_argument('--video-memory-strategy', help = wording.get('video_memory_strategy_help'), default = config.get_str_value('memory.video_memory_strategy', 'strict'), choices = facefusion.choices.video_memory_strategies) - group_memory.add_argument('--system-memory-limit', help = wording.get('system_memory_limit_help'), type = int, default = config.get_int_value('memory.system_memory_limit', '0'), choices = facefusion.choices.system_memory_limit_range, metavar = create_metavar(facefusion.choices.system_memory_limit_range)) + group_memory.add_argument('--video-memory-strategy', help = wording.get('help.video_memory_strategy'), default = config.get_str_value('memory.video_memory_strategy', 'strict'), choices = facefusion.choices.video_memory_strategies) + group_memory.add_argument('--system-memory-limit', help = wording.get('help.system_memory_limit'), type = int, default = config.get_int_value('memory.system_memory_limit', '0'), choices = facefusion.choices.system_memory_limit_range, metavar = create_metavar(facefusion.choices.system_memory_limit_range)) # face analyser group_face_analyser = program.add_argument_group('face analyser') - group_face_analyser.add_argument('--face-analyser-order', help = wording.get('face_analyser_order_help'), default = config.get_str_value('face_analyser.face_analyser_order', 'left-right'), choices = facefusion.choices.face_analyser_orders) - group_face_analyser.add_argument('--face-analyser-age', help = wording.get('face_analyser_age_help'), default = config.get_str_value('face_analyser.face_analyser_age'), choices = facefusion.choices.face_analyser_ages) - group_face_analyser.add_argument('--face-analyser-gender', help = wording.get('face_analyser_gender_help'), default = config.get_str_value('face_analyser.face_analyser_gender'), choices = facefusion.choices.face_analyser_genders) - group_face_analyser.add_argument('--face-detector-model', help = wording.get('face_detector_model_help'), default = config.get_str_value('face_analyser.face_detector_model', 'retinaface'), choices = facefusion.choices.face_detector_models) - group_face_analyser.add_argument('--face-detector-size', help = wording.get('face_detector_size_help'), default = config.get_str_value('face_analyser.face_detector_size', '640x640'), choices = facefusion.choices.face_detector_sizes) - group_face_analyser.add_argument('--face-detector-score', help = wording.get('face_detector_score_help'), type = float, default = config.get_float_value('face_analyser.face_detector_score', '0.5'), choices = facefusion.choices.face_detector_score_range, metavar = create_metavar(facefusion.choices.face_detector_score_range)) + group_face_analyser.add_argument('--face-analyser-order', help = wording.get('help.face_analyser_order'), default = config.get_str_value('face_analyser.face_analyser_order', 'left-right'), choices = facefusion.choices.face_analyser_orders) + group_face_analyser.add_argument('--face-analyser-age', help = wording.get('help.face_analyser_age'), default = config.get_str_value('face_analyser.face_analyser_age'), choices = facefusion.choices.face_analyser_ages) + group_face_analyser.add_argument('--face-analyser-gender', help = wording.get('help.face_analyser_gender'), default = config.get_str_value('face_analyser.face_analyser_gender'), choices = facefusion.choices.face_analyser_genders) + group_face_analyser.add_argument('--face-detector-model', help = wording.get('help.face_detector_model'), default = config.get_str_value('face_analyser.face_detector_model', 'yoloface'), choices = facefusion.choices.face_detector_set.keys()) + group_face_analyser.add_argument('--face-detector-size', help = wording.get('help.face_detector_size'), default = config.get_str_value('face_analyser.face_detector_size', '640x640')) + group_face_analyser.add_argument('--face-detector-score', help = wording.get('help.face_detector_score'), type = float, default = config.get_float_value('face_analyser.face_detector_score', '0.5'), choices = facefusion.choices.face_detector_score_range, metavar = create_metavar(facefusion.choices.face_detector_score_range)) # face selector group_face_selector = program.add_argument_group('face selector') - group_face_selector.add_argument('--face-selector-mode', help = wording.get('face_selector_mode_help'), default = config.get_str_value('face_selector.face_selector_mode', 'reference'), choices = facefusion.choices.face_selector_modes) - group_face_selector.add_argument('--reference-face-position', help = wording.get('reference_face_position_help'), type = int, default = config.get_int_value('face_selector.reference_face_position', '0')) - group_face_selector.add_argument('--reference-face-distance', help = wording.get('reference_face_distance_help'), type = float, default = config.get_float_value('face_selector.reference_face_distance', '0.6'), choices = facefusion.choices.reference_face_distance_range, metavar = create_metavar(facefusion.choices.reference_face_distance_range)) - group_face_selector.add_argument('--reference-frame-number', help = wording.get('reference_frame_number_help'), type = int, default = config.get_int_value('face_selector.reference_frame_number', '0')) + group_face_selector.add_argument('--face-selector-mode', help = wording.get('help.face_selector_mode'), default = config.get_str_value('face_selector.face_selector_mode', 'reference'), choices = facefusion.choices.face_selector_modes) + group_face_selector.add_argument('--reference-face-position', help = wording.get('help.reference_face_position'), type = int, default = config.get_int_value('face_selector.reference_face_position', '0')) + group_face_selector.add_argument('--reference-face-distance', help = wording.get('help.reference_face_distance'), type = float, default = config.get_float_value('face_selector.reference_face_distance', '0.6'), choices = facefusion.choices.reference_face_distance_range, metavar = create_metavar(facefusion.choices.reference_face_distance_range)) + group_face_selector.add_argument('--reference-frame-number', help = wording.get('help.reference_frame_number'), type = int, default = config.get_int_value('face_selector.reference_frame_number', '0')) # face mask group_face_mask = program.add_argument_group('face mask') - group_face_mask.add_argument('--face-mask-types', help = wording.get('face_mask_types_help').format(choices = ', '.join(facefusion.choices.face_mask_types)), default = config.get_str_list('face_mask.face_mask_types', 'box'), choices = facefusion.choices.face_mask_types, nargs = '+', metavar = 'FACE_MASK_TYPES') - group_face_mask.add_argument('--face-mask-blur', help = wording.get('face_mask_blur_help'), type = float, default = config.get_float_value('face_mask.face_mask_blur', '0.3'), choices = facefusion.choices.face_mask_blur_range, metavar = create_metavar(facefusion.choices.face_mask_blur_range)) - group_face_mask.add_argument('--face-mask-padding', help = wording.get('face_mask_padding_help'), type = int, default = config.get_int_list('face_mask.face_mask_padding', '0 0 0 0'), nargs = '+') - group_face_mask.add_argument('--face-mask-regions', help = wording.get('face_mask_regions_help').format(choices = ', '.join(facefusion.choices.face_mask_regions)), default = config.get_str_list('face_mask.face_mask_regions', ' '.join(facefusion.choices.face_mask_regions)), choices = facefusion.choices.face_mask_regions, nargs = '+', metavar = 'FACE_MASK_REGIONS') + group_face_mask.add_argument('--face-mask-types', help = wording.get('help.face_mask_types').format(choices = ', '.join(facefusion.choices.face_mask_types)), default = config.get_str_list('face_mask.face_mask_types', 'box'), choices = facefusion.choices.face_mask_types, nargs = '+', metavar = 'FACE_MASK_TYPES') + group_face_mask.add_argument('--face-mask-blur', help = wording.get('help.face_mask_blur'), type = float, default = config.get_float_value('face_mask.face_mask_blur', '0.3'), choices = facefusion.choices.face_mask_blur_range, metavar = create_metavar(facefusion.choices.face_mask_blur_range)) + group_face_mask.add_argument('--face-mask-padding', help = wording.get('help.face_mask_padding'), type = int, default = config.get_int_list('face_mask.face_mask_padding', '0 0 0 0'), nargs = '+') + group_face_mask.add_argument('--face-mask-regions', help = wording.get('help.face_mask_regions').format(choices = ', '.join(facefusion.choices.face_mask_regions)), default = config.get_str_list('face_mask.face_mask_regions', ' '.join(facefusion.choices.face_mask_regions)), choices = facefusion.choices.face_mask_regions, nargs = '+', metavar = 'FACE_MASK_REGIONS') # frame extraction group_frame_extraction = program.add_argument_group('frame extraction') - group_frame_extraction.add_argument('--trim-frame-start', help = wording.get('trim_frame_start_help'), type = int, default = facefusion.config.get_int_value('frame_extraction.trim_frame_start')) - group_frame_extraction.add_argument('--trim-frame-end', help = wording.get('trim_frame_end_help'), type = int, default = facefusion.config.get_int_value('frame_extraction.trim_frame_end')) - group_frame_extraction.add_argument('--temp-frame-format', help = wording.get('temp_frame_format_help'), default = config.get_str_value('frame_extraction.temp_frame_format', 'jpg'), choices = facefusion.choices.temp_frame_formats) - group_frame_extraction.add_argument('--temp-frame-quality', help = wording.get('temp_frame_quality_help'), type = int, default = config.get_int_value('frame_extraction.temp_frame_quality', '100'), choices = facefusion.choices.temp_frame_quality_range, metavar = create_metavar(facefusion.choices.temp_frame_quality_range)) - group_frame_extraction.add_argument('--keep-temp', help = wording.get('keep_temp_help'), action = 'store_true', default = config.get_bool_value('frame_extraction.keep_temp')) + group_frame_extraction.add_argument('--trim-frame-start', help = wording.get('help.trim_frame_start'), type = int, default = facefusion.config.get_int_value('frame_extraction.trim_frame_start')) + group_frame_extraction.add_argument('--trim-frame-end', help = wording.get('help.trim_frame_end'), type = int, default = facefusion.config.get_int_value('frame_extraction.trim_frame_end')) + group_frame_extraction.add_argument('--temp-frame-format', help = wording.get('help.temp_frame_format'), default = config.get_str_value('frame_extraction.temp_frame_format', 'jpg'), choices = facefusion.choices.temp_frame_formats) + group_frame_extraction.add_argument('--temp-frame-quality', help = wording.get('help.temp_frame_quality'), type = int, default = config.get_int_value('frame_extraction.temp_frame_quality', '100'), choices = facefusion.choices.temp_frame_quality_range, metavar = create_metavar(facefusion.choices.temp_frame_quality_range)) + group_frame_extraction.add_argument('--keep-temp', help = wording.get('help.keep_temp'), action = 'store_true', default = config.get_bool_value('frame_extraction.keep_temp')) # output creation group_output_creation = program.add_argument_group('output creation') - group_output_creation.add_argument('--output-image-quality', help = wording.get('output_image_quality_help'), type = int, default = config.get_int_value('output_creation.output_image_quality', '80'), choices = facefusion.choices.output_image_quality_range, metavar = create_metavar(facefusion.choices.output_image_quality_range)) - group_output_creation.add_argument('--output-video-encoder', help = wording.get('output_video_encoder_help'), default = config.get_str_value('output_creation.output_video_encoder', 'libx264'), choices = facefusion.choices.output_video_encoders) - group_output_creation.add_argument('--output-video-preset', help = wording.get('output_video_preset_help'), default = config.get_str_value('output_creation.output_video_preset', 'veryfast'), choices = facefusion.choices.output_video_presets) - group_output_creation.add_argument('--output-video-quality', help = wording.get('output_video_quality_help'), type = int, default = config.get_int_value('output_creation.output_video_quality', '80'), choices = facefusion.choices.output_video_quality_range, metavar = create_metavar(facefusion.choices.output_video_quality_range)) - group_output_creation.add_argument('--output-video-resolution', help = wording.get('output_video_resolution_help'), default = config.get_str_value('output_creation.output_video_resolution')) - group_output_creation.add_argument('--output-video-fps', help = wording.get('output_video_fps_help'), type = float) - group_output_creation.add_argument('--skip-audio', help = wording.get('skip_audio_help'), action = 'store_true', default = config.get_bool_value('output_creation.skip_audio')) + group_output_creation.add_argument('--output-image-quality', help = wording.get('help.output_image_quality'), type = int, default = config.get_int_value('output_creation.output_image_quality', '80'), choices = facefusion.choices.output_image_quality_range, metavar = create_metavar(facefusion.choices.output_image_quality_range)) + group_output_creation.add_argument('--output-video-encoder', help = wording.get('help.output_video_encoder'), default = config.get_str_value('output_creation.output_video_encoder', 'libx264'), choices = facefusion.choices.output_video_encoders) + group_output_creation.add_argument('--output-video-preset', help = wording.get('help.output_video_preset'), default = config.get_str_value('output_creation.output_video_preset', 'veryfast'), choices = facefusion.choices.output_video_presets) + group_output_creation.add_argument('--output-video-quality', help = wording.get('help.output_video_quality'), type = int, default = config.get_int_value('output_creation.output_video_quality', '80'), choices = facefusion.choices.output_video_quality_range, metavar = create_metavar(facefusion.choices.output_video_quality_range)) + group_output_creation.add_argument('--output-video-resolution', help = wording.get('help.output_video_resolution'), default = config.get_str_value('output_creation.output_video_resolution')) + group_output_creation.add_argument('--output-video-fps', help = wording.get('help.output_video_fps'), type = float) + group_output_creation.add_argument('--skip-audio', help = wording.get('help.skip_audio'), action = 'store_true', default = config.get_bool_value('output_creation.skip_audio')) # frame processors available_frame_processors = list_directory('facefusion/processors/frame/modules') program = ArgumentParser(parents = [ program ], formatter_class = program.formatter_class, add_help = True) group_frame_processors = program.add_argument_group('frame processors') - group_frame_processors.add_argument('--frame-processors', help = wording.get('frame_processors_help').format(choices = ', '.join(available_frame_processors)), default = config.get_str_list('frame_processors.frame_processors', 'face_swapper'), nargs = '+') + group_frame_processors.add_argument('--frame-processors', help = wording.get('help.frame_processors').format(choices = ', '.join(available_frame_processors)), default = config.get_str_list('frame_processors.frame_processors', 'face_swapper'), nargs = '+') for frame_processor in available_frame_processors: frame_processor_module = load_frame_processor_module(frame_processor) frame_processor_module.register_args(group_frame_processors) # uis available_ui_layouts = list_directory('facefusion/uis/layouts') group_uis = program.add_argument_group('uis') - group_uis.add_argument('--ui-layouts', help = wording.get('ui_layouts_help').format(choices = ', '.join(available_ui_layouts)), default = config.get_str_list('uis.ui_layout', 'default'), nargs = '+') + group_uis.add_argument('--ui-layouts', help = wording.get('help.ui_layouts').format(choices = ', '.join(available_ui_layouts)), default = config.get_str_list('uis.ui_layouts', 'default'), nargs = '+') run(program) @@ -128,7 +128,10 @@ def apply_args(program : ArgumentParser) -> None: facefusion.globals.face_analyser_age = args.face_analyser_age facefusion.globals.face_analyser_gender = args.face_analyser_gender facefusion.globals.face_detector_model = args.face_detector_model - facefusion.globals.face_detector_size = args.face_detector_size + if args.face_detector_size in facefusion.choices.face_detector_set[args.face_detector_model]: + facefusion.globals.face_detector_size = args.face_detector_size + else: + facefusion.globals.face_detector_size = '640x640' facefusion.globals.face_detector_score = args.face_detector_score # face selector facefusion.globals.face_selector_mode = args.face_selector_mode @@ -253,9 +256,10 @@ def process_image(start_time : float) -> None: frame_processor_module.process_image(facefusion.globals.source_paths, facefusion.globals.output_path, facefusion.globals.output_path) frame_processor_module.post_process() # compress image - logger.info(wording.get('compressing_image'), __name__.upper()) - if not compress_image(facefusion.globals.output_path): - logger.error(wording.get('compressing_image_failed'), __name__.upper()) + if compress_image(facefusion.globals.output_path): + logger.info(wording.get('compressing_image_succeed'), __name__.upper()) + else: + logger.warn(wording.get('compressing_image_skipped'), __name__.upper()) # validate image if is_image(facefusion.globals.output_path): seconds = '{:.2f}'.format((time.time() - start_time) % 60) @@ -267,8 +271,11 @@ def process_image(start_time : float) -> None: def process_video(start_time : float) -> None: if analyse_video(facefusion.globals.target_path, facefusion.globals.trim_frame_start, facefusion.globals.trim_frame_end): return + # clear temp + logger.debug(wording.get('clearing_temp'), __name__.upper()) + clear_temp(facefusion.globals.target_path) # create temp - logger.info(wording.get('creating_temp'), __name__.upper()) + logger.debug(wording.get('creating_temp'), __name__.upper()) create_temp(facefusion.globals.target_path) # extract frames logger.info(wording.get('extracting_frames_fps').format(video_fps = facefusion.globals.output_video_fps), __name__.upper()) @@ -285,7 +292,7 @@ def process_video(start_time : float) -> None: return # merge video logger.info(wording.get('merging_video_fps').format(video_fps = facefusion.globals.output_video_fps), __name__.upper()) - if not merge_video(facefusion.globals.target_path, facefusion.globals.output_video_fps): + if not merge_video(facefusion.globals.target_path, facefusion.globals.output_video_resolution, facefusion.globals.output_video_fps): logger.error(wording.get('merging_video_failed'), __name__.upper()) return # handle audio @@ -293,12 +300,21 @@ def process_video(start_time : float) -> None: logger.info(wording.get('skipping_audio'), __name__.upper()) move_temp(facefusion.globals.target_path, facefusion.globals.output_path) else: - logger.info(wording.get('restoring_audio'), __name__.upper()) - if not restore_audio(facefusion.globals.target_path, facefusion.globals.output_path, facefusion.globals.output_video_fps): - logger.warn(wording.get('restoring_audio_skipped'), __name__.upper()) - move_temp(facefusion.globals.target_path, facefusion.globals.output_path) + if 'lip_syncer' in facefusion.globals.frame_processors: + source_audio_path = get_first(filter_audio_paths(facefusion.globals.source_paths)) + if source_audio_path and replace_audio(facefusion.globals.target_path, source_audio_path, facefusion.globals.output_path): + logger.info(wording.get('restoring_audio_succeed'), __name__.upper()) + else: + logger.warn(wording.get('restoring_audio_skipped'), __name__.upper()) + move_temp(facefusion.globals.target_path, facefusion.globals.output_path) + else: + if restore_audio(facefusion.globals.target_path, facefusion.globals.output_path, facefusion.globals.output_video_fps): + logger.info(wording.get('restoring_audio_succeed'), __name__.upper()) + else: + logger.warn(wording.get('restoring_audio_skipped'), __name__.upper()) + move_temp(facefusion.globals.target_path, facefusion.globals.output_path) # clear temp - logger.info(wording.get('clearing_temp'), __name__.upper()) + logger.debug(wording.get('clearing_temp'), __name__.upper()) clear_temp(facefusion.globals.target_path) # validate video if is_video(facefusion.globals.output_path): diff --git a/facefusion/face_analyser.py b/facefusion/face_analyser.py index ffb0741f..3df55012 100644 --- a/facefusion/face_analyser.py +++ b/facefusion/face_analyser.py @@ -5,12 +5,13 @@ import numpy import onnxruntime import facefusion.globals -from facefusion.download import conditional_download +from facefusion.common_helper import get_first +from facefusion.face_helper import warp_face_by_face_landmark_5, warp_face_by_translation, create_static_anchors, distance_to_face_landmark_5, distance_to_bounding_box, convert_face_landmark_68_to_5, apply_nms, categorize_age, categorize_gender from facefusion.face_store import get_static_faces, set_static_faces from facefusion.execution_helper import apply_execution_provider_options -from facefusion.face_helper import warp_face_by_kps, create_static_anchors, distance_to_kps, distance_to_bbox, apply_nms +from facefusion.download import conditional_download from facefusion.filesystem import resolve_relative_path -from facefusion.typing import Frame, Face, FaceSet, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, ModelSet, Bbox, Kps, Score, Embedding +from facefusion.typing import VisionFrame, Face, FaceSet, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, ModelSet, BoundingBox, FaceLandmarkSet, FaceLandmark5, FaceLandmark68, Score, Embedding from facefusion.vision import resize_frame_resolution, unpack_resolution FACE_ANALYSER = None @@ -23,6 +24,11 @@ MODELS : ModelSet =\ 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/retinaface_10g.onnx', 'path': resolve_relative_path('../.assets/models/retinaface_10g.onnx') }, + 'face_detector_yoloface': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/yoloface_8n.onnx', + 'path': resolve_relative_path('../.assets/models/yoloface_8n.onnx') + }, 'face_detector_yunet': { 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/yunet_2023mar.onnx', @@ -43,6 +49,16 @@ MODELS : ModelSet =\ 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/arcface_simswap.onnx', 'path': resolve_relative_path('../.assets/models/arcface_simswap.onnx') }, + 'face_recognizer_arcface_uniface': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/arcface_w600k_r50.onnx', + 'path': resolve_relative_path('../.assets/models/arcface_w600k_r50.onnx') + }, + 'face_landmarker': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/2dfan4.onnx', + 'path': resolve_relative_path('../.assets/models/2dfan4.onnx') + }, 'gender_age': { 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gender_age.onnx', @@ -58,6 +74,8 @@ def get_face_analyser() -> Any: if FACE_ANALYSER is None: if facefusion.globals.face_detector_model == 'retinaface': face_detector = onnxruntime.InferenceSession(MODELS.get('face_detector_retinaface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + if facefusion.globals.face_detector_model == 'yoloface': + face_detector = onnxruntime.InferenceSession(MODELS.get('face_detector_yoloface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) if facefusion.globals.face_detector_model == 'yunet': face_detector = cv2.FaceDetectorYN.create(MODELS.get('face_detector_yunet').get('path'), '', (0, 0)) if facefusion.globals.face_recognizer_model == 'arcface_blendswap': @@ -66,11 +84,15 @@ def get_face_analyser() -> Any: face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_inswapper').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) if facefusion.globals.face_recognizer_model == 'arcface_simswap': face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_simswap').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + if facefusion.globals.face_recognizer_model == 'arcface_uniface': + face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_uniface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + face_landmarker = onnxruntime.InferenceSession(MODELS.get('face_landmarker').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) gender_age = onnxruntime.InferenceSession(MODELS.get('gender_age').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) FACE_ANALYSER =\ { 'face_detector': face_detector, 'face_recognizer': face_recognizer, + 'face_landmarker': face_landmarker, 'gender_age': gender_age } return FACE_ANALYSER @@ -88,47 +110,36 @@ def pre_check() -> bool: model_urls =\ [ MODELS.get('face_detector_retinaface').get('url'), + MODELS.get('face_detector_yoloface').get('url'), MODELS.get('face_detector_yunet').get('url'), + MODELS.get('face_recognizer_arcface_blendswap').get('url'), MODELS.get('face_recognizer_arcface_inswapper').get('url'), MODELS.get('face_recognizer_arcface_simswap').get('url'), - MODELS.get('gender_age').get('url') + MODELS.get('face_recognizer_arcface_uniface').get('url'), + MODELS.get('face_landmarker').get('url'), + MODELS.get('gender_age').get('url'), ] conditional_download(download_directory_path, model_urls) return True -def extract_faces(frame : Frame) -> List[Face]: - face_detector_width, face_detector_height = unpack_resolution(facefusion.globals.face_detector_size) - frame_height, frame_width, _ = frame.shape - temp_frame = resize_frame_resolution(frame, face_detector_width, face_detector_height) - temp_frame_height, temp_frame_width, _ = temp_frame.shape - ratio_height = frame_height / temp_frame_height - ratio_width = frame_width / temp_frame_width - if facefusion.globals.face_detector_model == 'retinaface': - bbox_list, kps_list, score_list = detect_with_retinaface(temp_frame, temp_frame_height, temp_frame_width, face_detector_height, face_detector_width, ratio_height, ratio_width) - return create_faces(frame, bbox_list, kps_list, score_list) - elif facefusion.globals.face_detector_model == 'yunet': - bbox_list, kps_list, score_list = detect_with_yunet(temp_frame, temp_frame_height, temp_frame_width, ratio_height, ratio_width) - return create_faces(frame, bbox_list, kps_list, score_list) - return [] - - -def detect_with_retinaface(temp_frame : Frame, temp_frame_height : int, temp_frame_width : int, face_detector_height : int, face_detector_width : int, ratio_height : float, ratio_width : float) -> Tuple[List[Bbox], List[Kps], List[Score]]: +def detect_with_retinaface(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: face_detector = get_face_analyser().get('face_detector') - bbox_list = [] - kps_list = [] - score_list = [] + face_detector_width, face_detector_height = unpack_resolution(face_detector_size) + temp_vision_frame = resize_frame_resolution(vision_frame, face_detector_width, face_detector_height) + ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0] + ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1] feature_strides = [ 8, 16, 32 ] feature_map_channel = 3 anchor_total = 2 - prepare_frame = numpy.zeros((face_detector_height, face_detector_width, 3)) - prepare_frame[:temp_frame_height, :temp_frame_width, :] = temp_frame - temp_frame = (prepare_frame - 127.5) / 128.0 - temp_frame = numpy.expand_dims(temp_frame.transpose(2, 0, 1), axis = 0).astype(numpy.float32) + bounding_box_list = [] + face_landmark5_list = [] + score_list = [] + with THREAD_SEMAPHORE: detections = face_detector.run(None, { - face_detector.get_inputs()[0].name: temp_frame + face_detector.get_inputs()[0].name: prepare_detect_frame(temp_vision_frame, face_detector_size) }) for index, feature_stride in enumerate(feature_strides): keep_indices = numpy.where(detections[index] >= facefusion.globals.face_detector_score)[0] @@ -136,63 +147,119 @@ def detect_with_retinaface(temp_frame : Frame, temp_frame_height : int, temp_fra stride_height = face_detector_height // feature_stride stride_width = face_detector_width // feature_stride anchors = create_static_anchors(feature_stride, anchor_total, stride_height, stride_width) - bbox_raw = detections[index + feature_map_channel] * feature_stride - kps_raw = detections[index + feature_map_channel * 2] * feature_stride - for bbox in distance_to_bbox(anchors, bbox_raw)[keep_indices]: - bbox_list.append(numpy.array( + bounding_box_raw = detections[index + feature_map_channel] * feature_stride + face_landmark_5_raw = detections[index + feature_map_channel * 2] * feature_stride + for bounding_box in distance_to_bounding_box(anchors, bounding_box_raw)[keep_indices]: + bounding_box_list.append(numpy.array( [ - bbox[0] * ratio_width, - bbox[1] * ratio_height, - bbox[2] * ratio_width, - bbox[3] * ratio_height + bounding_box[0] * ratio_width, + bounding_box[1] * ratio_height, + bounding_box[2] * ratio_width, + bounding_box[3] * ratio_height ])) - for kps in distance_to_kps(anchors, kps_raw)[keep_indices]: - kps_list.append(kps * [ ratio_width, ratio_height ]) + for face_landmark5 in distance_to_face_landmark_5(anchors, face_landmark_5_raw)[keep_indices]: + face_landmark5_list.append(face_landmark5 * [ ratio_width, ratio_height ]) for score in detections[index][keep_indices]: score_list.append(score[0]) - return bbox_list, kps_list, score_list + return bounding_box_list, face_landmark5_list, score_list -def detect_with_yunet(temp_frame : Frame, temp_frame_height : int, temp_frame_width : int, ratio_height : float, ratio_width : float) -> Tuple[List[Bbox], List[Kps], List[Score]]: +def detect_with_yoloface(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: face_detector = get_face_analyser().get('face_detector') - face_detector.setInputSize((temp_frame_width, temp_frame_height)) - face_detector.setScoreThreshold(facefusion.globals.face_detector_score) - bbox_list = [] - kps_list = [] + face_detector_width, face_detector_height = unpack_resolution(face_detector_size) + temp_vision_frame = resize_frame_resolution(vision_frame, face_detector_width, face_detector_height) + ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0] + ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1] + bounding_box_list = [] + face_landmark5_list = [] score_list = [] + with THREAD_SEMAPHORE: - _, detections = face_detector.detect(temp_frame) + detections = face_detector.run(None, + { + face_detector.get_inputs()[0].name: prepare_detect_frame(temp_vision_frame, face_detector_size) + }) + detections = numpy.squeeze(detections).T + bounding_box_raw, score_raw, face_landmark_5_raw = numpy.split(detections, [ 4, 5 ], axis = 1) + keep_indices = numpy.where(score_raw > facefusion.globals.face_detector_score)[0] + if keep_indices.any(): + bounding_box_raw, face_landmark_5_raw, score_raw = bounding_box_raw[keep_indices], face_landmark_5_raw[keep_indices], score_raw[keep_indices] + for bounding_box in bounding_box_raw: + bounding_box_list.append(numpy.array( + [ + (bounding_box[0] - bounding_box[2] / 2) * ratio_width, + (bounding_box[1] - bounding_box[3] / 2) * ratio_height, + (bounding_box[0] + bounding_box[2] / 2) * ratio_width, + (bounding_box[1] + bounding_box[3] / 2) * ratio_height + ])) + face_landmark_5_raw[:, 0::3] = (face_landmark_5_raw[:, 0::3]) * ratio_width + face_landmark_5_raw[:, 1::3] = (face_landmark_5_raw[:, 1::3]) * ratio_height + for face_landmark_5 in face_landmark_5_raw: + face_landmark5_list.append(numpy.array(face_landmark_5.reshape(-1, 3)[:, :2])) + score_list = score_raw.ravel().tolist() + return bounding_box_list, face_landmark5_list, score_list + + +def detect_with_yunet(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: + face_detector = get_face_analyser().get('face_detector') + face_detector_width, face_detector_height = unpack_resolution(face_detector_size) + temp_vision_frame = resize_frame_resolution(vision_frame, face_detector_width, face_detector_height) + ratio_height = vision_frame.shape[0] / temp_vision_frame.shape[0] + ratio_width = vision_frame.shape[1] / temp_vision_frame.shape[1] + bounding_box_list = [] + face_landmark5_list = [] + score_list = [] + + face_detector.setInputSize((temp_vision_frame.shape[1], temp_vision_frame.shape[0])) + face_detector.setScoreThreshold(facefusion.globals.face_detector_score) + with THREAD_SEMAPHORE: + _, detections = face_detector.detect(temp_vision_frame) if detections.any(): for detection in detections: - bbox_list.append(numpy.array( + bounding_box_list.append(numpy.array( [ detection[0] * ratio_width, detection[1] * ratio_height, (detection[0] + detection[2]) * ratio_width, (detection[1] + detection[3]) * ratio_height ])) - kps_list.append(detection[4:14].reshape((5, 2)) * [ ratio_width, ratio_height]) + face_landmark5_list.append(detection[4:14].reshape((5, 2)) * [ ratio_width, ratio_height ]) score_list.append(detection[14]) - return bbox_list, kps_list, score_list + return bounding_box_list, face_landmark5_list, score_list -def create_faces(frame : Frame, bbox_list : List[Bbox], kps_list : List[Kps], score_list : List[Score]) -> List[Face]: +def prepare_detect_frame(temp_vision_frame : VisionFrame, face_detector_size : str) -> VisionFrame: + face_detector_width, face_detector_height = unpack_resolution(face_detector_size) + detect_vision_frame = numpy.zeros((face_detector_height, face_detector_width, 3)) + detect_vision_frame[:temp_vision_frame.shape[0], :temp_vision_frame.shape[1], :] = temp_vision_frame + detect_vision_frame = (detect_vision_frame - 127.5) / 128.0 + detect_vision_frame = numpy.expand_dims(detect_vision_frame.transpose(2, 0, 1), axis = 0).astype(numpy.float32) + return detect_vision_frame + + +def create_faces(vision_frame : VisionFrame, bounding_box_list : List[BoundingBox], face_landmark5_list : List[FaceLandmark5], score_list : List[Score]) -> List[Face]: faces = [] if facefusion.globals.face_detector_score > 0: sort_indices = numpy.argsort(-numpy.array(score_list)) - bbox_list = [ bbox_list[index] for index in sort_indices ] - kps_list = [ kps_list[index] for index in sort_indices ] + bounding_box_list = [ bounding_box_list[index] for index in sort_indices ] + face_landmark5_list = [ face_landmark5_list[index] for index in sort_indices ] score_list = [ score_list[index] for index in sort_indices ] - keep_indices = apply_nms(bbox_list, 0.4) + keep_indices = apply_nms(bounding_box_list, 0.4) for index in keep_indices: - bbox = bbox_list[index] - kps = kps_list[index] + bounding_box = bounding_box_list[index] + face_landmark_68 = detect_face_landmark_68(vision_frame, bounding_box) + landmark : FaceLandmarkSet =\ + { + '5': face_landmark5_list[index], + '5/68': convert_face_landmark_68_to_5(face_landmark_68), + '68': face_landmark_68 + } score = score_list[index] - embedding, normed_embedding = calc_embedding(frame, kps) - gender, age = detect_gender_age(frame, bbox) + embedding, normed_embedding = calc_embedding(vision_frame, landmark['5/68']) + gender, age = detect_gender_age(vision_frame, bounding_box) faces.append(Face( - bbox = bbox, - kps = kps, + bounding_box = bounding_box, + landmark = landmark, score = score, embedding = embedding, normed_embedding = normed_embedding, @@ -202,41 +269,57 @@ def create_faces(frame : Frame, bbox_list : List[Bbox], kps_list : List[Kps], sc return faces -def calc_embedding(temp_frame : Frame, kps : Kps) -> Tuple[Embedding, Embedding]: +def calc_embedding(temp_vision_frame : VisionFrame, face_landmark_5 : FaceLandmark5) -> Tuple[Embedding, Embedding]: face_recognizer = get_face_analyser().get('face_recognizer') - crop_frame, matrix = warp_face_by_kps(temp_frame, kps, 'arcface_112_v2', (112, 112)) - crop_frame = crop_frame.astype(numpy.float32) / 127.5 - 1 - crop_frame = crop_frame[:, :, ::-1].transpose(2, 0, 1) - crop_frame = numpy.expand_dims(crop_frame, axis = 0) + crop_vision_frame, matrix = warp_face_by_face_landmark_5(temp_vision_frame, face_landmark_5, 'arcface_112_v2', (112, 112)) + crop_vision_frame = crop_vision_frame / 127.5 - 1 + crop_vision_frame = crop_vision_frame[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) + crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0) embedding = face_recognizer.run(None, { - face_recognizer.get_inputs()[0].name: crop_frame + face_recognizer.get_inputs()[0].name: crop_vision_frame })[0] embedding = embedding.ravel() normed_embedding = embedding / numpy.linalg.norm(embedding) return embedding, normed_embedding -def detect_gender_age(frame : Frame, bbox : Bbox) -> Tuple[int, int]: +def detect_face_landmark_68(temp_vision_frame : VisionFrame, bounding_box : BoundingBox) -> FaceLandmark68: + face_landmarker = get_face_analyser().get('face_landmarker') + scale = 195 / numpy.subtract(bounding_box[2:], bounding_box[:2]).max() + translation = (256 - numpy.add(bounding_box[2:], bounding_box[:2]) * scale) * 0.5 + crop_vision_frame, affine_matrix = warp_face_by_translation(temp_vision_frame, translation, scale, (256, 256)) + crop_vision_frame = crop_vision_frame.transpose(2, 0, 1).astype(numpy.float32) / 255.0 + face_landmark_68 = face_landmarker.run(None, + { + face_landmarker.get_inputs()[0].name: [ crop_vision_frame ] + })[0] + face_landmark_68 = face_landmark_68[:, :, :2][0] / 64 + face_landmark_68 = face_landmark_68.reshape(1, -1, 2) * 256 + face_landmark_68 = cv2.transform(face_landmark_68, cv2.invertAffineTransform(affine_matrix)) + face_landmark_68 = face_landmark_68.reshape(-1, 2) + return face_landmark_68 + + +def detect_gender_age(temp_vision_frame : VisionFrame, bounding_box : BoundingBox) -> Tuple[int, int]: gender_age = get_face_analyser().get('gender_age') - bbox = bbox.reshape(2, -1) - scale = 64 / numpy.subtract(*bbox[::-1]).max() - translation = 48 - bbox.sum(axis = 0) * 0.5 * scale - affine_matrix = numpy.array([[ scale, 0, translation[0] ], [ 0, scale, translation[1] ]]) - crop_frame = cv2.warpAffine(frame, affine_matrix, (96, 96)) - crop_frame = crop_frame.astype(numpy.float32)[:, :, ::-1].transpose(2, 0, 1) - crop_frame = numpy.expand_dims(crop_frame, axis = 0) + bounding_box = bounding_box.reshape(2, -1) + scale = 64 / numpy.subtract(*bounding_box[::-1]).max() + translation = 48 - bounding_box.sum(axis = 0) * scale * 0.5 + crop_vision_frame, affine_matrix = warp_face_by_translation(temp_vision_frame, translation, scale, (96, 96)) + crop_vision_frame = crop_vision_frame[:, :, ::-1].transpose(2, 0, 1).astype(numpy.float32) + crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0) prediction = gender_age.run(None, { - gender_age.get_inputs()[0].name: crop_frame + gender_age.get_inputs()[0].name: crop_vision_frame })[0][0] gender = int(numpy.argmax(prediction[:2])) age = int(numpy.round(prediction[2] * 100)) return gender, age -def get_one_face(frame : Frame, position : int = 0) -> Optional[Face]: - many_faces = get_many_faces(frame) +def get_one_face(vision_frame : VisionFrame, position : int = 0) -> Optional[Face]: + many_faces = get_many_faces(vision_frame) if many_faces: try: return many_faces[position] @@ -245,52 +328,64 @@ def get_one_face(frame : Frame, position : int = 0) -> Optional[Face]: return None -def get_average_face(frames : List[Frame], position : int = 0) -> Optional[Face]: +def get_average_face(vision_frames : List[VisionFrame], position : int = 0) -> Optional[Face]: average_face = None faces = [] embedding_list = [] normed_embedding_list = [] - for frame in frames: - face = get_one_face(frame, position) + + for vision_frame in vision_frames: + face = get_one_face(vision_frame, position) if face: faces.append(face) embedding_list.append(face.embedding) normed_embedding_list.append(face.normed_embedding) if faces: + first_face = get_first(faces) average_face = Face( - bbox = faces[0].bbox, - kps = faces[0].kps, - score = faces[0].score, + bounding_box = first_face.bounding_box, + landmark = first_face.landmark, + score = first_face.score, embedding = numpy.mean(embedding_list, axis = 0), normed_embedding = numpy.mean(normed_embedding_list, axis = 0), - gender = faces[0].gender, - age = faces[0].age + gender = first_face.gender, + age = first_face.age ) return average_face -def get_many_faces(frame : Frame) -> List[Face]: +def get_many_faces(vision_frame : VisionFrame) -> List[Face]: + faces = [] try: - faces_cache = get_static_faces(frame) + faces_cache = get_static_faces(vision_frame) if faces_cache: faces = faces_cache else: - faces = extract_faces(frame) - set_static_faces(frame, faces) + if facefusion.globals.face_detector_model == 'retinaface': + bounding_box_list, face_landmark5_list, score_list = detect_with_retinaface(vision_frame, facefusion.globals.face_detector_size) + faces = create_faces(vision_frame, bounding_box_list, face_landmark5_list, score_list) + if facefusion.globals.face_detector_model == 'yoloface': + bounding_box_list, face_landmark5_list, score_list = detect_with_yoloface(vision_frame, facefusion.globals.face_detector_size) + faces = create_faces(vision_frame, bounding_box_list, face_landmark5_list, score_list) + if facefusion.globals.face_detector_model == 'yunet': + bounding_box_list, face_landmark5_list, score_list = detect_with_yunet(vision_frame, facefusion.globals.face_detector_size) + faces = create_faces(vision_frame, bounding_box_list, face_landmark5_list, score_list) + if faces: + set_static_faces(vision_frame, faces) if facefusion.globals.face_analyser_order: faces = sort_by_order(faces, facefusion.globals.face_analyser_order) if facefusion.globals.face_analyser_age: faces = filter_by_age(faces, facefusion.globals.face_analyser_age) if facefusion.globals.face_analyser_gender: faces = filter_by_gender(faces, facefusion.globals.face_analyser_gender) - return faces except (AttributeError, ValueError): - return [] + pass + return faces -def find_similar_faces(frame : Frame, reference_faces : FaceSet, face_distance : float) -> List[Face]: +def find_similar_faces(reference_faces : FaceSet, vision_frame : VisionFrame, face_distance : float) -> List[Face]: similar_faces : List[Face] = [] - many_faces = get_many_faces(frame) + many_faces = get_many_faces(vision_frame) if reference_faces: for reference_set in reference_faces: @@ -315,17 +410,17 @@ def calc_face_distance(face : Face, reference_face : Face) -> float: def sort_by_order(faces : List[Face], order : FaceAnalyserOrder) -> List[Face]: if order == 'left-right': - return sorted(faces, key = lambda face: face.bbox[0]) + return sorted(faces, key = lambda face: face.bounding_box[0]) if order == 'right-left': - return sorted(faces, key = lambda face: face.bbox[0], reverse = True) + return sorted(faces, key = lambda face: face.bounding_box[0], reverse = True) if order == 'top-bottom': - return sorted(faces, key = lambda face: face.bbox[1]) + return sorted(faces, key = lambda face: face.bounding_box[1]) if order == 'bottom-top': - return sorted(faces, key = lambda face: face.bbox[1], reverse = True) + return sorted(faces, key = lambda face: face.bounding_box[1], reverse = True) if order == 'small-large': - return sorted(faces, key = lambda face: (face.bbox[2] - face.bbox[0]) * (face.bbox[3] - face.bbox[1])) + return sorted(faces, key = lambda face: (face.bounding_box[2] - face.bounding_box[0]) * (face.bounding_box[3] - face.bounding_box[1])) if order == 'large-small': - return sorted(faces, key = lambda face: (face.bbox[2] - face.bbox[0]) * (face.bbox[3] - face.bbox[1]), reverse = True) + return sorted(faces, key = lambda face: (face.bounding_box[2] - face.bounding_box[0]) * (face.bounding_box[3] - face.bounding_box[1]), reverse = True) if order == 'best-worst': return sorted(faces, key = lambda face: face.score, reverse = True) if order == 'worst-best': @@ -336,13 +431,7 @@ def sort_by_order(faces : List[Face], order : FaceAnalyserOrder) -> List[Face]: def filter_by_age(faces : List[Face], age : FaceAnalyserAge) -> List[Face]: filter_faces = [] for face in faces: - if face.age < 13 and age == 'child': - filter_faces.append(face) - elif face.age < 19 and age == 'teen': - filter_faces.append(face) - elif face.age < 60 and age == 'adult': - filter_faces.append(face) - elif face.age > 59 and age == 'senior': + if categorize_age(face.age) == age: filter_faces.append(face) return filter_faces @@ -350,8 +439,6 @@ def filter_by_age(faces : List[Face], age : FaceAnalyserAge) -> List[Face]: def filter_by_gender(faces : List[Face], gender : FaceAnalyserGender) -> List[Face]: filter_faces = [] for face in faces: - if face.gender == 0 and gender == 'female': - filter_faces.append(face) - if face.gender == 1 and gender == 'male': + if categorize_gender(face.gender) == gender: filter_faces.append(face) return filter_faces diff --git a/facefusion/face_helper.py b/facefusion/face_helper.py index d49a2e6a..2391eedb 100644 --- a/facefusion/face_helper.py +++ b/facefusion/face_helper.py @@ -4,7 +4,7 @@ from functools import lru_cache import cv2 import numpy -from facefusion.typing import Bbox, Kps, Frame, Mask, Matrix, Template +from facefusion.typing import BoundingBox, FaceLandmark5, FaceLandmark68, VisionFrame, Mask, Matrix, Translation, Template, FaceAnalyserAge, FaceAnalyserGender TEMPLATES : Dict[Template, numpy.ndarray[Any, Any]] =\ { @@ -43,35 +43,41 @@ TEMPLATES : Dict[Template, numpy.ndarray[Any, Any]] =\ } -def warp_face_by_kps(temp_frame : Frame, kps : Kps, template : Template, crop_size : Size) -> Tuple[Frame, Matrix]: +def warp_face_by_face_landmark_5(temp_vision_frame : VisionFrame, face_landmark_5 : FaceLandmark5, template : Template, crop_size : Size) -> Tuple[VisionFrame, Matrix]: normed_template = TEMPLATES.get(template) * crop_size - affine_matrix = cv2.estimateAffinePartial2D(kps, normed_template, method = cv2.RANSAC, ransacReprojThreshold = 100)[0] - crop_frame = cv2.warpAffine(temp_frame, affine_matrix, crop_size, borderMode = cv2.BORDER_REPLICATE, flags = cv2.INTER_AREA) - return crop_frame, affine_matrix + affine_matrix = cv2.estimateAffinePartial2D(face_landmark_5, normed_template, method = cv2.RANSAC, ransacReprojThreshold = 100)[0] + crop_vision_frame = cv2.warpAffine(temp_vision_frame, affine_matrix, crop_size, borderMode = cv2.BORDER_REPLICATE, flags = cv2.INTER_AREA) + return crop_vision_frame, affine_matrix -def warp_face_by_bbox(temp_frame : Frame, bbox : Bbox, crop_size : Size) -> Tuple[Frame, Matrix]: - source_kps = numpy.array([[ bbox[0], bbox[1] ], [bbox[2], bbox[1] ], [bbox[0], bbox[3] ]], dtype = numpy.float32) - target_kps = numpy.array([[ 0, 0 ], [ crop_size[0], 0 ], [ 0, crop_size[1] ]], dtype = numpy.float32) - affine_matrix = cv2.getAffineTransform(source_kps, target_kps) - if bbox[2] - bbox[0] > crop_size[0] or bbox[3] - bbox[1] > crop_size[1]: +def warp_face_by_bounding_box(temp_vision_frame : VisionFrame, bounding_box : BoundingBox, crop_size : Size) -> Tuple[VisionFrame, Matrix]: + source_points = numpy.array([[bounding_box[0], bounding_box[1]], [bounding_box[2], bounding_box[1]], [bounding_box[0], bounding_box[3]]], dtype = numpy.float32) + target_points = numpy.array([[ 0, 0 ], [ crop_size[0], 0 ], [ 0, crop_size[1] ]], dtype = numpy.float32) + affine_matrix = cv2.getAffineTransform(source_points, target_points) + if bounding_box[2] - bounding_box[0] > crop_size[0] or bounding_box[3] - bounding_box[1] > crop_size[1]: interpolation_method = cv2.INTER_AREA else: interpolation_method = cv2.INTER_LINEAR - crop_frame = cv2.warpAffine(temp_frame, affine_matrix, crop_size, flags = interpolation_method) - return crop_frame, affine_matrix + crop_vision_frame = cv2.warpAffine(temp_vision_frame, affine_matrix, crop_size, flags = interpolation_method) + return crop_vision_frame, affine_matrix -def paste_back(temp_frame : Frame, crop_frame : Frame, crop_mask : Mask, affine_matrix : Matrix) -> Frame: +def warp_face_by_translation(temp_vision_frame : VisionFrame, translation : Translation, scale : float, crop_size : Size) -> Tuple[VisionFrame, Matrix]: + affine_matrix = numpy.array([[ scale, 0, translation[0] ], [ 0, scale, translation[1] ]]) + crop_vision_frame = cv2.warpAffine(temp_vision_frame, affine_matrix, crop_size) + return crop_vision_frame, affine_matrix + + +def paste_back(temp_vision_frame : VisionFrame, crop_vision_frame : VisionFrame, crop_mask : Mask, affine_matrix : Matrix) -> VisionFrame: inverse_matrix = cv2.invertAffineTransform(affine_matrix) - temp_frame_size = temp_frame.shape[:2][::-1] - inverse_crop_mask = cv2.warpAffine(crop_mask, inverse_matrix, temp_frame_size).clip(0, 1) - inverse_crop_frame = cv2.warpAffine(crop_frame, inverse_matrix, temp_frame_size, borderMode = cv2.BORDER_REPLICATE) - paste_frame = temp_frame.copy() - paste_frame[:, :, 0] = inverse_crop_mask * inverse_crop_frame[:, :, 0] + (1 - inverse_crop_mask) * temp_frame[:, :, 0] - paste_frame[:, :, 1] = inverse_crop_mask * inverse_crop_frame[:, :, 1] + (1 - inverse_crop_mask) * temp_frame[:, :, 1] - paste_frame[:, :, 2] = inverse_crop_mask * inverse_crop_frame[:, :, 2] + (1 - inverse_crop_mask) * temp_frame[:, :, 2] - return paste_frame + temp_size = temp_vision_frame.shape[:2][::-1] + inverse_mask = cv2.warpAffine(crop_mask, inverse_matrix, temp_size).clip(0, 1) + inverse_vision_frame = cv2.warpAffine(crop_vision_frame, inverse_matrix, temp_size, borderMode = cv2.BORDER_REPLICATE) + paste_vision_frame = temp_vision_frame.copy() + paste_vision_frame[:, :, 0] = inverse_mask * inverse_vision_frame[:, :, 0] + (1 - inverse_mask) * temp_vision_frame[:, :, 0] + paste_vision_frame[:, :, 1] = inverse_mask * inverse_vision_frame[:, :, 1] + (1 - inverse_mask) * temp_vision_frame[:, :, 1] + paste_vision_frame[:, :, 2] = inverse_mask * inverse_vision_frame[:, :, 2] + (1 - inverse_mask) * temp_vision_frame[:, :, 2] + return paste_vision_frame @lru_cache(maxsize = None) @@ -83,31 +89,48 @@ def create_static_anchors(feature_stride : int, anchor_total : int, stride_heigh return anchors -def distance_to_bbox(points : numpy.ndarray[Any, Any], distance : numpy.ndarray[Any, Any]) -> Bbox: +def create_bounding_box_from_landmark(face_landmark_68 : FaceLandmark68) -> BoundingBox: + min_x, min_y = numpy.min(face_landmark_68, axis = 0) + max_x, max_y = numpy.max(face_landmark_68, axis = 0) + bounding_box = numpy.array([ min_x, min_y, max_x, max_y ]).astype(numpy.int16) + return bounding_box + + +def distance_to_bounding_box(points : numpy.ndarray[Any, Any], distance : numpy.ndarray[Any, Any]) -> BoundingBox: x1 = points[:, 0] - distance[:, 0] y1 = points[:, 1] - distance[:, 1] x2 = points[:, 0] + distance[:, 2] y2 = points[:, 1] + distance[:, 3] - bbox = numpy.column_stack([ x1, y1, x2, y2 ]) - return bbox + bounding_box = numpy.column_stack([ x1, y1, x2, y2 ]) + return bounding_box -def distance_to_kps(points : numpy.ndarray[Any, Any], distance : numpy.ndarray[Any, Any]) -> Kps: +def distance_to_face_landmark_5(points : numpy.ndarray[Any, Any], distance : numpy.ndarray[Any, Any]) -> FaceLandmark5: x = points[:, 0::2] + distance[:, 0::2] y = points[:, 1::2] + distance[:, 1::2] - kps = numpy.stack((x, y), axis = -1) - return kps + face_landmark_5 = numpy.stack((x, y), axis = -1) + return face_landmark_5 -def apply_nms(bbox_list : List[Bbox], iou_threshold : float) -> List[int]: +def convert_face_landmark_68_to_5(landmark_68 : FaceLandmark68) -> FaceLandmark5: + left_eye = numpy.mean(landmark_68[36:42], axis = 0) + right_eye = numpy.mean(landmark_68[42:48], axis = 0) + nose = landmark_68[30] + left_mouth_end = landmark_68[48] + right_mouth_end = landmark_68[54] + face_landmark_5 = numpy.array([ left_eye, right_eye, nose, left_mouth_end, right_mouth_end ]) + return face_landmark_5 + + +def apply_nms(bounding_box_list : List[BoundingBox], iou_threshold : float) -> List[int]: keep_indices = [] - dimension_list = numpy.reshape(bbox_list, (-1, 4)) + dimension_list = numpy.reshape(bounding_box_list, (-1, 4)) x1 = dimension_list[:, 0] y1 = dimension_list[:, 1] x2 = dimension_list[:, 2] y2 = dimension_list[:, 3] areas = (x2 - x1 + 1) * (y2 - y1 + 1) - indices = numpy.arange(len(bbox_list)) + indices = numpy.arange(len(bounding_box_list)) while indices.size > 0: index = indices[0] remain_indices = indices[1:] @@ -121,3 +144,19 @@ def apply_nms(bbox_list : List[Bbox], iou_threshold : float) -> List[int]: iou = width * height / (areas[index] + areas[remain_indices] - width * height) indices = indices[numpy.where(iou <= iou_threshold)[0] + 1] return keep_indices + + +def categorize_age(age : int) -> FaceAnalyserAge: + if age < 13: + return 'child' + elif age < 19: + return 'teen' + elif age < 60: + return 'adult' + return 'senior' + + +def categorize_gender(gender : int) -> FaceAnalyserGender: + if gender == 0: + return 'female' + return 'male' diff --git a/facefusion/face_masker.py b/facefusion/face_masker.py index 9884654b..0e76fd8d 100755 --- a/facefusion/face_masker.py +++ b/facefusion/face_masker.py @@ -7,7 +7,7 @@ import numpy import onnxruntime import facefusion.globals -from facefusion.typing import Frame, Mask, Padding, FaceMaskRegion, ModelSet +from facefusion.typing import FaceLandmark68, VisionFrame, Mask, Padding, FaceMaskRegion, ModelSet from facefusion.execution_helper import apply_execution_provider_options from facefusion.filesystem import resolve_relative_path from facefusion.download import conditional_download @@ -91,7 +91,7 @@ def pre_check() -> bool: def create_static_box_mask(crop_size : Size, face_mask_blur : float, face_mask_padding : Padding) -> Mask: blur_amount = int(crop_size[0] * 0.5 * face_mask_blur) blur_area = max(blur_amount // 2, 1) - box_mask = numpy.ones(crop_size, numpy.float32) + box_mask : Mask = numpy.ones(crop_size, numpy.float32) box_mask[:max(blur_area, int(crop_size[1] * face_mask_padding[0] / 100)), :] = 0 box_mask[-max(blur_area, int(crop_size[1] * face_mask_padding[2] / 100)):, :] = 0 box_mask[:, :max(blur_area, int(crop_size[0] * face_mask_padding[3] / 100))] = 0 @@ -101,29 +101,40 @@ def create_static_box_mask(crop_size : Size, face_mask_blur : float, face_mask_p return box_mask -def create_occlusion_mask(crop_frame : Frame) -> Mask: +def create_occlusion_mask(crop_vision_frame : VisionFrame) -> Mask: face_occluder = get_face_occluder() - prepare_frame = cv2.resize(crop_frame, face_occluder.get_inputs()[0].shape[1:3][::-1]) - prepare_frame = numpy.expand_dims(prepare_frame, axis = 0).astype(numpy.float32) / 255 - prepare_frame = prepare_frame.transpose(0, 1, 2, 3) - occlusion_mask = face_occluder.run(None, + prepare_vision_frame = cv2.resize(crop_vision_frame, face_occluder.get_inputs()[0].shape[1:3][::-1]) + prepare_vision_frame = numpy.expand_dims(prepare_vision_frame, axis = 0).astype(numpy.float32) / 255 + prepare_vision_frame = prepare_vision_frame.transpose(0, 1, 2, 3) + occlusion_mask : Mask = face_occluder.run(None, { - face_occluder.get_inputs()[0].name: prepare_frame + face_occluder.get_inputs()[0].name: prepare_vision_frame })[0][0] occlusion_mask = occlusion_mask.transpose(0, 1, 2).clip(0, 1).astype(numpy.float32) - occlusion_mask = cv2.resize(occlusion_mask, crop_frame.shape[:2][::-1]) + occlusion_mask = cv2.resize(occlusion_mask, crop_vision_frame.shape[:2][::-1]) + occlusion_mask = (cv2.GaussianBlur(occlusion_mask.clip(0, 1), (0, 0), 5).clip(0.5, 1) - 0.5) * 2 return occlusion_mask -def create_region_mask(crop_frame : Frame, face_mask_regions : List[FaceMaskRegion]) -> Mask: +def create_region_mask(crop_vision_frame : VisionFrame, face_mask_regions : List[FaceMaskRegion]) -> Mask: face_parser = get_face_parser() - prepare_frame = cv2.flip(cv2.resize(crop_frame, (512, 512)), 1) - prepare_frame = numpy.expand_dims(prepare_frame, axis = 0).astype(numpy.float32)[:, :, ::-1] / 127.5 - 1 - prepare_frame = prepare_frame.transpose(0, 3, 1, 2) - region_mask = face_parser.run(None, + prepare_vision_frame = cv2.flip(cv2.resize(crop_vision_frame, (512, 512)), 1) + prepare_vision_frame = numpy.expand_dims(prepare_vision_frame, axis = 0).astype(numpy.float32)[:, :, ::-1] / 127.5 - 1 + prepare_vision_frame = prepare_vision_frame.transpose(0, 3, 1, 2) + region_mask : Mask = face_parser.run(None, { - face_parser.get_inputs()[0].name: prepare_frame + face_parser.get_inputs()[0].name: prepare_vision_frame })[0][0] region_mask = numpy.isin(region_mask.argmax(0), [ FACE_MASK_REGIONS[region] for region in face_mask_regions ]) - region_mask = cv2.resize(region_mask.astype(numpy.float32), crop_frame.shape[:2][::-1]) + region_mask = cv2.resize(region_mask.astype(numpy.float32), crop_vision_frame.shape[:2][::-1]) + region_mask = (cv2.GaussianBlur(region_mask.clip(0, 1), (0, 0), 5).clip(0.5, 1) - 0.5) * 2 return region_mask + + +def create_mouth_mask(face_landmark_68 : FaceLandmark68) -> Mask: + convex_hull = cv2.convexHull(face_landmark_68[numpy.r_[3:14, 31:36]].astype(numpy.int32)) + mouth_mask : Mask = numpy.zeros((512, 512), dtype = numpy.float32) + mouth_mask = cv2.fillConvexPoly(mouth_mask, convex_hull, 1.0) + mouth_mask = cv2.erode(mouth_mask.clip(0, 1), numpy.ones((21, 3))) + mouth_mask = cv2.GaussianBlur(mouth_mask, (0, 0), sigmaX = 1, sigmaY = 15) + return mouth_mask diff --git a/facefusion/face_store.py b/facefusion/face_store.py index 36c8b514..7540bc9d 100644 --- a/facefusion/face_store.py +++ b/facefusion/face_store.py @@ -2,7 +2,7 @@ from typing import Optional, List import hashlib import numpy -from facefusion.typing import Frame, Face, FaceStore, FaceSet +from facefusion.typing import VisionFrame, Face, FaceStore, FaceSet FACE_STORE: FaceStore =\ { @@ -11,15 +11,15 @@ FACE_STORE: FaceStore =\ } -def get_static_faces(frame : Frame) -> Optional[List[Face]]: - frame_hash = create_frame_hash(frame) +def get_static_faces(vision_frame : VisionFrame) -> Optional[List[Face]]: + frame_hash = create_frame_hash(vision_frame) if frame_hash in FACE_STORE['static_faces']: return FACE_STORE['static_faces'][frame_hash] return None -def set_static_faces(frame : Frame, faces : List[Face]) -> None: - frame_hash = create_frame_hash(frame) +def set_static_faces(vision_frame : VisionFrame, faces : List[Face]) -> None: + frame_hash = create_frame_hash(vision_frame) if frame_hash: FACE_STORE['static_faces'][frame_hash] = faces @@ -28,8 +28,8 @@ def clear_static_faces() -> None: FACE_STORE['static_faces'] = {} -def create_frame_hash(frame : Frame) -> Optional[str]: - return hashlib.sha1(frame.tobytes()).hexdigest() if numpy.any(frame) else None +def create_frame_hash(vision_frame : VisionFrame) -> Optional[str]: + return hashlib.sha1(vision_frame.tobytes()).hexdigest() if numpy.any(vision_frame) else None def get_reference_faces() -> Optional[FaceSet]: diff --git a/facefusion/ffmpeg.py b/facefusion/ffmpeg.py index 9da029d3..f6cf4071 100644 --- a/facefusion/ffmpeg.py +++ b/facefusion/ffmpeg.py @@ -3,7 +3,7 @@ import subprocess import facefusion.globals from facefusion import logger -from facefusion.typing import OutputVideoPreset, Fps +from facefusion.typing import OutputVideoPreset, Fps, AudioBuffer from facefusion.filesystem import get_temp_frames_pattern, get_temp_output_video_path @@ -21,7 +21,7 @@ def run_ffmpeg(args : List[str]) -> bool: def open_ffmpeg(args : List[str]) -> subprocess.Popen[bytes]: commands = [ 'ffmpeg', '-hide_banner', '-loglevel', 'error' ] commands.extend(args) - return subprocess.Popen(commands, stdin = subprocess.PIPE) + return subprocess.Popen(commands, stdin = subprocess.PIPE, stdout = subprocess.PIPE) def extract_frames(target_path : str, video_resolution : str, video_fps : Fps) -> bool: @@ -48,10 +48,10 @@ def compress_image(output_path : str) -> bool: return run_ffmpeg(commands) -def merge_video(target_path : str, video_fps : Fps) -> bool: +def merge_video(target_path : str, video_resolution : str, video_fps : Fps) -> bool: temp_output_video_path = get_temp_output_video_path(target_path) temp_frames_pattern = get_temp_frames_pattern(target_path, '%04d') - commands = [ '-hwaccel', 'auto', '-r', str(video_fps), '-i', temp_frames_pattern, '-c:v', facefusion.globals.output_video_encoder ] + commands = [ '-hwaccel', 'auto', '-s', str(video_resolution), '-r', str(video_fps), '-i', temp_frames_pattern, '-c:v', facefusion.globals.output_video_encoder ] if facefusion.globals.output_video_encoder in [ 'libx264', 'libx265' ]: output_video_compression = round(51 - (facefusion.globals.output_video_quality * 0.51)) commands.extend([ '-crf', str(output_video_compression), '-preset', facefusion.globals.output_video_preset ]) @@ -65,6 +65,15 @@ def merge_video(target_path : str, video_fps : Fps) -> bool: return run_ffmpeg(commands) +def read_audio_buffer(target_path : str, sample_rate : int, channel_total : int) -> Optional[AudioBuffer]: + commands = [ '-i', target_path, '-vn', '-f', 's16le', '-acodec', 'pcm_s16le', '-ar', str(sample_rate), '-ac', str(channel_total), '-' ] + process = open_ffmpeg(commands) + audio_buffer, error = process.communicate() + if process.returncode == 0: + return audio_buffer + return None + + def restore_audio(target_path : str, output_path : str, video_fps : Fps) -> bool: trim_frame_start = facefusion.globals.trim_frame_start trim_frame_end = facefusion.globals.trim_frame_end @@ -80,6 +89,12 @@ def restore_audio(target_path : str, output_path : str, video_fps : Fps) -> bool return run_ffmpeg(commands) +def replace_audio(target_path : str, audio_path : str, output_path : str) -> bool: + temp_output_path = get_temp_output_video_path(target_path) + commands = [ '-hwaccel', 'auto', '-i', temp_output_path, '-i', audio_path, '-c:v', 'copy', '-af', 'apad', '-shortest', '-map', '0:v:0', '-map', '1:a:0', '-y', output_path ] + return run_ffmpeg(commands) + + def map_nvenc_preset(output_video_preset : OutputVideoPreset) -> Optional[str]: if output_video_preset in [ 'ultrafast', 'superfast', 'veryfast' ]: return 'p1' diff --git a/facefusion/filesystem.py b/facefusion/filesystem.py index 87923d2c..a3320fd9 100644 --- a/facefusion/filesystem.py +++ b/facefusion/filesystem.py @@ -62,22 +62,40 @@ def is_directory(directory_path : str) -> bool: return bool(directory_path and os.path.isdir(directory_path)) -def is_image(image_path : str) -> bool: - if is_file(image_path): - return filetype.helpers.is_image(image_path) +def is_audio(audio_path : str) -> bool: + return is_file(audio_path) and filetype.helpers.is_audio(audio_path) + + +def has_audio(audio_paths : List[str]) -> bool: + if audio_paths: + return any(is_audio(audio_path) for audio_path in audio_paths) return False -def are_images(image_paths : List[str]) -> bool: +def is_image(image_path : str) -> bool: + return is_file(image_path) and filetype.helpers.is_image(image_path) + + +def has_image(image_paths: List[str]) -> bool: if image_paths: - return all(is_image(image_path) for image_path in image_paths) + return any(is_image(image_path) for image_path in image_paths) return False def is_video(video_path : str) -> bool: - if is_file(video_path): - return filetype.helpers.is_video(video_path) - return False + return is_file(video_path) and filetype.helpers.is_video(video_path) + + +def filter_audio_paths(paths : List[str]) -> List[str]: + if paths: + return [ path for path in paths if is_audio(path) ] + return [] + + +def filter_image_paths(paths : List[str]) -> List[str]: + if paths: + return [ path for path in paths if is_image(path) ] + return [] def resolve_relative_path(path : str) -> str: @@ -87,5 +105,5 @@ def resolve_relative_path(path : str) -> str: def list_directory(directory_path : str) -> Optional[List[str]]: if is_directory(directory_path): files = os.listdir(directory_path) - return [ Path(file).stem for file in files if not Path(file).stem.startswith(('.', '__')) ] + return sorted([ Path(file).stem for file in files if not Path(file).stem.startswith(('.', '__')) ]) return None diff --git a/facefusion/installer.py b/facefusion/installer.py index cf802ae6..aa8e073a 100644 --- a/facefusion/installer.py +++ b/facefusion/installer.py @@ -4,11 +4,8 @@ import os import platform import tempfile import subprocess -from argparse import ArgumentParser, HelpFormatter - -subprocess.call([ 'pip', 'install' , 'inquirer', '-q' ]) - import inquirer +from argparse import ArgumentParser, HelpFormatter from facefusion import metadata, wording @@ -17,31 +14,32 @@ TORCH : Dict[str, str] =\ 'default': 'default', 'cpu': 'cpu' } -ONNXRUNTIMES : Dict[str, Tuple[str, str]] =\ -{ - 'default': ('onnxruntime', '1.16.3') -} +ONNXRUNTIMES : Dict[str, Tuple[str, str]] = {} + +if platform.system().lower() == 'darwin': + ONNXRUNTIMES['default'] = ('onnxruntime', '1.17.0') +else: + ONNXRUNTIMES['default'] = ('onnxruntime', '1.16.3') if platform.system().lower() == 'linux' or platform.system().lower() == 'windows': - TORCH['cuda'] = 'cu118' - TORCH['cuda-nightly'] = 'cu121' - ONNXRUNTIMES['cuda'] = ('onnxruntime-gpu', '1.16.3') - ONNXRUNTIMES['cuda-nightly'] = ('onnxruntime-gpu', '1.17.0') + TORCH['cuda-12.1'] = 'cu121' + TORCH['cuda-11.8'] = 'cu118' + ONNXRUNTIMES['cuda-12.1'] = ('onnxruntime-gpu', '1.17.0') + ONNXRUNTIMES['cuda-11.8'] = ('onnxruntime-gpu', '1.16.3') ONNXRUNTIMES['openvino'] = ('onnxruntime-openvino', '1.16.0') if platform.system().lower() == 'linux': - TORCH['rocm'] = 'rocm5.6' - ONNXRUNTIMES['rocm'] = ('onnxruntime-rocm', '1.16.3') -if platform.system().lower() == 'darwin': - ONNXRUNTIMES['coreml-legacy'] = ('onnxruntime-coreml', '1.13.1') - ONNXRUNTIMES['coreml-silicon'] = ('onnxruntime-silicon', '1.16.0') + TORCH['rocm-5.4.2'] = 'rocm5.4.2' + TORCH['rocm-5.6'] = 'rocm5.6' + ONNXRUNTIMES['rocm-5.4.2'] = ('onnxruntime-rocm', '1.16.3') + ONNXRUNTIMES['rocm-5.6'] = ('onnxruntime-rocm', '1.16.3') if platform.system().lower() == 'windows': - ONNXRUNTIMES['directml'] = ('onnxruntime-directml', '1.16.3') + ONNXRUNTIMES['directml'] = ('onnxruntime-directml', '1.16.0') def cli() -> None: - program = ArgumentParser(formatter_class = lambda prog: HelpFormatter(prog, max_help_position = 120)) - program.add_argument('--torch', help = wording.get('install_dependency_help').format(dependency = 'torch'), choices = TORCH.keys()) - program.add_argument('--onnxruntime', help = wording.get('install_dependency_help').format(dependency = 'onnxruntime'), choices = ONNXRUNTIMES.keys()) - program.add_argument('--skip-venv', help = wording.get('skip_venv_help'), action = 'store_true') + program = ArgumentParser(formatter_class = lambda prog: HelpFormatter(prog, max_help_position = 130)) + program.add_argument('--torch', help = wording.get('help.install_dependency').format(dependency = 'torch'), choices = TORCH.keys()) + program.add_argument('--onnxruntime', help = wording.get('help.install_dependency').format(dependency = 'onnxruntime'), choices = ONNXRUNTIMES.keys()) + program.add_argument('--skip-venv', help = wording.get('help.skip_venv'), action = 'store_true') program.add_argument('-v', '--version', version = metadata.get('name') + ' ' + metadata.get('version'), action = 'version') run(program) @@ -61,8 +59,8 @@ def run(program : ArgumentParser) -> None: else: answers = inquirer.prompt( [ - inquirer.List('torch', message = wording.get('install_dependency_help').format(dependency = 'torch'), choices = list(TORCH.keys())), - inquirer.List('onnxruntime', message = wording.get('install_dependency_help').format(dependency = 'onnxruntime'), choices = list(ONNXRUNTIMES.keys())) + inquirer.List('torch', message = wording.get('help.install_dependency').format(dependency = 'torch'), choices = list(TORCH.keys())), + inquirer.List('onnxruntime', message = wording.get('help.install_dependency').format(dependency = 'onnxruntime'), choices = list(ONNXRUNTIMES.keys())) ]) if answers: torch = answers['torch'] @@ -75,9 +73,11 @@ def run(program : ArgumentParser) -> None: subprocess.call([ 'pip', 'install', '-r', 'requirements.txt', '--force-reinstall' ]) else: subprocess.call([ 'pip', 'install', '-r', 'requirements.txt', '--extra-index-url', 'https://download.pytorch.org/whl/' + torch_wheel, '--force-reinstall' ]) - if onnxruntime == 'rocm': + if onnxruntime == 'rocm-5.4.2' or onnxruntime == 'rocm-5.6': if python_id in [ 'cp39', 'cp310', 'cp311' ]: - wheel_name = 'onnxruntime_training-' + onnxruntime_version + '+rocm56-' + python_id + '-' + python_id + '-manylinux_2_17_x86_64.manylinux2014_x86_64.whl' + rocm_version = onnxruntime.replace('-', '') + rocm_version = rocm_version.replace('.', '') + wheel_name = 'onnxruntime_training-' + onnxruntime_version + '+' + rocm_version + '-' + python_id + '-' + python_id + '-manylinux_2_17_x86_64.manylinux2014_x86_64.whl' wheel_path = os.path.join(tempfile.gettempdir(), wheel_name) wheel_url = 'https://download.onnxruntime.ai/' + wheel_name subprocess.call([ 'curl', '--silent', '--location', '--continue-at', '-', '--output', wheel_path, wheel_url ]) @@ -86,7 +86,7 @@ def run(program : ArgumentParser) -> None: os.remove(wheel_path) else: subprocess.call([ 'pip', 'uninstall', 'onnxruntime', onnxruntime_name, '-y', '-q' ]) - if onnxruntime == 'cuda-nightly': + if onnxruntime == 'cuda-12.1': subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--extra-index-url', 'https://pkgs.dev.azure.com/onnxruntime/onnxruntime/_packaging/onnxruntime-cuda-12/pypi/simple', '--force-reinstall' ]) else: subprocess.call([ 'pip', 'install', onnxruntime_name + '==' + onnxruntime_version, '--force-reinstall' ]) diff --git a/facefusion/metadata.py b/facefusion/metadata.py index 96a4a8c4..9d2d88aa 100644 --- a/facefusion/metadata.py +++ b/facefusion/metadata.py @@ -2,7 +2,7 @@ METADATA =\ { 'name': 'FaceFusion', 'description': 'Next generation face swapper and enhancer', - 'version': '2.2.1', + 'version': '2.3.0', 'license': 'MIT', 'author': 'Henry Ruhs', 'url': 'https://facefusion.io' diff --git a/facefusion/processors/frame/choices.py b/facefusion/processors/frame/choices.py index 9da63980..f0daeccf 100755 --- a/facefusion/processors/frame/choices.py +++ b/facefusion/processors/frame/choices.py @@ -1,13 +1,13 @@ from typing import List from facefusion.common_helper import create_int_range -from facefusion.processors.frame.typings import FaceSwapperModel, FaceEnhancerModel, FrameEnhancerModel, FaceDebuggerItem +from facefusion.processors.frame.typings import FaceDebuggerItem, FaceEnhancerModel, FaceSwapperModel, FrameEnhancerModel, LipSyncerModel -face_swapper_models : List[FaceSwapperModel] = [ 'blendswap_256', 'inswapper_128', 'inswapper_128_fp16', 'simswap_256', 'simswap_512_unofficial' ] +face_debugger_items : List[FaceDebuggerItem] = [ 'bounding-box', 'landmark-5', 'landmark-68', 'face-mask', 'score', 'age', 'gender' ] face_enhancer_models : List[FaceEnhancerModel] = [ 'codeformer', 'gfpgan_1.2', 'gfpgan_1.3', 'gfpgan_1.4', 'gpen_bfr_256', 'gpen_bfr_512', 'restoreformer_plus_plus' ] +face_swapper_models : List[FaceSwapperModel] = [ 'blendswap_256', 'inswapper_128', 'inswapper_128_fp16', 'simswap_256', 'simswap_512_unofficial', 'uniface_256' ] frame_enhancer_models : List[FrameEnhancerModel] = [ 'real_esrgan_x2plus', 'real_esrgan_x4plus', 'real_esrnet_x4plus' ] -face_debugger_items : List[FaceDebuggerItem] = [ 'bbox', 'kps', 'face-mask', 'score' ] +lip_syncer_models : List[LipSyncerModel] = [ 'wav2lip_gan' ] face_enhancer_blend_range : List[int] = create_int_range(0, 100, 1) frame_enhancer_blend_range : List[int] = create_int_range(0, 100, 1) - diff --git a/facefusion/processors/frame/core.py b/facefusion/processors/frame/core.py index a236ddb0..f09ba2ad 100644 --- a/facefusion/processors/frame/core.py +++ b/facefusion/processors/frame/core.py @@ -1,3 +1,4 @@ +import os import sys import importlib from concurrent.futures import ThreadPoolExecutor, as_completed @@ -7,7 +8,7 @@ from typing import Any, List from tqdm import tqdm import facefusion.globals -from facefusion.typing import Process_Frames +from facefusion.typing import Process_Frames, QueuePayload from facefusion.execution_helper import encode_execution_providers from facefusion import logger, wording @@ -67,7 +68,8 @@ def clear_frame_processors_modules() -> None: def multi_process_frames(source_paths : List[str], temp_frame_paths : List[str], process_frames : Process_Frames) -> None: - with tqdm(total = len(temp_frame_paths), desc = wording.get('processing'), unit = 'frame', ascii = ' =', disable = facefusion.globals.log_level in [ 'warn', 'error' ]) as progress: + queue_payloads = create_queue_payloads(temp_frame_paths) + with tqdm(total = len(queue_payloads), desc = wording.get('processing'), unit = 'frame', ascii = ' =', disable = facefusion.globals.log_level in [ 'warn', 'error' ]) as progress: progress.set_postfix( { 'execution_providers': encode_execution_providers(facefusion.globals.execution_providers), @@ -76,26 +78,39 @@ def multi_process_frames(source_paths : List[str], temp_frame_paths : List[str], }) with ThreadPoolExecutor(max_workers = facefusion.globals.execution_thread_count) as executor: futures = [] - queue_frame_paths : Queue[str] = create_queue(temp_frame_paths) - queue_per_future = max(len(temp_frame_paths) // facefusion.globals.execution_thread_count * facefusion.globals.execution_queue_count, 1) - while not queue_frame_paths.empty(): - submit_frame_paths = pick_queue(queue_frame_paths, queue_per_future) - future = executor.submit(process_frames, source_paths, submit_frame_paths, progress.update) + queue : Queue[QueuePayload] = create_queue(queue_payloads) + queue_per_future = max(len(queue_payloads) // facefusion.globals.execution_thread_count * facefusion.globals.execution_queue_count, 1) + while not queue.empty(): + future = executor.submit(process_frames, source_paths, pick_queue(queue, queue_per_future), progress.update) futures.append(future) for future_done in as_completed(futures): future_done.result() -def create_queue(temp_frame_paths : List[str]) -> Queue[str]: - queue : Queue[str] = Queue() - for frame_path in temp_frame_paths: - queue.put(frame_path) +def create_queue(queue_payloads : List[QueuePayload]) -> Queue[QueuePayload]: + queue : Queue[QueuePayload] = Queue() + for queue_payload in queue_payloads: + queue.put(queue_payload) return queue -def pick_queue(queue : Queue[str], queue_per_future : int) -> List[str]: +def pick_queue(queue : Queue[QueuePayload], queue_per_future : int) -> List[QueuePayload]: queues = [] for _ in range(queue_per_future): if not queue.empty(): queues.append(queue.get()) return queues + + +def create_queue_payloads(temp_frame_paths : List[str]) -> List[QueuePayload]: + queue_payloads = [] + temp_frame_paths = sorted(temp_frame_paths, key = os.path.basename) + + for frame_number, frame_path in enumerate(temp_frame_paths): + frame_payload : QueuePayload =\ + { + 'frame_number' : frame_number, + 'frame_path' : frame_path + } + queue_payloads.append(frame_payload) + return queue_payloads diff --git a/facefusion/processors/frame/globals.py b/facefusion/processors/frame/globals.py index 526b8573..5fa85395 100755 --- a/facefusion/processors/frame/globals.py +++ b/facefusion/processors/frame/globals.py @@ -1,10 +1,11 @@ from typing import List, Optional -from facefusion.processors.frame.typings import FaceSwapperModel, FaceEnhancerModel, FrameEnhancerModel, FaceDebuggerItem +from facefusion.processors.frame.typings import FaceDebuggerItem, FaceEnhancerModel, FaceSwapperModel, FrameEnhancerModel, LipSyncerModel -face_swapper_model : Optional[FaceSwapperModel] = None +face_debugger_items : Optional[List[FaceDebuggerItem]] = None face_enhancer_model : Optional[FaceEnhancerModel] = None face_enhancer_blend : Optional[int] = None +face_swapper_model : Optional[FaceSwapperModel] = None frame_enhancer_model : Optional[FrameEnhancerModel] = None frame_enhancer_blend : Optional[int] = None -face_debugger_items : Optional[List[FaceDebuggerItem]] = None +lip_syncer_model : Optional[LipSyncerModel] = None diff --git a/facefusion/processors/frame/modules/face_debugger.py b/facefusion/processors/frame/modules/face_debugger.py index 443d875c..de7c136a 100755 --- a/facefusion/processors/frame/modules/face_debugger.py +++ b/facefusion/processors/frame/modules/face_debugger.py @@ -6,13 +6,14 @@ import numpy import facefusion.globals import facefusion.processors.frame.core as frame_processors from facefusion import config, wording -from facefusion.face_analyser import get_one_face, get_average_face, get_many_faces, find_similar_faces, clear_face_analyser +from facefusion.face_analyser import get_one_face, get_many_faces, find_similar_faces, clear_face_analyser +from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, create_region_mask, clear_face_occluder, clear_face_parser +from facefusion.face_helper import warp_face_by_face_landmark_5, categorize_age, categorize_gender from facefusion.face_store import get_reference_faces from facefusion.content_analyser import clear_content_analyser -from facefusion.typing import Face, FaceSet, Frame, Update_Process, ProcessMode -from facefusion.vision import read_image, read_static_image, read_static_images, write_image -from facefusion.face_helper import warp_face_by_kps -from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, create_region_mask, clear_face_occluder, clear_face_parser +from facefusion.typing import Face, VisionFrame, Update_Process, ProcessMode, QueuePayload +from facefusion.vision import read_image, read_static_image, write_image +from facefusion.processors.frame.typings import FaceDebuggerInputs from facefusion.processors.frame import globals as frame_processors_globals, choices as frame_processors_choices NAME = __name__.upper() @@ -35,7 +36,7 @@ def set_options(key : Literal['model'], value : Any) -> None: def register_args(program : ArgumentParser) -> None: - program.add_argument('--face-debugger-items', help = wording.get('face_debugger_items_help').format(choices = ', '.join(frame_processors_choices.face_debugger_items)), default = config.get_str_list('frame_processors.face_debugger_items', 'kps face-mask'), choices = frame_processors_choices.face_debugger_items, nargs = '+', metavar = 'FACE_DEBUGGER_ITEMS') + program.add_argument('--face-debugger-items', help = wording.get('help.face_debugger_items').format(choices = ', '.join(frame_processors_choices.face_debugger_items)), default = config.get_str_list('frame_processors.face_debugger_items', 'landmark-5 face-mask'), choices = frame_processors_choices.face_debugger_items, nargs = '+', metavar = 'FACE_DEBUGGER_ITEMS') def apply_args(program : ArgumentParser) -> None: @@ -66,82 +67,109 @@ def post_process() -> None: clear_face_parser() -def debug_face(source_face : Face, target_face : Face, reference_faces : FaceSet, temp_frame : Frame) -> Frame: +def debug_face(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: primary_color = (0, 0, 255) secondary_color = (0, 255, 0) - bounding_box = target_face.bbox.astype(numpy.int32) - temp_frame = temp_frame.copy() - if 'bbox' in frame_processors_globals.face_debugger_items: - cv2.rectangle(temp_frame, (bounding_box[0], bounding_box[1]), (bounding_box[2], bounding_box[3]), secondary_color, 2) + bounding_box = target_face.bounding_box.astype(numpy.int32) + temp_vision_frame = temp_vision_frame.copy() + + if 'bounding-box' in frame_processors_globals.face_debugger_items: + cv2.rectangle(temp_vision_frame, (bounding_box[0], bounding_box[1]), (bounding_box[2], bounding_box[3]), secondary_color, 2) if 'face-mask' in frame_processors_globals.face_debugger_items: - crop_frame, affine_matrix = warp_face_by_kps(temp_frame, target_face.kps, 'arcface_128_v2', (512, 512)) + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmark['5/68'], 'arcface_128_v2', (512, 512)) inverse_matrix = cv2.invertAffineTransform(affine_matrix) - temp_frame_size = temp_frame.shape[:2][::-1] + temp_size = temp_vision_frame.shape[:2][::-1] crop_mask_list = [] if 'box' in facefusion.globals.face_mask_types: - crop_mask_list.append(create_static_box_mask(crop_frame.shape[:2][::-1], 0, facefusion.globals.face_mask_padding)) + box_mask = create_static_box_mask(crop_vision_frame.shape[:2][::-1], 0, facefusion.globals.face_mask_padding) + crop_mask_list.append(box_mask) if 'occlusion' in facefusion.globals.face_mask_types: - crop_mask_list.append(create_occlusion_mask(crop_frame)) + occlusion_mask = create_occlusion_mask(crop_vision_frame) + crop_mask_list.append(occlusion_mask) if 'region' in facefusion.globals.face_mask_types: - crop_mask_list.append(create_region_mask(crop_frame, facefusion.globals.face_mask_regions)) + region_mask = create_region_mask(crop_vision_frame, facefusion.globals.face_mask_regions) + crop_mask_list.append(region_mask) crop_mask = numpy.minimum.reduce(crop_mask_list).clip(0, 1) crop_mask = (crop_mask * 255).astype(numpy.uint8) - inverse_mask_frame = cv2.warpAffine(crop_mask, inverse_matrix, temp_frame_size) - inverse_mask_frame = cv2.threshold(inverse_mask_frame, 100, 255, cv2.THRESH_BINARY)[1] - inverse_mask_frame[inverse_mask_frame > 0] = 255 - inverse_mask_contours = cv2.findContours(inverse_mask_frame, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)[0] - cv2.drawContours(temp_frame, inverse_mask_contours, -1, primary_color, 2) + inverse_vision_frame = cv2.warpAffine(crop_mask, inverse_matrix, temp_size) + inverse_vision_frame = cv2.threshold(inverse_vision_frame, 100, 255, cv2.THRESH_BINARY)[1] + inverse_vision_frame[inverse_vision_frame > 0] = 255 + inverse_contours = cv2.findContours(inverse_vision_frame, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)[0] + cv2.drawContours(temp_vision_frame, inverse_contours, -1, primary_color, 2) if bounding_box[3] - bounding_box[1] > 60 and bounding_box[2] - bounding_box[0] > 60: - if 'kps' in frame_processors_globals.face_debugger_items: - kps = target_face.kps.astype(numpy.int32) - for index in range(kps.shape[0]): - cv2.circle(temp_frame, (kps[index][0], kps[index][1]), 3, primary_color, -1) + top = bounding_box[1] + left = bounding_box[0] + 20 + if 'landmark-5' in frame_processors_globals.face_debugger_items: + face_landmark_5 = target_face.landmark['5/68'].astype(numpy.int32) + for index in range(face_landmark_5.shape[0]): + cv2.circle(temp_vision_frame, (face_landmark_5[index][0], face_landmark_5[index][1]), 3, primary_color, -1) + if 'landmark-68' in frame_processors_globals.face_debugger_items: + face_landmark_68 = target_face.landmark['68'].astype(numpy.int32) + for index in range(face_landmark_68.shape[0]): + cv2.circle(temp_vision_frame, (face_landmark_68[index][0], face_landmark_68[index][1]), 3, secondary_color, -1) if 'score' in frame_processors_globals.face_debugger_items: face_score_text = str(round(target_face.score, 2)) - face_score_position = (bounding_box[0] + 10, bounding_box[1] + 20) - cv2.putText(temp_frame, face_score_text, face_score_position, cv2.FONT_HERSHEY_SIMPLEX, 0.5, secondary_color, 2) - return temp_frame + top = top + 20 + cv2.putText(temp_vision_frame, face_score_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, secondary_color, 2) + if 'age' in frame_processors_globals.face_debugger_items: + face_age_text = categorize_age(target_face.age) + top = top + 20 + cv2.putText(temp_vision_frame, face_age_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, secondary_color, 2) + if 'gender' in frame_processors_globals.face_debugger_items: + face_gender_text = categorize_gender(target_face.gender) + top = top + 20 + cv2.putText(temp_vision_frame, face_gender_text, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.5, secondary_color, 2) + return temp_vision_frame -def get_reference_frame(source_face : Face, target_face : Face, temp_frame : Frame) -> Frame: +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: pass -def process_frame(source_face : Face, reference_faces : FaceSet, temp_frame : Frame) -> Frame: +def process_frame(inputs : FaceDebuggerInputs) -> VisionFrame: + reference_faces = inputs['reference_faces'] + target_vision_frame = inputs['target_vision_frame'] + if 'reference' in facefusion.globals.face_selector_mode: - similar_faces = find_similar_faces(temp_frame, reference_faces, facefusion.globals.reference_face_distance) + similar_faces = find_similar_faces(reference_faces, target_vision_frame, facefusion.globals.reference_face_distance) if similar_faces: for similar_face in similar_faces: - temp_frame = debug_face(source_face, similar_face, reference_faces, temp_frame) + target_vision_frame = debug_face(similar_face, target_vision_frame) if 'one' in facefusion.globals.face_selector_mode: - target_face = get_one_face(temp_frame) + target_face = get_one_face(target_vision_frame) if target_face: - temp_frame = debug_face(source_face, target_face, None, temp_frame) + target_vision_frame = debug_face(target_face, target_vision_frame) if 'many' in facefusion.globals.face_selector_mode: - many_faces = get_many_faces(temp_frame) + many_faces = get_many_faces(target_vision_frame) if many_faces: for target_face in many_faces: - temp_frame = debug_face(source_face, target_face, None, temp_frame) - return temp_frame + target_vision_frame = debug_face(target_face, target_vision_frame) + return target_vision_frame -def process_frames(source_paths : List[str], temp_frame_paths : List[str], update_progress : Update_Process) -> None: - source_frames = read_static_images(source_paths) - source_face = get_average_face(source_frames) +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : Update_Process) -> None: reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None - for temp_frame_path in temp_frame_paths: - temp_frame = read_image(temp_frame_path) - result_frame = process_frame(source_face, reference_faces, temp_frame) - write_image(temp_frame_path, result_frame) + + for queue_payload in queue_payloads: + target_vision_path = queue_payload['frame_path'] + target_vision_frame = read_image(target_vision_path) + result_frame = process_frame( + { + 'reference_faces': reference_faces, + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, result_frame) update_progress() def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: - source_frames = read_static_images(source_paths) - source_face = get_average_face(source_frames) - target_frame = read_static_image(target_path) reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None - result_frame = process_frame(source_face, reference_faces, target_frame) + target_vision_frame = read_static_image(target_path) + result_frame = process_frame( + { + 'reference_faces': reference_faces, + 'target_vision_frame': target_vision_frame + }) write_image(output_path, result_frame) diff --git a/facefusion/processors/frame/modules/face_enhancer.py b/facefusion/processors/frame/modules/face_enhancer.py index 4be35e81..c4ea5bb9 100755 --- a/facefusion/processors/frame/modules/face_enhancer.py +++ b/facefusion/processors/frame/modules/face_enhancer.py @@ -9,18 +9,19 @@ import facefusion.globals import facefusion.processors.frame.core as frame_processors from facefusion import config, logger, wording from facefusion.face_analyser import get_many_faces, clear_face_analyser, find_similar_faces, get_one_face +from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, clear_face_occluder +from facefusion.face_helper import warp_face_by_face_landmark_5, paste_back from facefusion.execution_helper import apply_execution_provider_options -from facefusion.face_helper import warp_face_by_kps, paste_back from facefusion.content_analyser import clear_content_analyser from facefusion.face_store import get_reference_faces -from facefusion.typing import Face, FaceSet, Frame, Update_Process, ProcessMode, ModelSet, OptionsWithModel +from facefusion.typing import Face, VisionFrame, Update_Process, ProcessMode, ModelSet, OptionsWithModel, QueuePayload from facefusion.common_helper import create_metavar from facefusion.filesystem import is_file, is_image, is_video, resolve_relative_path from facefusion.download import conditional_download, is_download_done from facefusion.vision import read_image, read_static_image, write_image +from facefusion.processors.frame.typings import FaceEnhancerInputs from facefusion.processors.frame import globals as frame_processors_globals from facefusion.processors.frame import choices as frame_processors_choices -from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, clear_face_occluder FRAME_PROCESSOR = None THREAD_SEMAPHORE : threading.Semaphore = threading.Semaphore() @@ -115,8 +116,8 @@ def set_options(key : Literal['model'], value : Any) -> None: def register_args(program : ArgumentParser) -> None: - program.add_argument('--face-enhancer-model', help = wording.get('frame_processor_model_help'), default = config.get_str_value('frame_processors.face_enhancer_model', 'gfpgan_1.4'), choices = frame_processors_choices.face_enhancer_models) - program.add_argument('--face-enhancer-blend', help = wording.get('frame_processor_blend_help'), type = int, default = config.get_int_value('frame_processors.face_enhancer_blend', '80'), choices = frame_processors_choices.face_enhancer_blend_range, metavar = create_metavar(frame_processors_choices.face_enhancer_blend_range)) + program.add_argument('--face-enhancer-model', help = wording.get('help.face_enhancer_model'), default = config.get_str_value('frame_processors.face_enhancer_model', 'gfpgan_1.4'), choices = frame_processors_choices.face_enhancer_models) + program.add_argument('--face-enhancer-blend', help = wording.get('help.face_enhancer_blend'), type = int, default = config.get_int_value('frame_processors.face_enhancer_blend', '80'), choices = frame_processors_choices.face_enhancer_blend_range, metavar = create_metavar(frame_processors_choices.face_enhancer_blend_range)) def apply_args(program : ArgumentParser) -> None: @@ -165,97 +166,113 @@ def post_process() -> None: clear_face_occluder() -def enhance_face(target_face: Face, temp_frame : Frame) -> Frame: +def enhance_face(target_face: Face, temp_vision_frame : VisionFrame) -> VisionFrame: model_template = get_options('model').get('template') model_size = get_options('model').get('size') - crop_frame, affine_matrix = warp_face_by_kps(temp_frame, target_face.kps, model_template, model_size) + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmark['5/68'], model_template, model_size) + box_mask = create_static_box_mask(crop_vision_frame.shape[:2][::-1], facefusion.globals.face_mask_blur, (0, 0, 0, 0)) crop_mask_list =\ [ - create_static_box_mask(crop_frame.shape[:2][::-1], facefusion.globals.face_mask_blur, (0, 0, 0, 0)) + box_mask ] + if 'occlusion' in facefusion.globals.face_mask_types: - crop_mask_list.append(create_occlusion_mask(crop_frame)) - crop_frame = prepare_crop_frame(crop_frame) - crop_frame = apply_enhance(crop_frame) - crop_frame = normalize_crop_frame(crop_frame) + occlusion_mask = create_occlusion_mask(crop_vision_frame) + crop_mask_list.append(occlusion_mask) + crop_vision_frame = prepare_crop_frame(crop_vision_frame) + crop_vision_frame = apply_enhance(crop_vision_frame) + crop_vision_frame = normalize_crop_frame(crop_vision_frame) crop_mask = numpy.minimum.reduce(crop_mask_list).clip(0, 1) - paste_frame = paste_back(temp_frame, crop_frame, crop_mask, affine_matrix) - temp_frame = blend_frame(temp_frame, paste_frame) - return temp_frame + paste_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix) + temp_vision_frame = blend_frame(temp_vision_frame, paste_vision_frame) + return temp_vision_frame -def apply_enhance(crop_frame : Frame) -> Frame: +def apply_enhance(crop_vision_frame : VisionFrame) -> VisionFrame: frame_processor = get_frame_processor() frame_processor_inputs = {} for frame_processor_input in frame_processor.get_inputs(): if frame_processor_input.name == 'input': - frame_processor_inputs[frame_processor_input.name] = crop_frame + frame_processor_inputs[frame_processor_input.name] = crop_vision_frame if frame_processor_input.name == 'weight': weight = numpy.array([ 1 ], dtype = numpy.double) frame_processor_inputs[frame_processor_input.name] = weight with THREAD_SEMAPHORE: - crop_frame = frame_processor.run(None, frame_processor_inputs)[0][0] - return crop_frame + crop_vision_frame = frame_processor.run(None, frame_processor_inputs)[0][0] + return crop_vision_frame -def prepare_crop_frame(crop_frame : Frame) -> Frame: - crop_frame = crop_frame[:, :, ::-1] / 255.0 - crop_frame = (crop_frame - 0.5) / 0.5 - crop_frame = numpy.expand_dims(crop_frame.transpose(2, 0, 1), axis = 0).astype(numpy.float32) - return crop_frame +def prepare_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + crop_vision_frame = crop_vision_frame[:, :, ::-1] / 255.0 + crop_vision_frame = (crop_vision_frame - 0.5) / 0.5 + crop_vision_frame = numpy.expand_dims(crop_vision_frame.transpose(2, 0, 1), axis = 0).astype(numpy.float32) + return crop_vision_frame -def normalize_crop_frame(crop_frame : Frame) -> Frame: - crop_frame = numpy.clip(crop_frame, -1, 1) - crop_frame = (crop_frame + 1) / 2 - crop_frame = crop_frame.transpose(1, 2, 0) - crop_frame = (crop_frame * 255.0).round() - crop_frame = crop_frame.astype(numpy.uint8)[:, :, ::-1] - return crop_frame +def normalize_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + crop_vision_frame = numpy.clip(crop_vision_frame, -1, 1) + crop_vision_frame = (crop_vision_frame + 1) / 2 + crop_vision_frame = crop_vision_frame.transpose(1, 2, 0) + crop_vision_frame = (crop_vision_frame * 255.0).round() + crop_vision_frame = crop_vision_frame.astype(numpy.uint8)[:, :, ::-1] + return crop_vision_frame -def blend_frame(temp_frame : Frame, paste_frame : Frame) -> Frame: +def blend_frame(temp_vision_frame : VisionFrame, paste_vision_frame : VisionFrame) -> VisionFrame: face_enhancer_blend = 1 - (frame_processors_globals.face_enhancer_blend / 100) - temp_frame = cv2.addWeighted(temp_frame, face_enhancer_blend, paste_frame, 1 - face_enhancer_blend, 0) - return temp_frame + temp_vision_frame = cv2.addWeighted(temp_vision_frame, face_enhancer_blend, paste_vision_frame, 1 - face_enhancer_blend, 0) + return temp_vision_frame -def get_reference_frame(source_face : Face, target_face : Face, temp_frame : Frame) -> Frame: - return enhance_face(target_face, temp_frame) +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + return enhance_face(target_face, temp_vision_frame) -def process_frame(source_face : Face, reference_faces : FaceSet, temp_frame : Frame) -> Frame: +def process_frame(inputs : FaceEnhancerInputs) -> VisionFrame: + reference_faces = inputs['reference_faces'] + target_vision_frame = inputs['target_vision_frame'] + if 'reference' in facefusion.globals.face_selector_mode: - similar_faces = find_similar_faces(temp_frame, reference_faces, facefusion.globals.reference_face_distance) + similar_faces = find_similar_faces(reference_faces, target_vision_frame, facefusion.globals.reference_face_distance) if similar_faces: for similar_face in similar_faces: - temp_frame = enhance_face(similar_face, temp_frame) + target_vision_frame = enhance_face(similar_face, target_vision_frame) if 'one' in facefusion.globals.face_selector_mode: - target_face = get_one_face(temp_frame) + target_face = get_one_face(target_vision_frame) if target_face: - temp_frame = enhance_face(target_face, temp_frame) + target_vision_frame = enhance_face(target_face, target_vision_frame) if 'many' in facefusion.globals.face_selector_mode: - many_faces = get_many_faces(temp_frame) + many_faces = get_many_faces(target_vision_frame) if many_faces: for target_face in many_faces: - temp_frame = enhance_face(target_face, temp_frame) - return temp_frame + target_vision_frame = enhance_face(target_face, target_vision_frame) + return target_vision_frame -def process_frames(source_path : List[str], temp_frame_paths : List[str], update_progress : Update_Process) -> None: +def process_frames(source_path : List[str], queue_payloads : List[QueuePayload], update_progress : Update_Process) -> None: reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None - for temp_frame_path in temp_frame_paths: - temp_frame = read_image(temp_frame_path) - result_frame = process_frame(None, reference_faces, temp_frame) - write_image(temp_frame_path, result_frame) + + for queue_payload in queue_payloads: + target_vision_path = queue_payload['frame_path'] + target_vision_frame = read_image(target_vision_path) + result_frame = process_frame( + { + 'reference_faces': reference_faces, + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, result_frame) update_progress() def process_image(source_path : str, target_path : str, output_path : str) -> None: reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None - target_frame = read_static_image(target_path) - result_frame = process_frame(None, reference_faces, target_frame) + target_vision_frame = read_static_image(target_path) + result_frame = process_frame( + { + 'reference_faces': reference_faces, + 'target_vision_frame': target_vision_frame + }) write_image(output_path, result_frame) diff --git a/facefusion/processors/frame/modules/face_swapper.py b/facefusion/processors/frame/modules/face_swapper.py index 9b880d7e..0eca5aad 100755 --- a/facefusion/processors/frame/modules/face_swapper.py +++ b/facefusion/processors/frame/modules/face_swapper.py @@ -1,6 +1,5 @@ from typing import Any, List, Literal, Optional from argparse import ArgumentParser -import platform import threading import numpy import onnx @@ -12,16 +11,17 @@ import facefusion.processors.frame.core as frame_processors from facefusion import config, logger, wording from facefusion.execution_helper import apply_execution_provider_options from facefusion.face_analyser import get_one_face, get_average_face, get_many_faces, find_similar_faces, clear_face_analyser -from facefusion.face_helper import warp_face_by_kps, paste_back +from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, create_region_mask, clear_face_occluder, clear_face_parser +from facefusion.face_helper import warp_face_by_face_landmark_5, paste_back from facefusion.face_store import get_reference_faces from facefusion.content_analyser import clear_content_analyser -from facefusion.typing import Face, FaceSet, Frame, Update_Process, ProcessMode, ModelSet, OptionsWithModel, Embedding -from facefusion.filesystem import is_file, is_image, are_images, is_video, resolve_relative_path +from facefusion.typing import Face, Embedding, VisionFrame, Update_Process, ProcessMode, ModelSet, OptionsWithModel, QueuePayload +from facefusion.filesystem import is_file, is_image, has_image, is_video, filter_image_paths, resolve_relative_path from facefusion.download import conditional_download, is_download_done from facefusion.vision import read_image, read_static_image, read_static_images, write_image +from facefusion.processors.frame.typings import FaceSwapperInputs from facefusion.processors.frame import globals as frame_processors_globals from facefusion.processors.frame import choices as frame_processors_choices -from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, create_region_mask, clear_face_occluder, clear_face_parser FRAME_PROCESSOR = None MODEL_MATRIX = None @@ -78,7 +78,17 @@ MODELS : ModelSet =\ 'size': (512, 512), 'mean': [ 0.0, 0.0, 0.0 ], 'standard_deviation': [ 1.0, 1.0, 1.0 ] - } + }, + 'uniface_256': + { + 'type': 'uniface', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/uniface_256.onnx', + 'path': resolve_relative_path('../.assets/models/uniface_256.onnx'), + 'template': 'ffhq_512', + 'size': (256, 256), + 'mean': [ 0.0, 0.0, 0.0 ], + 'standard_deviation': [ 1.0, 1.0, 1.0 ] + }, } OPTIONS : Optional[OptionsWithModel] = None @@ -134,11 +144,11 @@ def set_options(key : Literal['model'], value : Any) -> None: def register_args(program : ArgumentParser) -> None: - if platform.system().lower() == 'darwin': + if onnxruntime.__version__ == '1.17.0': face_swapper_model_fallback = 'inswapper_128' else: face_swapper_model_fallback = 'inswapper_128_fp16' - program.add_argument('--face-swapper-model', help = wording.get('frame_processor_model_help'), default = config.get_str_value('frame_processors.face_swapper_model', face_swapper_model_fallback), choices = frame_processors_choices.face_swapper_models) + program.add_argument('--face-swapper-model', help = wording.get('help.face_swapper_model'), default = config.get_str_value('frame_processors.face_swapper_model', face_swapper_model_fallback), choices = frame_processors_choices.face_swapper_models) def apply_args(program : ArgumentParser) -> None: @@ -150,6 +160,8 @@ def apply_args(program : ArgumentParser) -> None: facefusion.globals.face_recognizer_model = 'arcface_inswapper' if args.face_swapper_model == 'simswap_256' or args.face_swapper_model == 'simswap_512_unofficial': facefusion.globals.face_recognizer_model = 'arcface_simswap' + if args.face_swapper_model == 'uniface_256': + facefusion.globals.face_recognizer_model = 'arcface_uniface' def pre_check() -> bool: @@ -173,10 +185,12 @@ def post_check() -> bool: def pre_process(mode : ProcessMode) -> bool: - if not are_images(facefusion.globals.source_paths): + if not has_image(facefusion.globals.source_paths): logger.error(wording.get('select_image_source') + wording.get('exclamation_mark'), NAME) return False - for source_frame in read_static_images(facefusion.globals.source_paths): + source_image_paths = filter_image_paths(facefusion.globals.source_paths) + source_frames = read_static_images(source_image_paths) + for source_frame in source_frames: if not get_one_face(source_frame): logger.error(wording.get('no_source_face_detected') + wording.get('exclamation_mark'), NAME) return False @@ -201,50 +215,57 @@ def post_process() -> None: clear_face_parser() -def swap_face(source_face : Face, target_face : Face, temp_frame : Frame) -> Frame: +def swap_face(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: model_template = get_options('model').get('template') model_size = get_options('model').get('size') - crop_frame, affine_matrix = warp_face_by_kps(temp_frame, target_face.kps, model_template, model_size) + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmark['5/68'], model_template, model_size) crop_mask_list = [] if 'box' in facefusion.globals.face_mask_types: - crop_mask_list.append(create_static_box_mask(crop_frame.shape[:2][::-1], facefusion.globals.face_mask_blur, facefusion.globals.face_mask_padding)) + box_mask = create_static_box_mask(crop_vision_frame.shape[:2][::-1], facefusion.globals.face_mask_blur, facefusion.globals.face_mask_padding) + crop_mask_list.append(box_mask) if 'occlusion' in facefusion.globals.face_mask_types: - crop_mask_list.append(create_occlusion_mask(crop_frame)) - crop_frame = prepare_crop_frame(crop_frame) - crop_frame = apply_swap(source_face, crop_frame) - crop_frame = normalize_crop_frame(crop_frame) + occlusion_mask = create_occlusion_mask(crop_vision_frame) + crop_mask_list.append(occlusion_mask) + crop_vision_frame = prepare_crop_frame(crop_vision_frame) + crop_vision_frame = apply_swap(source_face, crop_vision_frame) + crop_vision_frame = normalize_crop_frame(crop_vision_frame) if 'region' in facefusion.globals.face_mask_types: - crop_mask_list.append(create_region_mask(crop_frame, facefusion.globals.face_mask_regions)) + region_mask = create_region_mask(crop_vision_frame, facefusion.globals.face_mask_regions) + crop_mask_list.append(region_mask) crop_mask = numpy.minimum.reduce(crop_mask_list).clip(0, 1) - temp_frame = paste_back(temp_frame, crop_frame, crop_mask, affine_matrix) - return temp_frame + temp_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix) + return temp_vision_frame -def apply_swap(source_face : Face, crop_frame : Frame) -> Frame: +def apply_swap(source_face : Face, crop_vision_frame : VisionFrame) -> VisionFrame: frame_processor = get_frame_processor() model_type = get_options('model').get('type') frame_processor_inputs = {} for frame_processor_input in frame_processor.get_inputs(): if frame_processor_input.name == 'source': - if model_type == 'blendswap': + if model_type == 'blendswap' or model_type == 'uniface': frame_processor_inputs[frame_processor_input.name] = prepare_source_frame(source_face) else: frame_processor_inputs[frame_processor_input.name] = prepare_source_embedding(source_face) if frame_processor_input.name == 'target': - frame_processor_inputs[frame_processor_input.name] = crop_frame - crop_frame = frame_processor.run(None, frame_processor_inputs)[0][0] - return crop_frame + frame_processor_inputs[frame_processor_input.name] = crop_vision_frame + crop_vision_frame = frame_processor.run(None, frame_processor_inputs)[0][0] + return crop_vision_frame -def prepare_source_frame(source_face : Face) -> Frame: - source_frame = read_static_image(facefusion.globals.source_paths[0]) - source_frame, _ = warp_face_by_kps(source_frame, source_face.kps, 'arcface_112_v2', (112, 112)) - source_frame = source_frame[:, :, ::-1] / 255.0 - source_frame = source_frame.transpose(2, 0, 1) - source_frame = numpy.expand_dims(source_frame, axis = 0).astype(numpy.float32) - return source_frame +def prepare_source_frame(source_face : Face) -> VisionFrame: + model_type = get_options('model').get('type') + source_vision_frame = read_static_image(facefusion.globals.source_paths[0]) + if model_type == 'blendswap': + source_vision_frame, _ = warp_face_by_face_landmark_5(source_vision_frame, source_face.landmark['5/68'], 'arcface_112_v2', (112, 112)) + if model_type == 'uniface': + source_vision_frame, _ = warp_face_by_face_landmark_5(source_vision_frame, source_face.landmark['5/68'], 'ffhq_512', (256, 256)) + source_vision_frame = source_vision_frame[:, :, ::-1] / 255.0 + source_vision_frame = source_vision_frame.transpose(2, 0, 1) + source_vision_frame = numpy.expand_dims(source_vision_frame, axis = 0).astype(numpy.float32) + return source_vision_frame def prepare_source_embedding(source_face : Face) -> Embedding: @@ -258,62 +279,78 @@ def prepare_source_embedding(source_face : Face) -> Embedding: return source_embedding -def prepare_crop_frame(crop_frame : Frame) -> Frame: +def prepare_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: model_mean = get_options('model').get('mean') model_standard_deviation = get_options('model').get('standard_deviation') - crop_frame = crop_frame[:, :, ::-1] / 255.0 - crop_frame = (crop_frame - model_mean) / model_standard_deviation - crop_frame = crop_frame.transpose(2, 0, 1) - crop_frame = numpy.expand_dims(crop_frame, axis = 0).astype(numpy.float32) - return crop_frame + crop_vision_frame = crop_vision_frame[:, :, ::-1] / 255.0 + crop_vision_frame = (crop_vision_frame - model_mean) / model_standard_deviation + crop_vision_frame = crop_vision_frame.transpose(2, 0, 1) + crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0).astype(numpy.float32) + return crop_vision_frame -def normalize_crop_frame(crop_frame : Frame) -> Frame: - crop_frame = crop_frame.transpose(1, 2, 0) - crop_frame = (crop_frame * 255.0).round() - crop_frame = crop_frame[:, :, ::-1] - return crop_frame +def normalize_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + crop_vision_frame = crop_vision_frame.transpose(1, 2, 0) + crop_vision_frame = (crop_vision_frame * 255.0).round() + crop_vision_frame = crop_vision_frame[:, :, ::-1] + return crop_vision_frame -def get_reference_frame(source_face : Face, target_face : Face, temp_frame : Frame) -> Frame: - return swap_face(source_face, target_face, temp_frame) +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + return swap_face(source_face, target_face, temp_vision_frame) -def process_frame(source_face : Face, reference_faces : FaceSet, temp_frame : Frame) -> Frame: +def process_frame(inputs : FaceSwapperInputs) -> VisionFrame: + reference_faces = inputs['reference_faces'] + source_face = inputs['source_face'] + target_vision_frame = inputs['target_vision_frame'] + if 'reference' in facefusion.globals.face_selector_mode: - similar_faces = find_similar_faces(temp_frame, reference_faces, facefusion.globals.reference_face_distance) + similar_faces = find_similar_faces(reference_faces, target_vision_frame, facefusion.globals.reference_face_distance) if similar_faces: for similar_face in similar_faces: - temp_frame = swap_face(source_face, similar_face, temp_frame) + target_vision_frame = swap_face(source_face, similar_face, target_vision_frame) if 'one' in facefusion.globals.face_selector_mode: - target_face = get_one_face(temp_frame) + target_face = get_one_face(target_vision_frame) if target_face: - temp_frame = swap_face(source_face, target_face, temp_frame) + target_vision_frame = swap_face(source_face, target_face, target_vision_frame) if 'many' in facefusion.globals.face_selector_mode: - many_faces = get_many_faces(temp_frame) + many_faces = get_many_faces(target_vision_frame) if many_faces: for target_face in many_faces: - temp_frame = swap_face(source_face, target_face, temp_frame) - return temp_frame + target_vision_frame = swap_face(source_face, target_face, target_vision_frame) + return target_vision_frame -def process_frames(source_paths : List[str], temp_frame_paths : List[str], update_progress : Update_Process) -> None: +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : Update_Process) -> None: + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None source_frames = read_static_images(source_paths) source_face = get_average_face(source_frames) - reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None - for temp_frame_path in temp_frame_paths: - temp_frame = read_image(temp_frame_path) - result_frame = process_frame(source_face, reference_faces, temp_frame) - write_image(temp_frame_path, result_frame) + + for queue_payload in queue_payloads: + target_vision_path = queue_payload['frame_path'] + target_vision_frame = read_image(target_vision_path) + result_frame = process_frame( + { + 'reference_faces': reference_faces, + 'source_face': source_face, + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, result_frame) update_progress() def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None source_frames = read_static_images(source_paths) source_face = get_average_face(source_frames) - reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None - target_frame = read_static_image(target_path) - result_frame = process_frame(source_face, reference_faces, target_frame) + target_vision_frame = read_static_image(target_path) + result_frame = process_frame( + { + 'reference_faces': reference_faces, + 'source_face': source_face, + 'target_vision_frame': target_vision_frame + }) write_image(output_path, result_frame) diff --git a/facefusion/processors/frame/modules/frame_enhancer.py b/facefusion/processors/frame/modules/frame_enhancer.py index 07b0e933..bf85b61d 100644 --- a/facefusion/processors/frame/modules/frame_enhancer.py +++ b/facefusion/processors/frame/modules/frame_enhancer.py @@ -10,12 +10,13 @@ import facefusion.processors.frame.core as frame_processors from facefusion import config, logger, wording from facefusion.face_analyser import clear_face_analyser from facefusion.content_analyser import clear_content_analyser -from facefusion.typing import Face, FaceSet, Frame, Update_Process, ProcessMode, ModelSet, OptionsWithModel +from facefusion.typing import Face, VisionFrame, Update_Process, ProcessMode, ModelSet, OptionsWithModel, QueuePayload from facefusion.common_helper import create_metavar from facefusion.execution_helper import map_torch_backend from facefusion.filesystem import is_file, resolve_relative_path from facefusion.download import conditional_download, is_download_done from facefusion.vision import read_image, read_static_image, write_image +from facefusion.processors.frame.typings import FrameEnhancerInputs from facefusion.processors.frame import globals as frame_processors_globals from facefusion.processors.frame import choices as frame_processors_choices @@ -91,8 +92,8 @@ def set_options(key : Literal['model'], value : Any) -> None: def register_args(program : ArgumentParser) -> None: - program.add_argument('--frame-enhancer-model', help = wording.get('frame_processor_model_help'), default = config.get_str_value('frame_processors.frame_enhancer_model', 'real_esrgan_x2plus'), choices = frame_processors_choices.frame_enhancer_models) - program.add_argument('--frame-enhancer-blend', help = wording.get('frame_processor_blend_help'), type = int, default = config.get_int_value('frame_processors.frame_enhancer_blend', '80'), choices = frame_processors_choices.frame_enhancer_blend_range, metavar = create_metavar(frame_processors_choices.frame_enhancer_blend_range)) + program.add_argument('--frame-enhancer-model', help = wording.get('help.frame_enhancer_model'), default = config.get_str_value('frame_processors.frame_enhancer_model', 'real_esrgan_x2plus'), choices = frame_processors_choices.frame_enhancer_models) + program.add_argument('--frame-enhancer-blend', help = wording.get('help.frame_enhancer_blend'), type = int, default = config.get_int_value('frame_processors.frame_enhancer_blend', '80'), choices = frame_processors_choices.frame_enhancer_blend_range, metavar = create_metavar(frame_processors_choices.frame_enhancer_blend_range)) def apply_args(program : ArgumentParser) -> None: @@ -137,41 +138,48 @@ def post_process() -> None: clear_content_analyser() -def enhance_frame(temp_frame : Frame) -> Frame: +def enhance_frame(temp_vision_frame : VisionFrame) -> VisionFrame: with THREAD_SEMAPHORE: - paste_frame, _ = get_frame_processor().enhance(temp_frame) - temp_frame = blend_frame(temp_frame, paste_frame) - return temp_frame + paste_vision_frame, _ = get_frame_processor().enhance(temp_vision_frame) + temp_vision_frame = blend_frame(temp_vision_frame, paste_vision_frame) + return temp_vision_frame -def blend_frame(temp_frame : Frame, paste_frame : Frame) -> Frame: +def blend_frame(temp_vision_frame : VisionFrame, paste_vision_frame : VisionFrame) -> VisionFrame: frame_enhancer_blend = 1 - (frame_processors_globals.frame_enhancer_blend / 100) - paste_frame_height, paste_frame_width = paste_frame.shape[0:2] - temp_frame = cv2.resize(temp_frame, (paste_frame_width, paste_frame_height)) - temp_frame = cv2.addWeighted(temp_frame, frame_enhancer_blend, paste_frame, 1 - frame_enhancer_blend, 0) - return temp_frame + temp_vision_frame = cv2.resize(temp_vision_frame, (paste_vision_frame.shape[1], paste_vision_frame.shape[0])) + temp_vision_frame = cv2.addWeighted(temp_vision_frame, frame_enhancer_blend, paste_vision_frame, 1 - frame_enhancer_blend, 0) + return temp_vision_frame -def get_reference_frame(source_face : Face, target_face : Face, temp_frame : Frame) -> Frame: +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: pass -def process_frame(source_face : Face, reference_faces : FaceSet, temp_frame : Frame) -> Frame: - return enhance_frame(temp_frame) +def process_frame(inputs : FrameEnhancerInputs) -> VisionFrame: + target_vision_frame = inputs['target_vision_frame'] + return enhance_frame(target_vision_frame) -def process_frames(source_paths : List[str], temp_frame_paths : List[str], update_progress : Update_Process) -> None: - for temp_frame_path in temp_frame_paths: - temp_frame = read_image(temp_frame_path) - result_frame = process_frame(None, None, temp_frame) - write_image(temp_frame_path, result_frame) +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : Update_Process) -> None: + for queue_payload in queue_payloads: + target_vision_path = queue_payload['frame_path'] + target_vision_frame = read_image(target_vision_path) + result_frame = process_frame( + { + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, result_frame) update_progress() def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: - target_frame = read_static_image(target_path) - result = process_frame(None, None, target_frame) - write_image(output_path, result) + target_vision_frame = read_static_image(target_path) + result_frame = process_frame( + { + 'target_vision_frame': target_vision_frame + }) + write_image(output_path, result_frame) def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: diff --git a/facefusion/processors/frame/modules/lip_syncer.py b/facefusion/processors/frame/modules/lip_syncer.py new file mode 100755 index 00000000..39fb371c --- /dev/null +++ b/facefusion/processors/frame/modules/lip_syncer.py @@ -0,0 +1,248 @@ +from typing import Any, List, Literal, Optional +from argparse import ArgumentParser +import threading +import cv2 +import numpy +import onnxruntime + +import facefusion.globals +import facefusion.processors.frame.core as frame_processors +from facefusion import config, logger, wording +from facefusion.execution_helper import apply_execution_provider_options +from facefusion.face_analyser import get_one_face, get_many_faces, find_similar_faces, clear_face_analyser +from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, create_mouth_mask, clear_face_occluder, clear_face_parser +from facefusion.face_helper import warp_face_by_face_landmark_5, warp_face_by_bounding_box, paste_back, create_bounding_box_from_landmark +from facefusion.face_store import get_reference_faces +from facefusion.content_analyser import clear_content_analyser +from facefusion.typing import Face, VisionFrame, Update_Process, ProcessMode, ModelSet, OptionsWithModel, AudioFrame, QueuePayload +from facefusion.filesystem import is_file, has_audio, resolve_relative_path +from facefusion.download import conditional_download, is_download_done +from facefusion.audio import read_static_audio, get_audio_frame +from facefusion.filesystem import is_image, is_video, filter_audio_paths +from facefusion.common_helper import get_first +from facefusion.vision import read_image, write_image, read_static_image +from facefusion.processors.frame.typings import LipSyncerInputs +from facefusion.processors.frame import globals as frame_processors_globals +from facefusion.processors.frame import choices as frame_processors_choices + +FRAME_PROCESSOR = None +MODEL_MATRIX = None +THREAD_LOCK : threading.Lock = threading.Lock() +NAME = __name__.upper() +MODELS : ModelSet =\ +{ + 'wav2lip_gan': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/wav2lip_gan.onnx', + 'path': resolve_relative_path('../.assets/models/wav2lip_gan.onnx'), + } +} +OPTIONS : Optional[OptionsWithModel] = None + + +def get_frame_processor() -> Any: + global FRAME_PROCESSOR + + with THREAD_LOCK: + if FRAME_PROCESSOR is None: + model_path = get_options('model').get('path') + FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + return FRAME_PROCESSOR + + +def clear_frame_processor() -> None: + global FRAME_PROCESSOR + + FRAME_PROCESSOR = None + + +def get_options(key : Literal['model']) -> Any: + global OPTIONS + + if OPTIONS is None: + OPTIONS =\ + { + 'model': MODELS[frame_processors_globals.lip_syncer_model] + } + return OPTIONS.get(key) + + +def set_options(key : Literal['model'], value : Any) -> None: + global OPTIONS + + OPTIONS[key] = value + + +def register_args(program : ArgumentParser) -> None: + program.add_argument('--lip-syncer-model', help = wording.get('help.lip_syncer_model'), default = config.get_str_value('frame_processors.lip_syncer_model', 'wav2lip_gan'), choices = frame_processors_choices.lip_syncer_models) + + +def apply_args(program : ArgumentParser) -> None: + args = program.parse_args() + frame_processors_globals.lip_syncer_model = args.lip_syncer_model + + +def pre_check() -> bool: + if not facefusion.globals.skip_download: + download_directory_path = resolve_relative_path('../.assets/models') + model_url = get_options('model').get('url') + conditional_download(download_directory_path, [ model_url ]) + return True + + +def post_check() -> bool: + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + if not facefusion.globals.skip_download and not is_download_done(model_url, model_path): + logger.error(wording.get('model_download_not_done') + wording.get('exclamation_mark'), NAME) + return False + elif not is_file(model_path): + logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def pre_process(mode : ProcessMode) -> bool: + if not has_audio(facefusion.globals.source_paths): + logger.error(wording.get('select_audio_source') + wording.get('exclamation_mark'), NAME) + return False + if mode in [ 'output', 'preview' ] and not is_image(facefusion.globals.target_path) and not is_video(facefusion.globals.target_path): + logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) + return False + if mode == 'output' and not facefusion.globals.output_path: + logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def post_process() -> None: + read_static_image.cache_clear() + read_static_audio.cache_clear() + if facefusion.globals.video_memory_strategy == 'strict' or facefusion.globals.video_memory_strategy == 'moderate': + clear_frame_processor() + if facefusion.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + clear_face_occluder() + clear_face_parser() + + +def sync_lip(target_face : Face, temp_audio_frame : AudioFrame, temp_vision_frame : VisionFrame) -> VisionFrame: + frame_processor = get_frame_processor() + temp_audio_frame = prepare_audio_frame(temp_audio_frame) + crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmark['5/68'], 'ffhq_512', (512, 512)) + face_landmark_68 = cv2.transform(target_face.landmark['68'].reshape(1, -1, 2), affine_matrix).reshape(-1, 2) + bounding_box = create_bounding_box_from_landmark(face_landmark_68) + bounding_box[1] -= numpy.abs(bounding_box[3] - bounding_box[1]) * 0.125 + mouth_mask = create_mouth_mask(face_landmark_68) + box_mask = create_static_box_mask(crop_vision_frame.shape[:2][::-1], facefusion.globals.face_mask_blur, facefusion.globals.face_mask_padding) + crop_mask_list =\ + [ + mouth_mask, + box_mask + ] + + if 'occlusion' in facefusion.globals.face_mask_types: + occlusion_mask = create_occlusion_mask(crop_vision_frame) + crop_mask_list.append(occlusion_mask) + close_vision_frame, closeup_matrix = warp_face_by_bounding_box(crop_vision_frame, bounding_box, (96, 96)) + close_vision_frame = prepare_crop_frame(close_vision_frame) + close_vision_frame = frame_processor.run(None, + { + 'source': temp_audio_frame, + 'target': close_vision_frame + })[0] + crop_vision_frame = normalize_crop_frame(close_vision_frame) + crop_vision_frame = cv2.warpAffine(crop_vision_frame, cv2.invertAffineTransform(closeup_matrix), (512, 512), borderMode = cv2.BORDER_REPLICATE) + crop_mask = numpy.minimum.reduce(crop_mask_list) + paste_vision_frame = paste_back(temp_vision_frame, crop_vision_frame, crop_mask, affine_matrix) + return paste_vision_frame + + +def prepare_audio_frame(temp_audio_frame : AudioFrame) -> AudioFrame: + temp_audio_frame = numpy.maximum(numpy.exp(-5 * numpy.log(10)), temp_audio_frame) + temp_audio_frame = numpy.log10(temp_audio_frame) * 1.6 + 3.2 + temp_audio_frame = temp_audio_frame.clip(-4, 4).astype(numpy.float32) + temp_audio_frame = numpy.expand_dims(temp_audio_frame, axis = (0, 1)) + return temp_audio_frame + + +def prepare_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + crop_vision_frame = numpy.expand_dims(crop_vision_frame, axis = 0) + prepare_vision_frame = crop_vision_frame.copy() + prepare_vision_frame[:, 48:] = 0 + crop_vision_frame = numpy.concatenate((prepare_vision_frame, crop_vision_frame), axis = 3) + crop_vision_frame = crop_vision_frame.transpose(0, 3, 1, 2).astype('float32') / 255.0 + return crop_vision_frame + + +def normalize_crop_frame(crop_vision_frame : VisionFrame) -> VisionFrame: + crop_vision_frame = crop_vision_frame[0].transpose(1, 2, 0) + crop_vision_frame = crop_vision_frame.clip(0, 1) * 255 + crop_vision_frame = crop_vision_frame.astype(numpy.uint8) + return crop_vision_frame + + +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + pass + + +def process_frame(inputs : LipSyncerInputs) -> VisionFrame: + reference_faces = inputs['reference_faces'] + source_audio_frame = inputs['source_audio_frame'] + target_vision_frame = inputs['target_vision_frame'] + is_source_audio_frame = isinstance(source_audio_frame, numpy.ndarray) and source_audio_frame.any() + + if 'reference' in facefusion.globals.face_selector_mode: + similar_faces = find_similar_faces(reference_faces, target_vision_frame, facefusion.globals.reference_face_distance) + if similar_faces and is_source_audio_frame: + for similar_face in similar_faces: + target_vision_frame = sync_lip(similar_face, source_audio_frame, target_vision_frame) + if 'one' in facefusion.globals.face_selector_mode: + target_face = get_one_face(target_vision_frame) + if target_face and is_source_audio_frame: + target_vision_frame = sync_lip(target_face, source_audio_frame, target_vision_frame) + if 'many' in facefusion.globals.face_selector_mode: + many_faces = get_many_faces(target_vision_frame) + if many_faces and is_source_audio_frame: + for target_face in many_faces: + target_vision_frame = sync_lip(target_face, source_audio_frame, target_vision_frame) + return target_vision_frame + + +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : Update_Process) -> None: + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None + source_audio_path = get_first(filter_audio_paths(source_paths)) + target_video_fps = facefusion.globals.output_video_fps + + for queue_payload in queue_payloads: + frame_number = queue_payload['frame_number'] + target_vision_path = queue_payload['frame_path'] + source_audio_frame = get_audio_frame(source_audio_path, target_video_fps, frame_number) + target_vision_frame = read_image(target_vision_path) + result_frame = process_frame( + { + 'reference_faces': reference_faces, + 'source_audio_frame': source_audio_frame, + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, result_frame) + update_progress() + + +def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None + source_audio_path = get_first(filter_audio_paths(source_paths)) + source_audio_frame = get_audio_frame(source_audio_path, 25) + target_vision_frame = read_static_image(target_path) + result_frame = process_frame( + { + 'reference_faces': reference_faces, + 'source_audio_frame': source_audio_frame, + 'target_vision_frame': target_vision_frame + }) + write_image(output_path, result_frame) + + +def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: + frame_processors.multi_process_frames(source_paths, temp_frame_paths, process_frames) diff --git a/facefusion/processors/frame/typings.py b/facefusion/processors/frame/typings.py index 6db5a1b7..e7a93fd3 100644 --- a/facefusion/processors/frame/typings.py +++ b/facefusion/processors/frame/typings.py @@ -1,6 +1,36 @@ -from typing import Literal +from typing import Literal, TypedDict -FaceSwapperModel = Literal['blendswap_256', 'inswapper_128', 'inswapper_128_fp16', 'simswap_256', 'simswap_512_unofficial'] +from facefusion.typing import Face, FaceSet, AudioFrame, VisionFrame + +FaceDebuggerItem = Literal['bounding-box', 'landmark-5', 'landmark-68', 'face-mask', 'score', 'age', 'gender'] FaceEnhancerModel = Literal['codeformer', 'gfpgan_1.2', 'gfpgan_1.3', 'gfpgan_1.4', 'gpen_bfr_256', 'gpen_bfr_512', 'restoreformer_plus_plus'] +FaceSwapperModel = Literal['blendswap_256', 'inswapper_128', 'inswapper_128_fp16', 'simswap_256', 'simswap_512_unofficial', 'uniface_256'] FrameEnhancerModel = Literal['real_esrgan_x2plus', 'real_esrgan_x4plus', 'real_esrnet_x4plus'] -FaceDebuggerItem = Literal['bbox', 'kps', 'face-mask', 'score', 'distance'] +LipSyncerModel = Literal['wav2lip_gan'] + +FaceDebuggerInputs = TypedDict('FaceDebuggerInputs', +{ + 'reference_faces' : FaceSet, + 'target_vision_frame' : VisionFrame +}) +FaceEnhancerInputs = TypedDict('FaceEnhancerInputs', +{ + 'reference_faces' : FaceSet, + 'target_vision_frame' : VisionFrame +}) +FaceSwapperInputs = TypedDict('FaceSwapperInputs', +{ + 'reference_faces' : FaceSet, + 'source_face' : Face, + 'target_vision_frame' : VisionFrame +}) +FrameEnhancerInputs = TypedDict('FrameEnhancerInputs', +{ + 'target_vision_frame' : VisionFrame +}) +LipSyncerInputs = TypedDict('LipSyncerInputs', +{ + 'reference_faces' : FaceSet, + 'source_audio_frame' : AudioFrame, + 'target_vision_frame' : VisionFrame +}) diff --git a/facefusion/typing.py b/facefusion/typing.py index 7a2459ff..cd302420 100755 --- a/facefusion/typing.py +++ b/facefusion/typing.py @@ -2,14 +2,21 @@ from typing import Any, Literal, Callable, List, Tuple, Dict, TypedDict from collections import namedtuple import numpy -Bbox = numpy.ndarray[Any, Any] -Kps = numpy.ndarray[Any, Any] +BoundingBox = numpy.ndarray[Any, Any] +FaceLandmark5 = numpy.ndarray[Any, Any] +FaceLandmark68 = numpy.ndarray[Any, Any] +FaceLandmarkSet = TypedDict('FaceLandmarkSet', +{ + '5' : FaceLandmark5, # type: ignore[valid-type] + '5/68' : FaceLandmark5, # type: ignore[valid-type] + '68' : FaceLandmark68 # type: ignore[valid-type] +}) Score = float Embedding = numpy.ndarray[Any, Any] Face = namedtuple('Face', [ - 'bbox', - 'kps', + 'bounding_box', + 'landmark', 'score', 'embedding', 'normed_embedding', @@ -22,16 +29,27 @@ FaceStore = TypedDict('FaceStore', 'static_faces' : FaceSet, 'reference_faces': FaceSet }) -Frame = numpy.ndarray[Any, Any] +VisionFrame = numpy.ndarray[Any, Any] Mask = numpy.ndarray[Any, Any] Matrix = numpy.ndarray[Any, Any] +Translation = numpy.ndarray[Any, Any] + +AudioBuffer = bytes +Audio = numpy.ndarray[Any, Any] +AudioFrame = numpy.ndarray[Any, Any] +Spectrogram = numpy.ndarray[Any, Any] Fps = float Padding = Tuple[int, int, int, int] Resolution = Tuple[int, int] +QueuePayload = TypedDict('QueuePayload', +{ + 'frame_number' : int, + 'frame_path' : str +}) Update_Process = Callable[[], None] -Process_Frames = Callable[[List[str], List[str], Update_Process], None] +Process_Frames = Callable[[List[str], List[QueuePayload], Update_Process], None] Template = Literal['arcface_112_v1', 'arcface_112_v2', 'arcface_128_v2', 'ffhq_512'] ProcessMode = Literal['output', 'preview', 'stream'] @@ -41,9 +59,9 @@ VideoMemoryStrategy = Literal['strict', 'moderate', 'tolerant'] FaceSelectorMode = Literal['reference', 'one', 'many'] FaceAnalyserOrder = Literal['left-right', 'right-left', 'top-bottom', 'bottom-top', 'small-large', 'large-small', 'best-worst', 'worst-best'] FaceAnalyserAge = Literal['child', 'teen', 'adult', 'senior'] -FaceAnalyserGender = Literal['male', 'female'] -FaceDetectorModel = Literal['retinaface', 'yunet'] -FaceRecognizerModel = Literal['arcface_blendswap', 'arcface_inswapper', 'arcface_simswap'] +FaceAnalyserGender = Literal['female', 'male'] +FaceDetectorModel = Literal['retinaface', 'yoloface', 'yunet'] +FaceRecognizerModel = Literal['arcface_blendswap', 'arcface_inswapper', 'arcface_simswap', 'arcface_uniface'] FaceMaskType = Literal['box', 'occlusion', 'region'] FaceMaskRegion = Literal['skin', 'left-eyebrow', 'right-eyebrow', 'left-eye', 'right-eye', 'eye-glasses', 'nose', 'mouth', 'upper-lip', 'lower-lip'] TempFrameFormat = Literal['jpg', 'png', 'bmp'] diff --git a/facefusion/uis/components/about.py b/facefusion/uis/components/about.py index e2c52caa..01ecb00d 100644 --- a/facefusion/uis/components/about.py +++ b/facefusion/uis/components/about.py @@ -17,7 +17,7 @@ def render() -> None: link = metadata.get('url') ) DONATE_BUTTON = gradio.Button( - value = wording.get('donate_button_label'), + value = wording.get('uis.donate_button'), link = 'https://donate.facefusion.io', size = 'sm' ) diff --git a/facefusion/uis/components/benchmark.py b/facefusion/uis/components/benchmark.py index e8280c7a..572fcc3e 100644 --- a/facefusion/uis/components/benchmark.py +++ b/facefusion/uis/components/benchmark.py @@ -36,7 +36,7 @@ def render() -> None: global BENCHMARK_CLEAR_BUTTON BENCHMARK_RESULTS_DATAFRAME = gradio.Dataframe( - label = wording.get('benchmark_results_dataframe_label'), + label = wording.get('uis.benchmark_results_dataframe'), headers = [ 'target_path', @@ -57,12 +57,12 @@ def render() -> None: ] ) BENCHMARK_START_BUTTON = gradio.Button( - value = wording.get('start_button_label'), + value = wording.get('uis.start_button'), variant = 'primary', size = 'sm' ) BENCHMARK_CLEAR_BUTTON = gradio.Button( - value = wording.get('clear_button_label'), + value = wording.get('uis.clear_button'), size = 'sm' ) diff --git a/facefusion/uis/components/benchmark_options.py b/facefusion/uis/components/benchmark_options.py index 91500613..6748dd9b 100644 --- a/facefusion/uis/components/benchmark_options.py +++ b/facefusion/uis/components/benchmark_options.py @@ -14,12 +14,12 @@ def render() -> None: global BENCHMARK_CYCLES_SLIDER BENCHMARK_RUNS_CHECKBOX_GROUP = gradio.CheckboxGroup( - label = wording.get('benchmark_runs_checkbox_group_label'), + label = wording.get('uis.benchmark_runs_checkbox_group'), value = list(BENCHMARKS.keys()), choices = list(BENCHMARKS.keys()) ) BENCHMARK_CYCLES_SLIDER = gradio.Slider( - label = wording.get('benchmark_cycles_slider_label'), + label = wording.get('uis.benchmark_cycles_slider'), value = 5, step = 1, minimum = 1, diff --git a/facefusion/uis/components/common_options.py b/facefusion/uis/components/common_options.py index 17d73647..43817413 100644 --- a/facefusion/uis/components/common_options.py +++ b/facefusion/uis/components/common_options.py @@ -19,7 +19,7 @@ def render() -> None: if facefusion.globals.skip_download: value.append('skip-download') COMMON_OPTIONS_CHECKBOX_GROUP = gradio.Checkboxgroup( - label = wording.get('common_options_checkbox_group_label'), + label = wording.get('uis.common_options_checkbox_group'), choices = uis_choices.common_options, value = value ) diff --git a/facefusion/uis/components/execution.py b/facefusion/uis/components/execution.py index e8df28fd..b3b6ec0e 100644 --- a/facefusion/uis/components/execution.py +++ b/facefusion/uis/components/execution.py @@ -15,7 +15,7 @@ def render() -> None: global EXECUTION_PROVIDERS_CHECKBOX_GROUP EXECUTION_PROVIDERS_CHECKBOX_GROUP = gradio.CheckboxGroup( - label = wording.get('execution_providers_checkbox_group_label'), + label = wording.get('uis.execution_providers_checkbox_group'), choices = encode_execution_providers(onnxruntime.get_available_providers()), value = encode_execution_providers(facefusion.globals.execution_providers) ) diff --git a/facefusion/uis/components/execution_queue_count.py b/facefusion/uis/components/execution_queue_count.py index fc8a3c87..18456a51 100644 --- a/facefusion/uis/components/execution_queue_count.py +++ b/facefusion/uis/components/execution_queue_count.py @@ -12,7 +12,7 @@ def render() -> None: global EXECUTION_QUEUE_COUNT_SLIDER EXECUTION_QUEUE_COUNT_SLIDER = gradio.Slider( - label = wording.get('execution_queue_count_slider_label'), + label = wording.get('uis.execution_queue_count_slider'), value = facefusion.globals.execution_queue_count, step = facefusion.choices.execution_queue_count_range[1] - facefusion.choices.execution_queue_count_range[0], minimum = facefusion.choices.execution_queue_count_range[0], diff --git a/facefusion/uis/components/execution_thread_count.py b/facefusion/uis/components/execution_thread_count.py index 615d1642..df0d5dfa 100644 --- a/facefusion/uis/components/execution_thread_count.py +++ b/facefusion/uis/components/execution_thread_count.py @@ -12,7 +12,7 @@ def render() -> None: global EXECUTION_THREAD_COUNT_SLIDER EXECUTION_THREAD_COUNT_SLIDER = gradio.Slider( - label = wording.get('execution_thread_count_slider_label'), + label = wording.get('uis.execution_thread_count_slider'), value = facefusion.globals.execution_thread_count, step = facefusion.choices.execution_thread_count_range[1] - facefusion.choices.execution_thread_count_range[0], minimum = facefusion.choices.execution_thread_count_range[0], diff --git a/facefusion/uis/components/face_analyser.py b/facefusion/uis/components/face_analyser.py index 77bf3a64..eb7b68f7 100644 --- a/facefusion/uis/components/face_analyser.py +++ b/facefusion/uis/components/face_analyser.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, Dict, Any import gradio @@ -24,34 +24,37 @@ def render() -> None: global FACE_DETECTOR_SCORE_SLIDER global FACE_DETECTOR_MODEL_DROPDOWN + face_detector_size_dropdown_args : Dict[str, Any] =\ + { + 'label': wording.get('uis.face_detector_size_dropdown'), + 'value': facefusion.globals.face_detector_size + } + if facefusion.globals.face_detector_size in facefusion.choices.face_detector_set[facefusion.globals.face_detector_model]: + face_detector_size_dropdown_args['choices'] = facefusion.choices.face_detector_set[facefusion.globals.face_detector_model] with gradio.Row(): FACE_ANALYSER_ORDER_DROPDOWN = gradio.Dropdown( - label = wording.get('face_analyser_order_dropdown_label'), + label = wording.get('uis.face_analyser_order_dropdown'), choices = facefusion.choices.face_analyser_orders, value = facefusion.globals.face_analyser_order ) FACE_ANALYSER_AGE_DROPDOWN = gradio.Dropdown( - label = wording.get('face_analyser_age_dropdown_label'), + label = wording.get('uis.face_analyser_age_dropdown'), choices = [ 'none' ] + facefusion.choices.face_analyser_ages, value = facefusion.globals.face_analyser_age or 'none' ) FACE_ANALYSER_GENDER_DROPDOWN = gradio.Dropdown( - label = wording.get('face_analyser_gender_dropdown_label'), + label = wording.get('uis.face_analyser_gender_dropdown'), choices = [ 'none' ] + facefusion.choices.face_analyser_genders, value = facefusion.globals.face_analyser_gender or 'none' ) FACE_DETECTOR_MODEL_DROPDOWN = gradio.Dropdown( - label = wording.get('face_detector_model_dropdown_label'), - choices = facefusion.choices.face_detector_models, + label = wording.get('uis.face_detector_model_dropdown'), + choices = facefusion.choices.face_detector_set.keys(), value = facefusion.globals.face_detector_model ) - FACE_DETECTOR_SIZE_DROPDOWN = gradio.Dropdown( - label = wording.get('face_detector_size_dropdown_label'), - choices = facefusion.choices.face_detector_sizes, - value = facefusion.globals.face_detector_size - ) + FACE_DETECTOR_SIZE_DROPDOWN = gradio.Dropdown(**face_detector_size_dropdown_args) FACE_DETECTOR_SCORE_SLIDER = gradio.Slider( - label = wording.get('face_detector_score_slider_label'), + label = wording.get('uis.face_detector_score_slider'), value = facefusion.globals.face_detector_score, step = facefusion.choices.face_detector_score_range[1] - facefusion.choices.face_detector_score_range[0], minimum = facefusion.choices.face_detector_score_range[0], @@ -69,7 +72,7 @@ def listen() -> None: FACE_ANALYSER_ORDER_DROPDOWN.change(update_face_analyser_order, inputs = FACE_ANALYSER_ORDER_DROPDOWN) FACE_ANALYSER_AGE_DROPDOWN.change(update_face_analyser_age, inputs = FACE_ANALYSER_AGE_DROPDOWN) FACE_ANALYSER_GENDER_DROPDOWN.change(update_face_analyser_gender, inputs = FACE_ANALYSER_GENDER_DROPDOWN) - FACE_DETECTOR_MODEL_DROPDOWN.change(update_face_detector_model, inputs = FACE_DETECTOR_MODEL_DROPDOWN) + FACE_DETECTOR_MODEL_DROPDOWN.change(update_face_detector_model, inputs = FACE_DETECTOR_MODEL_DROPDOWN, outputs = FACE_DETECTOR_SIZE_DROPDOWN) FACE_DETECTOR_SIZE_DROPDOWN.change(update_face_detector_size, inputs = FACE_DETECTOR_SIZE_DROPDOWN) FACE_DETECTOR_SCORE_SLIDER.change(update_face_detector_score, inputs = FACE_DETECTOR_SCORE_SLIDER) @@ -86,8 +89,11 @@ def update_face_analyser_gender(face_analyser_gender : FaceAnalyserGender) -> No facefusion.globals.face_analyser_gender = face_analyser_gender if face_analyser_gender != 'none' else None -def update_face_detector_model(face_detector_model : FaceDetectorModel) -> None: +def update_face_detector_model(face_detector_model : FaceDetectorModel) -> gradio.Dropdown: facefusion.globals.face_detector_model = face_detector_model + if facefusion.globals.face_detector_size in facefusion.choices.face_detector_set[face_detector_model]: + return gradio.Dropdown(value = '640x640', choices = facefusion.choices.face_detector_set[face_detector_model]) + return gradio.Dropdown(value = '640x640', choices = [ '640x640' ]) def update_face_detector_size(face_detector_size : str) -> None: diff --git a/facefusion/uis/components/face_masker.py b/facefusion/uis/components/face_masker.py index 978a0199..51289973 100755 --- a/facefusion/uis/components/face_masker.py +++ b/facefusion/uis/components/face_masker.py @@ -32,13 +32,13 @@ def render() -> None: has_box_mask = 'box' in facefusion.globals.face_mask_types has_region_mask = 'region' in facefusion.globals.face_mask_types FACE_MASK_TYPES_CHECKBOX_GROUP = gradio.CheckboxGroup( - label = wording.get('face_mask_types_checkbox_group_label'), + label = wording.get('uis.face_mask_types_checkbox_group'), choices = facefusion.choices.face_mask_types, value = facefusion.globals.face_mask_types ) with gradio.Group(visible = has_box_mask) as FACE_MASK_BOX_GROUP: FACE_MASK_BLUR_SLIDER = gradio.Slider( - label = wording.get('face_mask_blur_slider_label'), + label = wording.get('uis.face_mask_blur_slider'), step = facefusion.choices.face_mask_blur_range[1] - facefusion.choices.face_mask_blur_range[0], minimum = facefusion.choices.face_mask_blur_range[0], maximum = facefusion.choices.face_mask_blur_range[-1], @@ -46,14 +46,14 @@ def render() -> None: ) with gradio.Row(): FACE_MASK_PADDING_TOP_SLIDER = gradio.Slider( - label = wording.get('face_mask_padding_top_slider_label'), + label = wording.get('uis.face_mask_padding_top_slider'), step = facefusion.choices.face_mask_padding_range[1] - facefusion.choices.face_mask_padding_range[0], minimum = facefusion.choices.face_mask_padding_range[0], maximum = facefusion.choices.face_mask_padding_range[-1], value = facefusion.globals.face_mask_padding[0] ) FACE_MASK_PADDING_RIGHT_SLIDER = gradio.Slider( - label = wording.get('face_mask_padding_right_slider_label'), + label = wording.get('uis.face_mask_padding_right_slider'), step = facefusion.choices.face_mask_padding_range[1] - facefusion.choices.face_mask_padding_range[0], minimum = facefusion.choices.face_mask_padding_range[0], maximum = facefusion.choices.face_mask_padding_range[-1], @@ -61,14 +61,14 @@ def render() -> None: ) with gradio.Row(): FACE_MASK_PADDING_BOTTOM_SLIDER = gradio.Slider( - label = wording.get('face_mask_padding_bottom_slider_label'), + label = wording.get('uis.face_mask_padding_bottom_slider'), step = facefusion.choices.face_mask_padding_range[1] - facefusion.choices.face_mask_padding_range[0], minimum = facefusion.choices.face_mask_padding_range[0], maximum = facefusion.choices.face_mask_padding_range[-1], value = facefusion.globals.face_mask_padding[2] ) FACE_MASK_PADDING_LEFT_SLIDER = gradio.Slider( - label = wording.get('face_mask_padding_left_slider_label'), + label = wording.get('uis.face_mask_padding_left_slider'), step = facefusion.choices.face_mask_padding_range[1] - facefusion.choices.face_mask_padding_range[0], minimum = facefusion.choices.face_mask_padding_range[0], maximum = facefusion.choices.face_mask_padding_range[-1], @@ -76,7 +76,7 @@ def render() -> None: ) with gradio.Row(): FACE_MASK_REGION_CHECKBOX_GROUP = gradio.CheckboxGroup( - label = wording.get('face_mask_region_checkbox_group_label'), + label = wording.get('uis.face_mask_region_checkbox_group'), choices = facefusion.choices.face_mask_regions, value = facefusion.globals.face_mask_regions, visible = has_region_mask diff --git a/facefusion/uis/components/face_selector.py b/facefusion/uis/components/face_selector.py index 5d9bb90d..d429d5a3 100644 --- a/facefusion/uis/components/face_selector.py +++ b/facefusion/uis/components/face_selector.py @@ -9,7 +9,7 @@ from facefusion.face_store import clear_static_faces, clear_reference_faces from facefusion.vision import get_video_frame, read_static_image, normalize_frame_color from facefusion.filesystem import is_image, is_video from facefusion.face_analyser import get_many_faces -from facefusion.typing import Frame, FaceSelectorMode +from facefusion.typing import VisionFrame, FaceSelectorMode from facefusion.uis.core import get_ui_component, register_ui_component from facefusion.uis.typing import ComponentName @@ -25,7 +25,7 @@ def render() -> None: reference_face_gallery_args: Dict[str, Any] =\ { - 'label': wording.get('reference_face_gallery_label'), + 'label': wording.get('uis.reference_face_gallery'), 'object_fit': 'cover', 'columns': 8, 'allow_preview': False, @@ -38,13 +38,13 @@ def render() -> None: reference_frame = get_video_frame(facefusion.globals.target_path, facefusion.globals.reference_frame_number) reference_face_gallery_args['value'] = extract_gallery_frames(reference_frame) FACE_SELECTOR_MODE_DROPDOWN = gradio.Dropdown( - label = wording.get('face_selector_mode_dropdown_label'), + label = wording.get('uis.face_selector_mode_dropdown'), choices = facefusion.choices.face_selector_modes, value = facefusion.globals.face_selector_mode ) REFERENCE_FACE_POSITION_GALLERY = gradio.Gallery(**reference_face_gallery_args) REFERENCE_FACE_DISTANCE_SLIDER = gradio.Slider( - label = wording.get('reference_face_distance_slider_label'), + label = wording.get('uis.reference_face_distance_slider'), value = facefusion.globals.reference_face_distance, step = facefusion.choices.reference_face_distance_range[1] - facefusion.choices.reference_face_distance_range[0], minimum = facefusion.choices.reference_face_distance_range[0], @@ -135,30 +135,31 @@ def clear_and_update_reference_position_gallery() -> gradio.Gallery: def update_reference_position_gallery() -> gradio.Gallery: - gallery_frames = [] + gallery_vision_frames = [] if is_image(facefusion.globals.target_path): - reference_frame = read_static_image(facefusion.globals.target_path) - gallery_frames = extract_gallery_frames(reference_frame) + temp_vision_frame = read_static_image(facefusion.globals.target_path) + gallery_vision_frames = extract_gallery_frames(temp_vision_frame) if is_video(facefusion.globals.target_path): - reference_frame = get_video_frame(facefusion.globals.target_path, facefusion.globals.reference_frame_number) - gallery_frames = extract_gallery_frames(reference_frame) - if gallery_frames: - return gradio.Gallery(value = gallery_frames) + temp_vision_frame = get_video_frame(facefusion.globals.target_path, facefusion.globals.reference_frame_number) + gallery_vision_frames = extract_gallery_frames(temp_vision_frame) + if gallery_vision_frames: + return gradio.Gallery(value = gallery_vision_frames) return gradio.Gallery(value = None) -def extract_gallery_frames(reference_frame : Frame) -> List[Frame]: - crop_frames = [] - faces = get_many_faces(reference_frame) +def extract_gallery_frames(temp_vision_frame : VisionFrame) -> List[VisionFrame]: + gallery_vision_frames = [] + faces = get_many_faces(temp_vision_frame) + for face in faces: - start_x, start_y, end_x, end_y = map(int, face.bbox) + start_x, start_y, end_x, end_y = map(int, face.bounding_box) padding_x = int((end_x - start_x) * 0.25) padding_y = int((end_y - start_y) * 0.25) start_x = max(0, start_x - padding_x) start_y = max(0, start_y - padding_y) end_x = max(0, end_x + padding_x) end_y = max(0, end_y + padding_y) - crop_frame = reference_frame[start_y:end_y, start_x:end_x] - crop_frame = normalize_frame_color(crop_frame) - crop_frames.append(crop_frame) - return crop_frames + crop_vision_frame = temp_vision_frame[start_y:end_y, start_x:end_x] + crop_vision_frame = normalize_frame_color(crop_vision_frame) + gallery_vision_frames.append(crop_vision_frame) + return gallery_vision_frames diff --git a/facefusion/uis/components/frame_processors.py b/facefusion/uis/components/frame_processors.py index e86fd446..65ef5759 100644 --- a/facefusion/uis/components/frame_processors.py +++ b/facefusion/uis/components/frame_processors.py @@ -14,7 +14,7 @@ def render() -> None: global FRAME_PROCESSORS_CHECKBOX_GROUP FRAME_PROCESSORS_CHECKBOX_GROUP = gradio.CheckboxGroup( - label = wording.get('frame_processors_checkbox_group_label'), + label = wording.get('uis.frame_processors_checkbox_group'), choices = sort_frame_processors(facefusion.globals.frame_processors), value = facefusion.globals.frame_processors ) diff --git a/facefusion/uis/components/frame_processors_options.py b/facefusion/uis/components/frame_processors_options.py index 89044b5e..d7172f2a 100755 --- a/facefusion/uis/components/frame_processors_options.py +++ b/facefusion/uis/components/frame_processors_options.py @@ -5,84 +5,120 @@ import facefusion.globals from facefusion import wording from facefusion.processors.frame.core import load_frame_processor_module from facefusion.processors.frame import globals as frame_processors_globals, choices as frame_processors_choices -from facefusion.processors.frame.typings import FaceSwapperModel, FaceEnhancerModel, FrameEnhancerModel, FaceDebuggerItem +from facefusion.processors.frame.typings import FaceDebuggerItem, FaceEnhancerModel, FaceSwapperModel, FrameEnhancerModel, LipSyncerModel from facefusion.uis.core import get_ui_component, register_ui_component -FACE_SWAPPER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None +FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None FACE_ENHANCER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None FACE_ENHANCER_BLEND_SLIDER : Optional[gradio.Slider] = None +FACE_SWAPPER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None FRAME_ENHANCER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None FRAME_ENHANCER_BLEND_SLIDER : Optional[gradio.Slider] = None -FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None +LIP_SYNCER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None def render() -> None: - global FACE_SWAPPER_MODEL_DROPDOWN + global FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP global FACE_ENHANCER_MODEL_DROPDOWN global FACE_ENHANCER_BLEND_SLIDER + global FACE_SWAPPER_MODEL_DROPDOWN global FRAME_ENHANCER_MODEL_DROPDOWN global FRAME_ENHANCER_BLEND_SLIDER - global FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP + global LIP_SYNCER_MODEL_DROPDOWN - FACE_SWAPPER_MODEL_DROPDOWN = gradio.Dropdown( - label = wording.get('face_swapper_model_dropdown_label'), - choices = frame_processors_choices.face_swapper_models, - value = frame_processors_globals.face_swapper_model, - visible = 'face_swapper' in facefusion.globals.frame_processors + FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP = gradio.CheckboxGroup( + label = wording.get('uis.face_debugger_items_checkbox_group'), + choices = frame_processors_choices.face_debugger_items, + value = frame_processors_globals.face_debugger_items, + visible = 'face_debugger' in facefusion.globals.frame_processors ) FACE_ENHANCER_MODEL_DROPDOWN = gradio.Dropdown( - label = wording.get('face_enhancer_model_dropdown_label'), + label = wording.get('uis.face_enhancer_model_dropdown'), choices = frame_processors_choices.face_enhancer_models, value = frame_processors_globals.face_enhancer_model, visible = 'face_enhancer' in facefusion.globals.frame_processors ) FACE_ENHANCER_BLEND_SLIDER = gradio.Slider( - label = wording.get('face_enhancer_blend_slider_label'), + label = wording.get('uis.face_enhancer_blend_slider'), value = frame_processors_globals.face_enhancer_blend, step = frame_processors_choices.face_enhancer_blend_range[1] - frame_processors_choices.face_enhancer_blend_range[0], minimum = frame_processors_choices.face_enhancer_blend_range[0], maximum = frame_processors_choices.face_enhancer_blend_range[-1], visible = 'face_enhancer' in facefusion.globals.frame_processors ) + FACE_SWAPPER_MODEL_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.face_swapper_model_dropdown'), + choices = frame_processors_choices.face_swapper_models, + value = frame_processors_globals.face_swapper_model, + visible = 'face_swapper' in facefusion.globals.frame_processors + ) FRAME_ENHANCER_MODEL_DROPDOWN = gradio.Dropdown( - label = wording.get('frame_enhancer_model_dropdown_label'), + label = wording.get('uis.frame_enhancer_model_dropdown'), choices = frame_processors_choices.frame_enhancer_models, value = frame_processors_globals.frame_enhancer_model, visible = 'frame_enhancer' in facefusion.globals.frame_processors ) FRAME_ENHANCER_BLEND_SLIDER = gradio.Slider( - label = wording.get('frame_enhancer_blend_slider_label'), + label = wording.get('uis.frame_enhancer_blend_slider'), value = frame_processors_globals.frame_enhancer_blend, step = frame_processors_choices.frame_enhancer_blend_range[1] - frame_processors_choices.frame_enhancer_blend_range[0], minimum = frame_processors_choices.frame_enhancer_blend_range[0], maximum = frame_processors_choices.frame_enhancer_blend_range[-1], visible = 'frame_enhancer' in facefusion.globals.frame_processors ) - FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP = gradio.CheckboxGroup( - label = wording.get('face_debugger_items_checkbox_group_label'), - choices = frame_processors_choices.face_debugger_items, - value = frame_processors_globals.face_debugger_items, - visible = 'face_debugger' in facefusion.globals.frame_processors + LIP_SYNCER_MODEL_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.lip_syncer_model_dropdown'), + choices = frame_processors_choices.lip_syncer_models, + value = frame_processors_globals.lip_syncer_model, + visible = 'lip_syncer' in facefusion.globals.frame_processors ) - - register_ui_component('face_swapper_model_dropdown', FACE_SWAPPER_MODEL_DROPDOWN) + register_ui_component('face_debugger_items_checkbox_group', FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP) register_ui_component('face_enhancer_model_dropdown', FACE_ENHANCER_MODEL_DROPDOWN) register_ui_component('face_enhancer_blend_slider', FACE_ENHANCER_BLEND_SLIDER) + register_ui_component('face_swapper_model_dropdown', FACE_SWAPPER_MODEL_DROPDOWN) register_ui_component('frame_enhancer_model_dropdown', FRAME_ENHANCER_MODEL_DROPDOWN) register_ui_component('frame_enhancer_blend_slider', FRAME_ENHANCER_BLEND_SLIDER) - register_ui_component('face_debugger_items_checkbox_group', FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP) + register_ui_component('lip_syncer_model_dropdown', LIP_SYNCER_MODEL_DROPDOWN) def listen() -> None: - FACE_SWAPPER_MODEL_DROPDOWN.change(update_face_swapper_model, inputs = FACE_SWAPPER_MODEL_DROPDOWN, outputs = FACE_SWAPPER_MODEL_DROPDOWN) + FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP.change(update_face_debugger_items, inputs = FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP) FACE_ENHANCER_MODEL_DROPDOWN.change(update_face_enhancer_model, inputs = FACE_ENHANCER_MODEL_DROPDOWN, outputs = FACE_ENHANCER_MODEL_DROPDOWN) FACE_ENHANCER_BLEND_SLIDER.change(update_face_enhancer_blend, inputs = FACE_ENHANCER_BLEND_SLIDER) + FACE_SWAPPER_MODEL_DROPDOWN.change(update_face_swapper_model, inputs = FACE_SWAPPER_MODEL_DROPDOWN, outputs = FACE_SWAPPER_MODEL_DROPDOWN) FRAME_ENHANCER_MODEL_DROPDOWN.change(update_frame_enhancer_model, inputs = FRAME_ENHANCER_MODEL_DROPDOWN, outputs = FRAME_ENHANCER_MODEL_DROPDOWN) FRAME_ENHANCER_BLEND_SLIDER.change(update_frame_enhancer_blend, inputs = FRAME_ENHANCER_BLEND_SLIDER) - FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP.change(update_face_debugger_items, inputs = FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP) + LIP_SYNCER_MODEL_DROPDOWN.change(update_lip_syncer_model, inputs = LIP_SYNCER_MODEL_DROPDOWN, outputs = LIP_SYNCER_MODEL_DROPDOWN) frame_processors_checkbox_group = get_ui_component('frame_processors_checkbox_group') if frame_processors_checkbox_group: - frame_processors_checkbox_group.change(toggle_face_swapper_model, inputs = frame_processors_checkbox_group, outputs = [ FACE_SWAPPER_MODEL_DROPDOWN, FACE_ENHANCER_MODEL_DROPDOWN, FACE_ENHANCER_BLEND_SLIDER, FRAME_ENHANCER_MODEL_DROPDOWN, FRAME_ENHANCER_BLEND_SLIDER, FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP ]) + frame_processors_checkbox_group.change(update_frame_processors, inputs = frame_processors_checkbox_group, outputs = [ FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP, FACE_ENHANCER_MODEL_DROPDOWN, FACE_ENHANCER_BLEND_SLIDER, FACE_SWAPPER_MODEL_DROPDOWN, FRAME_ENHANCER_MODEL_DROPDOWN, FRAME_ENHANCER_BLEND_SLIDER, LIP_SYNCER_MODEL_DROPDOWN ]) + + +def update_frame_processors(frame_processors : List[str]) -> Tuple[gradio.CheckboxGroup, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown]: + has_face_debugger = 'face_debugger' in frame_processors + has_face_enhancer = 'face_enhancer' in frame_processors + has_face_swapper = 'face_swapper' in frame_processors + has_frame_enhancer = 'frame_enhancer' in frame_processors + has_lip_syncer = 'lip_syncer' in frame_processors + return gradio.CheckboxGroup(visible = has_face_debugger), gradio.Dropdown(visible = has_face_enhancer), gradio.Slider(visible = has_face_enhancer), gradio.Dropdown(visible = has_face_swapper), gradio.Dropdown(visible = has_frame_enhancer), gradio.Slider(visible = has_frame_enhancer), gradio.Dropdown(visible = has_lip_syncer) + + +def update_face_debugger_items(face_debugger_items : List[FaceDebuggerItem]) -> None: + frame_processors_globals.face_debugger_items = face_debugger_items + + +def update_face_enhancer_model(face_enhancer_model : FaceEnhancerModel) -> gradio.Dropdown: + frame_processors_globals.face_enhancer_model = face_enhancer_model + face_enhancer_module = load_frame_processor_module('face_enhancer') + face_enhancer_module.clear_frame_processor() + face_enhancer_module.set_options('model', face_enhancer_module.MODELS[face_enhancer_model]) + if face_enhancer_module.pre_check(): + return gradio.Dropdown(value = face_enhancer_model) + return gradio.Dropdown() + + +def update_face_enhancer_blend(face_enhancer_blend : int) -> None: + frame_processors_globals.face_enhancer_blend = face_enhancer_blend def update_face_swapper_model(face_swapper_model : FaceSwapperModel) -> gradio.Dropdown: @@ -93,26 +129,14 @@ def update_face_swapper_model(face_swapper_model : FaceSwapperModel) -> gradio.D facefusion.globals.face_recognizer_model = 'arcface_inswapper' if face_swapper_model == 'simswap_256' or face_swapper_model == 'simswap_512_unofficial': facefusion.globals.face_recognizer_model = 'arcface_simswap' + if face_swapper_model == 'uniface_256': + facefusion.globals.face_recognizer_model = 'arcface_uniface' face_swapper_module = load_frame_processor_module('face_swapper') face_swapper_module.clear_frame_processor() face_swapper_module.set_options('model', face_swapper_module.MODELS[face_swapper_model]) - if not face_swapper_module.pre_check(): - return gradio.Dropdown() - return gradio.Dropdown(value = face_swapper_model) - - -def update_face_enhancer_model(face_enhancer_model : FaceEnhancerModel) -> gradio.Dropdown: - frame_processors_globals.face_enhancer_model = face_enhancer_model - face_enhancer_module = load_frame_processor_module('face_enhancer') - face_enhancer_module.clear_frame_processor() - face_enhancer_module.set_options('model', face_enhancer_module.MODELS[face_enhancer_model]) - if not face_enhancer_module.pre_check(): - return gradio.Dropdown() - return gradio.Dropdown(value = face_enhancer_model) - - -def update_face_enhancer_blend(face_enhancer_blend : int) -> None: - frame_processors_globals.face_enhancer_blend = face_enhancer_blend + if face_swapper_module.pre_check(): + return gradio.Dropdown(value = face_swapper_model) + return gradio.Dropdown() def update_frame_enhancer_model(frame_enhancer_model : FrameEnhancerModel) -> gradio.Dropdown: @@ -120,22 +144,20 @@ def update_frame_enhancer_model(frame_enhancer_model : FrameEnhancerModel) -> gr frame_enhancer_module = load_frame_processor_module('frame_enhancer') frame_enhancer_module.clear_frame_processor() frame_enhancer_module.set_options('model', frame_enhancer_module.MODELS[frame_enhancer_model]) - if not frame_enhancer_module.pre_check(): - return gradio.Dropdown() - return gradio.Dropdown(value = frame_enhancer_model) + if frame_enhancer_module.pre_check(): + return gradio.Dropdown(value = frame_enhancer_model) + return gradio.Dropdown() def update_frame_enhancer_blend(frame_enhancer_blend : int) -> None: frame_processors_globals.frame_enhancer_blend = frame_enhancer_blend -def update_face_debugger_items(face_debugger_items : List[FaceDebuggerItem]) -> None: - frame_processors_globals.face_debugger_items = face_debugger_items - - -def toggle_face_swapper_model(frame_processors : List[str]) -> Tuple[gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Slider, gradio.CheckboxGroup]: - has_face_swapper = 'face_swapper' in frame_processors - has_face_enhancer = 'face_enhancer' in frame_processors - has_frame_enhancer = 'frame_enhancer' in frame_processors - has_face_debugger = 'face_debugger' in frame_processors - return gradio.Dropdown(visible = has_face_swapper), gradio.Dropdown(visible = has_face_enhancer), gradio.Slider(visible = has_face_enhancer), gradio.Dropdown(visible = has_frame_enhancer), gradio.Slider(visible = has_frame_enhancer), gradio.CheckboxGroup(visible = has_face_debugger) +def update_lip_syncer_model(lip_syncer_model : LipSyncerModel) -> gradio.Dropdown: + frame_processors_globals.lip_syncer_model = lip_syncer_model + lip_syncer_module = load_frame_processor_module('lip_syncer') + lip_syncer_module.clear_frame_processor() + lip_syncer_module.set_options('model', lip_syncer_module.MODELS[lip_syncer_model]) + if lip_syncer_module.pre_check(): + return gradio.Dropdown(value = lip_syncer_model) + return gradio.Dropdown() diff --git a/facefusion/uis/components/memory.py b/facefusion/uis/components/memory.py index cda45549..fe0d9723 100644 --- a/facefusion/uis/components/memory.py +++ b/facefusion/uis/components/memory.py @@ -15,12 +15,12 @@ def render() -> None: global SYSTEM_MEMORY_LIMIT_SLIDER VIDEO_MEMORY_STRATEGY = gradio.Dropdown( - label = wording.get('video_memory_strategy_dropdown_label'), + label = wording.get('uis.video_memory_strategy_dropdown'), choices = facefusion.choices.video_memory_strategies, value = facefusion.globals.video_memory_strategy ) SYSTEM_MEMORY_LIMIT_SLIDER = gradio.Slider( - label = wording.get('system_memory_limit_slider_label'), + label = wording.get('uis.system_memory_limit_slider'), step =facefusion.choices.system_memory_limit_range[1] - facefusion.choices.system_memory_limit_range[0], minimum = facefusion.choices.system_memory_limit_range[0], maximum = facefusion.choices.system_memory_limit_range[-1], diff --git a/facefusion/uis/components/output.py b/facefusion/uis/components/output.py index fb6460e0..cbbd3972 100644 --- a/facefusion/uis/components/output.py +++ b/facefusion/uis/components/output.py @@ -22,19 +22,19 @@ def render() -> None: global OUTPUT_CLEAR_BUTTON OUTPUT_IMAGE = gradio.Image( - label = wording.get('output_image_or_video_label'), + label = wording.get('uis.output_image_or_video'), visible = False ) OUTPUT_VIDEO = gradio.Video( - label = wording.get('output_image_or_video_label') + label = wording.get('uis.output_image_or_video') ) OUTPUT_START_BUTTON = gradio.Button( - value = wording.get('start_button_label'), + value = wording.get('uis.start_button'), variant = 'primary', size = 'sm' ) OUTPUT_CLEAR_BUTTON = gradio.Button( - value = wording.get('clear_button_label'), + value = wording.get('uis.clear_button'), size = 'sm' ) diff --git a/facefusion/uis/components/output_options.py b/facefusion/uis/components/output_options.py index 01a406ed..b828c715 100644 --- a/facefusion/uis/components/output_options.py +++ b/facefusion/uis/components/output_options.py @@ -30,12 +30,12 @@ def render() -> None: global OUTPUT_VIDEO_FPS_SLIDER OUTPUT_PATH_TEXTBOX = gradio.Textbox( - label = wording.get('output_path_textbox_label'), + label = wording.get('uis.output_path_textbox'), value = facefusion.globals.output_path or tempfile.gettempdir(), max_lines = 1 ) OUTPUT_IMAGE_QUALITY_SLIDER = gradio.Slider( - label = wording.get('output_image_quality_slider_label'), + label = wording.get('uis.output_image_quality_slider'), value = facefusion.globals.output_image_quality, step = facefusion.choices.output_image_quality_range[1] - facefusion.choices.output_image_quality_range[0], minimum = facefusion.choices.output_image_quality_range[0], @@ -43,19 +43,19 @@ def render() -> None: visible = is_image(facefusion.globals.target_path) ) OUTPUT_VIDEO_ENCODER_DROPDOWN = gradio.Dropdown( - label = wording.get('output_video_encoder_dropdown_label'), + label = wording.get('uis.output_video_encoder_dropdown'), choices = facefusion.choices.output_video_encoders, value = facefusion.globals.output_video_encoder, visible = is_video(facefusion.globals.target_path) ) OUTPUT_VIDEO_PRESET_DROPDOWN = gradio.Dropdown( - label = wording.get('output_video_preset_dropdown_label'), + label = wording.get('uis.output_video_preset_dropdown'), choices = facefusion.choices.output_video_presets, value = facefusion.globals.output_video_preset, visible = is_video(facefusion.globals.target_path) ) OUTPUT_VIDEO_QUALITY_SLIDER = gradio.Slider( - label = wording.get('output_video_quality_slider_label'), + label = wording.get('uis.output_video_quality_slider'), value = facefusion.globals.output_video_quality, step = facefusion.choices.output_video_quality_range[1] - facefusion.choices.output_video_quality_range[0], minimum = facefusion.choices.output_video_quality_range[0], @@ -63,13 +63,13 @@ def render() -> None: visible = is_video(facefusion.globals.target_path) ) OUTPUT_VIDEO_RESOLUTION_DROPDOWN = gradio.Dropdown( - label = wording.get('output_video_resolution_dropdown_label'), + label = wording.get('uis.output_video_resolution_dropdown'), choices = create_video_resolutions(facefusion.globals.target_path), value = facefusion.globals.output_video_resolution, visible = is_video(facefusion.globals.target_path) ) OUTPUT_VIDEO_FPS_SLIDER = gradio.Slider( - label = wording.get('output_video_fps_slider_label'), + label = wording.get('uis.output_video_fps_slider'), value = facefusion.globals.output_video_fps, step = 0.01, minimum = 1, @@ -77,6 +77,7 @@ def render() -> None: visible = is_video(facefusion.globals.target_path) ) register_ui_component('output_path_textbox', OUTPUT_PATH_TEXTBOX) + register_ui_component('output_video_fps_slider', OUTPUT_VIDEO_FPS_SLIDER) def listen() -> None: @@ -89,7 +90,6 @@ def listen() -> None: OUTPUT_VIDEO_FPS_SLIDER.change(update_output_video_fps, inputs = OUTPUT_VIDEO_FPS_SLIDER) multi_component_names : List[ComponentName] =\ [ - 'source_image', 'target_image', 'target_video' ] diff --git a/facefusion/uis/components/preview.py b/facefusion/uis/components/preview.py index fe01f932..07b8b01d 100755 --- a/facefusion/uis/components/preview.py +++ b/facefusion/uis/components/preview.py @@ -5,12 +5,14 @@ import gradio import facefusion.globals from facefusion import wording, logger +from facefusion.audio import get_audio_frame +from facefusion.common_helper import get_first from facefusion.core import conditional_append_reference_faces -from facefusion.face_store import clear_static_faces, get_reference_faces, clear_reference_faces -from facefusion.typing import Frame, Face, FaceSet -from facefusion.vision import get_video_frame, count_video_frame_total, normalize_frame_color, resize_frame_resolution, read_static_image, read_static_images -from facefusion.filesystem import is_image, is_video from facefusion.face_analyser import get_average_face, clear_face_analyser +from facefusion.face_store import clear_static_faces, get_reference_faces, clear_reference_faces +from facefusion.typing import Face, FaceSet, AudioFrame, VisionFrame +from facefusion.vision import get_video_frame, count_video_frame_total, normalize_frame_color, resize_frame_resolution, read_static_image, read_static_images +from facefusion.filesystem import is_image, is_video, filter_audio_paths from facefusion.content_analyser import analyse_frame from facefusion.processors.frame.core import load_frame_processor_module from facefusion.uis.typing import ComponentName @@ -26,29 +28,34 @@ def render() -> None: preview_image_args: Dict[str, Any] =\ { - 'label': wording.get('preview_image_label'), + 'label': wording.get('uis.preview_image'), 'interactive': False } preview_frame_slider_args: Dict[str, Any] =\ { - 'label': wording.get('preview_frame_slider_label'), + 'label': wording.get('uis.preview_frame_slider'), 'step': 1, 'minimum': 0, 'maximum': 100, 'visible': False } conditional_append_reference_faces() + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None source_frames = read_static_images(facefusion.globals.source_paths) source_face = get_average_face(source_frames) - reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None + source_audio_path = get_first(filter_audio_paths(facefusion.globals.source_paths)) + if source_audio_path and facefusion.globals.output_video_fps: + source_audio_frame = get_audio_frame(source_audio_path, facefusion.globals.output_video_fps, facefusion.globals.reference_frame_number) + else: + source_audio_frame = None if is_image(facefusion.globals.target_path): - target_frame = read_static_image(facefusion.globals.target_path) - preview_frame = process_preview_frame(source_face, reference_faces, target_frame) - preview_image_args['value'] = normalize_frame_color(preview_frame) + target_vision_frame = read_static_image(facefusion.globals.target_path) + preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, target_vision_frame) + preview_image_args['value'] = normalize_frame_color(preview_vision_frame) if is_video(facefusion.globals.target_path): - temp_frame = get_video_frame(facefusion.globals.target_path, facefusion.globals.reference_frame_number) - preview_frame = process_preview_frame(source_face, reference_faces, temp_frame) - preview_image_args['value'] = normalize_frame_color(preview_frame) + temp_vision_frame = get_video_frame(facefusion.globals.target_path, facefusion.globals.reference_frame_number) + preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, temp_vision_frame) + preview_image_args['value'] = normalize_frame_color(preview_vision_frame) preview_image_args['visible'] = True preview_frame_slider_args['value'] = facefusion.globals.reference_frame_number preview_frame_slider_args['maximum'] = count_video_frame_total(facefusion.globals.target_path) @@ -60,8 +67,12 @@ def render() -> None: def listen() -> None: PREVIEW_FRAME_SLIDER.release(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + reference_face_position_gallery = get_ui_component('reference_face_position_gallery') + if reference_face_position_gallery: + reference_face_position_gallery.select(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) multi_one_component_names : List[ComponentName] =\ [ + 'source_audio', 'source_image', 'target_image', 'target_video' @@ -81,17 +92,6 @@ def listen() -> None: if component: for method in [ 'upload', 'change', 'clear' ]: getattr(component, method)(update_preview_frame_slider, outputs = PREVIEW_FRAME_SLIDER) - select_component_names : List[ComponentName] =\ - [ - 'reference_face_position_gallery', - 'face_analyser_order_dropdown', - 'face_analyser_age_dropdown', - 'face_analyser_gender_dropdown' - ] - for component_name in select_component_names: - component = get_ui_component(component_name) - if component: - component.select(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) change_one_component_names : List[ComponentName] =\ [ 'face_debugger_items_checkbox_group', @@ -105,7 +105,11 @@ def listen() -> None: 'face_mask_padding_bottom_slider', 'face_mask_padding_left_slider', 'face_mask_padding_right_slider', - 'face_mask_region_checkbox_group' + 'face_mask_region_checkbox_group', + 'face_analyser_order_dropdown', + 'face_analyser_age_dropdown', + 'face_analyser_gender_dropdown', + 'output_video_fps_slider' ] for component_name in change_one_component_names: component = get_ui_component(component_name) @@ -117,6 +121,7 @@ def listen() -> None: 'face_enhancer_model_dropdown', 'face_swapper_model_dropdown', 'frame_enhancer_model_dropdown', + 'lip_syncer_model_dropdown', 'face_detector_model_dropdown', 'face_detector_size_dropdown', 'face_detector_score_slider' @@ -143,19 +148,25 @@ def update_preview_image(frame_number : int = 0) -> gradio.Image: sleep(0.5) logger.enable() conditional_append_reference_faces() + reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None source_frames = read_static_images(facefusion.globals.source_paths) source_face = get_average_face(source_frames) - reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None + source_audio_path = get_first(filter_audio_paths(facefusion.globals.source_paths)) + if source_audio_path and facefusion.globals.output_video_fps: + source_audio_frame = get_audio_frame(source_audio_path, facefusion.globals.output_video_fps, facefusion.globals.reference_frame_number) + else: + source_audio_frame = None + if is_image(facefusion.globals.target_path): - target_frame = read_static_image(facefusion.globals.target_path) - preview_frame = process_preview_frame(source_face, reference_faces, target_frame) - preview_frame = normalize_frame_color(preview_frame) - return gradio.Image(value = preview_frame) + target_vision_frame = read_static_image(facefusion.globals.target_path) + preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, target_vision_frame) + preview_vision_frame = normalize_frame_color(preview_vision_frame) + return gradio.Image(value = preview_vision_frame) if is_video(facefusion.globals.target_path): - temp_frame = get_video_frame(facefusion.globals.target_path, frame_number) - preview_frame = process_preview_frame(source_face, reference_faces, temp_frame) - preview_frame = normalize_frame_color(preview_frame) - return gradio.Image(value = preview_frame) + temp_vision_frame = get_video_frame(facefusion.globals.target_path, frame_number) + preview_vision_frame = process_preview_frame(reference_faces, source_face, source_audio_frame, temp_vision_frame) + preview_vision_frame = normalize_frame_color(preview_vision_frame) + return gradio.Image(value = preview_vision_frame) return gradio.Image(value = None) @@ -166,18 +177,20 @@ def update_preview_frame_slider() -> gradio.Slider: return gradio.Slider(value = None, maximum = None, visible = False) -def process_preview_frame(source_face : Face, reference_faces : FaceSet, temp_frame : Frame) -> Frame: - temp_frame = resize_frame_resolution(temp_frame, 640, 640) - if analyse_frame(temp_frame): - return cv2.GaussianBlur(temp_frame, (99, 99), 0) +def process_preview_frame(reference_faces : FaceSet, source_face : Face, source_audio_frame : AudioFrame, target_vision_frame : VisionFrame) -> VisionFrame: + target_vision_frame = resize_frame_resolution(target_vision_frame, 640, 640) + if analyse_frame(target_vision_frame): + return cv2.GaussianBlur(target_vision_frame, (99, 99), 0) for frame_processor in facefusion.globals.frame_processors: frame_processor_module = load_frame_processor_module(frame_processor) logger.disable() if frame_processor_module.pre_process('preview'): logger.enable() - temp_frame = frame_processor_module.process_frame( - source_face, - reference_faces, - temp_frame - ) - return temp_frame + target_vision_frame = frame_processor_module.process_frame( + { + 'reference_faces': reference_faces, + 'source_face': source_face, + 'source_audio_frame': source_audio_frame, + 'target_vision_frame': target_vision_frame + }) + return target_vision_frame diff --git a/facefusion/uis/components/source.py b/facefusion/uis/components/source.py index 5fd7a6e7..4e79d89d 100644 --- a/facefusion/uis/components/source.py +++ b/facefusion/uis/components/source.py @@ -1,49 +1,67 @@ -from typing import Optional, List +from typing import Optional, List, Tuple import gradio import facefusion.globals from facefusion import wording from facefusion.uis.typing import File -from facefusion.filesystem import are_images +from facefusion.common_helper import get_first +from facefusion.filesystem import has_audio, has_image, filter_audio_paths, filter_image_paths from facefusion.uis.core import register_ui_component SOURCE_FILE : Optional[gradio.File] = None +SOURCE_AUDIO : Optional[gradio.Audio] = None SOURCE_IMAGE : Optional[gradio.Image] = None def render() -> None: global SOURCE_FILE + global SOURCE_AUDIO global SOURCE_IMAGE - are_source_images = are_images(facefusion.globals.source_paths) + has_source_audio = has_audio(facefusion.globals.source_paths) + has_source_image = has_image(facefusion.globals.source_paths) SOURCE_FILE = gradio.File( file_count = 'multiple', file_types = [ + '.mp3', + '.wav', '.png', '.jpg', '.webp' ], - label = wording.get('source_file_label'), - value = facefusion.globals.source_paths if are_source_images else None + label = wording.get('uis.source_file'), + value = facefusion.globals.source_paths if has_source_audio or has_source_image else None ) source_file_names = [ source_file_value['name'] for source_file_value in SOURCE_FILE.value ] if SOURCE_FILE.value else None - SOURCE_IMAGE = gradio.Image( - value = source_file_names[0] if are_source_images else None, - visible = are_source_images, + source_audio_path = get_first(filter_audio_paths(source_file_names)) + source_image_path = get_first(filter_image_paths(source_file_names)) + SOURCE_AUDIO = gradio.Audio( + value = source_audio_path if has_source_audio else None, + visible = has_source_audio, show_label = False ) + SOURCE_IMAGE = gradio.Image( + value = source_image_path if has_source_image else None, + visible = has_source_image, + show_label = False + ) + register_ui_component('source_audio', SOURCE_AUDIO) register_ui_component('source_image', SOURCE_IMAGE) def listen() -> None: - SOURCE_FILE.change(update, inputs = SOURCE_FILE, outputs = SOURCE_IMAGE) + SOURCE_FILE.change(update, inputs = SOURCE_FILE, outputs = [ SOURCE_AUDIO, SOURCE_IMAGE ]) -def update(files : List[File]) -> gradio.Image: +def update(files : List[File]) -> Tuple[gradio.Audio, gradio.Image]: file_names = [ file.name for file in files ] if files else None - if are_images(file_names): + has_source_audio = has_audio(file_names) + has_source_image = has_image(file_names) + if has_source_audio or has_source_image: + source_audio_path = get_first(filter_audio_paths(file_names)) + source_image_path = get_first(filter_image_paths(file_names)) facefusion.globals.source_paths = file_names - return gradio.Image(value = file_names[0], visible = True) + return gradio.Audio(value = source_audio_path, visible = has_source_audio), gradio.Image(value = source_image_path, visible = has_source_image) facefusion.globals.source_paths = None - return gradio.Image(value = None, visible = False) + return gradio.Audio(value = None, visible = False), gradio.Image(value = None, visible = False) diff --git a/facefusion/uis/components/target.py b/facefusion/uis/components/target.py index 307b670c..2d11d713 100644 --- a/facefusion/uis/components/target.py +++ b/facefusion/uis/components/target.py @@ -21,7 +21,7 @@ def render() -> None: is_target_image = is_image(facefusion.globals.target_path) is_target_video = is_video(facefusion.globals.target_path) TARGET_FILE = gradio.File( - label = wording.get('target_file_label'), + label = wording.get('uis.target_file'), file_count = 'single', file_types = [ diff --git a/facefusion/uis/components/temp_frame.py b/facefusion/uis/components/temp_frame.py index 21d0312e..d9d851dd 100644 --- a/facefusion/uis/components/temp_frame.py +++ b/facefusion/uis/components/temp_frame.py @@ -17,13 +17,13 @@ def render() -> None: global TEMP_FRAME_QUALITY_SLIDER TEMP_FRAME_FORMAT_DROPDOWN = gradio.Dropdown( - label = wording.get('temp_frame_format_dropdown_label'), + label = wording.get('uis.temp_frame_format_dropdown'), choices = facefusion.choices.temp_frame_formats, value = facefusion.globals.temp_frame_format, visible = is_video(facefusion.globals.target_path) ) TEMP_FRAME_QUALITY_SLIDER = gradio.Slider( - label = wording.get('temp_frame_quality_slider_label'), + label = wording.get('uis.temp_frame_quality_slider'), value = facefusion.globals.temp_frame_quality, step = facefusion.choices.temp_frame_quality_range[1] - facefusion.choices.temp_frame_quality_range[0], minimum = facefusion.choices.temp_frame_quality_range[0], diff --git a/facefusion/uis/components/trim_frame.py b/facefusion/uis/components/trim_frame.py index 10d6089a..fdfb240c 100644 --- a/facefusion/uis/components/trim_frame.py +++ b/facefusion/uis/components/trim_frame.py @@ -17,7 +17,7 @@ def render() -> None: trim_frame_start_slider_args : Dict[str, Any] =\ { - 'label': wording.get('trim_frame_start_slider_label'), + 'label': wording.get('uis.trim_frame_start_slider'), 'step': 1, 'minimum': 0, 'maximum': 100, @@ -25,7 +25,7 @@ def render() -> None: } trim_frame_end_slider_args : Dict[str, Any] =\ { - 'label': wording.get('trim_frame_end_slider_label'), + 'label': wording.get('uis.trim_frame_end_slider'), 'step': 1, 'minimum': 0, 'maximum': 100, diff --git a/facefusion/uis/components/webcam.py b/facefusion/uis/components/webcam.py index f3fcbe43..6534bf4b 100644 --- a/facefusion/uis/components/webcam.py +++ b/facefusion/uis/components/webcam.py @@ -12,7 +12,7 @@ from tqdm import tqdm import facefusion.globals from facefusion import logger, wording from facefusion.content_analyser import analyse_stream -from facefusion.typing import Frame, Face, Fps +from facefusion.typing import VisionFrame, Face, Fps from facefusion.face_analyser import get_average_face from facefusion.processors.frame.core import get_frame_processors_modules, load_frame_processor_module from facefusion.ffmpeg import open_ffmpeg @@ -53,15 +53,15 @@ def render() -> None: global WEBCAM_STOP_BUTTON WEBCAM_IMAGE = gradio.Image( - label = wording.get('webcam_image_label') + label = wording.get('uis.webcam_image') ) WEBCAM_START_BUTTON = gradio.Button( - value = wording.get('start_button_label'), + value = wording.get('uis.start_button'), variant = 'primary', size = 'sm' ) WEBCAM_STOP_BUTTON = gradio.Button( - value = wording.get('stop_button_label'), + value = wording.get('uis.stop_button'), size = 'sm' ) @@ -80,6 +80,7 @@ def listen() -> None: 'face_swapper_model_dropdown', 'face_enhancer_model_dropdown', 'frame_enhancer_model_dropdown', + 'lip_syncer_model_dropdown', 'source_image' ] for component_name in change_two_component_names: @@ -88,7 +89,7 @@ def listen() -> None: component.change(update, cancels = start_event) -def start(webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps) -> Generator[Frame, None, None]: +def start(webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps) -> Generator[VisionFrame, None, None]: facefusion.globals.face_selector_mode = 'one' facefusion.globals.face_analyser_order = 'large-small' source_frames = read_static_images(facefusion.globals.source_paths) @@ -114,11 +115,11 @@ def start(webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps) - yield None -def multi_process_capture(source_face : Face, webcam_capture : cv2.VideoCapture, webcam_fps : Fps) -> Generator[Frame, None, None]: +def multi_process_capture(source_face : Face, webcam_capture : cv2.VideoCapture, webcam_fps : Fps) -> Generator[VisionFrame, None, None]: with tqdm(desc = wording.get('processing'), unit = 'frame', ascii = ' =', disable = facefusion.globals.log_level in [ 'warn', 'error' ]) as progress: with ThreadPoolExecutor(max_workers = facefusion.globals.execution_thread_count) as executor: futures = [] - deque_capture_frames : Deque[Frame] = deque() + deque_capture_frames : Deque[VisionFrame] = deque() while webcam_capture and webcam_capture.isOpened(): _, capture_frame = webcam_capture.read() if analyse_stream(capture_frame, webcam_fps): @@ -148,17 +149,19 @@ def stop() -> gradio.Image: return gradio.Image(value = None) -def process_stream_frame(source_face : Face, temp_frame : Frame) -> Frame: +def process_stream_frame(source_face : Face, target_vision_frame : VisionFrame) -> VisionFrame: for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): logger.disable() if frame_processor_module.pre_process('stream'): logger.enable() - temp_frame = frame_processor_module.process_frame( - source_face, - None, - temp_frame - ) - return temp_frame + target_vision_frame = frame_processor_module.process_frame( + { + 'source_face': source_face, + 'reference_faces': None, + 'source_audio_frame': None, + 'target_vision_frame': target_vision_frame + }) + return target_vision_frame def open_stream(stream_mode : StreamMode, stream_resolution : str, stream_fps : Fps) -> subprocess.Popen[bytes]: diff --git a/facefusion/uis/components/webcam_options.py b/facefusion/uis/components/webcam_options.py index edb245c8..ea707b05 100644 --- a/facefusion/uis/components/webcam_options.py +++ b/facefusion/uis/components/webcam_options.py @@ -16,17 +16,17 @@ def render() -> None: global WEBCAM_FPS_SLIDER WEBCAM_MODE_RADIO = gradio.Radio( - label = wording.get('webcam_mode_radio_label'), + label = wording.get('uis.webcam_mode_radio'), choices = uis_choices.webcam_modes, value = 'inline' ) WEBCAM_RESOLUTION_DROPDOWN = gradio.Dropdown( - label = wording.get('webcam_resolution_dropdown'), + label = wording.get('uis.webcam_resolution_dropdown'), choices = uis_choices.webcam_resolutions, value = uis_choices.webcam_resolutions[0] ) WEBCAM_FPS_SLIDER = gradio.Slider( - label = wording.get('webcam_fps_slider'), + label = wording.get('uis.webcam_fps_slider'), value = 25, step = 1, minimum = 1, diff --git a/facefusion/uis/layouts/benchmark.py b/facefusion/uis/layouts/benchmark.py index 83cde03a..2943fe2f 100644 --- a/facefusion/uis/layouts/benchmark.py +++ b/facefusion/uis/layouts/benchmark.py @@ -34,6 +34,7 @@ def render() -> gradio.Blocks: about.render() with gradio.Blocks(): frame_processors.render() + with gradio.Blocks(): frame_processors_options.render() with gradio.Blocks(): execution.render() @@ -60,4 +61,4 @@ def listen() -> None: def run(ui : gradio.Blocks) -> None: - ui.queue(concurrency_count = 2, api_open = False).launch(show_api = False) + ui.queue(concurrency_count = 2).launch(show_api = False, quiet = True) diff --git a/facefusion/uis/layouts/default.py b/facefusion/uis/layouts/default.py index ef8ac404..06d7d4de 100755 --- a/facefusion/uis/layouts/default.py +++ b/facefusion/uis/layouts/default.py @@ -19,6 +19,7 @@ def render() -> gradio.Blocks: about.render() with gradio.Blocks(): frame_processors.render() + with gradio.Blocks(): frame_processors_options.render() with gradio.Blocks(): execution.render() @@ -74,4 +75,4 @@ def listen() -> None: def run(ui : gradio.Blocks) -> None: - ui.launch(show_api = False) + ui.launch(show_api = False, quiet = True) diff --git a/facefusion/uis/layouts/webcam.py b/facefusion/uis/layouts/webcam.py index a5b6e184..9d23190d 100644 --- a/facefusion/uis/layouts/webcam.py +++ b/facefusion/uis/layouts/webcam.py @@ -19,6 +19,7 @@ def render() -> gradio.Blocks: about.render() with gradio.Blocks(): frame_processors.render() + with gradio.Blocks(): frame_processors_options.render() with gradio.Blocks(): execution.render() @@ -43,4 +44,4 @@ def listen() -> None: def run(ui : gradio.Blocks) -> None: - ui.queue(concurrency_count = 2, api_open = False).launch(show_api = False) + ui.queue(concurrency_count = 2).launch(show_api = False, quiet = True) diff --git a/facefusion/uis/typing.py b/facefusion/uis/typing.py index fe651125..2e7a1afe 100644 --- a/facefusion/uis/typing.py +++ b/facefusion/uis/typing.py @@ -5,6 +5,7 @@ File = IO[Any] Component = gradio.File or gradio.Image or gradio.Video or gradio.Slider ComponentName = Literal\ [ + 'source_audio', 'source_image', 'target_image', 'target_video', @@ -26,13 +27,15 @@ ComponentName = Literal\ 'face_mask_padding_right_slider', 'face_mask_region_checkbox_group', 'frame_processors_checkbox_group', - 'face_swapper_model_dropdown', + 'face_debugger_items_checkbox_group', 'face_enhancer_model_dropdown', 'face_enhancer_blend_slider', + 'face_swapper_model_dropdown', 'frame_enhancer_model_dropdown', 'frame_enhancer_blend_slider', - 'face_debugger_items_checkbox_group', + 'lip_syncer_model_dropdown', 'output_path_textbox', + 'output_video_fps_slider', 'benchmark_runs_checkbox_group', 'benchmark_cycles_slider', 'webcam_mode_radio', diff --git a/facefusion/vision.py b/facefusion/vision.py index fc5e84ab..d0a2a219 100644 --- a/facefusion/vision.py +++ b/facefusion/vision.py @@ -2,21 +2,21 @@ from typing import Optional, List, Tuple from functools import lru_cache import cv2 -from facefusion.typing import Frame, Resolution +from facefusion.typing import VisionFrame, Resolution from facefusion.choices import video_template_sizes from facefusion.filesystem import is_image, is_video -def get_video_frame(video_path : str, frame_number : int = 0) -> Optional[Frame]: +def get_video_frame(video_path : str, frame_number : int = 0) -> Optional[VisionFrame]: if is_video(video_path): video_capture = cv2.VideoCapture(video_path) if video_capture.isOpened(): frame_total = video_capture.get(cv2.CAP_PROP_FRAME_COUNT) video_capture.set(cv2.CAP_PROP_POS_FRAMES, min(frame_total, frame_number - 1)) - has_frame, frame = video_capture.read() + has_vision_frame, vision_frame = video_capture.read() video_capture.release() - if has_frame: - return frame + if has_vision_frame: + return vision_frame return None @@ -91,27 +91,27 @@ def unpack_resolution(resolution : str) -> Resolution: return width, height -def resize_frame_resolution(frame : Frame, max_width : int, max_height : int) -> Frame: - height, width = frame.shape[:2] +def resize_frame_resolution(vision_frame : VisionFrame, max_width : int, max_height : int) -> VisionFrame: + height, width = vision_frame.shape[:2] if height > max_height or width > max_width: scale = min(max_height / height, max_width / width) new_width = int(width * scale) new_height = int(height * scale) - return cv2.resize(frame, (new_width, new_height)) - return frame + return cv2.resize(vision_frame, (new_width, new_height)) + return vision_frame -def normalize_frame_color(frame : Frame) -> Frame: - return cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) +def normalize_frame_color(vision_frame : VisionFrame) -> VisionFrame: + return cv2.cvtColor(vision_frame, cv2.COLOR_BGR2RGB) @lru_cache(maxsize = 128) -def read_static_image(image_path : str) -> Optional[Frame]: +def read_static_image(image_path : str) -> Optional[VisionFrame]: return read_image(image_path) -def read_static_images(image_paths : List[str]) -> Optional[List[Frame]]: +def read_static_images(image_paths : List[str]) -> Optional[List[VisionFrame]]: frames = [] if image_paths: for image_path in image_paths: @@ -119,13 +119,13 @@ def read_static_images(image_paths : List[str]) -> Optional[List[Frame]]: return frames -def read_image(image_path : str) -> Optional[Frame]: +def read_image(image_path : str) -> Optional[VisionFrame]: if is_image(image_path): return cv2.imread(image_path) return None -def write_image(image_path : str, frame : Frame) -> bool: +def write_image(image_path : str, frame : VisionFrame) -> bool: if image_path: return cv2.imwrite(image_path, frame) return False diff --git a/facefusion/wording.py b/facefusion/wording.py index a02455ac..31569d76 100755 --- a/facefusion/wording.py +++ b/facefusion/wording.py @@ -1,63 +1,21 @@ -WORDING =\ +from typing import Any, Dict, Optional + +WORDING : Dict[str, Any] =\ { 'python_not_supported': 'Python version is not supported, upgrade to {version} or higher', 'ffmpeg_not_installed': 'FFMpeg is not installed', - 'install_dependency_help': 'select the variant of {dependency} to install', - 'skip_venv_help': 'skip the virtual environment check', - 'source_help': 'select a source image', - 'target_help': 'select a target image or video', - 'output_help': 'specify the output file or directory', - 'frame_processors_help': 'choose from the available frame processors (choices: {choices}, ...)', - 'frame_processor_model_help': 'choose the model for the frame processor', - 'frame_processor_blend_help': 'specify the blend amount for the frame processor', - 'face_debugger_items_help': 'specify the face debugger items (choices: {choices})', - 'ui_layouts_help': 'choose from the available ui layouts (choices: {choices}, ...)', - 'keep_temp_help': 'retain temporary frames after processing', - 'skip_audio_help': 'omit audio from the target', - 'face_analyser_order_help': 'specify the order used for the face analyser', - 'face_analyser_age_help': 'specify the age used for the face analyser', - 'face_analyser_gender_help': 'specify the gender used for the face analyser', - 'face_detector_model_help': 'specify the model used for the face detector', - 'face_detector_size_help': 'specify the size threshold used for the face detector', - 'face_detector_score_help': 'specify the score threshold used for the face detector', - 'face_selector_mode_help': 'specify the mode for the face selector', - 'reference_face_position_help': 'specify the position of the reference face', - 'reference_face_distance_help': 'specify the distance between the reference face and the target face', - 'reference_frame_number_help': 'specify the number of the reference frame', - 'face_mask_types_help': 'choose from the available face mask types (choices: {choices})', - 'face_mask_blur_help': 'specify the blur amount for face mask', - 'face_mask_padding_help': 'specify the face mask padding (top, right, bottom, left) in percent', - 'face_mask_regions_help': 'choose from the available face mask regions (choices: {choices})', - 'trim_frame_start_help': 'specify the start frame for extraction', - 'trim_frame_end_help': 'specify the end frame for extraction', - 'temp_frame_format_help': 'specify the image format used for frame extraction', - 'temp_frame_quality_help': 'specify the image quality used for frame extraction', - 'output_image_quality_help': 'specify the quality used for the output image', - 'output_video_encoder_help': 'specify the encoder used for the output video', - 'output_video_preset_help': 'specify the preset used for the output video', - 'output_video_quality_help': 'specify the quality used for the output video', - 'output_video_resolution_help': 'specify the resolution used for the output video', - 'output_video_fps_help': 'specify the frames per second (fps) used for the output video', - 'video_memory_strategy_help': 'specify strategy to handle the video memory', - 'system_memory_limit_help': 'specify the amount (gb) of system memory to be used', - 'execution_providers_help': 'choose from the available execution providers (choices: {choices}, ...)', - 'execution_thread_count_help': 'specify the number of execution threads', - 'execution_queue_count_help': 'specify the number of execution queries', - 'skip_download_help': 'omit automate downloads and lookups', - 'headless_help': 'run the program in headless mode', - 'log_level_help': 'choose from the available log levels', 'creating_temp': 'Creating temporary resources', 'extracting_frames_fps': 'Extracting frames with {video_fps} FPS', 'analysing': 'Analysing', 'processing': 'Processing', 'downloading': 'Downloading', 'temp_frames_not_found': 'Temporary frames not found', - 'compressing_image': 'Compressing image', - 'compressing_image_failed': 'Compressing image failed', + 'compressing_image_succeed': 'Compressing image succeed', + 'compressing_image_skipped': 'Compressing image skipped', 'merging_video_fps': 'Merging video with {video_fps} FPS', 'merging_video_failed': 'Merging video failed', 'skipping_audio': 'Skipping audio', - 'restoring_audio': 'Restoring audio', + 'restoring_audio_succeed': 'Restoring audio succeed', 'restoring_audio_skipped': 'Restoring audio skipped', 'clearing_temp': 'Clearing temporary resources', 'processing_image_succeed': 'Processing to image succeed in {seconds} seconds', @@ -66,78 +24,176 @@ WORDING =\ 'processing_video_failed': 'Processing to video failed', 'model_download_not_done': 'Download of the model is not done', 'model_file_not_present': 'File of the model is not present', - 'select_image_source': 'Select an image for source path', - 'select_image_or_video_target': 'Select an image or video for target path', - 'select_file_or_directory_output': 'Select an file or directory for output path', + 'select_image_source': 'Select a image for source path', + 'select_audio_source': 'Select a audio for source path', + 'select_video_target': 'Select a video for target path', + 'select_image_or_video_target': 'Select a image or video for target path', + 'select_file_or_directory_output': 'Select a file or directory for output path', 'no_source_face_detected': 'No source face detected', 'frame_processor_not_loaded': 'Frame processor {frame_processor} could not be loaded', 'frame_processor_not_implemented': 'Frame processor {frame_processor} not implemented correctly', 'ui_layout_not_loaded': 'UI layout {ui_layout} could not be loaded', 'ui_layout_not_implemented': 'UI layout {ui_layout} not implemented correctly', 'stream_not_loaded': 'Stream {stream_mode} could not be loaded', - 'donate_button_label': 'DONATE', - 'start_button_label': 'START', - 'stop_button_label': 'STOP', - 'clear_button_label': 'CLEAR', - 'benchmark_runs_checkbox_group_label': 'BENCHMARK RUNS', - 'benchmark_results_dataframe_label': 'BENCHMARK RESULTS', - 'benchmark_cycles_slider_label': 'BENCHMARK CYCLES', - 'execution_providers_checkbox_group_label': 'EXECUTION PROVIDERS', - 'execution_thread_count_slider_label': 'EXECUTION THREAD COUNT', - 'execution_queue_count_slider_label': 'EXECUTION QUEUE COUNT', - 'face_analyser_order_dropdown_label': 'FACE ANALYSER ORDER', - 'face_analyser_age_dropdown_label': 'FACE ANALYSER AGE', - 'face_analyser_gender_dropdown_label': 'FACE ANALYSER GENDER', - 'face_detector_model_dropdown_label': 'FACE DETECTOR MODEL', - 'face_detector_size_dropdown_label': 'FACE DETECTOR SIZE', - 'face_detector_score_slider_label': 'FACE DETECTOR SCORE', - 'face_selector_mode_dropdown_label': 'FACE SELECTOR MODE', - 'reference_face_gallery_label': 'REFERENCE FACE', - 'reference_face_distance_slider_label': 'REFERENCE FACE DISTANCE', - 'face_mask_types_checkbox_group_label': 'FACE MASK TYPES', - 'face_mask_blur_slider_label': 'FACE MASK BLUR', - 'face_mask_padding_top_slider_label': 'FACE MASK PADDING TOP', - 'face_mask_padding_bottom_slider_label': 'FACE MASK PADDING BOTTOM', - 'face_mask_padding_left_slider_label': 'FACE MASK PADDING LEFT', - 'face_mask_padding_right_slider_label': 'FACE MASK PADDING RIGHT', - 'face_mask_region_checkbox_group_label': 'FACE MASK REGIONS', - 'video_memory_strategy_dropdown_label': 'VIDEO MEMORY STRATEGY', - 'system_memory_limit_slider_label': 'SYSTEM MEMORY LIMIT', - 'output_image_or_video_label': 'OUTPUT', - 'output_path_textbox_label': 'OUTPUT PATH', - 'output_image_quality_slider_label': 'OUTPUT IMAGE QUALITY', - 'output_video_encoder_dropdown_label': 'OUTPUT VIDEO ENCODER', - 'output_video_preset_dropdown_label': 'OUTPUT VIDEO PRESET', - 'output_video_quality_slider_label': 'OUTPUT VIDEO QUALITY', - 'output_video_resolution_dropdown_label': 'OUTPUT VIDEO RESOLUTION', - 'output_video_fps_slider_label': 'OUTPUT VIDEO FPS', - 'preview_image_label': 'PREVIEW', - 'preview_frame_slider_label': 'PREVIEW FRAME', - 'frame_processors_checkbox_group_label': 'FRAME PROCESSORS', - 'face_swapper_model_dropdown_label': 'FACE SWAPPER MODEL', - 'face_enhancer_model_dropdown_label': 'FACE ENHANCER MODEL', - 'face_enhancer_blend_slider_label': 'FACE ENHANCER BLEND', - 'frame_enhancer_model_dropdown_label': 'FRAME ENHANCER MODEL', - 'frame_enhancer_blend_slider_label': 'FRAME ENHANCER BLEND', - 'face_debugger_items_checkbox_group_label': 'FACE DEBUGGER ITEMS', - 'common_options_checkbox_group_label': 'OPTIONS', - 'temp_frame_format_dropdown_label': 'TEMP FRAME FORMAT', - 'temp_frame_quality_slider_label': 'TEMP FRAME QUALITY', - 'trim_frame_start_slider_label': 'TRIM FRAME START', - 'trim_frame_end_slider_label': 'TRIM FRAME END', - 'source_file_label': 'SOURCE', - 'target_file_label': 'TARGET', - 'webcam_image_label': 'WEBCAM', - 'webcam_mode_radio_label': 'WEBCAM MODE', - 'webcam_resolution_dropdown': 'WEBCAM RESOLUTION', - 'webcam_fps_slider': 'WEBCAM FPS', 'point': '.', 'comma': ',', 'colon': ':', 'question_mark': '?', - 'exclamation_mark': '!' + 'exclamation_mark': '!', + 'help': + { + # installer + 'install_dependency': 'select the variant of {dependency} to install', + 'skip_venv': 'skip the virtual environment check', + # general + 'source': 'choose single or multiple source images', + 'target': 'choose single target image or video', + 'output': 'specify the output file or directory', + # misc + 'skip_download': 'omit automate downloads and remote lookups', + 'headless': 'run the program without a user interface', + 'log_level': 'adjust the message severity displayed in the terminal', + # execution + 'execution_providers': 'accelerate the model inference using different providers (choices: {choices}, ...)', + 'execution_thread_count': 'specify the amount of parallel threads while processing', + 'execution_queue_count': 'specify the amount of frames each thread is processing', + # memory + 'video_memory_strategy': 'balance fast frame processing and low vram usage', + 'system_memory_limit': 'limit the available ram that can be used while processing', + # face analyser + 'face_analyser_order': 'specify the order in which the face analyser detects faces.', + 'face_analyser_age': 'filter the detected faces based on their age', + 'face_analyser_gender': 'filter the detected faces based on their gender', + 'face_detector_model': 'choose the model responsible for detecting the face', + 'face_detector_size': 'specify the size of the frame provided to the face detector', + 'face_detector_score': 'filter the detected faces base on the confidence score', + # face selector + 'face_selector_mode': 'use reference based tracking with simple matching', + 'reference_face_position': 'specify the position used to create the reference face', + 'reference_face_distance': 'specify the desired similarity between the reference face and target face', + 'reference_frame_number': 'specify the frame used to create the reference face', + # face mask + 'face_mask_types': 'mix and match different face mask types (choices: {choices})', + 'face_mask_blur': 'specify the degree of blur applied the box mask', + 'face_mask_padding': 'apply top, right, bottom and left padding to the box mask', + 'face_mask_regions': 'choose the facial features used for the region mask (choices: {choices})', + # frame extraction + 'trim_frame_start': 'specify the the start frame of the target video', + 'trim_frame_end': 'specify the the end frame of the target video', + 'temp_frame_format': 'specify the temporary resources format', + 'temp_frame_quality': 'specify the temporary resources quality', + 'keep_temp': 'keep the temporary resources after processing', + # output creation + 'output_image_quality': 'specify the image quality which translates to the compression factor', + 'output_video_encoder': 'specify the encoder use for the video compression', + 'output_video_preset': 'balance fast video processing and video file size', + 'output_video_quality': 'specify the video quality which translates to the compression factor', + 'output_video_resolution': 'specify the video output resolution based on the target video', + 'output_video_fps': 'specify the video output fps based on the target video', + 'skip_audio': 'omit the audio from the target video', + # frame processors + 'frame_processors': 'load a single or multiple frame processors. (choices: {choices}, ...)', + 'face_debugger_items': 'load a single or multiple frame processors (choices: {choices})', + 'face_enhancer_model': 'choose the model responsible for enhancing the face', + 'face_enhancer_blend': 'blend the enhanced into the previous face', + 'face_swapper_model': 'choose the model responsible for swapping the face', + 'frame_enhancer_model': 'choose the model responsible for enhancing the frame', + 'frame_enhancer_blend': 'blend the enhanced into the previous frame', + 'lip_syncer_model': 'choose the model responsible for syncing the lips', + # uis + 'ui_layouts': 'launch a single or multiple UI layouts (choices: {choices}, ...)' + }, + 'uis': + { + # general + 'start_button': 'START', + 'stop_button': 'STOP', + 'clear_button': 'CLEAR', + # about + 'donate_button': 'DONATE', + # benchmark + 'benchmark_results_dataframe': 'BENCHMARK RESULTS', + # benchmark options + 'benchmark_runs_checkbox_group': 'BENCHMARK RUNS', + 'benchmark_cycles_slider': 'BENCHMARK CYCLES', + # common options + 'common_options_checkbox_group': 'OPTIONS', + # execution + 'execution_providers_checkbox_group': 'EXECUTION PROVIDERS', + # execution queue count + 'execution_queue_count_slider': 'EXECUTION QUEUE COUNT', + # execution thread count + 'execution_thread_count_slider': 'EXECUTION THREAD COUNT', + # face analyser + 'face_analyser_order_dropdown': 'FACE ANALYSER ORDER', + 'face_analyser_age_dropdown': 'FACE ANALYSER AGE', + 'face_analyser_gender_dropdown': 'FACE ANALYSER GENDER', + 'face_detector_model_dropdown': 'FACE DETECTOR MODEL', + 'face_detector_size_dropdown': 'FACE DETECTOR SIZE', + 'face_detector_score_slider': 'FACE DETECTOR SCORE', + # face masker + 'face_mask_types_checkbox_group': 'FACE MASK TYPES', + 'face_mask_blur_slider': 'FACE MASK BLUR', + 'face_mask_padding_top_slider': 'FACE MASK PADDING TOP', + 'face_mask_padding_right_slider': 'FACE MASK PADDING RIGHT', + 'face_mask_padding_bottom_slider': 'FACE MASK PADDING BOTTOM', + 'face_mask_padding_left_slider': 'FACE MASK PADDING LEFT', + 'face_mask_region_checkbox_group': 'FACE MASK REGIONS', + # face selector + 'face_selector_mode_dropdown': 'FACE SELECTOR MODE', + 'reference_face_gallery': 'REFERENCE FACE', + 'reference_face_distance_slider': 'REFERENCE FACE DISTANCE', + # frame processors + 'frame_processors_checkbox_group': 'FRAME PROCESSORS', + # frame processors options + 'face_debugger_items_checkbox_group': 'FACE DEBUGGER ITEMS', + 'face_enhancer_model_dropdown': 'FACE ENHANCER MODEL', + 'face_enhancer_blend_slider': 'FACE ENHANCER BLEND', + 'face_swapper_model_dropdown': 'FACE SWAPPER MODEL', + 'frame_enhancer_model_dropdown': 'FRAME ENHANCER MODEL', + 'frame_enhancer_blend_slider': 'FRAME ENHANCER BLEND', + 'lip_syncer_model_dropdown': 'LIP SYNCER MODEL', + # memory + 'video_memory_strategy_dropdown': 'VIDEO MEMORY STRATEGY', + 'system_memory_limit_slider': 'SYSTEM MEMORY LIMIT', + # output + 'output_image_or_video': 'OUTPUT', + # output options + 'output_path_textbox': 'OUTPUT PATH', + 'output_image_quality_slider': 'OUTPUT IMAGE QUALITY', + 'output_video_encoder_dropdown': 'OUTPUT VIDEO ENCODER', + 'output_video_preset_dropdown': 'OUTPUT VIDEO PRESET', + 'output_video_quality_slider': 'OUTPUT VIDEO QUALITY', + 'output_video_resolution_dropdown': 'OUTPUT VIDEO RESOLUTION', + 'output_video_fps_slider': 'OUTPUT VIDEO FPS', + # preview + 'preview_image': 'PREVIEW', + 'preview_frame_slider': 'PREVIEW FRAME', + # source + 'source_file': 'SOURCE', + # target + 'target_file': 'TARGET', + # temp frame + 'temp_frame_format_dropdown': 'TEMP FRAME FORMAT', + 'temp_frame_quality_slider': 'TEMP FRAME QUALITY', + # trim frame + 'trim_frame_start_slider': 'TRIM FRAME START', + 'trim_frame_end_slider': 'TRIM FRAME END', + # webcam + 'webcam_image': 'WEBCAM', + # webcam options + 'webcam_mode_radio': 'WEBCAM MODE', + 'webcam_resolution_dropdown': 'WEBCAM RESOLUTION', + 'webcam_fps_slider': 'WEBCAM FPS' + } } -def get(key : str) -> str: - return WORDING[key] +def get(key : str) -> Optional[str]: + if '.' in key: + section, name = key.split('.') + if section in WORDING and name in WORDING[section]: + return WORDING[section][name] + if key in WORDING: + return WORDING[key] + return None diff --git a/install.py b/install.py index 307f686f..9ce0d08a 100755 --- a/install.py +++ b/install.py @@ -1,5 +1,9 @@ #!/usr/bin/env python3 +import subprocess + +subprocess.call([ 'pip', 'install' , 'inquirer', '-q' ]) + from facefusion import installer if __name__ == '__main__': diff --git a/requirements.txt b/requirements.txt index d4f0bc51..3d1c41c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ gradio==3.50.2 numpy==1.26.2 onnx==1.15.0 onnxruntime==1.16.3 -opencv-python==4.8.1.78 +opencv-python==4.9.0.80 psutil==5.9.6 realesrgan==0.3.0 torch==2.1.2 diff --git a/run.py b/run.py index 3b796757..1c2a8bd9 100755 --- a/run.py +++ b/run.py @@ -3,4 +3,4 @@ from facefusion import core if __name__ == '__main__': - core.cli() + core.cli() diff --git a/tests/test_audio.py b/tests/test_audio.py new file mode 100644 index 00000000..08f8d471 --- /dev/null +++ b/tests/test_audio.py @@ -0,0 +1,26 @@ +import subprocess +import pytest + +from facefusion.audio import get_audio_frame, read_static_audio +from facefusion.download import conditional_download + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('.assets/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3' + ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/source.mp3', '.assets/examples/source.wav' ]) + + +def test_get_audio_frame() -> None: + assert get_audio_frame('.assets/examples/source.mp3', 25) is not None + assert get_audio_frame('.assets/examples/source.wav', 25) is not None + assert get_audio_frame('invalid', 25) is None + + +def test_read_static_audio() -> None: + assert len(read_static_audio('.assets/examples/source.mp3', 25)) == 91 + assert len(read_static_audio('.assets/examples/source.wav', 25)) == 91 + assert read_static_audio('invalid', 25) is None diff --git a/tests/test_cli.py b/tests/test_cli.py index cad4ffbb..0ef96ebf 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -10,21 +10,70 @@ def before_all() -> None: conditional_download('.assets/examples', [ 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', - 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-1080p.mp4' + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' ]) - subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-1080p.mp4', '-vframes', '1', '.assets/examples/target-1080p.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vframes', '1', '.assets/examples/target-240p.jpg' ]) -def test_image_to_image() -> None: - commands = [ sys.executable, 'run.py', '-s', '.assets/examples/source.jpg', '-t', '.assets/examples/target-1080p.jpg', '-o', '.assets/examples', '--headless' ] +def test_debug_face_to_image() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_debugger', '-t', '.assets/examples/target-240p.jpg', '-o', '.assets/examples/test_debug_face_to_image.jpg', '--headless' ] run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) assert run.returncode == 0 assert 'image succeed' in run.stdout.decode() -def test_image_to_video() -> None: - commands = [ sys.executable, 'run.py', '-s', '.assets/examples/source.jpg', '-t', '.assets/examples/target-1080p.mp4', '-o', '.assets/examples', '--trim-frame-end', '10', '--headless' ] +def test_debug_face_to_video() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_debugger', '-t', '.assets/examples/target-240p.mp4', '-o', '.assets/examples/test_debug_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'video succeed' in run.stdout.decode() + + +def test_enhance_face_to_image() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_enhancer', '-t', '.assets/examples/target-240p.jpg', '-o', '.assets/examples/test_enhance_face_to_image.jpg', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'image succeed' in run.stdout.decode() + + +def test_enhance_face_to_video() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_enhancer', '-t', '.assets/examples/target-240p.mp4', '-o', '.assets/examples/test_enhance_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'video succeed' in run.stdout.decode() + + +def test_swap_face_to_image() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_swapper', '-s', '.assets/examples/source.jpg', '-t', '.assets/examples/target-240p.jpg', '-o', '.assets/examples/test_swap_face_to_image.jpg', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'image succeed' in run.stdout.decode() + + +def test_swap_face_to_video() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'face_swapper', '-s', '.assets/examples/source.jpg', '-t', '.assets/examples/target-240p.mp4', '-o', '.assets/examples/test_swap_face_to_video.mp4', '--trim-frame-end', '10', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'video succeed' in run.stdout.decode() + + +def test_sync_lip_to_image() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'lip_syncer', '-s', '.assets/examples/source.mp3', '-t', '.assets/examples/target-240p.jpg', '-o', '.assets/examples/test_sync_lip_to_image.jpg', '--headless' ] + run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) + + assert run.returncode == 0 + assert 'image succeed' in run.stdout.decode() + + +def test_sync_lip_to_video() -> None: + commands = [ sys.executable, 'run.py', '--frame-processors', 'lip_syncer', '-s', '.assets/examples/source.mp3', '-t', '.assets/examples/target-240p.mp4', '-o', '.assets/examples/test_sync_lip_to_video.mp4', '--trim-frame-end', '10', '--headless' ] run = subprocess.run(commands, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) assert run.returncode == 0 diff --git a/tests/test_download.py b/tests/test_download.py index f80c44b8..6df94b17 100644 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -19,5 +19,5 @@ def test_get_download_size() -> None: def test_is_download_done() -> None: assert is_download_done('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', '.assets/examples/target-240p.mp4') is True - assert is_download_done('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4','invalid') is False + assert is_download_done('https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', 'invalid') is False assert is_download_done('invalid', 'invalid') is False diff --git a/tests/test_face_analyser.py b/tests/test_face_analyser.py new file mode 100644 index 00000000..6ec1f7f1 --- /dev/null +++ b/tests/test_face_analyser.py @@ -0,0 +1,84 @@ +import subprocess +import pytest + +import facefusion.globals +from facefusion.download import conditional_download +from facefusion.face_analyser import clear_face_analyser, get_one_face +from facefusion.typing import Face +from facefusion.vision import read_static_image + + +@pytest.fixture(scope = 'module', autouse = True) +def before_all() -> None: + conditional_download('.assets/examples', + [ + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg' + ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/source.jpg', '-vf', 'crop=iw*0.8:ih*0.8', '.assets/examples/source-80crop.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/source.jpg', '-vf', 'crop=iw*0.7:ih*0.7', '.assets/examples/source-70crop.jpg' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/source.jpg', '-vf', 'crop=iw*0.6:ih*0.6', '.assets/examples/source-60crop.jpg' ]) + + +@pytest.fixture(autouse = True) +def before_each() -> None: + clear_face_analyser() + + +def test_get_one_face_with_retinaface() -> None: + facefusion.globals.face_detector_model = 'retinaface' + facefusion.globals.face_detector_size = '320x320' + facefusion.globals.face_detector_score = 0.5 + facefusion.globals.face_recognizer_model = 'arcface_inswapper' + + source_paths =\ + [ + '.assets/examples/source.jpg', + '.assets/examples/source-80crop.jpg', + '.assets/examples/source-70crop.jpg', + '.assets/examples/source-60crop.jpg' + ] + for source_path in source_paths: + source_frame = read_static_image(source_path) + face = get_one_face(source_frame) + + assert isinstance(face, Face) + + +def test_get_one_face_with_yoloface() -> None: + facefusion.globals.face_detector_model = 'yoloface' + facefusion.globals.face_detector_size = '640x640' + facefusion.globals.face_detector_score = 0.5 + facefusion.globals.face_recognizer_model = 'arcface_inswapper' + + source_paths =\ + [ + '.assets/examples/source.jpg', + '.assets/examples/source-80crop.jpg', + '.assets/examples/source-70crop.jpg', + '.assets/examples/source-60crop.jpg' + ] + for source_path in source_paths: + source_frame = read_static_image(source_path) + face = get_one_face(source_frame) + + assert isinstance(face, Face) + + +def test_get_one_face_with_yunet() -> None: + facefusion.globals.face_detector_model = 'yunet' + facefusion.globals.face_detector_size = '640x640' + facefusion.globals.face_detector_score = 0.5 + facefusion.globals.face_recognizer_model = 'arcface_inswapper' + + source_paths =\ + [ + '.assets/examples/source.jpg', + '.assets/examples/source-80crop.jpg', + '.assets/examples/source-70crop.jpg', + '.assets/examples/source-60crop.jpg' + ] + for source_path in source_paths: + source_frame = read_static_image(source_path) + face = get_one_face(source_frame) + + assert isinstance(face, Face) diff --git a/tests/test_ffmpeg.py b/tests/test_ffmpeg.py index 55a6bcb4..f46467c0 100644 --- a/tests/test_ffmpeg.py +++ b/tests/test_ffmpeg.py @@ -5,7 +5,7 @@ import pytest import facefusion.globals from facefusion.filesystem import get_temp_directory_path, create_temp, clear_temp from facefusion.download import conditional_download -from facefusion.ffmpeg import extract_frames +from facefusion.ffmpeg import extract_frames, read_audio_buffer @pytest.fixture(scope = 'module', autouse = True) @@ -13,8 +13,10 @@ def before_all() -> None: conditional_download('.assets/examples', [ 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' ]) + subprocess.run([ 'ffmpeg', '-i', '.assets/examples/source.mp3', '.assets/examples/source.wav' ]) subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'fps=25', '.assets/examples/target-240p-25fps.mp4' ]) subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'fps=30', '.assets/examples/target-240p-30fps.mp4' ]) subprocess.run([ 'ffmpeg', '-i', '.assets/examples/target-240p.mp4', '-vf', 'fps=60', '.assets/examples/target-240p-60fps.mp4' ]) @@ -98,3 +100,9 @@ def test_extract_frames_with_trim_end() -> None: assert len(glob.glob1(temp_directory_path, '*.jpg')) == frame_total clear_temp(target_path) + + +def test_read_audio_buffer() -> None: + assert isinstance(read_audio_buffer('.assets/examples/source.mp3', 1, 1), bytes) + assert isinstance(read_audio_buffer('.assets/examples/source.wav', 1, 1), bytes) + assert read_audio_buffer('.assets/examples/invalid.mp3', 1, 1) is None diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index 6db581d9..d149cba2 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -1,14 +1,16 @@ import pytest from facefusion.download import conditional_download -from facefusion.filesystem import is_file, is_directory, is_image, are_images, is_video, list_directory +from facefusion.filesystem import is_file, is_directory, is_audio, has_audio, is_image, has_image, is_video, filter_audio_paths, filter_image_paths, list_directory @pytest.fixture(scope = 'module', autouse = True) def before_all() -> None: conditional_download('.assets/examples', [ - 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg' + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4' ]) @@ -24,16 +26,30 @@ def test_is_directory() -> None: assert is_directory('invalid') is False +def test_is_audio() -> None: + assert is_audio('.assets/examples/source.mp3') is True + assert is_audio('.assets/examples/source.jpg') is False + assert is_audio('invalid') is False + + +def test_has_audio() -> None: + assert has_audio([ '.assets/examples/source.mp3' ]) is True + assert has_audio([ '.assets/examples/source.mp3', '.assets/examples/source.jpg' ]) is True + assert has_audio([ '.assets/examples/source.jpg', '.assets/examples/source.jpg' ]) is False + assert has_audio([ 'invalid' ]) is False + + def test_is_image() -> None: assert is_image('.assets/examples/source.jpg') is True assert is_image('.assets/examples/target-240p.mp4') is False assert is_image('invalid') is False -def test_are_images() -> None: - assert are_images([ '.assets/examples/source.jpg' ]) is True - assert are_images([ '.assets/examples/source.jpg', '.assets/examples/target-240p.mp4' ]) is False - assert are_images([ 'invalid' ]) is False +def test_has_image() -> None: + assert has_image([ '.assets/examples/source.jpg' ]) is True + assert has_image([ '.assets/examples/source.jpg', '.assets/examples/source.mp3' ]) is True + assert has_image([ '.assets/examples/source.mp3', '.assets/examples/source.mp3' ]) is False + assert has_image([ 'invalid' ]) is False def test_is_video() -> None: @@ -42,6 +58,18 @@ def test_is_video() -> None: assert is_video('invalid') is False +def test_filter_audio_paths() -> None: + assert filter_audio_paths([ '.assets/examples/source.jpg', '.assets/examples/source.mp3' ]) == [ '.assets/examples/source.mp3' ] + assert filter_audio_paths([ '.assets/examples/source.jpg', '.assets/examples/source.jpg' ]) == [] + assert filter_audio_paths([ 'invalid' ]) == [] + + +def test_filter_image_paths() -> None: + assert filter_image_paths([ '.assets/examples/source.jpg', '.assets/examples/source.mp3' ]) == [ '.assets/examples/source.jpg' ] + assert filter_image_paths([ '.assets/examples/source.mp3', '.assets/examples/source.mp3' ]) == [] + assert filter_audio_paths([ 'invalid' ]) == [] + + def test_list_directory() -> None: assert list_directory('.assets/examples') assert list_directory('.assets/examples/source.jpg') is None diff --git a/tests/test_wording.py b/tests/test_wording.py new file mode 100644 index 00000000..1deaa773 --- /dev/null +++ b/tests/test_wording.py @@ -0,0 +1,7 @@ +from facefusion import wording + + +def test_get() -> None: + assert wording.get('python_not_supported') + assert wording.get('help.source') + assert wording.get('invalid') is None