diff --git a/.github/preview.png b/.github/preview.png index ad94832d..ed05190e 100644 Binary files a/.github/preview.png and b/.github/preview.png differ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 51fa80e5..8da617ba 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,6 +30,6 @@ jobs: uses: actions/setup-python@v2 with: python-version: '3.10' - - run: python install.py --onnxruntime default --skip-venv + - run: python install.py --onnxruntime default --skip-conda - run: pip install pytest - run: pytest diff --git a/README.md b/README.md index 4fab7d8f..8687565e 100644 --- a/README.md +++ b/README.md @@ -30,75 +30,78 @@ Run the command: python run.py [options] options: - -h, --help show this help message and exit - -s SOURCE_PATHS, --source SOURCE_PATHS choose single or multiple source images or audios - -t TARGET_PATH, --target TARGET_PATH choose single target image or video - -o OUTPUT_PATH, --output OUTPUT_PATH specify the output file or directory - -v, --version show program's version number and exit + -h, --help show this help message and exit + -s SOURCE_PATHS, --source SOURCE_PATHS choose single or multiple source images or audios + -t TARGET_PATH, --target TARGET_PATH choose single target image or video + -o OUTPUT_PATH, --output OUTPUT_PATH specify the output file or directory + -v, --version show program's version number and exit misc: - --skip-download omit automate downloads and remote lookups - --headless run the program without a user interface - --log-level {error,warn,info,debug} adjust the message severity displayed in the terminal + --force-download force automate downloads and exit + --skip-download omit automate downloads and remote lookups + --headless run the program without a user interface + --log-level {error,warn,info,debug} adjust the message severity displayed in the terminal execution: - --execution-providers EXECUTION_PROVIDERS [EXECUTION_PROVIDERS ...] accelerate the model inference using different providers (choices: cpu, ...) - --execution-thread-count [1-128] specify the amount of parallel threads while processing - --execution-queue-count [1-32] specify the amount of frames each thread is processing + --execution-providers EXECUTION_PROVIDERS [EXECUTION_PROVIDERS ...] accelerate the model inference using different providers (choices: cpu, ...) + --execution-thread-count [1-128] specify the amount of parallel threads while processing + --execution-queue-count [1-32] specify the amount of frames each thread is processing memory: - --video-memory-strategy {strict,moderate,tolerant} balance fast frame processing and low vram usage - --system-memory-limit [0-128] limit the available ram that can be used while processing + --video-memory-strategy {strict,moderate,tolerant} balance fast frame processing and low VRAM usage + --system-memory-limit [0-128] limit the available RAM that can be used while processing face analyser: - --face-analyser-order {left-right,right-left,top-bottom,bottom-top,small-large,large-small,best-worst,worst-best} specify the order in which the face analyser detects faces. - --face-analyser-age {child,teen,adult,senior} filter the detected faces based on their age - --face-analyser-gender {female,male} filter the detected faces based on their gender - --face-detector-model {many,retinaface,scrfd,yoloface,yunet} choose the model responsible for detecting the face - --face-detector-size FACE_DETECTOR_SIZE specify the size of the frame provided to the face detector - --face-detector-score [0.0-1.0] filter the detected faces base on the confidence score - --face-landmarker-score [0.0-1.0] filter the detected landmarks base on the confidence score + --face-analyser-order {left-right,right-left,top-bottom,bottom-top,small-large,large-small,best-worst,worst-best} specify the order in which the face analyser detects faces + --face-analyser-age {child,teen,adult,senior} filter the detected faces based on their age + --face-analyser-gender {female,male} filter the detected faces based on their gender + --face-detector-model {many,retinaface,scrfd,yoloface,yunet} choose the model responsible for detecting the face + --face-detector-size FACE_DETECTOR_SIZE specify the size of the frame provided to the face detector + --face-detector-score [0.0-1.0] filter the detected faces base on the confidence score + --face-landmarker-score [0.0-1.0] filter the detected landmarks base on the confidence score face selector: - --face-selector-mode {many,one,reference} use reference based tracking or simple matching - --reference-face-position REFERENCE_FACE_POSITION specify the position used to create the reference face - --reference-face-distance [0.0-1.5] specify the desired similarity between the reference face and target face - --reference-frame-number REFERENCE_FRAME_NUMBER specify the frame used to create the reference face + --face-selector-mode {many,one,reference} use reference based tracking or simple matching + --reference-face-position REFERENCE_FACE_POSITION specify the position used to create the reference face + --reference-face-distance [0.0-1.5] specify the desired similarity between the reference face and target face + --reference-frame-number REFERENCE_FRAME_NUMBER specify the frame used to create the reference face face mask: - --face-mask-types FACE_MASK_TYPES [FACE_MASK_TYPES ...] mix and match different face mask types (choices: box, occlusion, region) - --face-mask-blur [0.0-1.0] specify the degree of blur applied the box mask - --face-mask-padding FACE_MASK_PADDING [FACE_MASK_PADDING ...] apply top, right, bottom and left padding to the box mask - --face-mask-regions FACE_MASK_REGIONS [FACE_MASK_REGIONS ...] choose the facial features used for the region mask (choices: skin, left-eyebrow, right-eyebrow, left-eye, right-eye, eye-glasses, nose, mouth, upper-lip, lower-lip) + --face-mask-types FACE_MASK_TYPES [FACE_MASK_TYPES ...] mix and match different face mask types (choices: box, occlusion, region) + --face-mask-blur [0.0-1.0] specify the degree of blur applied the box mask + --face-mask-padding FACE_MASK_PADDING [FACE_MASK_PADDING ...] apply top, right, bottom and left padding to the box mask + --face-mask-regions FACE_MASK_REGIONS [FACE_MASK_REGIONS ...] choose the facial features used for the region mask (choices: skin, left-eyebrow, right-eyebrow, left-eye, right-eye, eye-glasses, nose, mouth, upper-lip, lower-lip) frame extraction: - --trim-frame-start TRIM_FRAME_START specify the the start frame of the target video - --trim-frame-end TRIM_FRAME_END specify the the end frame of the target video - --temp-frame-format {bmp,jpg,png} specify the temporary resources format - --keep-temp keep the temporary resources after processing + --trim-frame-start TRIM_FRAME_START specify the the start frame of the target video + --trim-frame-end TRIM_FRAME_END specify the the end frame of the target video + --temp-frame-format {bmp,jpg,png} specify the temporary resources format + --keep-temp keep the temporary resources after processing output creation: - --output-image-quality [0-100] specify the image quality which translates to the compression factor - --output-image-resolution OUTPUT_IMAGE_RESOLUTION specify the image output resolution based on the target image - --output-video-encoder {libx264,libx265,libvpx-vp9,h264_nvenc,hevc_nvenc,h264_amf,hevc_amf} specify the encoder use for the video compression - --output-video-preset {ultrafast,superfast,veryfast,faster,fast,medium,slow,slower,veryslow} balance fast video processing and video file size - --output-video-quality [0-100] specify the video quality which translates to the compression factor - --output-video-resolution OUTPUT_VIDEO_RESOLUTION specify the video output resolution based on the target video - --output-video-fps OUTPUT_VIDEO_FPS specify the video output fps based on the target video - --skip-audio omit the audio from the target video + --output-image-quality [0-100] specify the image quality which translates to the compression factor + --output-image-resolution OUTPUT_IMAGE_RESOLUTION specify the image output resolution based on the target image + --output-video-encoder {libx264,libx265,libvpx-vp9,h264_nvenc,hevc_nvenc,h264_amf,hevc_amf} specify the encoder use for the video compression + --output-video-preset {ultrafast,superfast,veryfast,faster,fast,medium,slow,slower,veryslow} balance fast video processing and video file size + --output-video-quality [0-100] specify the video quality which translates to the compression factor + --output-video-resolution OUTPUT_VIDEO_RESOLUTION specify the video output resolution based on the target video + --output-video-fps OUTPUT_VIDEO_FPS specify the video output fps based on the target video + --skip-audio omit the audio from the target video frame processors: - --frame-processors FRAME_PROCESSORS [FRAME_PROCESSORS ...] load a single or multiple frame processors. (choices: face_debugger, face_enhancer, face_swapper, frame_enhancer, lip_syncer, ...) - --face-debugger-items FACE_DEBUGGER_ITEMS [FACE_DEBUGGER_ITEMS ...] load a single or multiple frame processors (choices: bounding-box, face-landmark-5, face-landmark-5/68, face-landmark-68, face-mask, face-detector-score, face-landmarker-score, age, gender) - --face-enhancer-model {codeformer,gfpgan_1.2,gfpgan_1.3,gfpgan_1.4,gpen_bfr_256,gpen_bfr_512,restoreformer_plus_plus} choose the model responsible for enhancing the face - --face-enhancer-blend [0-100] blend the enhanced into the previous face - --face-swapper-model {blendswap_256,inswapper_128,inswapper_128_fp16,simswap_256,simswap_512_unofficial,uniface_256} choose the model responsible for swapping the face - --frame-enhancer-model {lsdir_x4,nomos8k_sc_x4,real_esrgan_x4,real_esrgan_x4_fp16,span_kendata_x4} choose the model responsible for enhancing the frame - --frame-enhancer-blend [0-100] blend the enhanced into the previous frame - --lip-syncer-model {wav2lip_gan} choose the model responsible for syncing the lips + --frame-processors FRAME_PROCESSORS [FRAME_PROCESSORS ...] load a single or multiple frame processors. (choices: face_debugger, face_enhancer, face_swapper, frame_colorizer, frame_enhancer, lip_syncer, ...) + --face-debugger-items FACE_DEBUGGER_ITEMS [FACE_DEBUGGER_ITEMS ...] load a single or multiple frame processors (choices: bounding-box, face-landmark-5, face-landmark-5/68, face-landmark-68, face-landmark-68/5, face-mask, face-detector-score, face-landmarker-score, age, gender) + --face-enhancer-model {codeformer,gfpgan_1.2,gfpgan_1.3,gfpgan_1.4,gpen_bfr_256,gpen_bfr_512,gpen_bfr_1024,gpen_bfr_2048,restoreformer_plus_plus} choose the model responsible for enhancing the face + --face-enhancer-blend [0-100] blend the enhanced into the previous face + --face-swapper-model {blendswap_256,inswapper_128,inswapper_128_fp16,simswap_256,simswap_512_unofficial,uniface_256} choose the model responsible for swapping the face + --frame-colorizer-model {ddcolor,ddcolor_artistic,deoldify_artistic} choose the model responsible for colorizing the frame + --frame-colorizer-blend [0-100] blend the colorized into the previous frame + --frame-enhancer-model {lsdir_x4,nomos8k_sc_x4,real_esrgan_x2,real_esrgan_x2_fp16,real_esrgan_x4,real_esrgan_x4_fp16,real_hatgan_x4,span_kendata_x4} choose the model responsible for enhancing the frame + --frame-enhancer-blend [0-100] blend the enhanced into the previous frame + --lip-syncer-model {wav2lip_gan} choose the model responsible for syncing the lips uis: - --ui-layouts UI_LAYOUTS [UI_LAYOUTS ...] launch a single or multiple UI layouts (choices: benchmark, default, webcam, ...) + --ui-layouts UI_LAYOUTS [UI_LAYOUTS ...] launch a single or multiple UI layouts (choices: benchmark, default, webcam, ...) ``` diff --git a/facefusion.ini b/facefusion.ini index 1021c803..7f56cc68 100644 --- a/facefusion.ini +++ b/facefusion.ini @@ -4,6 +4,7 @@ target_path = output_path = [misc] +force_download = skip_download = headless = log_level = @@ -60,6 +61,8 @@ face_debugger_items = face_enhancer_model = face_enhancer_blend = face_swapper_model = +frame_colorizer_model = +frame_colorizer_blend = frame_enhancer_model = frame_enhancer_blend = lip_syncer_model = diff --git a/facefusion/audio.py b/facefusion/audio.py index a77448f3..fc1d782e 100644 --- a/facefusion/audio.py +++ b/facefusion/audio.py @@ -5,7 +5,49 @@ import scipy from facefusion.filesystem import is_audio from facefusion.ffmpeg import read_audio_buffer -from facefusion.typing import Fps, Audio, Spectrogram, AudioFrame +from facefusion.typing import Fps, Audio, AudioFrame, Spectrogram, MelFilterBank +from facefusion.voice_extractor import batch_extract_voice + + +@lru_cache(maxsize = 128) +def read_static_audio(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: + return read_audio(audio_path, fps) + + +def read_audio(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: + sample_rate = 16000 + channel_total = 2 + + if is_audio(audio_path): + audio_buffer = read_audio_buffer(audio_path, sample_rate, channel_total) + audio = numpy.frombuffer(audio_buffer, dtype = numpy.int16).reshape(-1, 2) + audio = prepare_audio(audio) + spectrogram = create_spectrogram(audio) + audio_frames = extract_audio_frames(spectrogram, fps) + return audio_frames + return None + + +@lru_cache(maxsize = 128) +def read_static_voice(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: + return read_voice(audio_path, fps) + + +def read_voice(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: + sample_rate = 16000 + channel_total = 2 + chunk_size = 1024 ** 3 + step_size = chunk_size // 4 + + if is_audio(audio_path): + audio_buffer = read_audio_buffer(audio_path, sample_rate, channel_total) + audio = numpy.frombuffer(audio_buffer, dtype = numpy.int16).reshape(-1, 2) + audio = batch_extract_voice(audio, chunk_size, step_size) + audio = prepare_audio(audio) + spectrogram = create_spectrogram(audio) + audio_frames = extract_audio_frames(spectrogram, fps) + return audio_frames + return None def get_audio_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Optional[AudioFrame]: @@ -16,33 +58,26 @@ def get_audio_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Opti return None -def create_empty_audio_frame() -> AudioFrame: - audio_frame = numpy.zeros((80, 16), dtype = numpy.int16) - return audio_frame - - -@lru_cache(maxsize = None) -def read_static_audio(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: +def get_voice_frame(audio_path : str, fps : Fps, frame_number : int = 0) -> Optional[AudioFrame]: if is_audio(audio_path): - audio_buffer = read_audio_buffer(audio_path, 16000, 2) - audio = numpy.frombuffer(audio_buffer, dtype = numpy.int16).reshape(-1, 2) - audio = normalize_audio(audio) - audio = filter_audio(audio, -0.97) - spectrogram = create_spectrogram(audio, 16000, 80, 800, 55.0, 7600.0) - audio_frames = extract_audio_frames(spectrogram, 80, 16, fps) - return audio_frames + voice_frames = read_static_voice(audio_path, fps) + if frame_number in range(len(voice_frames)): + return voice_frames[frame_number] return None -def normalize_audio(audio : numpy.ndarray[Any, Any]) -> Audio: +def create_empty_audio_frame() -> AudioFrame: + mel_filter_total = 80 + step_size = 16 + audio_frame = numpy.zeros((mel_filter_total, step_size)).astype(numpy.int16) + return audio_frame + + +def prepare_audio(audio : numpy.ndarray[Any, Any]) -> Audio: if audio.ndim > 1: audio = numpy.mean(audio, axis = 1) audio = audio / numpy.max(numpy.abs(audio), axis = 0) - return audio - - -def filter_audio(audio : Audio, filter_coefficient : float) -> Audio: - audio = scipy.signal.lfilter([ 1.0, filter_coefficient ], [1.0], audio) + audio = scipy.signal.lfilter([ 1.0, -0.97 ], [ 1.0 ], audio) return audio @@ -54,28 +89,40 @@ def convert_mel_to_hertz(mel : numpy.ndarray[Any, Any]) -> numpy.ndarray[Any, An return 700 * (10 ** (mel / 2595) - 1) -@lru_cache(maxsize = None) -def create_static_mel_filter(sample_rate : int, filter_total : int, filter_size : int, frequency_minimum : float, frequency_maximum : float) -> numpy.ndarray[Any, Any]: - frequency_maximum = min(sample_rate / 2, frequency_maximum) - mel_filter = numpy.zeros((filter_total, filter_size // 2 + 1)) - mel_bins = numpy.linspace(convert_hertz_to_mel(frequency_minimum), convert_hertz_to_mel(frequency_maximum), filter_total + 2) - indices = numpy.floor((filter_size + 1) * convert_mel_to_hertz(mel_bins) / sample_rate).astype(numpy.int16) - for index in range(filter_total): - mel_filter[index, indices[index]: indices[index + 1]] = scipy.signal.windows.triang(indices[index + 1] - indices[index]) - return mel_filter +def create_mel_filter_bank() -> MelFilterBank: + mel_filter_total = 80 + mel_bin_total = 800 + sample_rate = 16000 + min_frequency = 55.0 + max_frequency = 7600.0 + mel_filter_bank = numpy.zeros((mel_filter_total, mel_bin_total // 2 + 1)) + mel_frequency_range = numpy.linspace(convert_hertz_to_mel(min_frequency), convert_hertz_to_mel(max_frequency), mel_filter_total + 2) + indices = numpy.floor((mel_bin_total + 1) * convert_mel_to_hertz(mel_frequency_range) / sample_rate).astype(numpy.int16) + + for index in range(mel_filter_total): + start = indices[index] + end = indices[index + 1] + mel_filter_bank[index, start:end] = scipy.signal.windows.triang(end - start) + return mel_filter_bank -def create_spectrogram(audio : Audio, sample_rate : int, filter_total : int, filter_size : int, frequency_minimum : float, frequency_maximum : float) -> Spectrogram: - mel_filter = create_static_mel_filter(sample_rate, filter_total, filter_size, frequency_minimum, frequency_maximum) - spectrogram = scipy.signal.stft(audio, nperseg = filter_size, noverlap = 600, nfft = filter_size)[2] - spectrogram = numpy.dot(mel_filter, numpy.abs(spectrogram)) +def create_spectrogram(audio : Audio) -> Spectrogram: + mel_bin_total = 800 + mel_bin_overlap = 600 + mel_filter_bank = create_mel_filter_bank() + spectrogram = scipy.signal.stft(audio, nperseg = mel_bin_total, nfft = mel_bin_total, noverlap = mel_bin_overlap)[2] + spectrogram = numpy.dot(mel_filter_bank, numpy.abs(spectrogram)) return spectrogram -def extract_audio_frames(spectrogram : Spectrogram, filter_total : int, audio_frame_step : int, fps : Fps) -> List[AudioFrame]: - indices = numpy.arange(0, spectrogram.shape[1], filter_total / fps).astype(numpy.int16) - indices = indices[indices >= audio_frame_step] +def extract_audio_frames(spectrogram : Spectrogram, fps : Fps) -> List[AudioFrame]: + mel_filter_total = 80 + step_size = 16 audio_frames = [] + indices = numpy.arange(0, spectrogram.shape[1], mel_filter_total / fps).astype(numpy.int16) + indices = indices[indices >= step_size] + for index in indices: - audio_frames.append(spectrogram[:, max(0, index - audio_frame_step) : index]) + start = max(0, index - step_size) + audio_frames.append(spectrogram[:, start:index]) return audio_frames diff --git a/facefusion/common_helper.py b/facefusion/common_helper.py index f5d8b68d..ea1ec830 100644 --- a/facefusion/common_helper.py +++ b/facefusion/common_helper.py @@ -1,4 +1,4 @@ -from typing import List, Any, Tuple +from typing import List, Any import numpy @@ -16,12 +16,3 @@ def create_float_range(start : float, stop : float, step : float) -> List[float] def get_first(__list__ : Any) -> Any: return next(iter(__list__), None) - - -def extract_major_version(version : str) -> Tuple[int, int]: - versions = version.split('.') - if len(versions) > 1: - return int(versions[0]), int(versions[1]) - if len(versions) == 1: - return int(versions[0]), 0 - return 0, 0 diff --git a/facefusion/content_analyser.py b/facefusion/content_analyser.py index 7025977b..bf7a3c28 100644 --- a/facefusion/content_analyser.py +++ b/facefusion/content_analyser.py @@ -1,4 +1,4 @@ -from typing import Any, Dict +from typing import Any from functools import lru_cache from time import sleep import threading @@ -9,15 +9,15 @@ from tqdm import tqdm import facefusion.globals from facefusion import process_manager, wording -from facefusion.typing import VisionFrame, ModelValue, Fps +from facefusion.typing import VisionFrame, ModelSet, Fps from facefusion.execution import apply_execution_provider_options from facefusion.vision import get_video_frame, count_video_frame_total, read_image, detect_video_fps -from facefusion.filesystem import resolve_relative_path +from facefusion.filesystem import resolve_relative_path, is_file from facefusion.download import conditional_download CONTENT_ANALYSER = None THREAD_LOCK : threading.Lock = threading.Lock() -MODELS : Dict[str, ModelValue] =\ +MODELS : ModelSet =\ { 'open_nsfw': { @@ -26,7 +26,7 @@ MODELS : Dict[str, ModelValue] =\ } } PROBABILITY_LIMIT = 0.80 -RATE_LIMIT = 5 +RATE_LIMIT = 10 STREAM_COUNTER = 0 @@ -49,13 +49,15 @@ def clear_content_analyser() -> None: def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_url = MODELS.get('open_nsfw').get('url') + model_path = MODELS.get('open_nsfw').get('path') + if not facefusion.globals.skip_download: - download_directory_path = resolve_relative_path('../.assets/models') - model_url = MODELS.get('open_nsfw').get('url') process_manager.check() conditional_download(download_directory_path, [ model_url ]) process_manager.end() - return True + return is_file(model_path) def analyse_stream(vision_frame : VisionFrame, video_fps : Fps) -> bool: diff --git a/facefusion/core.py b/facefusion/core.py index 505e59f1..a2d6edee 100755 --- a/facefusion/core.py +++ b/facefusion/core.py @@ -15,7 +15,7 @@ import facefusion.choices import facefusion.globals from facefusion.face_analyser import get_one_face, get_average_face from facefusion.face_store import get_reference_faces, append_reference_face -from facefusion import face_analyser, face_masker, content_analyser, config, process_manager, metadata, logger, wording +from facefusion import face_analyser, face_masker, content_analyser, config, process_manager, metadata, logger, wording, voice_extractor from facefusion.content_analyser import analyse_image, analyse_video from facefusion.processors.frame.core import get_frame_processors_modules, load_frame_processor_module from facefusion.common_helper import create_metavar, get_first @@ -23,7 +23,8 @@ from facefusion.execution import encode_execution_providers, decode_execution_pr from facefusion.normalizer import normalize_output_path, normalize_padding, normalize_fps from facefusion.memory import limit_system_memory from facefusion.statistics import conditional_log_statistics -from facefusion.filesystem import list_directory, get_temp_frame_paths, create_temp, move_temp, clear_temp, is_image, is_video, filter_audio_paths +from facefusion.download import conditional_download +from facefusion.filesystem import list_directory, get_temp_frame_paths, create_temp, move_temp, clear_temp, is_image, is_video, filter_audio_paths, resolve_relative_path from facefusion.ffmpeg import extract_frames, merge_video, copy_image, finalize_image, restore_audio, replace_audio from facefusion.vision import read_image, read_static_images, detect_image_resolution, restrict_video_fps, create_image_resolutions, get_video_frame, detect_video_resolution, detect_video_fps, restrict_video_resolution, restrict_image_resolution, create_video_resolutions, pack_resolution, unpack_resolution @@ -33,7 +34,7 @@ warnings.filterwarnings('ignore', category = UserWarning, module = 'gradio') def cli() -> None: signal.signal(signal.SIGINT, lambda signal_number, frame: destroy()) - program = ArgumentParser(formatter_class = lambda prog: HelpFormatter(prog, max_help_position = 130), add_help = False) + program = ArgumentParser(formatter_class = lambda prog: HelpFormatter(prog, max_help_position = 160), add_help = False) # general program.add_argument('-s', '--source', help = wording.get('help.source'), action = 'append', dest = 'source_paths', default = config.get_str_list('general.source_paths')) program.add_argument('-t', '--target', help = wording.get('help.target'), dest = 'target_path', default = config.get_str_value('general.target_path')) @@ -41,6 +42,7 @@ def cli() -> None: program.add_argument('-v', '--version', version = metadata.get('name') + ' ' + metadata.get('version'), action = 'version') # misc group_misc = program.add_argument_group('misc') + group_misc.add_argument('--force-download', help = wording.get('help.force_download'), action = 'store_true', default = config.get_bool_value('misc.force_download')) group_misc.add_argument('--skip-download', help = wording.get('help.skip_download'), action = 'store_true', default = config.get_bool_value('misc.skip_download')) group_misc.add_argument('--headless', help = wording.get('help.headless'), action = 'store_true', default = config.get_bool_value('misc.headless')) group_misc.add_argument('--log-level', help = wording.get('help.log_level'), default = config.get_str_value('misc.log_level', 'info'), choices = logger.get_log_levels()) @@ -89,7 +91,7 @@ def cli() -> None: group_output_creation.add_argument('--output-video-preset', help = wording.get('help.output_video_preset'), default = config.get_str_value('output_creation.output_video_preset', 'veryfast'), choices = facefusion.choices.output_video_presets) group_output_creation.add_argument('--output-video-quality', help = wording.get('help.output_video_quality'), type = int, default = config.get_int_value('output_creation.output_video_quality', '80'), choices = facefusion.choices.output_video_quality_range, metavar = create_metavar(facefusion.choices.output_video_quality_range)) group_output_creation.add_argument('--output-video-resolution', help = wording.get('help.output_video_resolution'), default = config.get_str_value('output_creation.output_video_resolution')) - group_output_creation.add_argument('--output-video-fps', help = wording.get('help.output_video_fps'), type = float) + group_output_creation.add_argument('--output-video-fps', help = wording.get('help.output_video_fps'), type = float, default = config.get_str_value('output_creation.output_video_fps')) group_output_creation.add_argument('--skip-audio', help = wording.get('help.skip_audio'), action = 'store_true', default = config.get_bool_value('output_creation.skip_audio')) # frame processors available_frame_processors = list_directory('facefusion/processors/frame/modules') @@ -113,6 +115,7 @@ def apply_args(program : ArgumentParser) -> None: facefusion.globals.target_path = args.target_path facefusion.globals.output_path = args.output_path # misc + facefusion.globals.force_download = args.force_download facefusion.globals.skip_download = args.skip_download facefusion.globals.headless = args.headless facefusion.globals.log_level = args.log_level @@ -184,9 +187,13 @@ def apply_args(program : ArgumentParser) -> None: def run(program : ArgumentParser) -> None: apply_args(program) logger.init(facefusion.globals.log_level) + if facefusion.globals.system_memory_limit > 0: limit_system_memory(facefusion.globals.system_memory_limit) - if not pre_check() or not content_analyser.pre_check() or not face_analyser.pre_check() or not face_masker.pre_check(): + if facefusion.globals.force_download: + force_download() + return + if not pre_check() or not content_analyser.pre_check() or not face_analyser.pre_check() or not face_masker.pre_check() or not voice_extractor.pre_check(): return for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): if not frame_processor_module.pre_check(): @@ -256,6 +263,24 @@ def conditional_append_reference_faces() -> None: append_reference_face(frame_processor_module.__name__, reference_face) +def force_download() -> None: + download_directory_path = resolve_relative_path('../.assets/models') + available_frame_processors = list_directory('facefusion/processors/frame/modules') + model_list =\ + [ + content_analyser.MODELS, + face_analyser.MODELS, + face_masker.MODELS, + voice_extractor.MODELS + ] + + for frame_processor_module in get_frame_processors_modules(available_frame_processors): + if hasattr(frame_processor_module, 'MODELS'): + model_list.append(frame_processor_module.MODELS) + model_urls = [ models[model].get('url') for models in model_list for model in models ] + conditional_download(download_directory_path, model_urls) + + def process_image(start_time : float) -> None: normed_output_path = normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path) if analyse_image(facefusion.globals.target_path): diff --git a/facefusion/face_analyser.py b/facefusion/face_analyser.py index 75faffc0..0098ce8e 100644 --- a/facefusion/face_analyser.py +++ b/facefusion/face_analyser.py @@ -8,11 +8,11 @@ import onnxruntime import facefusion.globals from facefusion import process_manager from facefusion.common_helper import get_first -from facefusion.face_helper import warp_face_by_face_landmark_5, warp_face_by_translation, create_static_anchors, distance_to_face_landmark_5, distance_to_bounding_box, convert_face_landmark_68_to_5, apply_nms, categorize_age, categorize_gender +from facefusion.face_helper import estimate_matrix_by_face_landmark_5, warp_face_by_face_landmark_5, warp_face_by_translation, create_static_anchors, distance_to_face_landmark_5, distance_to_bounding_box, convert_face_landmark_68_to_5, apply_nms, categorize_age, categorize_gender from facefusion.face_store import get_static_faces, set_static_faces from facefusion.execution import apply_execution_provider_options from facefusion.download import conditional_download -from facefusion.filesystem import resolve_relative_path +from facefusion.filesystem import resolve_relative_path, is_file from facefusion.typing import VisionFrame, Face, FaceSet, FaceAnalyserOrder, FaceAnalyserAge, FaceAnalyserGender, ModelSet, BoundingBox, FaceLandmarkSet, FaceLandmark5, FaceLandmark68, Score, FaceScoreSet, Embedding from facefusion.vision import resize_frame_resolution, unpack_resolution @@ -61,11 +61,16 @@ MODELS : ModelSet =\ 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/arcface_w600k_r50.onnx', 'path': resolve_relative_path('../.assets/models/arcface_w600k_r50.onnx') }, - 'face_landmarker': + 'face_landmarker_68': { 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/2dfan4.onnx', 'path': resolve_relative_path('../.assets/models/2dfan4.onnx') }, + 'face_landmarker_68_5': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/face_landmarker_68_5.onnx', + 'path': resolve_relative_path('../.assets/models/face_landmarker_68_5.onnx') + }, 'gender_age': { 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gender_age.onnx', @@ -78,22 +83,20 @@ def get_face_analyser() -> Any: global FACE_ANALYSER face_detectors = {} + face_landmarkers = {} + with THREAD_LOCK: while process_manager.is_checking(): sleep(0.5) if FACE_ANALYSER is None: if facefusion.globals.face_detector_model in [ 'many', 'retinaface' ]: - face_detector = onnxruntime.InferenceSession(MODELS.get('face_detector_retinaface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) - face_detectors['retinaface'] = face_detector + face_detectors['retinaface'] = onnxruntime.InferenceSession(MODELS.get('face_detector_retinaface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) if facefusion.globals.face_detector_model in [ 'many', 'scrfd' ]: - face_detector = onnxruntime.InferenceSession(MODELS.get('face_detector_scrfd').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) - face_detectors['scrfd'] = face_detector + face_detectors['scrfd'] = onnxruntime.InferenceSession(MODELS.get('face_detector_scrfd').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) if facefusion.globals.face_detector_model in [ 'many', 'yoloface' ]: - face_detector = onnxruntime.InferenceSession(MODELS.get('face_detector_yoloface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) - face_detectors['yoloface'] = face_detector + face_detectors['yoloface'] = onnxruntime.InferenceSession(MODELS.get('face_detector_yoloface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) if facefusion.globals.face_detector_model in [ 'yunet' ]: - face_detector = cv2.FaceDetectorYN.create(MODELS.get('face_detector_yunet').get('path'), '', (0, 0)) - face_detectors['yunet'] = face_detector + face_detectors['yunet'] = cv2.FaceDetectorYN.create(MODELS.get('face_detector_yunet').get('path'), '', (0, 0)) if facefusion.globals.face_recognizer_model == 'arcface_blendswap': face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_blendswap').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) if facefusion.globals.face_recognizer_model == 'arcface_inswapper': @@ -102,13 +105,14 @@ def get_face_analyser() -> Any: face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_simswap').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) if facefusion.globals.face_recognizer_model == 'arcface_uniface': face_recognizer = onnxruntime.InferenceSession(MODELS.get('face_recognizer_arcface_uniface').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) - face_landmarker = onnxruntime.InferenceSession(MODELS.get('face_landmarker').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + face_landmarkers['68'] = onnxruntime.InferenceSession(MODELS.get('face_landmarker_68').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + face_landmarkers['68_5'] = onnxruntime.InferenceSession(MODELS.get('face_landmarker_68_5').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) gender_age = onnxruntime.InferenceSession(MODELS.get('gender_age').get('path'), providers = apply_execution_provider_options(facefusion.globals.execution_providers)) FACE_ANALYSER =\ { 'face_detectors': face_detectors, 'face_recognizer': face_recognizer, - 'face_landmarker': face_landmarker, + 'face_landmarkers': face_landmarkers, 'gender_age': gender_age } return FACE_ANALYSER @@ -121,34 +125,50 @@ def clear_face_analyser() -> Any: def pre_check() -> bool: - if not facefusion.globals.skip_download: - download_directory_path = resolve_relative_path('../.assets/models') - model_urls =\ - [ - MODELS.get('face_landmarker').get('url'), - MODELS.get('gender_age').get('url') - ] + download_directory_path = resolve_relative_path('../.assets/models') + model_urls =\ + [ + MODELS.get('face_landmarker_68').get('url'), + MODELS.get('face_landmarker_68_5').get('url'), + MODELS.get('gender_age').get('url') + ] + model_paths =\ + [ + MODELS.get('face_landmarker_68').get('path'), + MODELS.get('face_landmarker_68_5').get('path'), + MODELS.get('gender_age').get('path') + ] - if facefusion.globals.face_detector_model in [ 'many', 'retinaface' ]: - model_urls.append(MODELS.get('face_detector_retinaface').get('url')) - if facefusion.globals.face_detector_model in [ 'many', 'scrfd' ]: - model_urls.append(MODELS.get('face_detector_scrfd').get('url')) - if facefusion.globals.face_detector_model in [ 'many', 'yoloface' ]: - model_urls.append(MODELS.get('face_detector_yoloface').get('url')) - if facefusion.globals.face_detector_model in [ 'yunet' ]: - model_urls.append(MODELS.get('face_detector_yunet').get('url')) - if facefusion.globals.face_recognizer_model == 'arcface_blendswap': - model_urls.append(MODELS.get('face_recognizer_arcface_blendswap').get('url')) - if facefusion.globals.face_recognizer_model == 'arcface_inswapper': - model_urls.append(MODELS.get('face_recognizer_arcface_inswapper').get('url')) - if facefusion.globals.face_recognizer_model == 'arcface_simswap': - model_urls.append(MODELS.get('face_recognizer_arcface_simswap').get('url')) - if facefusion.globals.face_recognizer_model == 'arcface_uniface': - model_urls.append(MODELS.get('face_recognizer_arcface_uniface').get('url')) + if facefusion.globals.face_detector_model in [ 'many', 'retinaface' ]: + model_urls.append(MODELS.get('face_detector_retinaface').get('url')) + model_paths.append(MODELS.get('face_detector_retinaface').get('path')) + if facefusion.globals.face_detector_model in [ 'many', 'scrfd' ]: + model_urls.append(MODELS.get('face_detector_scrfd').get('url')) + model_paths.append(MODELS.get('face_detector_scrfd').get('path')) + if facefusion.globals.face_detector_model in [ 'many', 'yoloface' ]: + model_urls.append(MODELS.get('face_detector_yoloface').get('url')) + model_paths.append(MODELS.get('face_detector_yoloface').get('path')) + if facefusion.globals.face_detector_model in [ 'yunet' ]: + model_urls.append(MODELS.get('face_detector_yunet').get('url')) + model_paths.append(MODELS.get('face_detector_yunet').get('path')) + if facefusion.globals.face_recognizer_model == 'arcface_blendswap': + model_urls.append(MODELS.get('face_recognizer_arcface_blendswap').get('url')) + model_paths.append(MODELS.get('face_recognizer_arcface_blendswap').get('path')) + if facefusion.globals.face_recognizer_model == 'arcface_inswapper': + model_urls.append(MODELS.get('face_recognizer_arcface_inswapper').get('url')) + model_paths.append(MODELS.get('face_recognizer_arcface_inswapper').get('path')) + if facefusion.globals.face_recognizer_model == 'arcface_simswap': + model_urls.append(MODELS.get('face_recognizer_arcface_simswap').get('url')) + model_paths.append(MODELS.get('face_recognizer_arcface_simswap').get('path')) + if facefusion.globals.face_recognizer_model == 'arcface_uniface': + model_urls.append(MODELS.get('face_recognizer_arcface_uniface').get('url')) + model_paths.append(MODELS.get('face_recognizer_arcface_uniface').get('path')) + + if not facefusion.globals.skip_download: process_manager.check() conditional_download(download_directory_path, model_urls) process_manager.end() - return True + return all(is_file(model_path) for model_path in model_paths) def detect_with_retinaface(vision_frame : VisionFrame, face_detector_size : str) -> Tuple[List[BoundingBox], List[FaceLandmark5], List[Score]]: @@ -321,7 +341,8 @@ def create_faces(vision_frame : VisionFrame, bounding_box_list : List[BoundingBo for index in keep_indices: bounding_box = bounding_box_list[index] face_landmark_5_68 = face_landmark_5_list[index] - face_landmark_68 = None + face_landmark_68_5 = expand_face_landmark_68_from_5(face_landmark_5_68) + face_landmark_68 = face_landmark_68_5 face_landmark_68_score = 0.0 if facefusion.globals.face_landmarker_score > 0: face_landmark_68, face_landmark_68_score = detect_face_landmark_68(vision_frame, bounding_box) @@ -331,7 +352,8 @@ def create_faces(vision_frame : VisionFrame, bounding_box_list : List[BoundingBo { '5': face_landmark_5_list[index], '5/68': face_landmark_5_68, - '68': face_landmark_68 + '68': face_landmark_68, + '68/5': face_landmark_68_5 } scores : FaceScoreSet = \ { @@ -368,7 +390,7 @@ def calc_embedding(temp_vision_frame : VisionFrame, face_landmark_5 : FaceLandma def detect_face_landmark_68(temp_vision_frame : VisionFrame, bounding_box : BoundingBox) -> Tuple[FaceLandmark68, Score]: - face_landmarker = get_face_analyser().get('face_landmarker') + face_landmarker = get_face_analyser().get('face_landmarkers').get('68') scale = 195 / numpy.subtract(bounding_box[2:], bounding_box[:2]).max() translation = (256 - numpy.add(bounding_box[2:], bounding_box[:2]) * scale) * 0.5 crop_vision_frame, affine_matrix = warp_face_by_translation(temp_vision_frame, translation, scale, (256, 256)) @@ -390,6 +412,18 @@ def detect_face_landmark_68(temp_vision_frame : VisionFrame, bounding_box : Boun return face_landmark_68, face_landmark_68_score +def expand_face_landmark_68_from_5(face_landmark_5 : FaceLandmark5) -> FaceLandmark68: + face_landmarker = get_face_analyser().get('face_landmarkers').get('68_5') + affine_matrix = estimate_matrix_by_face_landmark_5(face_landmark_5, 'ffhq_512', (1, 1)) + face_landmark_5 = cv2.transform(face_landmark_5.reshape(1, -1, 2), affine_matrix).reshape(-1, 2) + face_landmark_68_5 = face_landmarker.run(None, + { + face_landmarker.get_inputs()[0].name: [ face_landmark_5 ] + })[0][0] + face_landmark_68_5 = cv2.transform(face_landmark_68_5.reshape(1, -1, 2), cv2.invertAffineTransform(affine_matrix)).reshape(-1, 2) + return face_landmark_68_5 + + def detect_gender_age(temp_vision_frame : VisionFrame, bounding_box : BoundingBox) -> Tuple[int, int]: gender_age = get_face_analyser().get('gender_age') bounding_box = bounding_box.reshape(2, -1) diff --git a/facefusion/face_helper.py b/facefusion/face_helper.py index b74c9e45..83eff56e 100644 --- a/facefusion/face_helper.py +++ b/facefusion/face_helper.py @@ -43,16 +43,21 @@ WARP_TEMPLATES : WarpTemplateSet =\ } -def warp_face_by_face_landmark_5(temp_vision_frame : VisionFrame, face_landmark_5 : FaceLandmark5, warp_template : WarpTemplate, crop_size : Size) -> Tuple[VisionFrame, Matrix]: +def estimate_matrix_by_face_landmark_5(face_landmark_5 : FaceLandmark5, warp_template : WarpTemplate, crop_size : Size) -> Matrix: normed_warp_template = WARP_TEMPLATES.get(warp_template) * crop_size affine_matrix = cv2.estimateAffinePartial2D(face_landmark_5, normed_warp_template, method = cv2.RANSAC, ransacReprojThreshold = 100)[0] + return affine_matrix + + +def warp_face_by_face_landmark_5(temp_vision_frame : VisionFrame, face_landmark_5 : FaceLandmark5, warp_template : WarpTemplate, crop_size : Size) -> Tuple[VisionFrame, Matrix]: + affine_matrix = estimate_matrix_by_face_landmark_5(face_landmark_5, warp_template, crop_size) crop_vision_frame = cv2.warpAffine(temp_vision_frame, affine_matrix, crop_size, borderMode = cv2.BORDER_REPLICATE, flags = cv2.INTER_AREA) return crop_vision_frame, affine_matrix def warp_face_by_bounding_box(temp_vision_frame : VisionFrame, bounding_box : BoundingBox, crop_size : Size) -> Tuple[VisionFrame, Matrix]: - source_points = numpy.array([ [ bounding_box[0], bounding_box[1] ], [bounding_box[2], bounding_box[1] ], [ bounding_box[0], bounding_box[3] ] ], dtype = numpy.float32) - target_points = numpy.array([ [ 0, 0 ], [ crop_size[0], 0 ], [ 0, crop_size[1] ] ], dtype = numpy.float32) + source_points = numpy.array([ [ bounding_box[0], bounding_box[1] ], [bounding_box[2], bounding_box[1] ], [ bounding_box[0], bounding_box[3] ] ]).astype(numpy.float32) + target_points = numpy.array([ [ 0, 0 ], [ crop_size[0], 0 ], [ 0, crop_size[1] ] ]).astype(numpy.float32) affine_matrix = cv2.getAffineTransform(source_points, target_points) if bounding_box[2] - bounding_box[0] > crop_size[0] or bounding_box[3] - bounding_box[1] > crop_size[1]: interpolation_method = cv2.INTER_AREA @@ -112,14 +117,14 @@ def distance_to_face_landmark_5(points : numpy.ndarray[Any, Any], distance : num return face_landmark_5 -def convert_face_landmark_68_to_5(landmark_68 : FaceLandmark68) -> FaceLandmark5: +def convert_face_landmark_68_to_5(face_landmark_68 : FaceLandmark68) -> FaceLandmark5: face_landmark_5 = numpy.array( [ - numpy.mean(landmark_68[36:42], axis = 0), - numpy.mean(landmark_68[42:48], axis = 0), - landmark_68[30], - landmark_68[48], - landmark_68[54] + numpy.mean(face_landmark_68[36:42], axis = 0), + numpy.mean(face_landmark_68[42:48], axis = 0), + face_landmark_68[30], + face_landmark_68[48], + face_landmark_68[54] ]) return face_landmark_5 diff --git a/facefusion/face_masker.py b/facefusion/face_masker.py index 062bc9db..647e44f5 100755 --- a/facefusion/face_masker.py +++ b/facefusion/face_masker.py @@ -11,7 +11,7 @@ import facefusion.globals from facefusion import process_manager from facefusion.typing import FaceLandmark68, VisionFrame, Mask, Padding, FaceMaskRegion, ModelSet from facefusion.execution import apply_execution_provider_options -from facefusion.filesystem import resolve_relative_path +from facefusion.filesystem import resolve_relative_path, is_file from facefusion.download import conditional_download FACE_OCCLUDER = None @@ -49,6 +49,8 @@ def get_face_occluder() -> Any: global FACE_OCCLUDER with THREAD_LOCK: + while process_manager.is_checking(): + sleep(0.5) if FACE_OCCLUDER is None: model_path = MODELS.get('face_occluder').get('path') FACE_OCCLUDER = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) @@ -80,17 +82,23 @@ def clear_face_parser() -> None: def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_urls =\ + [ + MODELS.get('face_occluder').get('url'), + MODELS.get('face_parser').get('url') + ] + model_paths =\ + [ + MODELS.get('face_occluder').get('path'), + MODELS.get('face_parser').get('path') + ] + if not facefusion.globals.skip_download: - download_directory_path = resolve_relative_path('../.assets/models') - model_urls =\ - [ - MODELS.get('face_occluder').get('url'), - MODELS.get('face_parser').get('url'), - ] process_manager.check() conditional_download(download_directory_path, model_urls) process_manager.end() - return True + return all(is_file(model_path) for model_path in model_paths) @lru_cache(maxsize = None) @@ -139,7 +147,7 @@ def create_region_mask(crop_vision_frame : VisionFrame, face_mask_regions : List def create_mouth_mask(face_landmark_68 : FaceLandmark68) -> Mask: convex_hull = cv2.convexHull(face_landmark_68[numpy.r_[3:14, 31:36]].astype(numpy.int32)) - mouth_mask : Mask = numpy.zeros((512, 512), dtype = numpy.float32) + mouth_mask : Mask = numpy.zeros((512, 512)).astype(numpy.float32) mouth_mask = cv2.fillConvexPoly(mouth_mask, convex_hull, 1.0) mouth_mask = cv2.erode(mouth_mask.clip(0, 1), numpy.ones((21, 3))) mouth_mask = cv2.GaussianBlur(mouth_mask, (0, 0), sigmaX = 1, sigmaY = 15) diff --git a/facefusion/ffmpeg.py b/facefusion/ffmpeg.py index 1d3c5a3d..6413e45c 100644 --- a/facefusion/ffmpeg.py +++ b/facefusion/ffmpeg.py @@ -1,20 +1,24 @@ from typing import List, Optional +import os import subprocess import filetype import facefusion.globals -from facefusion import process_manager +from facefusion import logger, process_manager from facefusion.typing import OutputVideoPreset, Fps, AudioBuffer from facefusion.filesystem import get_temp_frames_pattern, get_temp_output_video_path +from facefusion.vision import restrict_video_fps def run_ffmpeg(args : List[str]) -> bool: - commands = [ 'ffmpeg', '-hide_banner', '-loglevel', 'quiet' ] + commands = [ 'ffmpeg', '-hide_banner', '-loglevel', 'error' ] commands.extend(args) - process = subprocess.Popen(commands, stdout = subprocess.PIPE) + process = subprocess.Popen(commands, stderr = subprocess.PIPE, stdout = subprocess.PIPE) while process_manager.is_processing(): try: + if facefusion.globals.log_level == 'debug': + log_debug(process) return process.wait(timeout = 0.5) == 0 except subprocess.TimeoutExpired: continue @@ -27,6 +31,15 @@ def open_ffmpeg(args : List[str]) -> subprocess.Popen[bytes]: return subprocess.Popen(commands, stdin = subprocess.PIPE, stdout = subprocess.PIPE) +def log_debug(process : subprocess.Popen[bytes]) -> None: + _, stderr = process.communicate() + errors = stderr.decode().split(os.linesep) + + for error in errors: + if error.strip(): + logger.debug(error.strip(), __name__.upper()) + + def extract_frames(target_path : str, temp_video_resolution : str, temp_video_fps : Fps) -> bool: trim_frame_start = facefusion.globals.trim_frame_start trim_frame_end = facefusion.globals.trim_frame_end @@ -46,9 +59,10 @@ def extract_frames(target_path : str, temp_video_resolution : str, temp_video_fp def merge_video(target_path : str, output_video_resolution : str, output_video_fps : Fps) -> bool: + temp_video_fps = restrict_video_fps(target_path, output_video_fps) temp_output_video_path = get_temp_output_video_path(target_path) temp_frames_pattern = get_temp_frames_pattern(target_path, '%04d') - commands = [ '-hwaccel', 'auto', '-s', str(output_video_resolution), '-r', str(output_video_fps), '-i', temp_frames_pattern, '-c:v', facefusion.globals.output_video_encoder ] + commands = [ '-hwaccel', 'auto', '-s', str(output_video_resolution), '-r', str(temp_video_fps), '-i', temp_frames_pattern, '-c:v', facefusion.globals.output_video_encoder ] if facefusion.globals.output_video_encoder in [ 'libx264', 'libx265' ]: output_video_compression = round(51 - (facefusion.globals.output_video_quality * 0.51)) @@ -62,7 +76,7 @@ def merge_video(target_path : str, output_video_resolution : str, output_video_f if facefusion.globals.output_video_encoder in [ 'h264_amf', 'hevc_amf' ]: output_video_compression = round(51 - (facefusion.globals.output_video_quality * 0.51)) commands.extend([ '-qp_i', str(output_video_compression), '-qp_p', str(output_video_compression), '-quality', map_amf_preset(facefusion.globals.output_video_preset) ]) - commands.extend([ '-pix_fmt', 'yuv420p', '-colorspace', 'bt709', '-y', temp_output_video_path ]) + commands.extend([ '-vf', 'framerate=fps=' + str(output_video_fps), '-pix_fmt', 'yuv420p', '-colorspace', 'bt709', '-y', temp_output_video_path ]) return run_ffmpeg(commands) @@ -79,8 +93,8 @@ def finalize_image(output_path : str, output_image_resolution : str) -> bool: return run_ffmpeg(commands) -def read_audio_buffer(target_path : str, sample_rate : int, total_channel : int) -> Optional[AudioBuffer]: - commands = [ '-i', target_path, '-vn', '-f', 's16le', '-acodec', 'pcm_s16le', '-ar', str(sample_rate), '-ac', str(total_channel), '-' ] +def read_audio_buffer(target_path : str, sample_rate : int, channel_total : int) -> Optional[AudioBuffer]: + commands = [ '-i', target_path, '-vn', '-f', 's16le', '-acodec', 'pcm_s16le', '-ar', str(sample_rate), '-ac', str(channel_total), '-'] process = open_ffmpeg(commands) audio_buffer, _ = process.communicate() if process.returncode == 0: @@ -106,25 +120,17 @@ def restore_audio(target_path : str, output_path : str, output_video_fps : Fps) def replace_audio(target_path : str, audio_path : str, output_path : str) -> bool: temp_output_path = get_temp_output_video_path(target_path) - commands = [ '-hwaccel', 'auto', '-i', temp_output_path, '-i', audio_path, '-c:v', 'copy', '-af', 'apad', '-shortest', '-map', '0:v:0', '-map', '1:a:0', '-y', output_path ] + commands = [ '-hwaccel', 'auto', '-i', temp_output_path, '-i', audio_path, '-c:v', 'copy', '-af', 'apad', '-map', '0:v:0', '-map', '1:a:0', '-shortest', '-y', output_path ] return run_ffmpeg(commands) def map_nvenc_preset(output_video_preset : OutputVideoPreset) -> Optional[str]: - if output_video_preset in [ 'ultrafast', 'superfast', 'veryfast' ]: - return 'p1' - if output_video_preset == 'faster': - return 'p2' - if output_video_preset == 'fast': - return 'p3' + if output_video_preset in [ 'ultrafast', 'superfast', 'veryfast', 'faster', 'fast' ]: + return 'fast' if output_video_preset == 'medium': - return 'p4' - if output_video_preset == 'slow': - return 'p5' - if output_video_preset == 'slower': - return 'p6' - if output_video_preset == 'veryslow': - return 'p7' + return 'medium' + if output_video_preset in [ 'slow', 'slower', 'veryslow' ]: + return 'slow' return None diff --git a/facefusion/globals.py b/facefusion/globals.py index 10b0d14b..d736880a 100755 --- a/facefusion/globals.py +++ b/facefusion/globals.py @@ -7,6 +7,7 @@ source_paths : Optional[List[str]] = None target_path : Optional[str] = None output_path : Optional[str] = None # misc +force_download : Optional[bool] = None skip_download : Optional[bool] = None headless : Optional[bool] = None log_level : Optional[LogLevel] = None diff --git a/facefusion/installer.py b/facefusion/installer.py index 1b5ec7b3..9c2b4312 100644 --- a/facefusion/installer.py +++ b/facefusion/installer.py @@ -9,26 +9,29 @@ from argparse import ArgumentParser, HelpFormatter from facefusion import metadata, wording +if platform.system().lower() == 'darwin': + os.environ['SYSTEM_VERSION_COMPAT'] = '0' + ONNXRUNTIMES : Dict[str, Tuple[str, str]] = {} if platform.system().lower() == 'darwin': ONNXRUNTIMES['default'] = ('onnxruntime', '1.17.1') else: - ONNXRUNTIMES['default'] = ('onnxruntime', '1.16.3') + ONNXRUNTIMES['default'] = ('onnxruntime', '1.17.1') ONNXRUNTIMES['cuda-12.2'] = ('onnxruntime-gpu', '1.17.1') - ONNXRUNTIMES['cuda-11.8'] = ('onnxruntime-gpu', '1.16.3') - ONNXRUNTIMES['openvino'] = ('onnxruntime-openvino', '1.16.0') + ONNXRUNTIMES['cuda-11.8'] = ('onnxruntime-gpu', '1.17.1') + ONNXRUNTIMES['openvino'] = ('onnxruntime-openvino', '1.17.1') if platform.system().lower() == 'linux': ONNXRUNTIMES['rocm-5.4.2'] = ('onnxruntime-rocm', '1.16.3') ONNXRUNTIMES['rocm-5.6'] = ('onnxruntime-rocm', '1.16.3') if platform.system().lower() == 'windows': - ONNXRUNTIMES['directml'] = ('onnxruntime-directml', '1.16.0') + ONNXRUNTIMES['directml'] = ('onnxruntime-directml', '1.17.1') def cli() -> None: program = ArgumentParser(formatter_class = lambda prog: HelpFormatter(prog, max_help_position = 130)) program.add_argument('--onnxruntime', help = wording.get('help.install_dependency').format(dependency = 'onnxruntime'), choices = ONNXRUNTIMES.keys()) - program.add_argument('--skip-venv', help = wording.get('help.skip_venv'), action = 'store_true') + program.add_argument('--skip-conda', help = wording.get('help.skip_conda'), action = 'store_true') program.add_argument('-v', '--version', version = metadata.get('name') + ' ' + metadata.get('version'), action = 'version') run(program) @@ -37,10 +40,9 @@ def run(program : ArgumentParser) -> None: args = program.parse_args() python_id = 'cp' + str(sys.version_info.major) + str(sys.version_info.minor) - if platform.system().lower() == 'darwin': - os.environ['SYSTEM_VERSION_COMPAT'] = '0' - if not args.skip_venv: - os.environ['PIP_REQUIRE_VIRTUALENV'] = '1' + if not args.skip_conda and 'CONDA_PREFIX' not in os.environ: + sys.stdout.write(wording.get('conda_not_activated') + os.linesep) + sys.exit(1) if args.onnxruntime: answers =\ { diff --git a/facefusion/metadata.py b/facefusion/metadata.py index 13673056..5b8e3aa7 100644 --- a/facefusion/metadata.py +++ b/facefusion/metadata.py @@ -2,7 +2,7 @@ METADATA =\ { 'name': 'FaceFusion', 'description': 'Next generation face swapper and enhancer', - 'version': '2.4.1', + 'version': '2.5.0', 'license': 'MIT', 'author': 'Henry Ruhs', 'url': 'https://facefusion.io' diff --git a/facefusion/processors/frame/choices.py b/facefusion/processors/frame/choices.py index 28f511d8..d6337ba2 100755 --- a/facefusion/processors/frame/choices.py +++ b/facefusion/processors/frame/choices.py @@ -1,13 +1,15 @@ from typing import List from facefusion.common_helper import create_int_range -from facefusion.processors.frame.typings import FaceDebuggerItem, FaceEnhancerModel, FaceSwapperModel, FrameEnhancerModel, LipSyncerModel +from facefusion.processors.frame.typings import FaceDebuggerItem, FaceEnhancerModel, FaceSwapperModel, FrameColorizerModel, FrameEnhancerModel, LipSyncerModel -face_debugger_items : List[FaceDebuggerItem] = [ 'bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-mask', 'face-detector-score', 'face-landmarker-score', 'age', 'gender' ] -face_enhancer_models : List[FaceEnhancerModel] = [ 'codeformer', 'gfpgan_1.2', 'gfpgan_1.3', 'gfpgan_1.4', 'gpen_bfr_256', 'gpen_bfr_512', 'restoreformer_plus_plus' ] +face_debugger_items : List[FaceDebuggerItem] = [ 'bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-landmark-68/5', 'face-mask', 'face-detector-score', 'face-landmarker-score', 'age', 'gender' ] +face_enhancer_models : List[FaceEnhancerModel] = [ 'codeformer', 'gfpgan_1.2', 'gfpgan_1.3', 'gfpgan_1.4', 'gpen_bfr_256', 'gpen_bfr_512', 'gpen_bfr_1024', 'gpen_bfr_2048', 'restoreformer_plus_plus' ] face_swapper_models : List[FaceSwapperModel] = [ 'blendswap_256', 'inswapper_128', 'inswapper_128_fp16', 'simswap_256', 'simswap_512_unofficial', 'uniface_256' ] -frame_enhancer_models : List[FrameEnhancerModel] = [ 'lsdir_x4', 'nomos8k_sc_x4', 'real_esrgan_x4', 'real_esrgan_x4_fp16', 'span_kendata_x4' ] +frame_colorizer_models : List[FrameColorizerModel] = [ 'ddcolor', 'ddcolor_artistic', 'deoldify_artistic' ] +frame_enhancer_models : List[FrameEnhancerModel] = [ 'lsdir_x4', 'nomos8k_sc_x4', 'real_esrgan_x2', 'real_esrgan_x2_fp16', 'real_esrgan_x4', 'real_esrgan_x4_fp16', 'real_hatgan_x4', 'span_kendata_x4' ] lip_syncer_models : List[LipSyncerModel] = [ 'wav2lip_gan' ] face_enhancer_blend_range : List[int] = create_int_range(0, 100, 1) +frame_colorizer_blend_range : List[int] = create_int_range(0, 100, 1) frame_enhancer_blend_range : List[int] = create_int_range(0, 100, 1) diff --git a/facefusion/processors/frame/globals.py b/facefusion/processors/frame/globals.py index 5fa85395..9d2841fc 100755 --- a/facefusion/processors/frame/globals.py +++ b/facefusion/processors/frame/globals.py @@ -1,11 +1,13 @@ from typing import List, Optional -from facefusion.processors.frame.typings import FaceDebuggerItem, FaceEnhancerModel, FaceSwapperModel, FrameEnhancerModel, LipSyncerModel +from facefusion.processors.frame.typings import FaceDebuggerItem, FaceEnhancerModel, FaceSwapperModel, FrameColorizerModel, FrameEnhancerModel, LipSyncerModel face_debugger_items : Optional[List[FaceDebuggerItem]] = None face_enhancer_model : Optional[FaceEnhancerModel] = None face_enhancer_blend : Optional[int] = None face_swapper_model : Optional[FaceSwapperModel] = None +frame_colorizer_model : Optional[FrameColorizerModel] = None +frame_colorizer_blend : Optional[int] = None frame_enhancer_model : Optional[FrameEnhancerModel] = None frame_enhancer_blend : Optional[int] = None lip_syncer_model : Optional[LipSyncerModel] = None diff --git a/facefusion/processors/frame/modules/face_debugger.py b/facefusion/processors/frame/modules/face_debugger.py index f784d710..ded5c645 100755 --- a/facefusion/processors/frame/modules/face_debugger.py +++ b/facefusion/processors/frame/modules/face_debugger.py @@ -11,7 +11,7 @@ from facefusion.face_masker import create_static_box_mask, create_occlusion_mask from facefusion.face_helper import warp_face_by_face_landmark_5, categorize_age, categorize_gender from facefusion.face_store import get_reference_faces from facefusion.content_analyser import clear_content_analyser -from facefusion.typing import Face, VisionFrame, UpdateProcess, ProcessMode, QueuePayload +from facefusion.typing import Face, VisionFrame, UpdateProgress, ProcessMode, QueuePayload from facefusion.vision import read_image, read_static_image, write_image from facefusion.processors.frame.typings import FaceDebuggerInputs from facefusion.processors.frame import globals as frame_processors_globals, choices as frame_processors_choices @@ -74,6 +74,7 @@ def debug_face(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFra bounding_box = target_face.bounding_box.astype(numpy.int32) temp_vision_frame = temp_vision_frame.copy() has_face_landmark_5_fallback = numpy.array_equal(target_face.landmarks.get('5'), target_face.landmarks.get('5/68')) + has_face_landmark_68_fallback = numpy.array_equal(target_face.landmarks.get('68'), target_face.landmarks.get('68/5')) if 'bounding-box' in frame_processors_globals.face_debugger_items: cv2.rectangle(temp_vision_frame, (bounding_box[0], bounding_box[1]), (bounding_box[2], bounding_box[3]), primary_color, 2) @@ -109,7 +110,11 @@ def debug_face(target_face : Face, temp_vision_frame : VisionFrame) -> VisionFra if 'face-landmark-68' in frame_processors_globals.face_debugger_items and numpy.any(target_face.landmarks.get('68')): face_landmark_68 = target_face.landmarks.get('68').astype(numpy.int32) for index in range(face_landmark_68.shape[0]): - cv2.circle(temp_vision_frame, (face_landmark_68[index][0], face_landmark_68[index][1]), 3, secondary_color, -1) + cv2.circle(temp_vision_frame, (face_landmark_68[index][0], face_landmark_68[index][1]), 3, tertiary_color if has_face_landmark_68_fallback else secondary_color, -1) + if 'face-landmark-68/5' in frame_processors_globals.face_debugger_items and numpy.any(target_face.landmarks.get('68')): + face_landmark_68 = target_face.landmarks.get('68/5').astype(numpy.int32) + for index in range(face_landmark_68.shape[0]): + cv2.circle(temp_vision_frame, (face_landmark_68[index][0], face_landmark_68[index][1]), 3, primary_color, -1) if bounding_box[3] - bounding_box[1] > 50 and bounding_box[2] - bounding_box[0] > 50: top = bounding_box[1] left = bounding_box[0] - 20 @@ -157,7 +162,7 @@ def process_frame(inputs : FaceDebuggerInputs) -> VisionFrame: return target_vision_frame -def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProcess) -> None: +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None for queue_payload in process_manager.manage(queue_payloads): @@ -169,7 +174,7 @@ def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload] 'target_vision_frame': target_vision_frame }) write_image(target_vision_path, output_vision_frame) - update_progress() + update_progress(1) def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: diff --git a/facefusion/processors/frame/modules/face_enhancer.py b/facefusion/processors/frame/modules/face_enhancer.py index 6e034f40..154aa10c 100755 --- a/facefusion/processors/frame/modules/face_enhancer.py +++ b/facefusion/processors/frame/modules/face_enhancer.py @@ -16,7 +16,7 @@ from facefusion.execution import apply_execution_provider_options from facefusion.content_analyser import clear_content_analyser from facefusion.face_store import get_reference_faces from facefusion.normalizer import normalize_output_path -from facefusion.typing import Face, VisionFrame, UpdateProcess, ProcessMode, ModelSet, OptionsWithModel, QueuePayload +from facefusion.typing import Face, VisionFrame, UpdateProgress, ProcessMode, ModelSet, OptionsWithModel, QueuePayload from facefusion.common_helper import create_metavar from facefusion.filesystem import is_file, is_image, is_video, resolve_relative_path from facefusion.download import conditional_download, is_download_done @@ -73,6 +73,20 @@ MODELS : ModelSet =\ 'template': 'ffhq_512', 'size': (512, 512) }, + 'gpen_bfr_1024': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gpen_bfr_1024.onnx', + 'path': resolve_relative_path('../.assets/models/gpen_bfr_1024.onnx'), + 'template': 'ffhq_512', + 'size': (1024, 1024) + }, + 'gpen_bfr_2048': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/gpen_bfr_2048.onnx', + 'path': resolve_relative_path('../.assets/models/gpen_bfr_2048.onnx'), + 'template': 'ffhq_512', + 'size': (2048, 2048) + }, 'restoreformer_plus_plus': { 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/restoreformer_plus_plus.onnx', @@ -131,22 +145,25 @@ def apply_args(program : ArgumentParser) -> None: def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + if not facefusion.globals.skip_download: - download_directory_path = resolve_relative_path('../.assets/models') - model_url = get_options('model').get('url') process_manager.check() conditional_download(download_directory_path, [ model_url ]) process_manager.end() - return True + return is_file(model_path) def post_check() -> bool: model_url = get_options('model').get('url') model_path = get_options('model').get('path') + if not facefusion.globals.skip_download and not is_download_done(model_url, model_path): logger.error(wording.get('model_download_not_done') + wording.get('exclamation_mark'), NAME) return False - elif not is_file(model_path): + if not is_file(model_path): logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) return False return True @@ -202,7 +219,7 @@ def apply_enhance(crop_vision_frame : VisionFrame) -> VisionFrame: if frame_processor_input.name == 'input': frame_processor_inputs[frame_processor_input.name] = crop_vision_frame if frame_processor_input.name == 'weight': - weight = numpy.array([ 1 ], dtype = numpy.double) + weight = numpy.array([ 1 ]).astype(numpy.double) frame_processor_inputs[frame_processor_input.name] = weight with THREAD_SEMAPHORE: crop_vision_frame = frame_processor.run(None, frame_processor_inputs)[0][0] @@ -256,7 +273,7 @@ def process_frame(inputs : FaceEnhancerInputs) -> VisionFrame: return target_vision_frame -def process_frames(source_path : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProcess) -> None: +def process_frames(source_path : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None for queue_payload in process_manager.manage(queue_payloads): @@ -268,7 +285,7 @@ def process_frames(source_path : List[str], queue_payloads : List[QueuePayload], 'target_vision_frame': target_vision_frame }) write_image(target_vision_path, output_vision_frame) - update_progress() + update_progress(1) def process_image(source_path : str, target_path : str, output_path : str) -> None: diff --git a/facefusion/processors/frame/modules/face_swapper.py b/facefusion/processors/frame/modules/face_swapper.py index c7da87af..11bf32b8 100755 --- a/facefusion/processors/frame/modules/face_swapper.py +++ b/facefusion/processors/frame/modules/face_swapper.py @@ -1,6 +1,7 @@ from typing import Any, List, Literal, Optional from argparse import ArgumentParser from time import sleep +import platform import threading import numpy import onnx @@ -15,10 +16,9 @@ from facefusion.face_analyser import get_one_face, get_average_face, get_many_fa from facefusion.face_masker import create_static_box_mask, create_occlusion_mask, create_region_mask, clear_face_occluder, clear_face_parser from facefusion.face_helper import warp_face_by_face_landmark_5, paste_back from facefusion.face_store import get_reference_faces -from facefusion.common_helper import extract_major_version from facefusion.content_analyser import clear_content_analyser from facefusion.normalizer import normalize_output_path -from facefusion.typing import Face, Embedding, VisionFrame, UpdateProcess, ProcessMode, ModelSet, OptionsWithModel, QueuePayload +from facefusion.typing import Face, Embedding, VisionFrame, UpdateProgress, ProcessMode, ModelSet, OptionsWithModel, QueuePayload from facefusion.filesystem import is_file, is_image, has_image, is_video, filter_image_paths, resolve_relative_path from facefusion.download import conditional_download, is_download_done from facefusion.vision import read_image, read_static_image, read_static_images, write_image @@ -27,7 +27,7 @@ from facefusion.processors.frame import globals as frame_processors_globals from facefusion.processors.frame import choices as frame_processors_choices FRAME_PROCESSOR = None -MODEL_MATRIX = None +MODEL_INITIALIZER = None THREAD_LOCK : threading.Lock = threading.Lock() NAME = __name__.upper() MODELS : ModelSet =\ @@ -114,23 +114,23 @@ def clear_frame_processor() -> None: FRAME_PROCESSOR = None -def get_model_matrix() -> Any: - global MODEL_MATRIX +def get_model_initializer() -> Any: + global MODEL_INITIALIZER with THREAD_LOCK: while process_manager.is_checking(): sleep(0.5) - if MODEL_MATRIX is None: + if MODEL_INITIALIZER is None: model_path = get_options('model').get('path') model = onnx.load(model_path) - MODEL_MATRIX = numpy_helper.to_array(model.graph.initializer[-1]) - return MODEL_MATRIX + MODEL_INITIALIZER = numpy_helper.to_array(model.graph.initializer[-1]) + return MODEL_INITIALIZER -def clear_model_matrix() -> None: - global MODEL_MATRIX +def clear_model_initializer() -> None: + global MODEL_INITIALIZER - MODEL_MATRIX = None + MODEL_INITIALIZER = None def get_options(key : Literal['model']) -> Any: @@ -151,8 +151,7 @@ def set_options(key : Literal['model'], value : Any) -> None: def register_args(program : ArgumentParser) -> None: - onnxruntime_version = extract_major_version(onnxruntime.__version__) - if onnxruntime_version > (1, 16): + if platform.system().lower() == 'darwin': face_swapper_model_fallback = 'inswapper_128' else: face_swapper_model_fallback = 'inswapper_128_fp16' @@ -173,22 +172,25 @@ def apply_args(program : ArgumentParser) -> None: def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + if not facefusion.globals.skip_download: - download_directory_path = resolve_relative_path('../.assets/models') - model_url = get_options('model').get('url') process_manager.check() conditional_download(download_directory_path, [ model_url ]) process_manager.end() - return True + return is_file(model_path) def post_check() -> bool: model_url = get_options('model').get('url') model_path = get_options('model').get('path') + if not facefusion.globals.skip_download and not is_download_done(model_url, model_path): logger.error(wording.get('model_download_not_done') + wording.get('exclamation_mark'), NAME) return False - elif not is_file(model_path): + if not is_file(model_path): logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) return False return True @@ -216,8 +218,8 @@ def pre_process(mode : ProcessMode) -> bool: def post_process() -> None: read_static_image.cache_clear() if facefusion.globals.video_memory_strategy == 'strict' or facefusion.globals.video_memory_strategy == 'moderate': + clear_model_initializer() clear_frame_processor() - clear_model_matrix() if facefusion.globals.video_memory_strategy == 'strict': clear_face_analyser() clear_content_analyser() @@ -281,9 +283,9 @@ def prepare_source_frame(source_face : Face) -> VisionFrame: def prepare_source_embedding(source_face : Face) -> Embedding: model_type = get_options('model').get('type') if model_type == 'inswapper': - model_matrix = get_model_matrix() + model_initializer = get_model_initializer() source_embedding = source_face.embedding.reshape((1, -1)) - source_embedding = numpy.dot(source_embedding, model_matrix) / numpy.linalg.norm(source_embedding) + source_embedding = numpy.dot(source_embedding, model_initializer) / numpy.linalg.norm(source_embedding) else: source_embedding = source_face.normed_embedding.reshape(1, -1) return source_embedding @@ -332,7 +334,7 @@ def process_frame(inputs : FaceSwapperInputs) -> VisionFrame: return target_vision_frame -def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProcess) -> None: +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None source_frames = read_static_images(source_paths) source_face = get_average_face(source_frames) @@ -347,7 +349,7 @@ def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload] 'target_vision_frame': target_vision_frame }) write_image(target_vision_path, output_vision_frame) - update_progress() + update_progress(1) def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: diff --git a/facefusion/processors/frame/modules/frame_colorizer.py b/facefusion/processors/frame/modules/frame_colorizer.py new file mode 100644 index 00000000..4b36c4a4 --- /dev/null +++ b/facefusion/processors/frame/modules/frame_colorizer.py @@ -0,0 +1,232 @@ +from typing import Any, List, Literal, Optional +from argparse import ArgumentParser +from time import sleep +import threading +import cv2 +import numpy +import onnxruntime + +import facefusion.globals +import facefusion.processors.frame.core as frame_processors +from facefusion import config, process_manager, logger, wording +from facefusion.face_analyser import clear_face_analyser +from facefusion.content_analyser import clear_content_analyser +from facefusion.execution import apply_execution_provider_options +from facefusion.normalizer import normalize_output_path +from facefusion.typing import Face, VisionFrame, UpdateProgress, ProcessMode, ModelSet, OptionsWithModel, QueuePayload +from facefusion.common_helper import create_metavar +from facefusion.filesystem import is_file, resolve_relative_path, is_image, is_video +from facefusion.download import conditional_download, is_download_done +from facefusion.vision import read_image, read_static_image, write_image +from facefusion.processors.frame.typings import FrameColorizerInputs +from facefusion.processors.frame import globals as frame_processors_globals +from facefusion.processors.frame import choices as frame_processors_choices + +FRAME_PROCESSOR = None +THREAD_LOCK : threading.Lock = threading.Lock() +THREAD_SEMAPHORE : threading.Semaphore = threading.Semaphore() +NAME = __name__.upper() +MODELS : ModelSet =\ +{ + 'ddcolor': + { + 'type': 'ddcolor', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/ddcolor.onnx', + 'path': resolve_relative_path('../.assets/models/ddcolor.onnx'), + 'size': (512, 512) + }, + 'ddcolor_artistic': + { + 'type': 'ddcolor', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/ddcolor_artistic.onnx', + 'path': resolve_relative_path('../.assets/models/ddcolor_artistic.onnx'), + 'size': (512, 512) + }, + 'deoldify_artistic': + { + 'type': 'deoldify', + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/deoldify_artistic.onnx', + 'path': resolve_relative_path('../.assets/models/deoldify_artistic.onnx'), + 'size': (512, 512) + } +} +OPTIONS : Optional[OptionsWithModel] = None + + +def get_frame_processor() -> Any: + global FRAME_PROCESSOR + + with THREAD_LOCK: + while process_manager.is_checking(): + sleep(0.5) + if FRAME_PROCESSOR is None: + model_path = get_options('model').get('path') + FRAME_PROCESSOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + return FRAME_PROCESSOR + + +def clear_frame_processor() -> None: + global FRAME_PROCESSOR + + FRAME_PROCESSOR = None + + +def get_options(key : Literal['model']) -> Any: + global OPTIONS + + if OPTIONS is None: + OPTIONS =\ + { + 'model': MODELS[frame_processors_globals.frame_colorizer_model] + } + return OPTIONS.get(key) + + +def set_options(key : Literal['model'], value : Any) -> None: + global OPTIONS + + OPTIONS[key] = value + + +def register_args(program : ArgumentParser) -> None: + program.add_argument('--frame-colorizer-model', help = wording.get('help.frame_colorizer_model'), default = config.get_str_value('frame_processors.frame_colorizer_model', 'ddcolor'), choices = frame_processors_choices.frame_colorizer_models) + program.add_argument('--frame-colorizer-blend', help = wording.get('help.frame_colorizer_blend'), type = int, default = config.get_int_value('frame_processors.frame_colorizer_blend', '100'), choices = frame_processors_choices.frame_colorizer_blend_range, metavar = create_metavar(frame_processors_choices.frame_colorizer_blend_range)) + + +def apply_args(program : ArgumentParser) -> None: + args = program.parse_args() + frame_processors_globals.frame_colorizer_model = args.frame_colorizer_model + frame_processors_globals.frame_colorizer_blend = args.frame_colorizer_blend + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not facefusion.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, [ model_url ]) + process_manager.end() + return is_file(model_path) + + +def post_check() -> bool: + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + + if not facefusion.globals.skip_download and not is_download_done(model_url, model_path): + logger.error(wording.get('model_download_not_done') + wording.get('exclamation_mark'), NAME) + return False + if not is_file(model_path): + logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def pre_process(mode : ProcessMode) -> bool: + if mode in [ 'output', 'preview' ] and not is_image(facefusion.globals.target_path) and not is_video(facefusion.globals.target_path): + logger.error(wording.get('select_image_or_video_target') + wording.get('exclamation_mark'), NAME) + return False + if mode == 'output' and not normalize_output_path(facefusion.globals.target_path, facefusion.globals.output_path): + logger.error(wording.get('select_file_or_directory_output') + wording.get('exclamation_mark'), NAME) + return False + return True + + +def post_process() -> None: + read_static_image.cache_clear() + if facefusion.globals.video_memory_strategy == 'strict' or facefusion.globals.video_memory_strategy == 'moderate': + clear_frame_processor() + if facefusion.globals.video_memory_strategy == 'strict': + clear_face_analyser() + clear_content_analyser() + + +def colorize_frame(temp_vision_frame : VisionFrame) -> VisionFrame: + frame_processor = get_frame_processor() + prepare_vision_frame = prepare_temp_frame(temp_vision_frame) + with THREAD_SEMAPHORE: + color_vision_frame = frame_processor.run(None, + { + frame_processor.get_inputs()[0].name: prepare_vision_frame + })[0][0] + color_vision_frame = merge_color_frame(temp_vision_frame, color_vision_frame) + color_vision_frame = blend_frame(temp_vision_frame, color_vision_frame) + return color_vision_frame + + +def prepare_temp_frame(temp_vision_frame : VisionFrame) -> VisionFrame: + model_size = get_options('model').get('size') + model_type = get_options('model').get('type') + temp_vision_frame = cv2.cvtColor(temp_vision_frame, cv2.COLOR_BGR2GRAY) + temp_vision_frame = cv2.cvtColor(temp_vision_frame, cv2.COLOR_GRAY2RGB) + if model_type == 'ddcolor': + temp_vision_frame = (temp_vision_frame / 255.0).astype(numpy.float32) + temp_vision_frame = cv2.cvtColor(temp_vision_frame, cv2.COLOR_RGB2LAB)[:, :, :1] + temp_vision_frame = numpy.concatenate((temp_vision_frame, numpy.zeros_like(temp_vision_frame), numpy.zeros_like(temp_vision_frame)), axis = -1) + temp_vision_frame = cv2.cvtColor(temp_vision_frame, cv2.COLOR_LAB2RGB) + temp_vision_frame = cv2.resize(temp_vision_frame, model_size) + temp_vision_frame = temp_vision_frame.transpose((2, 0, 1)) + temp_vision_frame = numpy.expand_dims(temp_vision_frame, axis = 0).astype(numpy.float32) + return temp_vision_frame + + +def merge_color_frame(temp_vision_frame : VisionFrame, color_vision_frame : VisionFrame) -> VisionFrame: + model_type = get_options('model').get('type') + color_vision_frame = color_vision_frame.transpose(1, 2, 0) + color_vision_frame = cv2.resize(color_vision_frame, (temp_vision_frame.shape[1], temp_vision_frame.shape[0])) + if model_type == 'ddcolor': + temp_vision_frame = (temp_vision_frame / 255.0).astype(numpy.float32) + temp_vision_frame = cv2.cvtColor(temp_vision_frame, cv2.COLOR_BGR2LAB)[:, :, :1] + color_vision_frame = numpy.concatenate((temp_vision_frame, color_vision_frame), axis = -1) + color_vision_frame = cv2.cvtColor(color_vision_frame, cv2.COLOR_LAB2BGR) + color_vision_frame = (color_vision_frame * 255.0).round().astype(numpy.uint8) + if model_type == 'deoldify': + temp_blue_channel, _, _ = cv2.split(temp_vision_frame) + color_vision_frame = cv2.cvtColor(color_vision_frame, cv2.COLOR_BGR2RGB).astype(numpy.uint8) + color_vision_frame = cv2.cvtColor(color_vision_frame, cv2.COLOR_BGR2LAB) + _, color_green_channel, color_red_channel = cv2.split(color_vision_frame) + color_vision_frame = cv2.merge((temp_blue_channel, color_green_channel, color_red_channel)) + color_vision_frame = cv2.cvtColor(color_vision_frame, cv2.COLOR_LAB2BGR) + return color_vision_frame + + +def blend_frame(temp_vision_frame : VisionFrame, paste_vision_frame : VisionFrame) -> VisionFrame: + frame_colorizer_blend = 1 - (frame_processors_globals.frame_colorizer_blend / 100) + temp_vision_frame = cv2.addWeighted(temp_vision_frame, frame_colorizer_blend, paste_vision_frame, 1 - frame_colorizer_blend, 0) + return temp_vision_frame + + +def get_reference_frame(source_face : Face, target_face : Face, temp_vision_frame : VisionFrame) -> VisionFrame: + pass + + +def process_frame(inputs : FrameColorizerInputs) -> VisionFrame: + target_vision_frame = inputs.get('target_vision_frame') + return colorize_frame(target_vision_frame) + + +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: + for queue_payload in process_manager.manage(queue_payloads): + target_vision_path = queue_payload['frame_path'] + target_vision_frame = read_image(target_vision_path) + output_vision_frame = process_frame( + { + 'target_vision_frame': target_vision_frame + }) + write_image(target_vision_path, output_vision_frame) + update_progress(1) + + +def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: + target_vision_frame = read_static_image(target_path) + output_vision_frame = process_frame( + { + 'target_vision_frame': target_vision_frame + }) + write_image(output_path, output_vision_frame) + + +def process_video(source_paths : List[str], temp_frame_paths : List[str]) -> None: + frame_processors.multi_process_frames(None, temp_frame_paths, process_frames) diff --git a/facefusion/processors/frame/modules/frame_enhancer.py b/facefusion/processors/frame/modules/frame_enhancer.py index e29ae84b..e98563ca 100644 --- a/facefusion/processors/frame/modules/frame_enhancer.py +++ b/facefusion/processors/frame/modules/frame_enhancer.py @@ -13,7 +13,7 @@ from facefusion.face_analyser import clear_face_analyser from facefusion.content_analyser import clear_content_analyser from facefusion.execution import apply_execution_provider_options from facefusion.normalizer import normalize_output_path -from facefusion.typing import Face, VisionFrame, UpdateProcess, ProcessMode, ModelSet, OptionsWithModel, QueuePayload +from facefusion.typing import Face, VisionFrame, UpdateProgress, ProcessMode, ModelSet, OptionsWithModel, QueuePayload from facefusion.common_helper import create_metavar from facefusion.filesystem import is_file, resolve_relative_path, is_image, is_video from facefusion.download import conditional_download, is_download_done @@ -41,6 +41,20 @@ MODELS : ModelSet =\ 'size': (128, 8, 2), 'scale': 4 }, + 'real_esrgan_x2': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_esrgan_x2.onnx', + 'path': resolve_relative_path('../.assets/models/real_esrgan_x2.onnx'), + 'size': (128, 8, 2), + 'scale': 2 + }, + 'real_esrgan_x2_fp16': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_esrgan_x2_fp16.onnx', + 'path': resolve_relative_path('../.assets/models/real_esrgan_x2_fp16.onnx'), + 'size': (128, 8, 2), + 'scale': 2 + }, 'real_esrgan_x4': { 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_esrgan_x4.onnx', @@ -55,6 +69,13 @@ MODELS : ModelSet =\ 'size': (128, 8, 2), 'scale': 4 }, + 'real_hatgan_x4': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/real_hatgan_x4.onnx', + 'path': resolve_relative_path('../.assets/models/real_hatgan_x4.onnx'), + 'size': (256, 8, 2), + 'scale': 4 + }, 'span_kendata_x4': { 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/span_kendata_x4.onnx', @@ -113,22 +134,25 @@ def apply_args(program : ArgumentParser) -> None: def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + if not facefusion.globals.skip_download: - download_directory_path = resolve_relative_path('../.assets/models') - model_url = get_options('model').get('url') process_manager.check() conditional_download(download_directory_path, [ model_url ]) process_manager.end() - return True + return is_file(model_path) def post_check() -> bool: model_url = get_options('model').get('url') model_path = get_options('model').get('path') + if not facefusion.globals.skip_download and not is_download_done(model_url, model_path): logger.error(wording.get('model_download_not_done') + wording.get('exclamation_mark'), NAME) return False - elif not is_file(model_path): + if not is_file(model_path): logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) return False return True @@ -172,7 +196,7 @@ def enhance_frame(temp_vision_frame : VisionFrame) -> VisionFrame: def prepare_tile_frame(vision_tile_frame : VisionFrame) -> VisionFrame: - vision_tile_frame = numpy.expand_dims(vision_tile_frame[:,:,::-1], axis = 0) + vision_tile_frame = numpy.expand_dims(vision_tile_frame[:, :, ::-1], axis = 0) vision_tile_frame = vision_tile_frame.transpose(0, 3, 1, 2) vision_tile_frame = vision_tile_frame.astype(numpy.float32) / 255 return vision_tile_frame @@ -180,14 +204,14 @@ def prepare_tile_frame(vision_tile_frame : VisionFrame) -> VisionFrame: def normalize_tile_frame(vision_tile_frame : VisionFrame) -> VisionFrame: vision_tile_frame = vision_tile_frame.transpose(0, 2, 3, 1).squeeze(0) * 255 - vision_tile_frame = vision_tile_frame.clip(0, 255).astype(numpy.uint8)[:,:,::-1] + vision_tile_frame = vision_tile_frame.clip(0, 255).astype(numpy.uint8)[:, :, ::-1] return vision_tile_frame -def blend_frame(temp_vision_frame : VisionFrame, paste_vision_frame : VisionFrame) -> VisionFrame: +def blend_frame(temp_vision_frame : VisionFrame, merge_vision_frame : VisionFrame) -> VisionFrame: frame_enhancer_blend = 1 - (frame_processors_globals.frame_enhancer_blend / 100) - temp_vision_frame = cv2.resize(temp_vision_frame, (paste_vision_frame.shape[1], paste_vision_frame.shape[0])) - temp_vision_frame = cv2.addWeighted(temp_vision_frame, frame_enhancer_blend, paste_vision_frame, 1 - frame_enhancer_blend, 0) + temp_vision_frame = cv2.resize(temp_vision_frame, (merge_vision_frame.shape[1], merge_vision_frame.shape[0])) + temp_vision_frame = cv2.addWeighted(temp_vision_frame, frame_enhancer_blend, merge_vision_frame, 1 - frame_enhancer_blend, 0) return temp_vision_frame @@ -200,7 +224,7 @@ def process_frame(inputs : FrameEnhancerInputs) -> VisionFrame: return enhance_frame(target_vision_frame) -def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProcess) -> None: +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: for queue_payload in process_manager.manage(queue_payloads): target_vision_path = queue_payload['frame_path'] target_vision_frame = read_image(target_vision_path) @@ -209,7 +233,7 @@ def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload] 'target_vision_frame': target_vision_frame }) write_image(target_vision_path, output_vision_frame) - update_progress() + update_progress(1) def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: diff --git a/facefusion/processors/frame/modules/lip_syncer.py b/facefusion/processors/frame/modules/lip_syncer.py index 25a4439b..901b3e4b 100755 --- a/facefusion/processors/frame/modules/lip_syncer.py +++ b/facefusion/processors/frame/modules/lip_syncer.py @@ -16,19 +16,19 @@ from facefusion.face_helper import warp_face_by_face_landmark_5, warp_face_by_bo from facefusion.face_store import get_reference_faces from facefusion.content_analyser import clear_content_analyser from facefusion.normalizer import normalize_output_path -from facefusion.typing import Face, VisionFrame, UpdateProcess, ProcessMode, ModelSet, OptionsWithModel, AudioFrame, QueuePayload +from facefusion.typing import Face, VisionFrame, UpdateProgress, ProcessMode, ModelSet, OptionsWithModel, AudioFrame, QueuePayload from facefusion.filesystem import is_file, has_audio, resolve_relative_path from facefusion.download import conditional_download, is_download_done -from facefusion.audio import read_static_audio, get_audio_frame, create_empty_audio_frame +from facefusion.audio import read_static_voice, get_voice_frame, create_empty_audio_frame from facefusion.filesystem import is_image, is_video, filter_audio_paths from facefusion.common_helper import get_first -from facefusion.vision import read_image, write_image, read_static_image +from facefusion.vision import read_image, read_static_image, write_image, restrict_video_fps from facefusion.processors.frame.typings import LipSyncerInputs +from facefusion.voice_extractor import clear_voice_extractor from facefusion.processors.frame import globals as frame_processors_globals from facefusion.processors.frame import choices as frame_processors_choices FRAME_PROCESSOR = None -MODEL_MATRIX = None THREAD_LOCK : threading.Lock = threading.Lock() NAME = __name__.upper() MODELS : ModelSet =\ @@ -36,7 +36,7 @@ MODELS : ModelSet =\ 'wav2lip_gan': { 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/wav2lip_gan.onnx', - 'path': resolve_relative_path('../.assets/models/wav2lip_gan.onnx'), + 'path': resolve_relative_path('../.assets/models/wav2lip_gan.onnx') } } OPTIONS : Optional[OptionsWithModel] = None @@ -87,22 +87,25 @@ def apply_args(program : ArgumentParser) -> None: def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_url = get_options('model').get('url') + model_path = get_options('model').get('path') + if not facefusion.globals.skip_download: - download_directory_path = resolve_relative_path('../.assets/models') - model_url = get_options('model').get('url') process_manager.check() conditional_download(download_directory_path, [ model_url ]) process_manager.end() - return True + return is_file(model_path) def post_check() -> bool: model_url = get_options('model').get('url') model_path = get_options('model').get('path') + if not facefusion.globals.skip_download and not is_download_done(model_url, model_path): logger.error(wording.get('model_download_not_done') + wording.get('exclamation_mark'), NAME) return False - elif not is_file(model_path): + if not is_file(model_path): logger.error(wording.get('model_file_not_present') + wording.get('exclamation_mark'), NAME) return False return True @@ -123,7 +126,7 @@ def pre_process(mode : ProcessMode) -> bool: def post_process() -> None: read_static_image.cache_clear() - read_static_audio.cache_clear() + read_static_voice.cache_clear() if facefusion.globals.video_memory_strategy == 'strict' or facefusion.globals.video_memory_strategy == 'moderate': clear_frame_processor() if facefusion.globals.video_memory_strategy == 'strict': @@ -131,6 +134,7 @@ def post_process() -> None: clear_content_analyser() clear_face_occluder() clear_face_parser() + clear_voice_extractor() def sync_lip(target_face : Face, temp_audio_frame : AudioFrame, temp_vision_frame : VisionFrame) -> VisionFrame: @@ -138,14 +142,11 @@ def sync_lip(target_face : Face, temp_audio_frame : AudioFrame, temp_vision_fram crop_mask_list = [] temp_audio_frame = prepare_audio_frame(temp_audio_frame) crop_vision_frame, affine_matrix = warp_face_by_face_landmark_5(temp_vision_frame, target_face.landmarks.get('5/68'), 'ffhq_512', (512, 512)) - if numpy.any(target_face.landmarks.get('68')): - face_landmark_68 = cv2.transform(target_face.landmarks.get('68').reshape(1, -1, 2), affine_matrix).reshape(-1, 2) - bounding_box = create_bounding_box_from_face_landmark_68(face_landmark_68) - bounding_box[1] -= numpy.abs(bounding_box[3] - bounding_box[1]) * 0.125 - mouth_mask = create_mouth_mask(face_landmark_68) - crop_mask_list.append(mouth_mask) - else: - bounding_box = target_face.bounding_box + face_landmark_68 = cv2.transform(target_face.landmarks.get('68').reshape(1, -1, 2), affine_matrix).reshape(-1, 2) + bounding_box = create_bounding_box_from_face_landmark_68(face_landmark_68) + bounding_box[1] -= numpy.abs(bounding_box[3] - bounding_box[1]) * 0.125 + mouth_mask = create_mouth_mask(face_landmark_68) + crop_mask_list.append(mouth_mask) box_mask = create_static_box_mask(crop_vision_frame.shape[:2][::-1], facefusion.globals.face_mask_blur, facefusion.globals.face_mask_padding) crop_mask_list.append(box_mask) @@ -216,14 +217,15 @@ def process_frame(inputs : LipSyncerInputs) -> VisionFrame: return target_vision_frame -def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProcess) -> None: +def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload], update_progress : UpdateProgress) -> None: reference_faces = get_reference_faces() if 'reference' in facefusion.globals.face_selector_mode else None source_audio_path = get_first(filter_audio_paths(source_paths)) + temp_video_fps = restrict_video_fps(facefusion.globals.target_path, facefusion.globals.output_video_fps) for queue_payload in process_manager.manage(queue_payloads): frame_number = queue_payload['frame_number'] target_vision_path = queue_payload['frame_path'] - source_audio_frame = get_audio_frame(source_audio_path, facefusion.globals.output_video_fps, frame_number) + source_audio_frame = get_voice_frame(source_audio_path, temp_video_fps, frame_number) if not numpy.any(source_audio_frame): source_audio_frame = create_empty_audio_frame() target_vision_frame = read_image(target_vision_path) @@ -234,7 +236,7 @@ def process_frames(source_paths : List[str], queue_payloads : List[QueuePayload] 'target_vision_frame': target_vision_frame }) write_image(target_vision_path, output_vision_frame) - update_progress() + update_progress(1) def process_image(source_paths : List[str], target_path : str, output_path : str) -> None: diff --git a/facefusion/processors/frame/typings.py b/facefusion/processors/frame/typings.py index c7de5ef1..62391700 100644 --- a/facefusion/processors/frame/typings.py +++ b/facefusion/processors/frame/typings.py @@ -2,10 +2,11 @@ from typing import Literal, TypedDict from facefusion.typing import Face, FaceSet, AudioFrame, VisionFrame -FaceDebuggerItem = Literal['bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-mask', 'face-detector-score', 'face-landmarker-score', 'age', 'gender'] -FaceEnhancerModel = Literal['codeformer', 'gfpgan_1.2', 'gfpgan_1.3', 'gfpgan_1.4', 'gpen_bfr_256', 'gpen_bfr_512', 'restoreformer_plus_plus'] +FaceDebuggerItem = Literal['bounding-box', 'face-landmark-5', 'face-landmark-5/68', 'face-landmark-68', 'face-landmark-68/5', 'face-mask', 'face-detector-score', 'face-landmarker-score', 'age', 'gender'] +FaceEnhancerModel = Literal['codeformer', 'gfpgan_1.2', 'gfpgan_1.3', 'gfpgan_1.4', 'gpen_bfr_256', 'gpen_bfr_512', 'gpen_bfr_1024', 'gpen_bfr_2048', 'restoreformer_plus_plus'] FaceSwapperModel = Literal['blendswap_256', 'inswapper_128', 'inswapper_128_fp16', 'simswap_256', 'simswap_512_unofficial', 'uniface_256'] -FrameEnhancerModel = Literal['lsdir_x4', 'nomos8k_sc_x4', 'real_esrgan_x4', 'real_esrgan_x4_fp16', 'span_kendata_x4'] +FrameColorizerModel = Literal['ddcolor', 'ddcolor_artistic', 'deoldify_artistic'] +FrameEnhancerModel = Literal['lsdir_x4', 'nomos8k_sc_x4', 'real_esrgan_x2', 'real_esrgan_x2_fp16', 'real_esrgan_x4', 'real_esrgan_x4_fp16', 'real_hatgan_x4', 'span_kendata_x4'] LipSyncerModel = Literal['wav2lip_gan'] FaceDebuggerInputs = TypedDict('FaceDebuggerInputs', @@ -24,6 +25,10 @@ FaceSwapperInputs = TypedDict('FaceSwapperInputs', 'source_face' : Face, 'target_vision_frame' : VisionFrame }) +FrameColorizerInputs = TypedDict('FrameColorizerInputs', +{ + 'target_vision_frame' : VisionFrame +}) FrameEnhancerInputs = TypedDict('FrameEnhancerInputs', { 'target_vision_frame' : VisionFrame diff --git a/facefusion/typing.py b/facefusion/typing.py index e16bcb1a..7972c057 100755 --- a/facefusion/typing.py +++ b/facefusion/typing.py @@ -9,7 +9,8 @@ FaceLandmarkSet = TypedDict('FaceLandmarkSet', { '5' : FaceLandmark5, # type: ignore[valid-type] '5/68' : FaceLandmark5, # type: ignore[valid-type] - '68' : FaceLandmark68 # type: ignore[valid-type] + '68' : FaceLandmark68, # type: ignore[valid-type] + '68/5' : FaceLandmark68 # type: ignore[valid-type] }) Score = float FaceScoreSet = TypedDict('FaceScoreSet', @@ -42,8 +43,10 @@ Translation = numpy.ndarray[Any, Any] AudioBuffer = bytes Audio = numpy.ndarray[Any, Any] +AudioChunk = numpy.ndarray[Any, Any] AudioFrame = numpy.ndarray[Any, Any] Spectrogram = numpy.ndarray[Any, Any] +MelFilterBank = numpy.ndarray[Any, Any] Fps = float Padding = Tuple[int, int, int, int] @@ -55,8 +58,8 @@ QueuePayload = TypedDict('QueuePayload', 'frame_number' : int, 'frame_path' : str }) -UpdateProcess = Callable[[], None] -ProcessFrames = Callable[[List[str], List[QueuePayload], UpdateProcess], None] +UpdateProgress = Callable[[int], None] +ProcessFrames = Callable[[List[str], List[QueuePayload], UpdateProgress], None] WarpTemplate = Literal['arcface_112_v1', 'arcface_112_v2', 'arcface_128_v2', 'ffhq_512'] WarpTemplateSet = Dict[WarpTemplate, numpy.ndarray[Any, Any]] diff --git a/facefusion/uis/components/benchmark.py b/facefusion/uis/components/benchmark.py index 2160d09d..6322febd 100644 --- a/facefusion/uis/components/benchmark.py +++ b/facefusion/uis/components/benchmark.py @@ -76,7 +76,7 @@ def listen() -> None: def start(benchmark_runs : List[str], benchmark_cycles : int) -> Generator[List[Any], None, None]: - facefusion.globals.source_paths = [ '.assets/examples/source.jpg' ] + facefusion.globals.source_paths = [ '.assets/examples/source.jpg', '.assets/examples/source.mp3' ] facefusion.globals.output_path = tempfile.gettempdir() facefusion.globals.face_landmarker_score = 0 facefusion.globals.temp_frame_format = 'bmp' @@ -87,7 +87,8 @@ def start(benchmark_runs : List[str], benchmark_cycles : int) -> Generator[List[ if target_paths: pre_process() for target_path in target_paths: - benchmark_results.append(benchmark(target_path, benchmark_cycles)) + facefusion.globals.target_path = target_path + benchmark_results.append(benchmark(benchmark_cycles)) yield benchmark_results post_process() @@ -103,10 +104,8 @@ def post_process() -> None: clear_static_faces() -def benchmark(target_path : str, benchmark_cycles : int) -> List[Any]: +def benchmark(benchmark_cycles : int) -> List[Any]: process_times = [] - total_fps = 0.0 - facefusion.globals.target_path = target_path video_frame_total = count_video_frame_total(facefusion.globals.target_path) output_video_resolution = detect_video_resolution(facefusion.globals.target_path) facefusion.globals.output_video_resolution = pack_resolution(output_video_resolution) @@ -116,13 +115,12 @@ def benchmark(target_path : str, benchmark_cycles : int) -> List[Any]: start_time = perf_counter() conditional_process() end_time = perf_counter() - process_time = end_time - start_time - total_fps += video_frame_total / process_time - process_times.append(process_time) + process_times.append(end_time - start_time) average_run = round(statistics.mean(process_times), 2) fastest_run = round(min(process_times), 2) slowest_run = round(max(process_times), 2) - relative_fps = round(total_fps / benchmark_cycles, 2) + relative_fps = round(video_frame_total * benchmark_cycles / sum(process_times), 2) + return\ [ facefusion.globals.target_path, diff --git a/facefusion/uis/components/execution_queue_count.py b/facefusion/uis/components/execution_queue_count.py index 18456a51..1b6725e5 100644 --- a/facefusion/uis/components/execution_queue_count.py +++ b/facefusion/uis/components/execution_queue_count.py @@ -21,7 +21,7 @@ def render() -> None: def listen() -> None: - EXECUTION_QUEUE_COUNT_SLIDER.change(update_execution_queue_count, inputs = EXECUTION_QUEUE_COUNT_SLIDER) + EXECUTION_QUEUE_COUNT_SLIDER.release(update_execution_queue_count, inputs = EXECUTION_QUEUE_COUNT_SLIDER) def update_execution_queue_count(execution_queue_count : int = 1) -> None: diff --git a/facefusion/uis/components/execution_thread_count.py b/facefusion/uis/components/execution_thread_count.py index df0d5dfa..4a1f4646 100644 --- a/facefusion/uis/components/execution_thread_count.py +++ b/facefusion/uis/components/execution_thread_count.py @@ -21,7 +21,7 @@ def render() -> None: def listen() -> None: - EXECUTION_THREAD_COUNT_SLIDER.change(update_execution_thread_count, inputs = EXECUTION_THREAD_COUNT_SLIDER) + EXECUTION_THREAD_COUNT_SLIDER.release(update_execution_thread_count, inputs = EXECUTION_THREAD_COUNT_SLIDER) def update_execution_thread_count(execution_thread_count : int = 1) -> None: diff --git a/facefusion/uis/components/face_masker.py b/facefusion/uis/components/face_masker.py index fb111e03..bb1c28c9 100755 --- a/facefusion/uis/components/face_masker.py +++ b/facefusion/uis/components/face_masker.py @@ -92,11 +92,11 @@ def render() -> None: def listen() -> None: FACE_MASK_TYPES_CHECKBOX_GROUP.change(update_face_mask_type, inputs = FACE_MASK_TYPES_CHECKBOX_GROUP, outputs = [ FACE_MASK_TYPES_CHECKBOX_GROUP, FACE_MASK_BOX_GROUP, FACE_MASK_REGION_CHECKBOX_GROUP ]) - FACE_MASK_BLUR_SLIDER.change(update_face_mask_blur, inputs = FACE_MASK_BLUR_SLIDER) + FACE_MASK_BLUR_SLIDER.release(update_face_mask_blur, inputs = FACE_MASK_BLUR_SLIDER) FACE_MASK_REGION_CHECKBOX_GROUP.change(update_face_mask_regions, inputs = FACE_MASK_REGION_CHECKBOX_GROUP, outputs = FACE_MASK_REGION_CHECKBOX_GROUP) face_mask_padding_sliders = [ FACE_MASK_PADDING_TOP_SLIDER, FACE_MASK_PADDING_RIGHT_SLIDER, FACE_MASK_PADDING_BOTTOM_SLIDER, FACE_MASK_PADDING_LEFT_SLIDER ] for face_mask_padding_slider in face_mask_padding_sliders: - face_mask_padding_slider.change(update_face_mask_padding, inputs = face_mask_padding_sliders) + face_mask_padding_slider.release(update_face_mask_padding, inputs = face_mask_padding_sliders) def update_face_mask_type(face_mask_types : List[FaceMaskType]) -> Tuple[gradio.CheckboxGroup, gradio.Group, gradio.CheckboxGroup]: diff --git a/facefusion/uis/components/face_selector.py b/facefusion/uis/components/face_selector.py index df8c3037..19fef55b 100644 --- a/facefusion/uis/components/face_selector.py +++ b/facefusion/uis/components/face_selector.py @@ -10,8 +10,7 @@ from facefusion.vision import get_video_frame, read_static_image, normalize_fram from facefusion.filesystem import is_image, is_video from facefusion.face_analyser import get_many_faces from facefusion.typing import VisionFrame, FaceSelectorMode -from facefusion.uis.core import get_ui_component, register_ui_component -from facefusion.uis.typing import ComponentName +from facefusion.uis.core import get_ui_component, get_ui_components, register_ui_component FACE_SELECTOR_MODE_DROPDOWN : Optional[gradio.Dropdown] = None REFERENCE_FACE_POSITION_GALLERY : Optional[gradio.Gallery] = None @@ -59,39 +58,39 @@ def render() -> None: def listen() -> None: FACE_SELECTOR_MODE_DROPDOWN.change(update_face_selector_mode, inputs = FACE_SELECTOR_MODE_DROPDOWN, outputs = [ REFERENCE_FACE_POSITION_GALLERY, REFERENCE_FACE_DISTANCE_SLIDER ]) REFERENCE_FACE_POSITION_GALLERY.select(clear_and_update_reference_face_position) - REFERENCE_FACE_DISTANCE_SLIDER.change(update_reference_face_distance, inputs = REFERENCE_FACE_DISTANCE_SLIDER) - multi_component_names : List[ComponentName] =\ + REFERENCE_FACE_DISTANCE_SLIDER.release(update_reference_face_distance, inputs = REFERENCE_FACE_DISTANCE_SLIDER) + + for ui_component in get_ui_components( [ 'target_image', 'target_video' - ] - for component_name in multi_component_names: - component = get_ui_component(component_name) - if component: - for method in [ 'upload', 'change', 'clear' ]: - getattr(component, method)(update_reference_face_position) - getattr(component, method)(update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) - change_one_component_names : List[ComponentName] =\ + ]): + for method in [ 'upload', 'change', 'clear' ]: + getattr(ui_component, method)(update_reference_face_position) + getattr(ui_component, method)(update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) + + for ui_component in get_ui_components( [ 'face_analyser_order_dropdown', 'face_analyser_age_dropdown', 'face_analyser_gender_dropdown' - ] - for component_name in change_one_component_names: - component = get_ui_component(component_name) - if component: - component.change(update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) - change_two_component_names : List[ComponentName] =\ + ]): + ui_component.change(update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) + + for ui_component in get_ui_components( [ 'face_detector_model_dropdown', - 'face_detector_size_dropdown', + 'face_detector_size_dropdown' + ]): + ui_component.change(clear_and_update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) + + for ui_component in get_ui_components( + [ 'face_detector_score_slider', 'face_landmarker_score_slider' - ] - for component_name in change_two_component_names: - component = get_ui_component(component_name) - if component: - component.change(clear_and_update_reference_position_gallery, outputs = REFERENCE_FACE_POSITION_GALLERY) + ]): + ui_component.release(clear_and_update_reference_position_gallery, outputs=REFERENCE_FACE_POSITION_GALLERY) + preview_frame_slider = get_ui_component('preview_frame_slider') if preview_frame_slider: preview_frame_slider.change(update_reference_frame_number, inputs = preview_frame_slider) diff --git a/facefusion/uis/components/frame_processors_options.py b/facefusion/uis/components/frame_processors_options.py index 74dcdf7d..6cc2dcdc 100755 --- a/facefusion/uis/components/frame_processors_options.py +++ b/facefusion/uis/components/frame_processors_options.py @@ -5,13 +5,15 @@ import facefusion.globals from facefusion import face_analyser, wording from facefusion.processors.frame.core import load_frame_processor_module from facefusion.processors.frame import globals as frame_processors_globals, choices as frame_processors_choices -from facefusion.processors.frame.typings import FaceDebuggerItem, FaceEnhancerModel, FaceSwapperModel, FrameEnhancerModel, LipSyncerModel +from facefusion.processors.frame.typings import FaceDebuggerItem, FaceEnhancerModel, FaceSwapperModel, FrameColorizerModel, FrameEnhancerModel, LipSyncerModel from facefusion.uis.core import get_ui_component, register_ui_component FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP : Optional[gradio.CheckboxGroup] = None FACE_ENHANCER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None FACE_ENHANCER_BLEND_SLIDER : Optional[gradio.Slider] = None FACE_SWAPPER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None +FRAME_COLORIZER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None +FRAME_COLORIZER_BLEND_SLIDER : Optional[gradio.Slider] = None FRAME_ENHANCER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None FRAME_ENHANCER_BLEND_SLIDER : Optional[gradio.Slider] = None LIP_SYNCER_MODEL_DROPDOWN : Optional[gradio.Dropdown] = None @@ -22,6 +24,8 @@ def render() -> None: global FACE_ENHANCER_MODEL_DROPDOWN global FACE_ENHANCER_BLEND_SLIDER global FACE_SWAPPER_MODEL_DROPDOWN + global FRAME_COLORIZER_MODEL_DROPDOWN + global FRAME_COLORIZER_BLEND_SLIDER global FRAME_ENHANCER_MODEL_DROPDOWN global FRAME_ENHANCER_BLEND_SLIDER global LIP_SYNCER_MODEL_DROPDOWN @@ -52,6 +56,20 @@ def render() -> None: value = frame_processors_globals.face_swapper_model, visible = 'face_swapper' in facefusion.globals.frame_processors ) + FRAME_COLORIZER_MODEL_DROPDOWN = gradio.Dropdown( + label = wording.get('uis.frame_colorizer_model_dropdown'), + choices = frame_processors_choices.frame_colorizer_models, + value = frame_processors_globals.frame_colorizer_model, + visible = 'frame_colorizer' in facefusion.globals.frame_processors + ) + FRAME_COLORIZER_BLEND_SLIDER = gradio.Slider( + label = wording.get('uis.frame_colorizer_blend_slider'), + value = frame_processors_globals.frame_colorizer_blend, + step = frame_processors_choices.frame_colorizer_blend_range[1] - frame_processors_choices.frame_colorizer_blend_range[0], + minimum = frame_processors_choices.frame_colorizer_blend_range[0], + maximum = frame_processors_choices.frame_colorizer_blend_range[-1], + visible = 'frame_colorizer' in facefusion.globals.frame_processors + ) FRAME_ENHANCER_MODEL_DROPDOWN = gradio.Dropdown( label = wording.get('uis.frame_enhancer_model_dropdown'), choices = frame_processors_choices.frame_enhancer_models, @@ -76,6 +94,8 @@ def render() -> None: register_ui_component('face_enhancer_model_dropdown', FACE_ENHANCER_MODEL_DROPDOWN) register_ui_component('face_enhancer_blend_slider', FACE_ENHANCER_BLEND_SLIDER) register_ui_component('face_swapper_model_dropdown', FACE_SWAPPER_MODEL_DROPDOWN) + register_ui_component('frame_colorizer_model_dropdown', FRAME_COLORIZER_MODEL_DROPDOWN) + register_ui_component('frame_colorizer_blend_slider', FRAME_COLORIZER_BLEND_SLIDER) register_ui_component('frame_enhancer_model_dropdown', FRAME_ENHANCER_MODEL_DROPDOWN) register_ui_component('frame_enhancer_blend_slider', FRAME_ENHANCER_BLEND_SLIDER) register_ui_component('lip_syncer_model_dropdown', LIP_SYNCER_MODEL_DROPDOWN) @@ -84,23 +104,26 @@ def render() -> None: def listen() -> None: FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP.change(update_face_debugger_items, inputs = FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP) FACE_ENHANCER_MODEL_DROPDOWN.change(update_face_enhancer_model, inputs = FACE_ENHANCER_MODEL_DROPDOWN, outputs = FACE_ENHANCER_MODEL_DROPDOWN) - FACE_ENHANCER_BLEND_SLIDER.change(update_face_enhancer_blend, inputs = FACE_ENHANCER_BLEND_SLIDER) + FACE_ENHANCER_BLEND_SLIDER.release(update_face_enhancer_blend, inputs = FACE_ENHANCER_BLEND_SLIDER) FACE_SWAPPER_MODEL_DROPDOWN.change(update_face_swapper_model, inputs = FACE_SWAPPER_MODEL_DROPDOWN, outputs = FACE_SWAPPER_MODEL_DROPDOWN) + FRAME_COLORIZER_MODEL_DROPDOWN.change(update_frame_colorizer_model, inputs = FRAME_COLORIZER_MODEL_DROPDOWN, outputs = FRAME_COLORIZER_MODEL_DROPDOWN) + FRAME_COLORIZER_BLEND_SLIDER.release(update_frame_colorizer_blend, inputs = FRAME_COLORIZER_BLEND_SLIDER) FRAME_ENHANCER_MODEL_DROPDOWN.change(update_frame_enhancer_model, inputs = FRAME_ENHANCER_MODEL_DROPDOWN, outputs = FRAME_ENHANCER_MODEL_DROPDOWN) - FRAME_ENHANCER_BLEND_SLIDER.change(update_frame_enhancer_blend, inputs = FRAME_ENHANCER_BLEND_SLIDER) + FRAME_ENHANCER_BLEND_SLIDER.release(update_frame_enhancer_blend, inputs = FRAME_ENHANCER_BLEND_SLIDER) LIP_SYNCER_MODEL_DROPDOWN.change(update_lip_syncer_model, inputs = LIP_SYNCER_MODEL_DROPDOWN, outputs = LIP_SYNCER_MODEL_DROPDOWN) frame_processors_checkbox_group = get_ui_component('frame_processors_checkbox_group') if frame_processors_checkbox_group: - frame_processors_checkbox_group.change(update_frame_processors, inputs = frame_processors_checkbox_group, outputs = [ FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP, FACE_ENHANCER_MODEL_DROPDOWN, FACE_ENHANCER_BLEND_SLIDER, FACE_SWAPPER_MODEL_DROPDOWN, FRAME_ENHANCER_MODEL_DROPDOWN, FRAME_ENHANCER_BLEND_SLIDER, LIP_SYNCER_MODEL_DROPDOWN ]) + frame_processors_checkbox_group.change(update_frame_processors, inputs = frame_processors_checkbox_group, outputs = [ FACE_DEBUGGER_ITEMS_CHECKBOX_GROUP, FACE_ENHANCER_MODEL_DROPDOWN, FACE_ENHANCER_BLEND_SLIDER, FACE_SWAPPER_MODEL_DROPDOWN, FRAME_COLORIZER_MODEL_DROPDOWN, FRAME_COLORIZER_BLEND_SLIDER, FRAME_ENHANCER_MODEL_DROPDOWN, FRAME_ENHANCER_BLEND_SLIDER, LIP_SYNCER_MODEL_DROPDOWN ]) -def update_frame_processors(frame_processors : List[str]) -> Tuple[gradio.CheckboxGroup, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown]: +def update_frame_processors(frame_processors : List[str]) -> Tuple[gradio.CheckboxGroup, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Slider, gradio.Dropdown]: has_face_debugger = 'face_debugger' in frame_processors has_face_enhancer = 'face_enhancer' in frame_processors has_face_swapper = 'face_swapper' in frame_processors + has_frame_colorizer = 'frame_colorizer' in frame_processors has_frame_enhancer = 'frame_enhancer' in frame_processors has_lip_syncer = 'lip_syncer' in frame_processors - return gradio.CheckboxGroup(visible = has_face_debugger), gradio.Dropdown(visible = has_face_enhancer), gradio.Slider(visible = has_face_enhancer), gradio.Dropdown(visible = has_face_swapper), gradio.Dropdown(visible = has_frame_enhancer), gradio.Slider(visible = has_frame_enhancer), gradio.Dropdown(visible = has_lip_syncer) + return gradio.CheckboxGroup(visible = has_face_debugger), gradio.Dropdown(visible = has_face_enhancer), gradio.Slider(visible = has_face_enhancer), gradio.Dropdown(visible = has_face_swapper), gradio.Dropdown(visible = has_frame_colorizer), gradio.Slider(visible = has_frame_colorizer), gradio.Dropdown(visible = has_frame_enhancer), gradio.Slider(visible = has_frame_enhancer), gradio.Dropdown(visible = has_lip_syncer) def update_face_debugger_items(face_debugger_items : List[FaceDebuggerItem]) -> None: @@ -132,6 +155,7 @@ def update_face_swapper_model(face_swapper_model : FaceSwapperModel) -> gradio.D if face_swapper_model == 'uniface_256': facefusion.globals.face_recognizer_model = 'arcface_uniface' face_swapper_module = load_frame_processor_module('face_swapper') + face_swapper_module.clear_model_initializer() face_swapper_module.clear_frame_processor() face_swapper_module.set_options('model', face_swapper_module.MODELS[face_swapper_model]) if face_analyser.pre_check() and face_swapper_module.pre_check(): @@ -139,6 +163,20 @@ def update_face_swapper_model(face_swapper_model : FaceSwapperModel) -> gradio.D return gradio.Dropdown() +def update_frame_colorizer_model(frame_colorizer_model : FrameColorizerModel) -> gradio.Dropdown: + frame_processors_globals.frame_colorizer_model = frame_colorizer_model + frame_colorizer_module = load_frame_processor_module('frame_colorizer') + frame_colorizer_module.clear_frame_processor() + frame_colorizer_module.set_options('model', frame_colorizer_module.MODELS[frame_colorizer_model]) + if frame_colorizer_module.pre_check(): + return gradio.Dropdown(value = frame_processors_globals.frame_colorizer_model) + return gradio.Dropdown() + + +def update_frame_colorizer_blend(frame_colorizer_blend : int) -> None: + frame_processors_globals.frame_colorizer_blend = frame_colorizer_blend + + def update_frame_enhancer_model(frame_enhancer_model : FrameEnhancerModel) -> gradio.Dropdown: frame_processors_globals.frame_enhancer_model = frame_enhancer_model frame_enhancer_module = load_frame_processor_module('frame_enhancer') diff --git a/facefusion/uis/components/memory.py b/facefusion/uis/components/memory.py index fe0d9723..f67c27ae 100644 --- a/facefusion/uis/components/memory.py +++ b/facefusion/uis/components/memory.py @@ -6,15 +6,15 @@ import facefusion.choices from facefusion.typing import VideoMemoryStrategy from facefusion import wording -VIDEO_MEMORY_STRATEGY : Optional[gradio.Dropdown] = None +VIDEO_MEMORY_STRATEGY_DROPDOWN : Optional[gradio.Dropdown] = None SYSTEM_MEMORY_LIMIT_SLIDER : Optional[gradio.Slider] = None def render() -> None: - global VIDEO_MEMORY_STRATEGY + global VIDEO_MEMORY_STRATEGY_DROPDOWN global SYSTEM_MEMORY_LIMIT_SLIDER - VIDEO_MEMORY_STRATEGY = gradio.Dropdown( + VIDEO_MEMORY_STRATEGY_DROPDOWN = gradio.Dropdown( label = wording.get('uis.video_memory_strategy_dropdown'), choices = facefusion.choices.video_memory_strategies, value = facefusion.globals.video_memory_strategy @@ -29,8 +29,8 @@ def render() -> None: def listen() -> None: - VIDEO_MEMORY_STRATEGY.change(update_video_memory_strategy, inputs = VIDEO_MEMORY_STRATEGY) - SYSTEM_MEMORY_LIMIT_SLIDER.change(update_system_memory_limit, inputs = SYSTEM_MEMORY_LIMIT_SLIDER) + VIDEO_MEMORY_STRATEGY_DROPDOWN.change(update_video_memory_strategy, inputs = VIDEO_MEMORY_STRATEGY_DROPDOWN) + SYSTEM_MEMORY_LIMIT_SLIDER.release(update_system_memory_limit, inputs = SYSTEM_MEMORY_LIMIT_SLIDER) def update_video_memory_strategy(video_memory_strategy : VideoMemoryStrategy) -> None: diff --git a/facefusion/uis/components/output_options.py b/facefusion/uis/components/output_options.py index 757316d4..4919920a 100644 --- a/facefusion/uis/components/output_options.py +++ b/facefusion/uis/components/output_options.py @@ -1,4 +1,4 @@ -from typing import Optional, Tuple, List +from typing import Optional, Tuple import gradio import facefusion.globals @@ -6,8 +6,7 @@ import facefusion.choices from facefusion import wording from facefusion.typing import OutputVideoEncoder, OutputVideoPreset, Fps from facefusion.filesystem import is_image, is_video -from facefusion.uis.typing import ComponentName -from facefusion.uis.core import get_ui_component, register_ui_component +from facefusion.uis.core import get_ui_components, register_ui_component from facefusion.vision import detect_image_resolution, create_image_resolutions, detect_video_fps, detect_video_resolution, create_video_resolutions, pack_resolution OUTPUT_PATH_TEXTBOX : Optional[gradio.Textbox] = None @@ -98,23 +97,21 @@ def render() -> None: def listen() -> None: OUTPUT_PATH_TEXTBOX.change(update_output_path, inputs = OUTPUT_PATH_TEXTBOX) - OUTPUT_IMAGE_QUALITY_SLIDER.change(update_output_image_quality, inputs = OUTPUT_IMAGE_QUALITY_SLIDER) + OUTPUT_IMAGE_QUALITY_SLIDER.release(update_output_image_quality, inputs = OUTPUT_IMAGE_QUALITY_SLIDER) OUTPUT_IMAGE_RESOLUTION_DROPDOWN.change(update_output_image_resolution, inputs = OUTPUT_IMAGE_RESOLUTION_DROPDOWN) OUTPUT_VIDEO_ENCODER_DROPDOWN.change(update_output_video_encoder, inputs = OUTPUT_VIDEO_ENCODER_DROPDOWN) OUTPUT_VIDEO_PRESET_DROPDOWN.change(update_output_video_preset, inputs = OUTPUT_VIDEO_PRESET_DROPDOWN) - OUTPUT_VIDEO_QUALITY_SLIDER.change(update_output_video_quality, inputs = OUTPUT_VIDEO_QUALITY_SLIDER) + OUTPUT_VIDEO_QUALITY_SLIDER.release(update_output_video_quality, inputs = OUTPUT_VIDEO_QUALITY_SLIDER) OUTPUT_VIDEO_RESOLUTION_DROPDOWN.change(update_output_video_resolution, inputs = OUTPUT_VIDEO_RESOLUTION_DROPDOWN) - OUTPUT_VIDEO_FPS_SLIDER.change(update_output_video_fps, inputs = OUTPUT_VIDEO_FPS_SLIDER) - multi_component_names : List[ComponentName] =\ + OUTPUT_VIDEO_FPS_SLIDER.release(update_output_video_fps, inputs = OUTPUT_VIDEO_FPS_SLIDER) + + for ui_component in get_ui_components( [ 'target_image', 'target_video' - ] - for component_name in multi_component_names: - component = get_ui_component(component_name) - if component: - for method in [ 'upload', 'change', 'clear' ]: - getattr(component, method)(remote_update, outputs = [ OUTPUT_IMAGE_QUALITY_SLIDER, OUTPUT_IMAGE_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_ENCODER_DROPDOWN, OUTPUT_VIDEO_PRESET_DROPDOWN, OUTPUT_VIDEO_QUALITY_SLIDER, OUTPUT_VIDEO_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_FPS_SLIDER ]) + ]): + for method in [ 'upload', 'change', 'clear' ]: + getattr(ui_component, method)(remote_update, outputs = [ OUTPUT_IMAGE_QUALITY_SLIDER, OUTPUT_IMAGE_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_ENCODER_DROPDOWN, OUTPUT_VIDEO_PRESET_DROPDOWN, OUTPUT_VIDEO_QUALITY_SLIDER, OUTPUT_VIDEO_RESOLUTION_DROPDOWN, OUTPUT_VIDEO_FPS_SLIDER ]) def remote_update() -> Tuple[gradio.Slider, gradio.Dropdown, gradio.Dropdown, gradio.Dropdown, gradio.Slider, gradio.Dropdown, gradio.Slider]: diff --git a/facefusion/uis/components/preview.py b/facefusion/uis/components/preview.py index 5b6468ab..7f69ffa7 100755 --- a/facefusion/uis/components/preview.py +++ b/facefusion/uis/components/preview.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Optional from time import sleep import cv2 import gradio @@ -16,8 +16,7 @@ from facefusion.vision import get_video_frame, count_video_frame_total, normaliz from facefusion.filesystem import is_image, is_video, filter_audio_paths from facefusion.content_analyser import analyse_frame from facefusion.processors.frame.core import load_frame_processor_module -from facefusion.uis.typing import ComponentName -from facefusion.uis.core import get_ui_component, register_ui_component +from facefusion.uis.core import get_ui_component, get_ui_components, register_ui_component PREVIEW_IMAGE : Optional[gradio.Image] = None PREVIEW_FRAME_SLIDER : Optional[gradio.Slider] = None @@ -72,69 +71,73 @@ def listen() -> None: reference_face_position_gallery = get_ui_component('reference_face_position_gallery') if reference_face_position_gallery: reference_face_position_gallery.select(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) - multi_one_component_names : List[ComponentName] =\ + + for ui_component in get_ui_components( [ 'source_audio', 'source_image', 'target_image', 'target_video' - ] - for component_name in multi_one_component_names: - component = get_ui_component(component_name) - if component: - for method in [ 'upload', 'change', 'clear' ]: - getattr(component, method)(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) - multi_two_component_names : List[ComponentName] =\ + ]): + for method in [ 'upload', 'change', 'clear' ]: + getattr(ui_component, method)(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + + for ui_component in get_ui_components( [ 'target_image', 'target_video' - ] - for component_name in multi_two_component_names: - component = get_ui_component(component_name) - if component: - for method in [ 'upload', 'change', 'clear' ]: - getattr(component, method)(update_preview_frame_slider, outputs = PREVIEW_FRAME_SLIDER) - change_one_component_names : List[ComponentName] =\ + ]): + for method in [ 'upload', 'change', 'clear' ]: + getattr(ui_component, method)(update_preview_frame_slider, outputs = PREVIEW_FRAME_SLIDER) + + for ui_component in get_ui_components( [ 'face_debugger_items_checkbox_group', + 'face_selector_mode_dropdown', + 'face_mask_types_checkbox_group', + 'face_mask_region_checkbox_group', + 'face_analyser_order_dropdown', + 'face_analyser_age_dropdown', + 'face_analyser_gender_dropdown' + ]): + ui_component.change(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + + for ui_component in get_ui_components( + [ 'face_enhancer_blend_slider', + 'frame_colorizer_blend_slider', 'frame_enhancer_blend_slider', 'trim_frame_start_slider', 'trim_frame_end_slider', - 'face_selector_mode_dropdown', 'reference_face_distance_slider', - 'face_mask_types_checkbox_group', 'face_mask_blur_slider', 'face_mask_padding_top_slider', 'face_mask_padding_bottom_slider', 'face_mask_padding_left_slider', 'face_mask_padding_right_slider', - 'face_mask_region_checkbox_group', - 'face_analyser_order_dropdown', - 'face_analyser_age_dropdown', - 'face_analyser_gender_dropdown', 'output_video_fps_slider' - ] - for component_name in change_one_component_names: - component = get_ui_component(component_name) - if component: - component.change(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) - change_two_component_names : List[ComponentName] =\ + ]): + ui_component.release(update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + + for ui_component in get_ui_components( [ 'frame_processors_checkbox_group', 'face_enhancer_model_dropdown', 'face_swapper_model_dropdown', + 'frame_colorizer_model_dropdown', 'frame_enhancer_model_dropdown', 'lip_syncer_model_dropdown', 'face_detector_model_dropdown', - 'face_detector_size_dropdown', + 'face_detector_size_dropdown' + ]): + ui_component.change(clear_and_update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + + for ui_component in get_ui_components( + [ 'face_detector_score_slider', 'face_landmarker_score_slider' - ] - for component_name in change_two_component_names: - component = get_ui_component(component_name) - if component: - component.change(clear_and_update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) + ]): + ui_component.release(clear_and_update_preview_image, inputs = PREVIEW_FRAME_SLIDER, outputs = PREVIEW_IMAGE) def clear_and_update_preview_image(frame_number : int = 0) -> gradio.Image: diff --git a/facefusion/uis/components/trim_frame.py b/facefusion/uis/components/trim_frame.py index bce70e53..3a33b350 100644 --- a/facefusion/uis/components/trim_frame.py +++ b/facefusion/uis/components/trim_frame.py @@ -47,8 +47,8 @@ def render() -> None: def listen() -> None: - TRIM_FRAME_START_SLIDER.change(update_trim_frame_start, inputs = TRIM_FRAME_START_SLIDER) - TRIM_FRAME_END_SLIDER.change(update_trim_frame_end, inputs = TRIM_FRAME_END_SLIDER) + TRIM_FRAME_START_SLIDER.release(update_trim_frame_start, inputs = TRIM_FRAME_START_SLIDER) + TRIM_FRAME_END_SLIDER.release(update_trim_frame_end, inputs = TRIM_FRAME_END_SLIDER) target_video = get_ui_component('target_video') if target_video: for method in [ 'upload', 'change', 'clear' ]: diff --git a/facefusion/uis/components/webcam.py b/facefusion/uis/components/webcam.py index 9f9b967a..e49432a1 100644 --- a/facefusion/uis/components/webcam.py +++ b/facefusion/uis/components/webcam.py @@ -1,4 +1,4 @@ -from typing import Optional, Generator, Deque, List +from typing import Optional, Generator, Deque import os import platform import subprocess @@ -19,8 +19,8 @@ from facefusion.face_analyser import get_average_face from facefusion.processors.frame.core import get_frame_processors_modules, load_frame_processor_module from facefusion.ffmpeg import open_ffmpeg from facefusion.vision import normalize_frame_color, read_static_images, unpack_resolution -from facefusion.uis.typing import StreamMode, WebcamMode, ComponentName -from facefusion.uis.core import get_ui_component +from facefusion.uis.typing import StreamMode, WebcamMode +from facefusion.uis.core import get_ui_component, get_ui_components WEBCAM_CAPTURE : Optional[cv2.VideoCapture] = None WEBCAM_IMAGE : Optional[gradio.Image] = None @@ -76,7 +76,8 @@ def listen() -> None: if webcam_mode_radio and webcam_resolution_dropdown and webcam_fps_slider: start_event = WEBCAM_START_BUTTON.click(start, inputs = [ webcam_mode_radio, webcam_resolution_dropdown, webcam_fps_slider ], outputs = WEBCAM_IMAGE) WEBCAM_STOP_BUTTON.click(stop, cancels = start_event) - change_two_component_names : List[ComponentName] =\ + + for ui_component in get_ui_components( [ 'frame_processors_checkbox_group', 'face_swapper_model_dropdown', @@ -84,11 +85,8 @@ def listen() -> None: 'frame_enhancer_model_dropdown', 'lip_syncer_model_dropdown', 'source_image' - ] - for component_name in change_two_component_names: - component = get_ui_component(component_name) - if component: - component.change(update, cancels = start_event) + ]): + ui_component.change(update, cancels = start_event) def start(webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps) -> Generator[VisionFrame, None, None]: diff --git a/facefusion/uis/core.py b/facefusion/uis/core.py index 68cdd86e..e5d2e6ff 100644 --- a/facefusion/uis/core.py +++ b/facefusion/uis/core.py @@ -51,14 +51,24 @@ def get_ui_layouts_modules(ui_layouts : List[str]) -> List[ModuleType]: return UI_LAYOUT_MODULES -def get_ui_component(name : ComponentName) -> Optional[Component]: - if name in UI_COMPONENTS: - return UI_COMPONENTS[name] +def get_ui_component(component_name : ComponentName) -> Optional[Component]: + if component_name in UI_COMPONENTS: + return UI_COMPONENTS[component_name] return None -def register_ui_component(name : ComponentName, component: Component) -> None: - UI_COMPONENTS[name] = component +def get_ui_components(component_names : List[ComponentName]) -> Optional[List[Component]]: + ui_components = [] + + for component_name in component_names: + component = get_ui_component(component_name) + if component: + ui_components.append(component) + return ui_components + + +def register_ui_component(component_name : ComponentName, component: Component) -> None: + UI_COMPONENTS[component_name] = component def launch() -> None: diff --git a/facefusion/uis/layouts/benchmark.py b/facefusion/uis/layouts/benchmark.py index 8168bf1d..f70f37ae 100644 --- a/facefusion/uis/layouts/benchmark.py +++ b/facefusion/uis/layouts/benchmark.py @@ -11,6 +11,7 @@ def pre_check() -> bool: conditional_download('.assets/examples', [ 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.jpg', + 'https://github.com/facefusion/facefusion-assets/releases/download/examples/source.mp3', 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-240p.mp4', 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-360p.mp4', 'https://github.com/facefusion/facefusion-assets/releases/download/examples/target-540p.mp4', diff --git a/facefusion/uis/overrides.py b/facefusion/uis/overrides.py index 8fff726a..7f3c4707 100644 --- a/facefusion/uis/overrides.py +++ b/facefusion/uis/overrides.py @@ -6,7 +6,7 @@ import base64 def encode_array_to_base64(array : numpy.ndarray[Any, Any]) -> str: buffer = cv2.imencode('.jpg', array[:, :, ::-1])[1] - return 'data:image/jpg;base64,' + base64.b64encode(buffer.tobytes()).decode('utf-8') + return 'data:image/jpeg;base64,' + base64.b64encode(buffer.tobytes()).decode('utf-8') def encode_pil_to_base64(image : Any) -> str: diff --git a/facefusion/uis/typing.py b/facefusion/uis/typing.py index 0dc2b13e..b4942ba0 100644 --- a/facefusion/uis/typing.py +++ b/facefusion/uis/typing.py @@ -34,6 +34,8 @@ ComponentName = Literal\ 'face_enhancer_model_dropdown', 'face_enhancer_blend_slider', 'face_swapper_model_dropdown', + 'frame_colorizer_model_dropdown', + 'frame_colorizer_blend_slider', 'frame_enhancer_model_dropdown', 'frame_enhancer_blend_slider', 'lip_syncer_model_dropdown', diff --git a/facefusion/voice_extractor.py b/facefusion/voice_extractor.py new file mode 100644 index 00000000..9fd24c66 --- /dev/null +++ b/facefusion/voice_extractor.py @@ -0,0 +1,132 @@ +from typing import Any, Tuple +from time import sleep +import threading +import scipy +import numpy +import onnxruntime + +import facefusion.globals +from facefusion import process_manager +from facefusion.typing import ModelSet, AudioChunk, Audio +from facefusion.execution import apply_execution_provider_options +from facefusion.filesystem import resolve_relative_path, is_file +from facefusion.download import conditional_download + +VOICE_EXTRACTOR = None +THREAD_SEMAPHORE : threading.Semaphore = threading.Semaphore() +THREAD_LOCK : threading.Lock = threading.Lock() +MODELS : ModelSet =\ +{ + 'voice_extractor': + { + 'url': 'https://github.com/facefusion/facefusion-assets/releases/download/models/voice_extractor.onnx', + 'path': resolve_relative_path('../.assets/models/voice_extractor.onnx') + } +} + + +def get_voice_extractor() -> Any: + global VOICE_EXTRACTOR + + with THREAD_LOCK: + while process_manager.is_checking(): + sleep(0.5) + if VOICE_EXTRACTOR is None: + model_path = MODELS.get('voice_extractor').get('path') + VOICE_EXTRACTOR = onnxruntime.InferenceSession(model_path, providers = apply_execution_provider_options(facefusion.globals.execution_providers)) + return VOICE_EXTRACTOR + + +def clear_voice_extractor() -> None: + global VOICE_EXTRACTOR + + VOICE_EXTRACTOR = None + + +def pre_check() -> bool: + download_directory_path = resolve_relative_path('../.assets/models') + model_url = MODELS.get('voice_extractor').get('url') + model_path = MODELS.get('voice_extractor').get('path') + + if not facefusion.globals.skip_download: + process_manager.check() + conditional_download(download_directory_path, [ model_url ]) + process_manager.end() + return is_file(model_path) + + +def batch_extract_voice(audio : Audio, chunk_size : int, step_size : int) -> Audio: + temp_audio = numpy.zeros((audio.shape[0], 2)).astype(numpy.float32) + temp_chunk = numpy.zeros((audio.shape[0], 2)).astype(numpy.float32) + + for start in range(0, audio.shape[0], step_size): + end = min(start + chunk_size, audio.shape[0]) + temp_audio[start:end, ...] += extract_voice(audio[start:end, ...]) + temp_chunk[start:end, ...] += 1 + audio = temp_audio / temp_chunk + return audio + + +def extract_voice(temp_audio_chunk : AudioChunk) -> AudioChunk: + voice_extractor = get_voice_extractor() + chunk_size = 1024 * (voice_extractor.get_inputs()[0].shape[3] - 1) + trim_size = 3840 + temp_audio_chunk, pad_size = prepare_audio_chunk(temp_audio_chunk.T, chunk_size, trim_size) + temp_audio_chunk = decompose_audio_chunk(temp_audio_chunk, trim_size) + with THREAD_SEMAPHORE: + temp_audio_chunk = voice_extractor.run(None, + { + voice_extractor.get_inputs()[0].name: temp_audio_chunk + })[0] + temp_audio_chunk = compose_audio_chunk(temp_audio_chunk, trim_size) + temp_audio_chunk = normalize_audio_chunk(temp_audio_chunk, chunk_size, trim_size, pad_size) + return temp_audio_chunk + + +def prepare_audio_chunk(temp_audio_chunk : AudioChunk, chunk_size : int, trim_size : int) -> Tuple[AudioChunk, int]: + step_size = chunk_size - 2 * trim_size + pad_size = step_size - temp_audio_chunk.shape[1] % step_size + audio_chunk_size = temp_audio_chunk.shape[1] + pad_size + temp_audio_chunk = temp_audio_chunk.astype(numpy.float32) / numpy.iinfo(numpy.int16).max + temp_audio_chunk = numpy.pad(temp_audio_chunk, ((0, 0), (trim_size, trim_size + pad_size))) + temp_audio_chunks = [] + + for index in range(0, audio_chunk_size, step_size): + temp_audio_chunk = temp_audio_chunk[:, index:index + chunk_size] + temp_audio_chunks.append(temp_audio_chunk) + temp_audio_chunk = numpy.concatenate(temp_audio_chunks, axis = 0) + temp_audio_chunk = temp_audio_chunk.reshape((-1, chunk_size)) + return temp_audio_chunk, pad_size + + +def decompose_audio_chunk(temp_audio_chunk : AudioChunk, trim_size : int) -> AudioChunk: + frame_size = 7680 + frame_overlap = 6656 + voice_extractor_shape = get_voice_extractor().get_inputs()[0].shape + window = scipy.signal.windows.hann(frame_size) + temp_audio_chunk = scipy.signal.stft(temp_audio_chunk, nperseg = frame_size, noverlap = frame_overlap, window = window)[2] + temp_audio_chunk = numpy.stack((numpy.real(temp_audio_chunk), numpy.imag(temp_audio_chunk)), axis = -1).transpose((0, 3, 1, 2)) + temp_audio_chunk = temp_audio_chunk.reshape(-1, 2, 2, trim_size + 1, voice_extractor_shape[3]).reshape(-1, voice_extractor_shape[1], trim_size + 1, voice_extractor_shape[3]) + temp_audio_chunk = temp_audio_chunk[:, :, :voice_extractor_shape[2]] + temp_audio_chunk /= numpy.sqrt(1.0 / window.sum() ** 2) + return temp_audio_chunk + + +def compose_audio_chunk(temp_audio_chunk : AudioChunk, trim_size : int) -> AudioChunk: + frame_size = 7680 + frame_overlap = 6656 + voice_extractor_shape = get_voice_extractor().get_inputs()[0].shape + window = scipy.signal.windows.hann(frame_size) + temp_audio_chunk = numpy.pad(temp_audio_chunk, ((0, 0), (0, 0), (0, trim_size + 1 - voice_extractor_shape[2]), (0, 0))) + temp_audio_chunk = temp_audio_chunk.reshape(-1, 2, trim_size + 1, voice_extractor_shape[3]).transpose((0, 2, 3, 1)) + temp_audio_chunk = temp_audio_chunk[:, :, :, 0] + 1j * temp_audio_chunk[:, :, :, 1] + temp_audio_chunk = scipy.signal.istft(temp_audio_chunk, nperseg = frame_size, noverlap = frame_overlap, window = window)[1] + temp_audio_chunk *= numpy.sqrt(1.0 / window.sum() ** 2) + return temp_audio_chunk + + +def normalize_audio_chunk(temp_audio_chunk : AudioChunk, chunk_size : int, trim_size : int, pad_size : int) -> AudioChunk: + temp_audio_chunk = temp_audio_chunk.reshape((-1, 2, chunk_size)) + temp_audio_chunk = temp_audio_chunk[:, :, trim_size:-trim_size].transpose(1, 0, 2) + temp_audio_chunk = temp_audio_chunk.reshape(2, -1)[:, :-pad_size].T + return temp_audio_chunk diff --git a/facefusion/wording.py b/facefusion/wording.py index 39befc0d..79b84e5d 100755 --- a/facefusion/wording.py +++ b/facefusion/wording.py @@ -2,6 +2,7 @@ from typing import Any, Dict, Optional WORDING : Dict[str, Any] =\ { + 'conda_not_activated': 'Conda is not activated', 'python_not_supported': 'Python version is not supported, upgrade to {version} or higher', 'ffmpeg_not_installed': 'FFMpeg is not installed', 'creating_temp': 'Creating temporary resources', @@ -52,12 +53,13 @@ WORDING : Dict[str, Any] =\ { # installer 'install_dependency': 'select the variant of {dependency} to install', - 'skip_venv': 'skip the virtual environment check', + 'skip_conda': 'skip the conda environment check', # general 'source': 'choose single or multiple source images or audios', 'target': 'choose single target image or video', 'output': 'specify the output file or directory', # misc + 'force_download': 'force automate downloads and exit', 'skip_download': 'omit automate downloads and remote lookups', 'headless': 'run the program without a user interface', 'log_level': 'adjust the message severity displayed in the terminal', @@ -66,10 +68,10 @@ WORDING : Dict[str, Any] =\ 'execution_thread_count': 'specify the amount of parallel threads while processing', 'execution_queue_count': 'specify the amount of frames each thread is processing', # memory - 'video_memory_strategy': 'balance fast frame processing and low vram usage', - 'system_memory_limit': 'limit the available ram that can be used while processing', + 'video_memory_strategy': 'balance fast frame processing and low VRAM usage', + 'system_memory_limit': 'limit the available RAM that can be used while processing', # face analyser - 'face_analyser_order': 'specify the order in which the face analyser detects faces.', + 'face_analyser_order': 'specify the order in which the face analyser detects faces', 'face_analyser_age': 'filter the detected faces based on their age', 'face_analyser_gender': 'filter the detected faces based on their gender', 'face_detector_model': 'choose the model responsible for detecting the face', @@ -106,6 +108,8 @@ WORDING : Dict[str, Any] =\ 'face_enhancer_model': 'choose the model responsible for enhancing the face', 'face_enhancer_blend': 'blend the enhanced into the previous face', 'face_swapper_model': 'choose the model responsible for swapping the face', + 'frame_colorizer_model': 'choose the model responsible for colorizing the frame', + 'frame_colorizer_blend': 'blend the colorized into the previous frame', 'frame_enhancer_model': 'choose the model responsible for enhancing the frame', 'frame_enhancer_blend': 'blend the enhanced into the previous frame', 'lip_syncer_model': 'choose the model responsible for syncing the lips', @@ -160,6 +164,8 @@ WORDING : Dict[str, Any] =\ 'face_enhancer_model_dropdown': 'FACE ENHANCER MODEL', 'face_enhancer_blend_slider': 'FACE ENHANCER BLEND', 'face_swapper_model_dropdown': 'FACE SWAPPER MODEL', + 'frame_colorizer_model_dropdown': 'FRAME COLORIZER MODEL', + 'frame_colorizer_blend_slider': 'FRAME COLORIZER BLEND', 'frame_enhancer_model_dropdown': 'FRAME ENHANCER MODEL', 'frame_enhancer_blend_slider': 'FRAME ENHANCER BLEND', 'lip_syncer_model_dropdown': 'LIP SYNCER MODEL', diff --git a/install.py b/install.py index 9ce0d08a..6feca23e 100755 --- a/install.py +++ b/install.py @@ -1,8 +1,10 @@ #!/usr/bin/env python3 +import os import subprocess -subprocess.call([ 'pip', 'install' , 'inquirer', '-q' ]) +os.environ['PIP_BREAK_SYSTEM_PACKAGES'] = '1' +subprocess.call([ 'pip', 'install', 'inquirer', '-q' ]) from facefusion import installer diff --git a/requirements.txt b/requirements.txt index 7f72f340..8787cd4f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ filetype==1.2.0 gradio==3.50.2 numpy==1.26.4 -onnx==1.15.0 -onnxruntime==1.16.3 +onnx==1.16.0 +onnxruntime==1.17.1 opencv-python==4.8.1.78 psutil==5.9.8 tqdm==4.66.2 diff --git a/tests/test_common_helper.py b/tests/test_common_helper.py index eaafbeef..145cc69f 100644 --- a/tests/test_common_helper.py +++ b/tests/test_common_helper.py @@ -1,4 +1,4 @@ -from facefusion.common_helper import create_metavar, create_int_range, create_float_range, extract_major_version +from facefusion.common_helper import create_metavar, create_int_range, create_float_range def test_create_metavar() -> None: @@ -14,8 +14,3 @@ def test_create_float_range() -> None: assert create_float_range(0.0, 1.0, 0.5) == [ 0.0, 0.5, 1.0 ] assert create_float_range(0.0, 0.2, 0.05) == [ 0.0, 0.05, 0.10, 0.15, 0.20 ] - -def test_extract_major_version() -> None: - assert extract_major_version('1') == (1, 0) - assert extract_major_version('1.1') == (1, 1) - assert extract_major_version('1.2.0') == (1, 2)