Replace retinaface with yunet (#168)

* Remove insightface dependency

* Fix urllib

* Some fixes

* Analyse based on matches

* Analyse based on rate

* Fix CI

* Migrate to Yunet

* Something is off here

* We indeed need semaphore for yunet

* Normalize the normed_embedding

* Fix download of models

* Fix download of models

* Fix download of models

* Add score and improve affine_matrix

* Temp fix for bbox out of frame

* Temp fix for bbox out of frame

* ROCM and OpenVINO mapping for torch backends

* Normalize bbox

* Implement gender age

* Cosmetics on cli args

* Prevent face jumping

* Fix the paste back speed

* FIx import

* Introduce detection size
This commit is contained in:
Henry Ruhs 2023-10-22 12:33:31 +02:00 committed by GitHub
parent 738d69a10b
commit 228febd73b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 134 additions and 30 deletions

View File

@ -13,7 +13,7 @@ from argparse import ArgumentParser, HelpFormatter
import facefusion.choices import facefusion.choices
import facefusion.globals import facefusion.globals
from facefusion import metadata, predictor, wording from facefusion import face_analyser, predictor, metadata, wording
from facefusion.predictor import predict_image, predict_video from facefusion.predictor import predict_image, predict_video
from facefusion.processors.frame.core import get_frame_processors_modules, load_frame_processor_module from facefusion.processors.frame.core import get_frame_processors_modules, load_frame_processor_module
from facefusion.utilities import is_image, is_video, detect_fps, compress_image, merge_video, extract_frames, get_temp_frame_paths, restore_audio, create_temp, move_temp, clear_temp, list_module_names, encode_execution_providers, decode_execution_providers, normalize_output_path, update_status from facefusion.utilities import is_image, is_video, detect_fps, compress_image, merge_video, extract_frames, get_temp_frame_paths, restore_audio, create_temp, move_temp, clear_temp, list_module_names, encode_execution_providers, decode_execution_providers, normalize_output_path, update_status
@ -47,22 +47,22 @@ def cli() -> None:
group_face_recognition.add_argument('--face-analyser-age', help = wording.get('face_analyser_age_help'), dest = 'face_analyser_age', choices = facefusion.choices.face_analyser_ages) group_face_recognition.add_argument('--face-analyser-age', help = wording.get('face_analyser_age_help'), dest = 'face_analyser_age', choices = facefusion.choices.face_analyser_ages)
group_face_recognition.add_argument('--face-analyser-gender', help = wording.get('face_analyser_gender_help'), dest = 'face_analyser_gender', choices = facefusion.choices.face_analyser_genders) group_face_recognition.add_argument('--face-analyser-gender', help = wording.get('face_analyser_gender_help'), dest = 'face_analyser_gender', choices = facefusion.choices.face_analyser_genders)
group_face_recognition.add_argument('--reference-face-position', help = wording.get('reference_face_position_help'), dest = 'reference_face_position', type = int, default = 0) group_face_recognition.add_argument('--reference-face-position', help = wording.get('reference_face_position_help'), dest = 'reference_face_position', type = int, default = 0)
group_face_recognition.add_argument('--reference-face-distance', help = wording.get('reference_face_distance_help'), dest = 'reference_face_distance', type = float, default = 1.5) group_face_recognition.add_argument('--reference-face-distance', help = wording.get('reference_face_distance_help'), dest = 'reference_face_distance', type = float, default = 0.6)
group_face_recognition.add_argument('--reference-frame-number', help = wording.get('reference_frame_number_help'), dest = 'reference_frame_number', type = int, default = 0) group_face_recognition.add_argument('--reference-frame-number', help = wording.get('reference_frame_number_help'), dest = 'reference_frame_number', type = int, default = 0)
# frame extraction # frame extraction
group_processing = program.add_argument_group('frame extraction') group_frame_extraction = program.add_argument_group('frame extraction')
group_processing.add_argument('--trim-frame-start', help = wording.get('trim_frame_start_help'), dest = 'trim_frame_start', type = int) group_frame_extraction.add_argument('--trim-frame-start', help = wording.get('trim_frame_start_help'), dest = 'trim_frame_start', type = int)
group_processing.add_argument('--trim-frame-end', help = wording.get('trim_frame_end_help'), dest = 'trim_frame_end', type = int) group_frame_extraction.add_argument('--trim-frame-end', help = wording.get('trim_frame_end_help'), dest = 'trim_frame_end', type = int)
group_processing.add_argument('--temp-frame-format', help = wording.get('temp_frame_format_help'), dest = 'temp_frame_format', default = 'jpg', choices = facefusion.choices.temp_frame_formats) group_frame_extraction.add_argument('--temp-frame-format', help = wording.get('temp_frame_format_help'), dest = 'temp_frame_format', default = 'jpg', choices = facefusion.choices.temp_frame_formats)
group_processing.add_argument('--temp-frame-quality', help = wording.get('temp_frame_quality_help'), dest = 'temp_frame_quality', type = int, default = 100, choices = range(101), metavar = '[0-100]') group_frame_extraction.add_argument('--temp-frame-quality', help = wording.get('temp_frame_quality_help'), dest = 'temp_frame_quality', type = int, default = 100, choices = range(101), metavar = '[0-100]')
group_processing.add_argument('--keep-temp', help = wording.get('keep_temp_help'), dest = 'keep_temp', action = 'store_true') group_frame_extraction.add_argument('--keep-temp', help = wording.get('keep_temp_help'), dest = 'keep_temp', action = 'store_true')
# output creation # output creation
group_output = program.add_argument_group('output creation') group_output_creation = program.add_argument_group('output creation')
group_output.add_argument('--output-image-quality', help=wording.get('output_image_quality_help'), dest = 'output_image_quality', type = int, default = 80, choices = range(101), metavar = '[0-100]') group_output_creation.add_argument('--output-image-quality', help=wording.get('output_image_quality_help'), dest = 'output_image_quality', type = int, default = 80, choices = range(101), metavar = '[0-100]')
group_output.add_argument('--output-video-encoder', help = wording.get('output_video_encoder_help'), dest = 'output_video_encoder', default = 'libx264', choices = facefusion.choices.output_video_encoders) group_output_creation.add_argument('--output-video-encoder', help = wording.get('output_video_encoder_help'), dest = 'output_video_encoder', default = 'libx264', choices = facefusion.choices.output_video_encoders)
group_output.add_argument('--output-video-quality', help = wording.get('output_video_quality_help'), dest = 'output_video_quality', type = int, default = 80, choices = range(101), metavar = '[0-100]') group_output_creation.add_argument('--output-video-quality', help = wording.get('output_video_quality_help'), dest = 'output_video_quality', type = int, default = 80, choices = range(101), metavar = '[0-100]')
group_output.add_argument('--keep-fps', help = wording.get('keep_fps_help'), dest = 'keep_fps', action = 'store_true') group_output_creation.add_argument('--keep-fps', help = wording.get('keep_fps_help'), dest = 'keep_fps', action = 'store_true')
group_output.add_argument('--skip-audio', help = wording.get('skip_audio_help'), dest = 'skip_audio', action = 'store_true') group_output_creation.add_argument('--skip-audio', help = wording.get('skip_audio_help'), dest = 'skip_audio', action = 'store_true')
# frame processors # frame processors
available_frame_processors = list_module_names('facefusion/processors/frame/modules') available_frame_processors = list_module_names('facefusion/processors/frame/modules')
program = ArgumentParser(parents = [ program ], formatter_class = program.formatter_class, add_help = True) program = ArgumentParser(parents = [ program ], formatter_class = program.formatter_class, add_help = True)
@ -124,7 +124,7 @@ def apply_args(program : ArgumentParser) -> None:
def run(program : ArgumentParser) -> None: def run(program : ArgumentParser) -> None:
apply_args(program) apply_args(program)
limit_resources() limit_resources()
if not pre_check() or not predictor.pre_check(): if not pre_check() or not predictor.pre_check() or not face_analyser.pre_check():
return return
for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors): for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors):
if not frame_processor_module.pre_check(): if not frame_processor_module.pre_check():

View File

@ -1,14 +1,37 @@
from typing import Any, Optional, List from typing import Any, Optional, List, Dict, Tuple
import threading import threading
import insightface import cv2
import numpy import numpy
import onnxruntime
import facefusion.globals import facefusion.globals
from facefusion.face_cache import get_faces_cache, set_faces_cache from facefusion.face_cache import get_faces_cache, set_faces_cache
from facefusion.typing import Frame, Face, FaceAnalyserDirection, FaceAnalyserAge, FaceAnalyserGender from facefusion.face_helper import warp_face
from facefusion.typing import Frame, Face, FaceAnalyserDirection, FaceAnalyserAge, FaceAnalyserGender, ModelValue, Kps, Embedding
from facefusion.utilities import resolve_relative_path, conditional_download
from facefusion.vision import resize_frame_dimension
FACE_ANALYSER = None FACE_ANALYSER = None
THREAD_SEMAPHORE : threading.Semaphore = threading.Semaphore()
THREAD_LOCK : threading.Lock = threading.Lock() THREAD_LOCK : threading.Lock = threading.Lock()
MODELS : Dict[str, ModelValue] =\
{
'face_recognition_arcface':
{
'url': 'https://huggingface.co/bluefoxcreation/insightface-retinaface-arcface-model/resolve/main/w600k_r50.onnx',
'path': resolve_relative_path('../.assets/models/w600k_r50.onnx')
},
'face_detection_yunet':
{
'url': 'https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx',
'path': resolve_relative_path('../.assets/models/face_detection_yunet_2023mar.onnx')
},
'gender_age':
{
'url': 'https://huggingface.co/facefusion/buffalo_l/resolve/main/genderage.onnx',
'path': resolve_relative_path('../.assets/models/genderage.onnx')
}
}
def get_face_analyser() -> Any: def get_face_analyser() -> Any:
@ -16,8 +39,12 @@ def get_face_analyser() -> Any:
with THREAD_LOCK: with THREAD_LOCK:
if FACE_ANALYSER is None: if FACE_ANALYSER is None:
FACE_ANALYSER = insightface.app.FaceAnalysis(name = 'buffalo_l', providers = facefusion.globals.execution_providers) FACE_ANALYSER =\
FACE_ANALYSER.prepare(ctx_id = 0) {
'face_detector': cv2.FaceDetectorYN.create(MODELS.get('face_detection_yunet').get('path'), None, (0, 0)),
'face_recognition': onnxruntime.InferenceSession(MODELS.get('face_recognition_arcface').get('path'), providers = facefusion.globals.execution_providers),
'gender_age': onnxruntime.InferenceSession(MODELS.get('gender_age').get('path'), providers = facefusion.globals.execution_providers),
}
return FACE_ANALYSER return FACE_ANALYSER
@ -27,6 +54,80 @@ def clear_face_analyser() -> Any:
FACE_ANALYSER = None FACE_ANALYSER = None
def pre_check() -> bool:
if not facefusion.globals.skip_download:
download_directory_path = resolve_relative_path('../.assets/models')
model_urls = [ MODELS.get('face_recognition_arcface').get('url'), MODELS.get('face_detection_yunet').get('url'), MODELS.get('gender_age').get('url') ]
conditional_download(download_directory_path, model_urls)
return True
def extract_faces(frame : Frame) -> List[Face]:
face_detector = get_face_analyser().get('face_detector')
faces: List[Face] = []
temp_frame = resize_frame_dimension(frame, 640, 640)
temp_frame_height, temp_frame_width, _ = temp_frame.shape
frame_height, frame_width, _ = frame.shape
ratio_height = frame_height / temp_frame_height
ratio_width = frame_width / temp_frame_width
face_detector.setScoreThreshold(0.5)
face_detector.setTopK(100)
face_detector.setInputSize((temp_frame_width, temp_frame_height))
with THREAD_SEMAPHORE:
_, detections = face_detector.detect(temp_frame)
if detections.any():
for detection in detections:
bbox =\
[
detection[0:4][0] * ratio_width,
detection[0:4][1] * ratio_height,
(detection[0:4][0] + detection[0:4][2]) * ratio_width,
(detection[0:4][1] + detection[0:4][3]) * ratio_height
]
kps = (detection[4:14].reshape((5, 2)) * [[ ratio_width, ratio_height ]]).astype(int)
score = detection[14]
embedding = calc_embedding(frame, kps)
normed_embedding = embedding / numpy.linalg.norm(embedding)
gender, age = detect_gender_age(frame, kps)
faces.append(Face(
bbox = bbox,
kps = kps,
score = score,
embedding = embedding,
normed_embedding = normed_embedding,
gender = gender,
age = age
))
return faces
def calc_embedding(temp_frame : Frame, kps : Kps) -> Embedding:
face_recognition = get_face_analyser().get('face_recognition')
crop_frame, matrix = warp_face(temp_frame, kps, 'arcface', (112, 112))
crop_frame = crop_frame.astype(numpy.float32) / 127.5 - 1
crop_frame = crop_frame[:, :, ::-1].transpose(2, 0, 1)
crop_frame = numpy.expand_dims(crop_frame, axis = 0)
embedding = face_recognition.run(None,
{
face_recognition.get_inputs()[0].name: crop_frame
})[0]
embedding = embedding.ravel()
return embedding
def detect_gender_age(frame : Frame, kps : Kps) -> Tuple[int, int]:
gender_age = get_face_analyser().get('gender_age')
crop_frame, affine_matrix = warp_face(frame, kps, 'arcface', (96, 96))
crop_frame = numpy.expand_dims(crop_frame, axis = 0).transpose(0, 3, 1, 2).astype(numpy.float32)
prediction = gender_age.run(None,
{
gender_age.get_inputs()[0].name: crop_frame
})[0][0]
gender = int(numpy.argmax(prediction[:2]))
age = int(numpy.round(prediction[2] * 100))
return gender, age
def get_one_face(frame : Frame, position : int = 0) -> Optional[Face]: def get_one_face(frame : Frame, position : int = 0) -> Optional[Face]:
many_faces = get_many_faces(frame) many_faces = get_many_faces(frame)
if many_faces: if many_faces:
@ -43,7 +144,7 @@ def get_many_faces(frame : Frame) -> List[Face]:
if faces_cache: if faces_cache:
faces = faces_cache faces = faces_cache
else: else:
faces = get_face_analyser().get(frame) faces = extract_faces(frame)
set_faces_cache(frame, faces) set_faces_cache(frame, faces)
if facefusion.globals.face_analyser_direction: if facefusion.globals.face_analyser_direction:
faces = sort_by_direction(faces, facefusion.globals.face_analyser_direction) faces = sort_by_direction(faces, facefusion.globals.face_analyser_direction)
@ -62,7 +163,7 @@ def find_similar_faces(frame : Frame, reference_face : Face, face_distance : flo
if many_faces: if many_faces:
for face in many_faces: for face in many_faces:
if hasattr(face, 'normed_embedding') and hasattr(reference_face, 'normed_embedding'): if hasattr(face, 'normed_embedding') and hasattr(reference_face, 'normed_embedding'):
current_face_distance = numpy.sum(numpy.square(face.normed_embedding - reference_face.normed_embedding)) current_face_distance = 1 - numpy.dot(face.normed_embedding, reference_face.normed_embedding)
if current_face_distance < face_distance: if current_face_distance < face_distance:
similar_faces.append(face) similar_faces.append(face)
return similar_faces return similar_faces

View File

@ -154,7 +154,7 @@ def swap_face(source_face : Face, target_face : Face, temp_frame : Frame) -> Fra
if frame_processor_input.name == 'source': if frame_processor_input.name == 'source':
frame_processor_inputs[frame_processor_input.name] = source_face frame_processor_inputs[frame_processor_input.name] = source_face
if frame_processor_input.name == 'target': if frame_processor_input.name == 'target':
frame_processor_inputs[frame_processor_input.name] = crop_frame frame_processor_inputs[frame_processor_input.name] = crop_frame # type: ignore[assignment]
crop_frame = frame_processor.run(None, frame_processor_inputs)[0][0] crop_frame = frame_processor.run(None, frame_processor_inputs)[0][0]
crop_frame = normalize_crop_frame(crop_frame) crop_frame = normalize_crop_frame(crop_frame)
temp_frame = paste_back(temp_frame, crop_frame, affine_matrix) temp_frame = paste_back(temp_frame, crop_frame, affine_matrix)

View File

@ -137,7 +137,8 @@ def enhance_frame(temp_frame : Frame) -> Frame:
def blend_frame(temp_frame : Frame, paste_frame : Frame) -> Frame: def blend_frame(temp_frame : Frame, paste_frame : Frame) -> Frame:
frame_enhancer_blend = 1 - (frame_processors_globals.frame_enhancer_blend / 100) frame_enhancer_blend = 1 - (frame_processors_globals.frame_enhancer_blend / 100)
temp_frame = cv2.resize(temp_frame, (paste_frame.shape[1], paste_frame.shape[0])) paste_frame_height, paste_frame_width = paste_frame.shape[0:2]
temp_frame = cv2.resize(temp_frame, (paste_frame_width, paste_frame_height))
temp_frame = cv2.addWeighted(temp_frame, frame_enhancer_blend, paste_frame, 1 - frame_enhancer_blend, 0) temp_frame = cv2.addWeighted(temp_frame, frame_enhancer_blend, paste_frame, 1 - frame_enhancer_blend, 0)
return temp_frame return temp_frame

View File

@ -1,9 +1,11 @@
from collections import namedtuple
from typing import Any, Literal, Callable, List, TypedDict, Dict from typing import Any, Literal, Callable, List, TypedDict, Dict
from insightface.app.common import Face
import numpy import numpy
Bbox = numpy.ndarray[Any, Any]
Kps = numpy.ndarray[Any, Any] Kps = numpy.ndarray[Any, Any]
Face = Face Embedding = numpy.ndarray[Any, Any]
Face = namedtuple('Face', [ 'bbox', 'kps', 'score', 'embedding', 'normed_embedding', 'gender', 'age' ])
Frame = numpy.ndarray[Any, Any] Frame = numpy.ndarray[Any, Any]
Matrix = numpy.ndarray[Any, Any] Matrix = numpy.ndarray[Any, Any]

View File

@ -5,7 +5,7 @@ import gradio
import facefusion.choices import facefusion.choices
import facefusion.globals import facefusion.globals
from facefusion import wording from facefusion import wording
from facefusion.vision import get_video_frame, normalize_frame_color, read_static_image from facefusion.vision import get_video_frame, read_static_image, normalize_frame_color
from facefusion.face_analyser import get_many_faces from facefusion.face_analyser import get_many_faces
from facefusion.face_reference import clear_face_reference from facefusion.face_reference import clear_face_reference
from facefusion.typing import Frame, FaceRecognition from facefusion.typing import Frame, FaceRecognition
@ -47,9 +47,9 @@ def render() -> None:
REFERENCE_FACE_DISTANCE_SLIDER = gradio.Slider( REFERENCE_FACE_DISTANCE_SLIDER = gradio.Slider(
label = wording.get('reference_face_distance_slider_label'), label = wording.get('reference_face_distance_slider_label'),
value = facefusion.globals.reference_face_distance, value = facefusion.globals.reference_face_distance,
step = 0.05, step = 0.025,
minimum = 0, minimum = 0,
maximum = 3, maximum = 1.5,
visible = 'reference' in facefusion.globals.face_recognition visible = 'reference' in facefusion.globals.face_recognition
) )
register_ui_component('face_recognition_dropdown', FACE_RECOGNITION_DROPDOWN) register_ui_component('face_recognition_dropdown', FACE_RECOGNITION_DROPDOWN)
@ -134,3 +134,4 @@ def extract_gallery_frames(reference_frame : Frame) -> List[Frame]:
crop_frame = normalize_frame_color(crop_frame) crop_frame = normalize_frame_color(crop_frame)
crop_frames.append(crop_frame) crop_frames.append(crop_frame)
return crop_frames return crop_frames

View File

@ -1,6 +1,5 @@
basicsr==1.4.2 basicsr==1.4.2
gradio==3.47.1 gradio==3.47.1
insightface==0.7.3
numpy==1.24.3 numpy==1.24.3
onnx==1.14.1 onnx==1.14.1
onnxruntime==1.16.0 onnxruntime==1.16.0