Replace retinaface with yunet (#168)

* Remove insightface dependency

* Fix urllib

* Some fixes

* Analyse based on matches

* Analyse based on rate

* Fix CI

* Migrate to Yunet

* Something is off here

* We indeed need semaphore for yunet

* Normalize the normed_embedding

* Fix download of models

* Fix download of models

* Fix download of models

* Add score and improve affine_matrix

* Temp fix for bbox out of frame

* Temp fix for bbox out of frame

* ROCM and OpenVINO mapping for torch backends

* Normalize bbox

* Implement gender age

* Cosmetics on cli args

* Prevent face jumping

* Fix the paste back speed

* FIx import

* Introduce detection size
This commit is contained in:
Henry Ruhs 2023-10-22 12:33:31 +02:00 committed by GitHub
parent 738d69a10b
commit 228febd73b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 134 additions and 30 deletions

View File

@ -13,7 +13,7 @@ from argparse import ArgumentParser, HelpFormatter
import facefusion.choices
import facefusion.globals
from facefusion import metadata, predictor, wording
from facefusion import face_analyser, predictor, metadata, wording
from facefusion.predictor import predict_image, predict_video
from facefusion.processors.frame.core import get_frame_processors_modules, load_frame_processor_module
from facefusion.utilities import is_image, is_video, detect_fps, compress_image, merge_video, extract_frames, get_temp_frame_paths, restore_audio, create_temp, move_temp, clear_temp, list_module_names, encode_execution_providers, decode_execution_providers, normalize_output_path, update_status
@ -47,22 +47,22 @@ def cli() -> None:
group_face_recognition.add_argument('--face-analyser-age', help = wording.get('face_analyser_age_help'), dest = 'face_analyser_age', choices = facefusion.choices.face_analyser_ages)
group_face_recognition.add_argument('--face-analyser-gender', help = wording.get('face_analyser_gender_help'), dest = 'face_analyser_gender', choices = facefusion.choices.face_analyser_genders)
group_face_recognition.add_argument('--reference-face-position', help = wording.get('reference_face_position_help'), dest = 'reference_face_position', type = int, default = 0)
group_face_recognition.add_argument('--reference-face-distance', help = wording.get('reference_face_distance_help'), dest = 'reference_face_distance', type = float, default = 1.5)
group_face_recognition.add_argument('--reference-face-distance', help = wording.get('reference_face_distance_help'), dest = 'reference_face_distance', type = float, default = 0.6)
group_face_recognition.add_argument('--reference-frame-number', help = wording.get('reference_frame_number_help'), dest = 'reference_frame_number', type = int, default = 0)
# frame extraction
group_processing = program.add_argument_group('frame extraction')
group_processing.add_argument('--trim-frame-start', help = wording.get('trim_frame_start_help'), dest = 'trim_frame_start', type = int)
group_processing.add_argument('--trim-frame-end', help = wording.get('trim_frame_end_help'), dest = 'trim_frame_end', type = int)
group_processing.add_argument('--temp-frame-format', help = wording.get('temp_frame_format_help'), dest = 'temp_frame_format', default = 'jpg', choices = facefusion.choices.temp_frame_formats)
group_processing.add_argument('--temp-frame-quality', help = wording.get('temp_frame_quality_help'), dest = 'temp_frame_quality', type = int, default = 100, choices = range(101), metavar = '[0-100]')
group_processing.add_argument('--keep-temp', help = wording.get('keep_temp_help'), dest = 'keep_temp', action = 'store_true')
group_frame_extraction = program.add_argument_group('frame extraction')
group_frame_extraction.add_argument('--trim-frame-start', help = wording.get('trim_frame_start_help'), dest = 'trim_frame_start', type = int)
group_frame_extraction.add_argument('--trim-frame-end', help = wording.get('trim_frame_end_help'), dest = 'trim_frame_end', type = int)
group_frame_extraction.add_argument('--temp-frame-format', help = wording.get('temp_frame_format_help'), dest = 'temp_frame_format', default = 'jpg', choices = facefusion.choices.temp_frame_formats)
group_frame_extraction.add_argument('--temp-frame-quality', help = wording.get('temp_frame_quality_help'), dest = 'temp_frame_quality', type = int, default = 100, choices = range(101), metavar = '[0-100]')
group_frame_extraction.add_argument('--keep-temp', help = wording.get('keep_temp_help'), dest = 'keep_temp', action = 'store_true')
# output creation
group_output = program.add_argument_group('output creation')
group_output.add_argument('--output-image-quality', help=wording.get('output_image_quality_help'), dest = 'output_image_quality', type = int, default = 80, choices = range(101), metavar = '[0-100]')
group_output.add_argument('--output-video-encoder', help = wording.get('output_video_encoder_help'), dest = 'output_video_encoder', default = 'libx264', choices = facefusion.choices.output_video_encoders)
group_output.add_argument('--output-video-quality', help = wording.get('output_video_quality_help'), dest = 'output_video_quality', type = int, default = 80, choices = range(101), metavar = '[0-100]')
group_output.add_argument('--keep-fps', help = wording.get('keep_fps_help'), dest = 'keep_fps', action = 'store_true')
group_output.add_argument('--skip-audio', help = wording.get('skip_audio_help'), dest = 'skip_audio', action = 'store_true')
group_output_creation = program.add_argument_group('output creation')
group_output_creation.add_argument('--output-image-quality', help=wording.get('output_image_quality_help'), dest = 'output_image_quality', type = int, default = 80, choices = range(101), metavar = '[0-100]')
group_output_creation.add_argument('--output-video-encoder', help = wording.get('output_video_encoder_help'), dest = 'output_video_encoder', default = 'libx264', choices = facefusion.choices.output_video_encoders)
group_output_creation.add_argument('--output-video-quality', help = wording.get('output_video_quality_help'), dest = 'output_video_quality', type = int, default = 80, choices = range(101), metavar = '[0-100]')
group_output_creation.add_argument('--keep-fps', help = wording.get('keep_fps_help'), dest = 'keep_fps', action = 'store_true')
group_output_creation.add_argument('--skip-audio', help = wording.get('skip_audio_help'), dest = 'skip_audio', action = 'store_true')
# frame processors
available_frame_processors = list_module_names('facefusion/processors/frame/modules')
program = ArgumentParser(parents = [ program ], formatter_class = program.formatter_class, add_help = True)
@ -124,7 +124,7 @@ def apply_args(program : ArgumentParser) -> None:
def run(program : ArgumentParser) -> None:
apply_args(program)
limit_resources()
if not pre_check() or not predictor.pre_check():
if not pre_check() or not predictor.pre_check() or not face_analyser.pre_check():
return
for frame_processor_module in get_frame_processors_modules(facefusion.globals.frame_processors):
if not frame_processor_module.pre_check():

View File

@ -1,14 +1,37 @@
from typing import Any, Optional, List
from typing import Any, Optional, List, Dict, Tuple
import threading
import insightface
import cv2
import numpy
import onnxruntime
import facefusion.globals
from facefusion.face_cache import get_faces_cache, set_faces_cache
from facefusion.typing import Frame, Face, FaceAnalyserDirection, FaceAnalyserAge, FaceAnalyserGender
from facefusion.face_helper import warp_face
from facefusion.typing import Frame, Face, FaceAnalyserDirection, FaceAnalyserAge, FaceAnalyserGender, ModelValue, Kps, Embedding
from facefusion.utilities import resolve_relative_path, conditional_download
from facefusion.vision import resize_frame_dimension
FACE_ANALYSER = None
THREAD_SEMAPHORE : threading.Semaphore = threading.Semaphore()
THREAD_LOCK : threading.Lock = threading.Lock()
MODELS : Dict[str, ModelValue] =\
{
'face_recognition_arcface':
{
'url': 'https://huggingface.co/bluefoxcreation/insightface-retinaface-arcface-model/resolve/main/w600k_r50.onnx',
'path': resolve_relative_path('../.assets/models/w600k_r50.onnx')
},
'face_detection_yunet':
{
'url': 'https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx',
'path': resolve_relative_path('../.assets/models/face_detection_yunet_2023mar.onnx')
},
'gender_age':
{
'url': 'https://huggingface.co/facefusion/buffalo_l/resolve/main/genderage.onnx',
'path': resolve_relative_path('../.assets/models/genderage.onnx')
}
}
def get_face_analyser() -> Any:
@ -16,8 +39,12 @@ def get_face_analyser() -> Any:
with THREAD_LOCK:
if FACE_ANALYSER is None:
FACE_ANALYSER = insightface.app.FaceAnalysis(name = 'buffalo_l', providers = facefusion.globals.execution_providers)
FACE_ANALYSER.prepare(ctx_id = 0)
FACE_ANALYSER =\
{
'face_detector': cv2.FaceDetectorYN.create(MODELS.get('face_detection_yunet').get('path'), None, (0, 0)),
'face_recognition': onnxruntime.InferenceSession(MODELS.get('face_recognition_arcface').get('path'), providers = facefusion.globals.execution_providers),
'gender_age': onnxruntime.InferenceSession(MODELS.get('gender_age').get('path'), providers = facefusion.globals.execution_providers),
}
return FACE_ANALYSER
@ -27,6 +54,80 @@ def clear_face_analyser() -> Any:
FACE_ANALYSER = None
def pre_check() -> bool:
if not facefusion.globals.skip_download:
download_directory_path = resolve_relative_path('../.assets/models')
model_urls = [ MODELS.get('face_recognition_arcface').get('url'), MODELS.get('face_detection_yunet').get('url'), MODELS.get('gender_age').get('url') ]
conditional_download(download_directory_path, model_urls)
return True
def extract_faces(frame : Frame) -> List[Face]:
face_detector = get_face_analyser().get('face_detector')
faces: List[Face] = []
temp_frame = resize_frame_dimension(frame, 640, 640)
temp_frame_height, temp_frame_width, _ = temp_frame.shape
frame_height, frame_width, _ = frame.shape
ratio_height = frame_height / temp_frame_height
ratio_width = frame_width / temp_frame_width
face_detector.setScoreThreshold(0.5)
face_detector.setTopK(100)
face_detector.setInputSize((temp_frame_width, temp_frame_height))
with THREAD_SEMAPHORE:
_, detections = face_detector.detect(temp_frame)
if detections.any():
for detection in detections:
bbox =\
[
detection[0:4][0] * ratio_width,
detection[0:4][1] * ratio_height,
(detection[0:4][0] + detection[0:4][2]) * ratio_width,
(detection[0:4][1] + detection[0:4][3]) * ratio_height
]
kps = (detection[4:14].reshape((5, 2)) * [[ ratio_width, ratio_height ]]).astype(int)
score = detection[14]
embedding = calc_embedding(frame, kps)
normed_embedding = embedding / numpy.linalg.norm(embedding)
gender, age = detect_gender_age(frame, kps)
faces.append(Face(
bbox = bbox,
kps = kps,
score = score,
embedding = embedding,
normed_embedding = normed_embedding,
gender = gender,
age = age
))
return faces
def calc_embedding(temp_frame : Frame, kps : Kps) -> Embedding:
face_recognition = get_face_analyser().get('face_recognition')
crop_frame, matrix = warp_face(temp_frame, kps, 'arcface', (112, 112))
crop_frame = crop_frame.astype(numpy.float32) / 127.5 - 1
crop_frame = crop_frame[:, :, ::-1].transpose(2, 0, 1)
crop_frame = numpy.expand_dims(crop_frame, axis = 0)
embedding = face_recognition.run(None,
{
face_recognition.get_inputs()[0].name: crop_frame
})[0]
embedding = embedding.ravel()
return embedding
def detect_gender_age(frame : Frame, kps : Kps) -> Tuple[int, int]:
gender_age = get_face_analyser().get('gender_age')
crop_frame, affine_matrix = warp_face(frame, kps, 'arcface', (96, 96))
crop_frame = numpy.expand_dims(crop_frame, axis = 0).transpose(0, 3, 1, 2).astype(numpy.float32)
prediction = gender_age.run(None,
{
gender_age.get_inputs()[0].name: crop_frame
})[0][0]
gender = int(numpy.argmax(prediction[:2]))
age = int(numpy.round(prediction[2] * 100))
return gender, age
def get_one_face(frame : Frame, position : int = 0) -> Optional[Face]:
many_faces = get_many_faces(frame)
if many_faces:
@ -43,7 +144,7 @@ def get_many_faces(frame : Frame) -> List[Face]:
if faces_cache:
faces = faces_cache
else:
faces = get_face_analyser().get(frame)
faces = extract_faces(frame)
set_faces_cache(frame, faces)
if facefusion.globals.face_analyser_direction:
faces = sort_by_direction(faces, facefusion.globals.face_analyser_direction)
@ -62,7 +163,7 @@ def find_similar_faces(frame : Frame, reference_face : Face, face_distance : flo
if many_faces:
for face in many_faces:
if hasattr(face, 'normed_embedding') and hasattr(reference_face, 'normed_embedding'):
current_face_distance = numpy.sum(numpy.square(face.normed_embedding - reference_face.normed_embedding))
current_face_distance = 1 - numpy.dot(face.normed_embedding, reference_face.normed_embedding)
if current_face_distance < face_distance:
similar_faces.append(face)
return similar_faces

View File

@ -154,7 +154,7 @@ def swap_face(source_face : Face, target_face : Face, temp_frame : Frame) -> Fra
if frame_processor_input.name == 'source':
frame_processor_inputs[frame_processor_input.name] = source_face
if frame_processor_input.name == 'target':
frame_processor_inputs[frame_processor_input.name] = crop_frame
frame_processor_inputs[frame_processor_input.name] = crop_frame # type: ignore[assignment]
crop_frame = frame_processor.run(None, frame_processor_inputs)[0][0]
crop_frame = normalize_crop_frame(crop_frame)
temp_frame = paste_back(temp_frame, crop_frame, affine_matrix)

View File

@ -137,7 +137,8 @@ def enhance_frame(temp_frame : Frame) -> Frame:
def blend_frame(temp_frame : Frame, paste_frame : Frame) -> Frame:
frame_enhancer_blend = 1 - (frame_processors_globals.frame_enhancer_blend / 100)
temp_frame = cv2.resize(temp_frame, (paste_frame.shape[1], paste_frame.shape[0]))
paste_frame_height, paste_frame_width = paste_frame.shape[0:2]
temp_frame = cv2.resize(temp_frame, (paste_frame_width, paste_frame_height))
temp_frame = cv2.addWeighted(temp_frame, frame_enhancer_blend, paste_frame, 1 - frame_enhancer_blend, 0)
return temp_frame

View File

@ -1,9 +1,11 @@
from collections import namedtuple
from typing import Any, Literal, Callable, List, TypedDict, Dict
from insightface.app.common import Face
import numpy
Bbox = numpy.ndarray[Any, Any]
Kps = numpy.ndarray[Any, Any]
Face = Face
Embedding = numpy.ndarray[Any, Any]
Face = namedtuple('Face', [ 'bbox', 'kps', 'score', 'embedding', 'normed_embedding', 'gender', 'age' ])
Frame = numpy.ndarray[Any, Any]
Matrix = numpy.ndarray[Any, Any]

View File

@ -5,7 +5,7 @@ import gradio
import facefusion.choices
import facefusion.globals
from facefusion import wording
from facefusion.vision import get_video_frame, normalize_frame_color, read_static_image
from facefusion.vision import get_video_frame, read_static_image, normalize_frame_color
from facefusion.face_analyser import get_many_faces
from facefusion.face_reference import clear_face_reference
from facefusion.typing import Frame, FaceRecognition
@ -47,9 +47,9 @@ def render() -> None:
REFERENCE_FACE_DISTANCE_SLIDER = gradio.Slider(
label = wording.get('reference_face_distance_slider_label'),
value = facefusion.globals.reference_face_distance,
step = 0.05,
step = 0.025,
minimum = 0,
maximum = 3,
maximum = 1.5,
visible = 'reference' in facefusion.globals.face_recognition
)
register_ui_component('face_recognition_dropdown', FACE_RECOGNITION_DROPDOWN)
@ -134,3 +134,4 @@ def extract_gallery_frames(reference_frame : Frame) -> List[Frame]:
crop_frame = normalize_frame_color(crop_frame)
crop_frames.append(crop_frame)
return crop_frames

View File

@ -1,6 +1,5 @@
basicsr==1.4.2
gradio==3.47.1
insightface==0.7.3
numpy==1.24.3
onnx==1.14.1
onnxruntime==1.16.0