From df96eb51e73d1123d24e15dad6b5c1ab720ea09e Mon Sep 17 00:00:00 2001
From: Henry Ruhs <info@henryruhs.com>
Date: Mon, 23 Dec 2024 09:10:24 +0100
Subject: [PATCH] Feat/webcam improvements (#838)

* Detect available webcams

* Fix CI, Move webcam id dropdown to the sidebar, Disable warnings

* Fix CI
---
 facefusion/uis/components/webcam.py         | 40 +++++++++++++++------
 facefusion/uis/components/webcam_options.py | 11 ++++++
 facefusion/uis/typing.py                    |  1 +
 facefusion/wording.py                       |  1 +
 4 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/facefusion/uis/components/webcam.py b/facefusion/uis/components/webcam.py
index a9b63d27..8658767b 100644
--- a/facefusion/uis/components/webcam.py
+++ b/facefusion/uis/components/webcam.py
@@ -2,7 +2,7 @@ import os
 import subprocess
 from collections import deque
 from concurrent.futures import ThreadPoolExecutor
-from typing import Deque, Generator, Optional
+from typing import Deque, Generator, List, Optional
 
 import cv2
 import gradio
@@ -10,7 +10,7 @@ from tqdm import tqdm
 
 from facefusion import logger, state_manager, wording
 from facefusion.audio import create_empty_audio_frame
-from facefusion.common_helper import is_windows
+from facefusion.common_helper import get_first, is_windows
 from facefusion.content_analyser import analyse_stream
 from facefusion.face_analyser import get_average_face, get_many_faces
 from facefusion.ffmpeg import open_ffmpeg
@@ -27,14 +27,17 @@ WEBCAM_START_BUTTON : Optional[gradio.Button] = None
 WEBCAM_STOP_BUTTON : Optional[gradio.Button] = None
 
 
-def get_webcam_capture() -> Optional[cv2.VideoCapture]:
+def get_webcam_capture(index : int) -> Optional[cv2.VideoCapture]:
 	global WEBCAM_CAPTURE
 
 	if WEBCAM_CAPTURE is None:
+		cv2.setLogLevel(0)
 		if is_windows():
-			webcam_capture = cv2.VideoCapture(0, cv2.CAP_DSHOW)
+			webcam_capture = cv2.VideoCapture(index, cv2.CAP_DSHOW)
 		else:
-			webcam_capture = cv2.VideoCapture(0)
+			webcam_capture = cv2.VideoCapture(index)
+		cv2.setLogLevel(3)
+
 		if webcam_capture and webcam_capture.isOpened():
 			WEBCAM_CAPTURE = webcam_capture
 	return WEBCAM_CAPTURE
@@ -68,31 +71,35 @@ def render() -> None:
 
 
 def listen() -> None:
+	webcam_device_id_dropdown = get_ui_component('webcam_device_id_dropdown')
 	webcam_mode_radio = get_ui_component('webcam_mode_radio')
 	webcam_resolution_dropdown = get_ui_component('webcam_resolution_dropdown')
 	webcam_fps_slider = get_ui_component('webcam_fps_slider')
 	source_image = get_ui_component('source_image')
 
-	if webcam_mode_radio and webcam_resolution_dropdown and webcam_fps_slider:
-		start_event = WEBCAM_START_BUTTON.click(start, inputs = [ webcam_mode_radio, webcam_resolution_dropdown, webcam_fps_slider ], outputs = WEBCAM_IMAGE)
+	if webcam_device_id_dropdown and webcam_mode_radio and webcam_resolution_dropdown and webcam_fps_slider:
+		start_event = WEBCAM_START_BUTTON.click(start, inputs = [ webcam_device_id_dropdown, webcam_mode_radio, webcam_resolution_dropdown, webcam_fps_slider ], outputs = WEBCAM_IMAGE)
 		WEBCAM_STOP_BUTTON.click(stop, cancels = start_event, outputs = WEBCAM_IMAGE)
 
 	if source_image:
 		source_image.change(stop, cancels = start_event, outputs = WEBCAM_IMAGE)
 
 
-def start(webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps) -> Generator[VisionFrame, None, None]:
+def start(webcam_device_id : int, webcam_mode : WebcamMode, webcam_resolution : str, webcam_fps : Fps) -> Generator[VisionFrame, None, None]:
 	state_manager.set_item('face_selector_mode', 'one')
 	source_image_paths = filter_image_paths(state_manager.get_item('source_paths'))
 	source_frames = read_static_images(source_image_paths)
 	source_faces = get_many_faces(source_frames)
 	source_face = get_average_face(source_faces)
 	stream = None
+	webcam_capture = None
 
 	if webcam_mode in [ 'udp', 'v4l2' ]:
 		stream = open_stream(webcam_mode, webcam_resolution, webcam_fps) #type:ignore[arg-type]
 	webcam_width, webcam_height = unpack_resolution(webcam_resolution)
-	webcam_capture = get_webcam_capture()
+
+	if isinstance(webcam_device_id, int):
+		webcam_capture = get_webcam_capture(webcam_device_id)
 
 	if webcam_capture and webcam_capture.isOpened():
 		webcam_capture.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG')) #type:ignore[attr-defined]
@@ -163,9 +170,22 @@ def open_stream(stream_mode : StreamMode, stream_resolution : str, stream_fps :
 		commands.extend([ '-b:v', '2000k', '-f', 'mpegts', 'udp://localhost:27000?pkt_size=1316' ])
 	if stream_mode == 'v4l2':
 		try:
-			device_name = os.listdir('/sys/devices/virtual/video4linux')[0]
+			device_name = get_first(os.listdir('/sys/devices/virtual/video4linux'))
 			if device_name:
 				commands.extend([ '-f', 'v4l2', '/dev/' + device_name ])
 		except FileNotFoundError:
 			logger.error(wording.get('stream_not_loaded').format(stream_mode = stream_mode), __name__)
 	return open_ffmpeg(commands)
+
+
+def get_available_webcam_ids(webcam_id_start : int, webcam_id_end : int) -> List[int]:
+	available_webcam_ids = []
+
+	for index in range(webcam_id_start, webcam_id_end):
+		webcam_capture = get_webcam_capture(index)
+
+		if webcam_capture and webcam_capture.isOpened():
+			available_webcam_ids.append(index)
+			clear_webcam_capture()
+
+	return available_webcam_ids
diff --git a/facefusion/uis/components/webcam_options.py b/facefusion/uis/components/webcam_options.py
index cbe7390c..ec8a4d36 100644
--- a/facefusion/uis/components/webcam_options.py
+++ b/facefusion/uis/components/webcam_options.py
@@ -3,19 +3,29 @@ from typing import Optional
 import gradio
 
 from facefusion import wording
+from facefusion.common_helper import get_first
 from facefusion.uis import choices as uis_choices
+from facefusion.uis.components.webcam import get_available_webcam_ids
 from facefusion.uis.core import register_ui_component
 
+WEBCAM_DEVICE_ID_DROPDOWN : Optional[gradio.Dropdown] = None
 WEBCAM_MODE_RADIO : Optional[gradio.Radio] = None
 WEBCAM_RESOLUTION_DROPDOWN : Optional[gradio.Dropdown] = None
 WEBCAM_FPS_SLIDER : Optional[gradio.Slider] = None
 
 
 def render() -> None:
+	global WEBCAM_DEVICE_ID_DROPDOWN
 	global WEBCAM_MODE_RADIO
 	global WEBCAM_RESOLUTION_DROPDOWN
 	global WEBCAM_FPS_SLIDER
 
+	available_webcam_ids = get_available_webcam_ids(0, 10) or [ 'none' ] #type:ignore[list-item]
+	WEBCAM_DEVICE_ID_DROPDOWN = gradio.Dropdown(
+		value = get_first(available_webcam_ids),
+		label = wording.get('uis.webcam_device_id_dropdown'),
+		choices = available_webcam_ids
+	)
 	WEBCAM_MODE_RADIO = gradio.Radio(
 		label = wording.get('uis.webcam_mode_radio'),
 		choices = uis_choices.webcam_modes,
@@ -33,6 +43,7 @@ def render() -> None:
 		minimum = 1,
 		maximum = 60
 	)
+	register_ui_component('webcam_device_id_dropdown', WEBCAM_DEVICE_ID_DROPDOWN)
 	register_ui_component('webcam_mode_radio', WEBCAM_MODE_RADIO)
 	register_ui_component('webcam_resolution_dropdown', WEBCAM_RESOLUTION_DROPDOWN)
 	register_ui_component('webcam_fps_slider', WEBCAM_FPS_SLIDER)
diff --git a/facefusion/uis/typing.py b/facefusion/uis/typing.py
index 78c6f718..6de5730e 100644
--- a/facefusion/uis/typing.py
+++ b/facefusion/uis/typing.py
@@ -73,6 +73,7 @@ ComponentName = Literal\
 	'target_image',
 	'target_video',
 	'ui_workflow_dropdown',
+	'webcam_device_id_dropdown',
 	'webcam_fps_slider',
 	'webcam_mode_radio',
 	'webcam_resolution_dropdown'
diff --git a/facefusion/wording.py b/facefusion/wording.py
index b27af638..db7faabd 100755
--- a/facefusion/wording.py
+++ b/facefusion/wording.py
@@ -330,6 +330,7 @@ WORDING : Dict[str, Any] =\
 		'video_memory_strategy_dropdown': 'VIDEO MEMORY STRATEGY',
 		'webcam_fps_slider': 'WEBCAM FPS',
 		'webcam_image': 'WEBCAM',
+		'webcam_device_id_dropdown': 'WEBCAM DEVICE ID',
 		'webcam_mode_radio': 'WEBCAM MODE',
 		'webcam_resolution_dropdown': 'WEBCAM RESOLUTION'
 	}