diff --git a/facefusion/audio.py b/facefusion/audio.py index 4e303ec9..8f1268a5 100644 --- a/facefusion/audio.py +++ b/facefusion/audio.py @@ -16,11 +16,11 @@ def read_static_audio(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]] def read_audio(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: if is_audio(audio_path): - audio_buffer = read_audio_buffer(audio_path, 16000, 2) + audio_buffer = read_audio_buffer(audio_path, 48000, 2) audio = numpy.frombuffer(audio_buffer, dtype = numpy.int16).reshape(-1, 2) audio = normalize_audio(audio) audio = filter_audio(audio, -0.97) - spectrogram = create_spectrogram(audio, 16000, 80, 800, 55.0, 7600.0) + spectrogram = create_spectrogram(audio, 48000, 80, 800, 55.0, 7600.0) audio_frames = extract_audio_frames(spectrogram, 80, 16, fps) return audio_frames return None @@ -33,12 +33,12 @@ def read_static_voice(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]] def read_voice(audio_path : str, fps : Fps) -> Optional[List[AudioFrame]]: if is_audio(audio_path): - audio_buffer = read_audio_buffer(audio_path, 16000, 2) + audio_buffer = read_audio_buffer(audio_path, 48000, 2) audio = numpy.frombuffer(audio_buffer, dtype = numpy.int16).reshape(-1, 2) audio = batch_extract_voice(audio, 1024 ** 3, 0.75) audio = normalize_audio(audio) audio = filter_audio(audio, -0.97) - spectrogram = create_spectrogram(audio, 16000, 80, 800, 55.0, 7600.0) + spectrogram = create_spectrogram(audio, 48000, 80, 800, 55.0, 7600.0) audio_frames = extract_audio_frames(spectrogram, 80, 16, fps) return audio_frames return None