11from collections .abc import Generator
22from pathlib import Path
3- from typing import Any , Literal
3+ from typing import Any
44from uuid import uuid4
55
66from gradio import (
1717from kokoro import KPipeline
1818from numpy import dtype , float32 , ndarray
1919from soundfile import write
20- from torch import zeros
2120
2221from vocalizr .app .logger import logger
2322from vocalizr .app .settings import Settings , Voices
2423
2524
2625class App :
26+ """Main application class for the Vocalizr text-to-speech app."""
27+
2728 def __init__ (self , settings : Settings ) -> None :
2829 self .settings : Settings = settings
2930 logger .info ("Downloading Kokoro model checkpoint" )
@@ -37,14 +38,10 @@ def generate_audio_for_text(
3738 text : str ,
3839 voice : Voices = Voices .AMERICAN_FEMALE_HEART ,
3940 speed : float = 1.0 ,
40- ) -> Generator [
41- tuple [Literal [24000 ], ndarray [tuple [float32 ], dtype [float32 ]]]
42- | tuple [int , ndarray ],
43- Any ,
44- None ,
45- ]:
41+ ) -> Generator [Path , Any , None ]:
4642 """
47- Generates audio from the provided text using the specified voice and speed.
43+ Generate audio from the provided text using the specified voice and speed.
44+
4845 It allows saving the generated audio to a file if required. The function
4946 yields tuples containing the audio sampling rate and the audio data as a
5047 NumPy array.
@@ -73,7 +70,7 @@ def generate_audio_for_text(
7370 _msg = "No text provided"
7471 logger .exception (_msg )
7572 raise ValueError (_msg )
76- elif len (text ) < 4 :
73+ if len (text ) < 4 :
7774 _msg = f"Text too short: { text } with length { len (text )} "
7875 logger .exception (_msg )
7976 raise ValueError (_msg )
@@ -84,21 +81,19 @@ def generate_audio_for_text(
8481 else text .strip ()[: self .settings .model .char_limit ]
8582 )
8683 generator : Generator [KPipeline .Result , None , None ] = self .pipeline (
87- text = text , voice = voice , speed = speed
84+ text = text ,
85+ voice = voice ,
86+ speed = speed ,
8887 )
89- first = True
9088 for _ , _ , audio in generator :
9189 if audio is None or isinstance (audio , str ):
92- logger .exception (f "Unexpected type (audio): { type (audio )} " )
90+ logger .exception ("Unexpected type (audio): %s" , type (audio ))
9391 raise Error (message = f"Unexpected type (audio): { type (audio )} " )
94- logger .info (f "Generating audio for '{ text } '" )
92+ logger .info ("Generating audio for '%s'" , text )
9593 audio_np : ndarray [tuple [float32 ], dtype [float32 ]] = audio .numpy ()
96- logger .info (f"Saving audio file at { self .settings .directory .results } " )
97- self ._save_file_wav (audio_np )
98- yield 24000 , audio_np
99- if first :
100- first = False
101- yield 24000 , zeros (1 ).numpy ()
94+ logger .info ("Saving audio file at %s" , self .settings .directory .results )
95+ file_result_path = self ._save_file_wav (audio_np )
96+ yield file_result_path
10297
10398 def gui (self ) -> Blocks :
10499 """Create the Gradio interface for the voice generation web app."""
@@ -126,19 +121,20 @@ def gui(self) -> Blocks:
126121 interactive = False ,
127122 streaming = True ,
128123 autoplay = True ,
124+ type = "filepath" ,
129125 )
130126 with Row ():
131127 stream_btn : Button = Button (value = "Generate" , variant = "primary" )
132128 stop_btn : Button = Button (value = "Stop" , variant = "stop" )
133129 stream_event = stream_btn .click (
134- fn = self .generate_audio_for_text ,
135- inputs = [text , voice , speed ],
136- outputs = [out_audio ],
130+ self .generate_audio_for_text ,
131+ [text , voice , speed ],
132+ [out_audio ],
137133 )
138134 stop_btn .click (fn = None , cancels = stream_event )
139135 return app
140136
141- def _save_file_wav (self , audio : ndarray [tuple [float32 ], dtype [float32 ]]) -> None :
137+ def _save_file_wav (self , audio : ndarray [tuple [float32 ], dtype [float32 ]]) -> Path :
142138 """
143139 Save an audio array to a WAV file using the specified sampling rate.
144140
@@ -153,10 +149,11 @@ def _save_file_wav(self, audio: ndarray[tuple[float32], dtype[float32]]) -> None
153149 """
154150 file_result_path : Path = self .settings .directory .results / f"{ uuid4 ()} .wav"
155151 try :
156- logger .info (f "Saving audio to { file_result_path } " )
152+ logger .info ("Saving audio to %s" , file_result_path )
157153 write (file = file_result_path , data = audio , samplerate = 24000 )
158- logger .info (f "Audio saved to { file_result_path } " )
154+ logger .info ("Audio saved to %s" , file_result_path )
159155 except Exception as e :
160156 _msg = f"Failed to save audio to { file_result_path } : { e } "
161157 logger .exception (_msg )
162158 raise RuntimeError (_msg ) from e
159+ return file_result_path
0 commit comments