Skip to content

Commit 9f70a3d

Browse files
authored
Merge branch 'main' into renovate/pip-25.x
2 parents 49cbbdf + 55c9f03 commit 9f70a3d

File tree

5 files changed

+129
-109
lines changed

5 files changed

+129
-109
lines changed

.github/workflows/build.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ jobs:
4646
with:
4747
commit_message: "Version: ${{ github.ref_name }}"
4848
branch: main
49+
commit_options: '--no-verify'
4950
github_release:
5051
name: Create GitHub Release
5152
needs: setup_and_build

.trunk/configs/.yamllint.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,6 @@ rules:
2323
line-length:
2424
level: warning
2525
allow-non-breakable-inline-mappings: true
26-
max: 140
26+
max: 145
2727
truthy: disable
2828
key-duplicates: enable

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "vocalizr"
3-
version = "0.0.1"
3+
version = "0.0.4"
44
description = "Voice Generator part of the Chatacter Backend"
55
readme = "README.md"
66
requires-python = ">=3.12, <3.14"

src/vocalizr/app/builder.py

Lines changed: 23 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from collections.abc import Generator
22
from pathlib import Path
3-
from typing import Any, Literal
3+
from typing import Any
44
from uuid import uuid4
55

66
from gradio import (
@@ -17,13 +17,14 @@
1717
from kokoro import KPipeline
1818
from numpy import dtype, float32, ndarray
1919
from soundfile import write
20-
from torch import zeros
2120

2221
from vocalizr.app.logger import logger
2322
from vocalizr.app.settings import Settings, Voices
2423

2524

2625
class App:
26+
"""Main application class for the Vocalizr text-to-speech app."""
27+
2728
def __init__(self, settings: Settings) -> None:
2829
self.settings: Settings = settings
2930
logger.info("Downloading Kokoro model checkpoint")
@@ -37,14 +38,10 @@ def generate_audio_for_text(
3738
text: str,
3839
voice: Voices = Voices.AMERICAN_FEMALE_HEART,
3940
speed: float = 1.0,
40-
) -> Generator[
41-
tuple[Literal[24000], ndarray[tuple[float32], dtype[float32]]]
42-
| tuple[int, ndarray],
43-
Any,
44-
None,
45-
]:
41+
) -> Generator[Path, Any, None]:
4642
"""
47-
Generates audio from the provided text using the specified voice and speed.
43+
Generate audio from the provided text using the specified voice and speed.
44+
4845
It allows saving the generated audio to a file if required. The function
4946
yields tuples containing the audio sampling rate and the audio data as a
5047
NumPy array.
@@ -73,7 +70,7 @@ def generate_audio_for_text(
7370
_msg = "No text provided"
7471
logger.exception(_msg)
7572
raise ValueError(_msg)
76-
elif len(text) < 4:
73+
if len(text) < 4:
7774
_msg = f"Text too short: {text} with length {len(text)}"
7875
logger.exception(_msg)
7976
raise ValueError(_msg)
@@ -84,21 +81,19 @@ def generate_audio_for_text(
8481
else text.strip()[: self.settings.model.char_limit]
8582
)
8683
generator: Generator[KPipeline.Result, None, None] = self.pipeline(
87-
text=text, voice=voice, speed=speed
84+
text=text,
85+
voice=voice,
86+
speed=speed,
8887
)
89-
first = True
9088
for _, _, audio in generator:
9189
if audio is None or isinstance(audio, str):
92-
logger.exception(f"Unexpected type (audio): {type(audio)}")
90+
logger.exception("Unexpected type (audio): %s", type(audio))
9391
raise Error(message=f"Unexpected type (audio): {type(audio)}")
94-
logger.info(f"Generating audio for '{text}'")
92+
logger.info("Generating audio for '%s'", text)
9593
audio_np: ndarray[tuple[float32], dtype[float32]] = audio.numpy()
96-
logger.info(f"Saving audio file at {self.settings.directory.results}")
97-
self._save_file_wav(audio_np)
98-
yield 24000, audio_np
99-
if first:
100-
first = False
101-
yield 24000, zeros(1).numpy()
94+
logger.info("Saving audio file at %s", self.settings.directory.results)
95+
file_result_path = self._save_file_wav(audio_np)
96+
yield file_result_path
10297

10398
def gui(self) -> Blocks:
10499
"""Create the Gradio interface for the voice generation web app."""
@@ -126,19 +121,20 @@ def gui(self) -> Blocks:
126121
interactive=False,
127122
streaming=True,
128123
autoplay=True,
124+
type="filepath",
129125
)
130126
with Row():
131127
stream_btn: Button = Button(value="Generate", variant="primary")
132128
stop_btn: Button = Button(value="Stop", variant="stop")
133129
stream_event = stream_btn.click(
134-
fn=self.generate_audio_for_text,
135-
inputs=[text, voice, speed],
136-
outputs=[out_audio],
130+
self.generate_audio_for_text,
131+
[text, voice, speed],
132+
[out_audio],
137133
)
138134
stop_btn.click(fn=None, cancels=stream_event)
139135
return app
140136

141-
def _save_file_wav(self, audio: ndarray[tuple[float32], dtype[float32]]) -> None:
137+
def _save_file_wav(self, audio: ndarray[tuple[float32], dtype[float32]]) -> Path:
142138
"""
143139
Save an audio array to a WAV file using the specified sampling rate.
144140
@@ -153,10 +149,11 @@ def _save_file_wav(self, audio: ndarray[tuple[float32], dtype[float32]]) -> None
153149
"""
154150
file_result_path: Path = self.settings.directory.results / f"{uuid4()}.wav"
155151
try:
156-
logger.info(f"Saving audio to {file_result_path}")
152+
logger.info("Saving audio to %s", file_result_path)
157153
write(file=file_result_path, data=audio, samplerate=24000)
158-
logger.info(f"Audio saved to {file_result_path}")
154+
logger.info("Audio saved to %s", file_result_path)
159155
except Exception as e:
160156
_msg = f"Failed to save audio to {file_result_path}: {e}"
161157
logger.exception(_msg)
162158
raise RuntimeError(_msg) from e
159+
return file_result_path

0 commit comments

Comments
 (0)