Skip to content

Commit fd3c64f

Browse files
committed
✨ feat: feat: integrate Kokoro TTS for language selection voice preview
Replace the previous simple espeak-ng playback in the language selector with a Kokoro TTS integration that provides high-quality neural voice previews. - Add _parse_voice_map() to read locale-voice-map.conf and select the correct engine/voice/lang_code per locale - Pre-generate Kokoro WAVs in background thread at startup (favorites en_US, pt_BR, es_ES prioritized) - Use cached WAV playback (via paplay) when available, falling back to espeak-ng for instant zero-latency speech on first visit - Add _kokoro_generate() background worker with tempfile caching - Add ensure_orca_disabled() to kill ORCA at startup — user activates it manually via Super+Alt+S - Add _set_speechd_language() to update speech-dispatcher language when user selects a language, so ORCA uses the correct TTS voice - Cancel ORCA speech for ALL languages (not just non-pt_BR)
1 parent 88f6182 commit fd3c64f

4 files changed

Lines changed: 204 additions & 15 deletions

File tree

biglinux-livecd/usr/share/biglinux/calamares/src/utils/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# Application Information
1010
APP_NAME = "BigLinux Calamares Config"
1111
APP_ID = "com.biglinux.calamares-config"
12-
APP_VERSION = "1.1.4"
12+
APP_VERSION = "1.2.0"
1313

1414
# Paths and Directories
1515
BASE_DIR = Path(__file__).parent.parent.parent

biglinux-livecd/usr/share/biglinux/livecd/accessibility.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,19 @@ def start_orca() -> bool:
5858
except subprocess.TimeoutExpired:
5959
logger.warning("Timeout checking for ORCA process")
6060
return False
61+
62+
63+
def ensure_orca_disabled() -> None:
64+
"""Kill any running ORCA and disable GNOME auto-start of screen reader."""
65+
subprocess.Popen(
66+
["pkill", "-x", "orca"],
67+
stdout=subprocess.DEVNULL,
68+
stderr=subprocess.DEVNULL,
69+
)
70+
subprocess.Popen(
71+
["gsettings", "set", "org.gnome.desktop.a11y.applications",
72+
"screen-reader-enabled", "false"],
73+
stdout=subprocess.DEVNULL,
74+
stderr=subprocess.DEVNULL,
75+
)
76+
logger.info("Ensured ORCA is disabled at startup")

biglinux-livecd/usr/share/biglinux/livecd/ui/app_window.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from ui.keyboard_view import KeyboardView
1212
from ui.desktop_view import DesktopView
1313
from ui.theme_view import ThemeView
14-
from accessibility import announce, start_orca
14+
from accessibility import announce, start_orca, ensure_orca_disabled
1515
from logging_config import get_logger
1616
import os
1717

@@ -71,6 +71,8 @@ def __init__(self, system_service: SystemService, **kwargs):
7171
self.config = SetupConfig()
7272
self.completed_steps = set() # Track completed steps
7373
self.is_simplified_env = system_service.is_simplified_environment()
74+
# Ensure ORCA is not running — user activates it manually via Super+Alt+S
75+
ensure_orca_disabled()
7476
self.set_title(_("BigLinux Setup"))
7577
self.update_property(
7678
[Gtk.AccessibleProperty.DESCRIPTION],
@@ -403,6 +405,9 @@ def _on_language_selected(self, view, selection):
403405
self._retranslate_ui()
404406
# --- END DYNAMIC TRANSLATION ---
405407

408+
# Set speech-dispatcher language so ORCA uses correct TTS voice
409+
self._set_speechd_language(lang_code)
410+
406411
# Mark language step as completed
407412
self.completed_steps.add("language")
408413

@@ -569,3 +574,18 @@ def _on_key_press_event(self, controller, keyval, keycode, state):
569574
if isinstance(current_view, LanguageView):
570575
return current_view.handle_global_key_press(keyval)
571576
return False
577+
578+
def _set_speechd_language(self, lang_code: str) -> None:
579+
"""Set speech-dispatcher language so ORCA speaks in the selected language."""
580+
if not lang_code:
581+
return
582+
try:
583+
import speechd
584+
client = speechd.SSIPClient("biglinux-wizard-lang")
585+
client.set_language(lang_code)
586+
client.close()
587+
except Exception:
588+
pass
589+
# Also update LANG for any newly spawned TTS processes
590+
locale_code = getattr(self.config.language, "code", lang_code)
591+
os.environ["LANG"] = f"{locale_code}.UTF-8"

biglinux-livecd/usr/share/biglinux/livecd/ui/language_view.py

Lines changed: 166 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from gi.repository import Gtk, Adw, Gio, GObject, Gdk, GLib
66
import json
77
import subprocess
8+
import tempfile
9+
import threading
810
import unicodedata
911
from urllib.parse import parse_qs, urlparse
1012
from translations import _
@@ -51,6 +53,56 @@
5153
"zh": "中文",
5254
}
5355

56+
# ─── Kokoro TTS integration ───────────────────────────────────────────────
57+
_VOICE_MAP_PATH = "/usr/share/biglinux-kokoro-tts/locale-voice-map.conf"
58+
_KOKO_BIN = "/usr/bin/koko"
59+
_KOKO_MODEL = "/usr/share/biglinux-kokoro-tts/model/model.onnx"
60+
_KOKO_VOICES = "/usr/share/biglinux-kokoro-tts/voices/voices.bin"
61+
_HAS_KOKO = all(os.path.isfile(p) for p in (_KOKO_BIN, _KOKO_MODEL, _KOKO_VOICES))
62+
63+
64+
def _parse_voice_map():
65+
"""Parse locale-voice-map.conf → {locale: (engine, voice, lang_code)}."""
66+
result = {}
67+
try:
68+
with open(_VOICE_MAP_PATH, "r", encoding="utf-8") as f:
69+
for line in f:
70+
line = line.strip()
71+
if not line or line.startswith("#"):
72+
continue
73+
if "=" not in line:
74+
continue
75+
locale, _, val = line.partition("=")
76+
locale = locale.strip()
77+
parts = [p.strip() for p in val.strip().split(":")]
78+
if len(parts) >= 2:
79+
result[locale] = (
80+
parts[0],
81+
parts[1],
82+
parts[2] if len(parts) > 2 else "",
83+
)
84+
except FileNotFoundError:
85+
pass
86+
return result
87+
88+
89+
_VOICE_MAP = _parse_voice_map()
90+
_KOKORO_WAV_CACHE = {}
91+
_KOKORO_CACHE_LOCK = threading.Lock()
92+
93+
94+
def _voice_config_for_locale(locale_code):
95+
"""Look up TTS voice config for a locale, with fallback chain."""
96+
if locale_code in _VOICE_MAP:
97+
return _VOICE_MAP[locale_code]
98+
lang = locale_code.split("_")[0]
99+
for key, val in _VOICE_MAP.items():
100+
if key.startswith(lang + "_"):
101+
return val
102+
if "*" in _VOICE_MAP:
103+
return _VOICE_MAP["*"]
104+
return ("espeak", "en", "en")
105+
54106

55107
def normalize_string(s: str) -> str:
56108
"""Normalizes a string by converting to lowercase and removing diacritics."""
@@ -163,6 +215,8 @@ def _load_languages(self):
163215
language_data.sort(key=lambda x: (x.code not in favorites_order, favorites_order.get(x.code, 999), x.name))
164216
self._store.splice(0, 0, language_data)
165217
GLib.idle_add(self._post_load_setup)
218+
# Pre-generate Kokoro WAVs in background (thread-safe copy of data)
219+
self._start_kokoro_precache(language_data)
166220

167221
except (FileNotFoundError, json.JSONDecodeError) as e:
168222
logger.error(f"Error loading languages: {e}")
@@ -177,6 +231,7 @@ def _create_filtered_model(self):
177231
selection_model.connect("selection-changed", self._on_selection_changed)
178232
self._espeak_proc = None
179233
self._speak_timeout_id = 0
234+
self._tts_gen = 0
180235
# speech-dispatcher client for fast cancel (avoids subprocess overhead)
181236
self._spd_client = None
182237
self._spd_scope_all = None
@@ -198,41 +253,64 @@ def _cancel_orca(self):
198253
pass
199254

200255
def _on_selection_changed(self, selection_model, position, n_items):
201-
"""For pt_BR, let ORCA speak with the default Letícia voice.
202-
For other languages, cancel ORCA and use espeak-ng with the native voice."""
256+
"""Speak the selected language name using Kokoro TTS or espeak-ng fallback."""
203257
# Cancel any pending delayed speak
204258
if self._speak_timeout_id > 0:
205259
GLib.source_remove(self._speak_timeout_id)
206260
self._speak_timeout_id = 0
207-
# Kill any ongoing espeak-ng process
261+
# Kill any ongoing TTS process
208262
if self._espeak_proc and self._espeak_proc.poll() is None:
209263
self._espeak_proc.terminate()
210264
self._espeak_proc = None
265+
self._tts_gen += 1
211266
selected = selection_model.get_selected()
212267
if selected == Gtk.INVALID_LIST_POSITION:
213268
return
214269
item = selection_model.get_item(selected)
215270
if not item:
216271
return
217-
# For pt_BR: do nothing, let ORCA read with Letícia voice
218-
if item.code == "pt_BR":
219-
return
220-
# Immediately cancel ORCA speech via Python API (instant, no fork)
272+
# Cancel ORCA speech for ALL languages
221273
self._cancel_orca()
222-
# Schedule espeak-ng after a brief delay to also cancel any ORCA re-queue
274+
# Build text to speak
223275
parts = item.name.split(" - ", 1)
224276
country = parts[1] if len(parts) > 1 else ""
225277
native_name = _NATIVE_LANG_NAMES.get(item.code[:2], item.name_orig)
226278
text = f"{native_name}, {country}" if country else native_name
227-
voice = item.code.replace("_", "-") # "en_US" -> "en-US"
228-
self._speak_timeout_id = GLib.timeout_add(50, self._do_espeak, voice, text)
279+
# Look up voice config from locale-voice-map.conf
280+
engine, voice, lang_code = _voice_config_for_locale(item.code)
281+
if engine == "kokoro" and _HAS_KOKO:
282+
cache_key = f"{voice}:{lang_code}:{text}"
283+
with _KOKORO_CACHE_LOCK:
284+
cached_wav = _KOKORO_WAV_CACHE.get(cache_key)
285+
if cached_wav and os.path.isfile(cached_wav):
286+
# Kokoro WAV is cached — play it instantly
287+
self._speak_timeout_id = GLib.timeout_add(
288+
50, self._play_wav, cached_wav
289+
)
290+
else:
291+
# Not cached yet — play espeak immediately (zero latency),
292+
# and generate Kokoro WAV in background for next visit
293+
espeak_voice = lang_code if lang_code else item.code.replace("_", "-")
294+
self._speak_timeout_id = GLib.timeout_add(
295+
50, self._do_espeak, espeak_voice, text
296+
)
297+
threading.Thread(
298+
target=self._kokoro_generate,
299+
args=(voice, lang_code, text, cache_key),
300+
daemon=True,
301+
).start()
302+
else:
303+
espeak_voice = lang_code if engine == "kokoro" else voice
304+
if not espeak_voice:
305+
espeak_voice = item.code.replace("_", "-")
306+
self._speak_timeout_id = GLib.timeout_add(
307+
50, self._do_espeak, espeak_voice, text
308+
)
229309

230310
def _do_espeak(self, voice, text):
231-
"""Cancel ORCA speech and speak with espeak-ng in native voice."""
311+
"""Speak with espeak-ng in native voice."""
232312
self._speak_timeout_id = 0
233-
# Cancel any ORCA speech that was re-queued
234313
self._cancel_orca()
235-
# Speak with espeak-ng using the native voice
236314
try:
237315
self._espeak_proc = subprocess.Popen(
238316
["espeak-ng", "-v", voice, "--", text],
@@ -243,6 +321,81 @@ def _do_espeak(self, voice, text):
243321
logger.debug("espeak-ng not found")
244322
return GLib.SOURCE_REMOVE
245323

324+
def _play_wav(self, wav_path):
325+
"""Play a cached WAV file instantly."""
326+
self._speak_timeout_id = 0
327+
self._cancel_orca()
328+
try:
329+
self._espeak_proc = subprocess.Popen(
330+
["paplay", wav_path],
331+
stdout=subprocess.DEVNULL,
332+
stderr=subprocess.DEVNULL,
333+
)
334+
except FileNotFoundError:
335+
pass
336+
return GLib.SOURCE_REMOVE
337+
338+
def _kokoro_generate(self, voice, lang_code, text, cache_key):
339+
"""Background: generate WAV with koko and cache it (does not play)."""
340+
tmpwav = None
341+
try:
342+
fd, tmpwav = tempfile.mkstemp(prefix="bw-", suffix=".wav")
343+
os.close(fd)
344+
proc = subprocess.run(
345+
[
346+
_KOKO_BIN, "text", text,
347+
"-m", _KOKO_MODEL, "-d", _KOKO_VOICES,
348+
"--lan", lang_code, "--style", voice, "--force-style",
349+
"-o", tmpwav,
350+
],
351+
stdout=subprocess.DEVNULL,
352+
stderr=subprocess.DEVNULL,
353+
timeout=30,
354+
)
355+
if proc.returncode == 0 and os.path.isfile(tmpwav) and os.path.getsize(tmpwav) > 0:
356+
with _KOKORO_CACHE_LOCK:
357+
_KOKORO_WAV_CACHE[cache_key] = tmpwav
358+
else:
359+
if tmpwav:
360+
os.unlink(tmpwav)
361+
except Exception:
362+
if tmpwav and os.path.isfile(tmpwav) and cache_key not in _KOKORO_WAV_CACHE:
363+
try:
364+
os.unlink(tmpwav)
365+
except OSError:
366+
pass
367+
368+
def _start_kokoro_precache(self, language_data):
369+
"""Launch background pre-generation of Kokoro WAVs for all supported locales."""
370+
if not _HAS_KOKO:
371+
return
372+
# Build list of (voice, lang_code, text, cache_key) — thread-safe, no GObjects
373+
tasks = []
374+
favorites = {"en_US": 0, "pt_BR": 1, "es_ES": 2}
375+
for item in language_data:
376+
engine, voice, lang_code = _voice_config_for_locale(item.code)
377+
if engine != "kokoro":
378+
continue
379+
parts = item.name.split(" - ", 1)
380+
country = parts[1] if len(parts) > 1 else ""
381+
native_name = _NATIVE_LANG_NAMES.get(item.code[:2], item.name_orig)
382+
text = f"{native_name}, {country}" if country else native_name
383+
cache_key = f"{voice}:{lang_code}:{text}"
384+
priority = favorites.get(item.code, 999)
385+
tasks.append((priority, voice, lang_code, text, cache_key))
386+
tasks.sort(key=lambda t: t[0])
387+
threading.Thread(
388+
target=self._precache_worker, args=(tasks,), daemon=True
389+
).start()
390+
391+
def _precache_worker(self, tasks):
392+
"""Background: sequentially generate Kokoro WAVs, favorites first."""
393+
for _, voice, lang_code, text, cache_key in tasks:
394+
with _KOKORO_CACHE_LOCK:
395+
if cache_key in _KOKORO_WAV_CACHE:
396+
continue
397+
self._kokoro_generate(voice, lang_code, text, cache_key)
398+
246399
def _activate_item(self, item):
247400
if not item:
248401
return

0 commit comments

Comments
 (0)