fix(deepgram): sync missing STT/TTS API params, fix duplicate languages, add TTS model list

Namit1867 · Namit1867 · commit c7018fc55f10 · 2026-04-07T15:20:33.000+05:30
**models.py** - Add `nova-2-atc`, `nova-3-multilingual` to `DeepgramModels` Ref: https://developers.deepgram.com/docs/models-languages-overview - Promote `flux-general-en` into `DeepgramModels` (was only in `V2Models`) - Remove duplicate entries in `DeepgramLanguages` (hi, pt, pt-BR, sv each appeared multiple times due to copy-paste artifact) - Add `TTSModels` Literal with all Aura-2 and Aura-1 voice names Ref: https://developers.deepgram.com/docs/tts-models **stt.py** - Add `utterance_end_ms` (int | None): silence duration to emit UtteranceEnd; requires interim_results=True Ref: https://developers.deepgram.com/reference/speech-to-text/listen-streaming#query-utterance_end_ms - Add `dictation` (bool): converts spoken punctuation commands into marks Ref: https://developers.deepgram.com/reference/speech-to-text/listen-streaming#query-dictation - Add `redact` (list[str]): redact PCI/PII/SSN from transcripts Ref: https://developers.deepgram.com/reference/speech-to-text/listen-streaming#query-redact - Add `replace` (dict[str, str]): term replacement in transcripts Ref: https://developers.deepgram.com/reference/speech-to-text/listen-streaming#query-replace - Add `search` (list[str]): highlight search terms with confidence scores Ref: https://developers.deepgram.com/reference/speech-to-text/listen-streaming#query-search - Handle `UtteranceEnd` event in `_process_stream_event` to emit END_OF_SPEECH **tts.py** - Add `bit_rate` (int | None): for compressed encodings (e.g. mp3) Ref: https://developers.deepgram.com/reference/text-to-speech-api#query-bit_rate - Use `TTSModels | str` as model type - Expand `update_options` to include `encoding`, `sample_rate`, `bit_rate` **_utils.py** - Handle `replace` dict in `_to_deepgram_url` by encoding as "term:replacement" pairs, consistent with how `keywords` are encoded
diff --git a/livekit-plugins/livekit-plugins-deepgram/livekit/plugins/deepgram/__init__.py b/livekit-plugins/livekit-plugins-deepgram/livekit/plugins/deepgram/__init__.py
@@ -19,12 +19,23 @@
 See https://docs.livekit.io/agents/integrations/stt/deepgram/ for more information.
 """
 
+from .models import DeepgramModels, DeepgramLanguages, TTSModels
 from .stt import STT, SpeechStream
 from .stt_v2 import SpeechStreamv2, STTv2
 from .tts import TTS
 from .version import __version__
 
-__all__ = ["STT", "SpeechStream", "STTv2", "SpeechStreamv2", "__version__", "TTS"]
+__all__ = [
+    "STT",
+    "SpeechStream",
+    "STTv2",
+    "SpeechStreamv2",
+    "TTS",
+    "DeepgramModels",
+    "DeepgramLanguages",
+    "TTSModels",
+    "__version__",
+]
 
 
 from livekit.agents import Plugin
diff --git a/livekit-plugins/livekit-plugins-deepgram/livekit/plugins/deepgram/_utils.py b/livekit-plugins/livekit-plugins-deepgram/livekit/plugins/deepgram/_utils.py
@@ -46,6 +46,10 @@ def _to_deepgram_url(opts: dict, base_url: str, *, websocket: bool) -> str:
         opts["keywords"] = [
             f"{keyword}:{intensifier}" for (keyword, intensifier) in opts["keywords"]
         ]
+    if opts.get("replace"):
+        # convert replace dict to a list of "term:replacement"
+        # https://developers.deepgram.com/reference/speech-to-text/listen-streaming#query-replace
+        opts["replace"] = [f"{term}:{replacement}" for term, replacement in opts["replace"].items()]
 
     # lowercase bools
     opts = {k: str(v).lower() if isinstance(v, bool) else v for k, v in opts.items()}
diff --git a/livekit-plugins/livekit-plugins-deepgram/livekit/plugins/deepgram/models.py b/livekit-plugins/livekit-plugins-deepgram/livekit/plugins/deepgram/models.py
@@ -14,9 +14,11 @@
     "nova-2-medical",
     "nova-2-drivethru",
     "nova-2-automotive",
+    "nova-2-atc",
     "nova-3",
     "nova-3-general",
     "nova-3-medical",
+    "nova-3-multilingual",
     "enhanced-general",
     "enhanced-meeting",
     "enhanced-phonecall",
@@ -33,10 +35,12 @@
     "whisper-small",
     "whisper-medium",
     "whisper-large",
+    "flux-general-en",
 ]
 
 V2Models = Literal["flux-general-en"]
 
+# https://developers.deepgram.com/docs/models-languages-overview
 DeepgramLanguages = Literal[
     "zh",
     "zh-CN",
@@ -54,30 +58,87 @@
     "de",
     "hi",
     "hi-Latn",
-    "pt",
-    "pt-BR",
-    "es",
-    "es-419",
-    "hi",
-    "hi-Latn",
+    "id",
     "it",
     "ja",
     "ko",
     "no",
     "pl",
     "pt",
     "pt-BR",
+    "ru",
+    "es",
+    "es-419",
     "es-LATAM",
     "sv",
     "ta",
     "taq",
-    "uk",
-    "tr",
-    "sv",
-    "id",
-    "pt",
-    "pt-BR",
-    "ru",
     "th",
+    "tr",
+    "uk",
     "multi",
 ]
+
+# https://developers.deepgram.com/docs/tts-models
+TTSModels = Literal[
+    # Aura-2 English
+    "aura-2-andromeda-en",
+    "aura-2-apollo-en",
+    "aura-2-arcas-en",
+    "aura-2-aries-en",
+    "aura-2-artemis-en",
+    "aura-2-asteria-en",
+    "aura-2-atlas-en",
+    "aura-2-aurora-en",
+    "aura-2-callisto-en",
+    "aura-2-cetus-en",
+    "aura-2-chiron-en",
+    "aura-2-columbia-en",
+    "aura-2-cordelia-en",
+    "aura-2-crina-en",
+    "aura-2-draco-en",
+    "aura-2-electra-en",
+    "aura-2-eos-en",
+    "aura-2-harmonia-en",
+    "aura-2-helios-en",
+    "aura-2-hera-en",
+    "aura-2-hermes-en",
+    "aura-2-hyperion-en",
+    "aura-2-io-en",
+    "aura-2-iris-en",
+    "aura-2-janus-en",
+    "aura-2-juno-en",
+    "aura-2-jupiter-en",
+    "aura-2-luna-en",
+    "aura-2-mars-en",
+    "aura-2-minerva-en",
+    "aura-2-mira-en",
+    "aura-2-neptune-en",
+    "aura-2-odysseus-en",
+    "aura-2-ophiuchus-en",
+    "aura-2-orion-en",
+    "aura-2-orpheus-en",
+    "aura-2-phoebe-en",
+    "aura-2-pluto-en",
+    "aura-2-saturn-en",
+    "aura-2-selene-en",
+    "aura-2-theia-en",
+    "aura-2-titan-en",
+    "aura-2-triton-en",
+    "aura-2-vega-en",
+    "aura-2-venus-en",
+    "aura-2-zeus-en",
+    # Aura-1 English (legacy)
+    "aura-asteria-en",
+    "aura-luna-en",
+    "aura-stella-en",
+    "aura-athena-en",
+    "aura-hera-en",
+    "aura-orion-en",
+    "aura-arcas-en",
+    "aura-perseus-en",
+    "aura-angus-en",
+    "aura-orpheus-en",
+    "aura-helios-en",
+    "aura-zeus-en",
+]
diff --git a/livekit-plugins/livekit-plugins-deepgram/livekit/plugins/deepgram/stt.py b/livekit-plugins/livekit-plugins-deepgram/livekit/plugins/deepgram/stt.py
@@ -72,6 +72,11 @@ class STTOptions:
     numerals: bool = False
     mip_opt_out: bool = False
     tags: NotGivenOr[list[str]] = NOT_GIVEN
+    utterance_end_ms: int | None = None
+    dictation: bool = False
+    redact: list[str] | None = None
+    replace: dict[str, str] | None = None
+    search: list[str] | None = None
 
 
 class STT(stt.STT):
@@ -100,6 +105,11 @@ def __init__(
         numerals: bool = False,
         mip_opt_out: bool = False,
         vad_events: bool = True,
+        utterance_end_ms: int | None = None,
+        dictation: bool = False,
+        redact: list[str] | None = None,
+        replace: dict[str, str] | None = None,
+        search: list[str] | None = None,
         # deprecated
         keyterms: NotGivenOr[list[str]] = NOT_GIVEN,
     ) -> None:
@@ -130,6 +140,21 @@ def __init__(
             mip_opt_out: Whether to take part in the model improvement program
             vad_events: Whether to enable VAD (Voice Activity Detection) events.
                        When enabled, SpeechStarted events are sent when speech is detected. Defaults to True.
+            utterance_end_ms: Duration of silence in milliseconds to detect the end of an utterance
+                             and emit an UtteranceEnd event. Requires interim_results=True.
+                             See https://developers.deepgram.com/docs/understand-endpointing-interim-results
+            dictation: Whether to enable dictation mode which converts spoken punctuation commands
+                      (e.g. "comma", "period") into punctuation marks. Defaults to False.
+                      See https://developers.deepgram.com/reference/speech-to-text/listen-streaming#query-dictation
+            redact: List of sensitive information types to redact from the transcript
+                   (e.g. ["pci", "pii", "numbers", "ssn"]).
+                   See https://developers.deepgram.com/reference/speech-to-text/listen-streaming#query-redact
+            replace: Dictionary of terms to replace in the transcript, where keys are the original
+                    terms and values are the replacements (e.g. {"hello": "hi"}).
+                    See https://developers.deepgram.com/reference/speech-to-text/listen-streaming#query-replace
+            search: List of terms to search for in the transcript. Matched terms are returned with
+                   confidence scores in the response.
+                   See https://developers.deepgram.com/reference/speech-to-text/listen-streaming#query-search
 
         Raises:
             ValueError: If no API key is provided or found in environment variables.
@@ -185,6 +210,11 @@ def __init__(
             vad_events=vad_events,
             tags=_validate_tags(tags) if is_given(tags) else [],
             endpoint_url=base_url,
+            utterance_end_ms=utterance_end_ms,
+            dictation=dictation,
+            redact=redact,
+            replace=replace,
+            search=search,
         )
         self._session = http_session
         self._streams = weakref.WeakSet[SpeechStream]()
@@ -298,6 +328,11 @@ def update_options(
         vad_events: NotGivenOr[bool] = NOT_GIVEN,
         tags: NotGivenOr[list[str]] = NOT_GIVEN,
         endpoint_url: NotGivenOr[str] = NOT_GIVEN,
+        utterance_end_ms: NotGivenOr[int | None] = NOT_GIVEN,
+        dictation: NotGivenOr[bool] = NOT_GIVEN,
+        redact: NotGivenOr[list[str] | None] = NOT_GIVEN,
+        replace: NotGivenOr[dict[str, str] | None] = NOT_GIVEN,
+        search: NotGivenOr[list[str] | None] = NOT_GIVEN,
         # deprecated
         keyterms: NotGivenOr[list[str]] = NOT_GIVEN,
     ) -> None:
@@ -342,6 +377,16 @@ def update_options(
             self._opts.tags = _validate_tags(tags)
         if is_given(endpoint_url):
             self._opts.endpoint_url = endpoint_url
+        if is_given(utterance_end_ms):
+            self._opts.utterance_end_ms = utterance_end_ms
+        if is_given(dictation):
+            self._opts.dictation = dictation
+        if is_given(redact):
+            self._opts.redact = redact
+        if is_given(replace):
+            self._opts.replace = replace
+        if is_given(search):
+            self._opts.search = search
 
         for stream in self._streams:
             stream.update_options(
@@ -361,6 +406,11 @@ def update_options(
                 mip_opt_out=mip_opt_out,
                 vad_events=vad_events,
                 endpoint_url=endpoint_url,
+                utterance_end_ms=utterance_end_ms,
+                dictation=dictation,
+                redact=redact,
+                replace=replace,
+                search=search,
             )
 
     def _sanitize_options(
@@ -432,6 +482,11 @@ def update_options(
         vad_events: NotGivenOr[bool] = NOT_GIVEN,
         tags: NotGivenOr[list[str]] = NOT_GIVEN,
         endpoint_url: NotGivenOr[str] = NOT_GIVEN,
+        utterance_end_ms: NotGivenOr[int | None] = NOT_GIVEN,
+        dictation: NotGivenOr[bool] = NOT_GIVEN,
+        redact: NotGivenOr[list[str] | None] = NOT_GIVEN,
+        replace: NotGivenOr[dict[str, str] | None] = NOT_GIVEN,
+        search: NotGivenOr[list[str] | None] = NOT_GIVEN,
         # deprecated
         keyterms: NotGivenOr[list[str]] = NOT_GIVEN,
     ) -> None:
@@ -476,6 +531,16 @@ def update_options(
             self._opts.tags = _validate_tags(tags)
         if is_given(endpoint_url):
             self._opts.endpoint_url = endpoint_url
+        if is_given(utterance_end_ms):
+            self._opts.utterance_end_ms = utterance_end_ms
+        if is_given(dictation):
+            self._opts.dictation = dictation
+        if is_given(redact):
+            self._opts.redact = redact
+        if is_given(replace):
+            self._opts.replace = replace
+        if is_given(search):
+            self._opts.search = search
 
         self._reconnect_event.set()
 
@@ -617,6 +682,16 @@ async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
             live_config["keywords"] = self._opts.keywords
         if self._opts.keyterm:
             live_config["keyterm"] = self._opts.keyterm
+        if self._opts.utterance_end_ms is not None:
+            live_config["utterance_end_ms"] = self._opts.utterance_end_ms
+        if self._opts.dictation:
+            live_config["dictation"] = True
+        if self._opts.redact:
+            live_config["redact"] = self._opts.redact
+        if self._opts.replace:
+            live_config["replace"] = self._opts.replace
+        if self._opts.search:
+            live_config["search"] = self._opts.search
 
         if self._opts.language:
             live_config["language"] = self._opts.language
@@ -716,6 +791,12 @@ def _process_stream_event(self, data: dict) -> None:
                 self._speaking = False
                 self._event_ch.send_nowait(stt.SpeechEvent(type=stt.SpeechEventType.END_OF_SPEECH))
 
+        elif data["type"] == "UtteranceEnd":
+            # Fired when utterance_end_ms is set and the configured silence duration has elapsed.
+            # https://developers.deepgram.com/docs/understand-endpointing-interim-results
+            if self._speaking:
+                self._speaking = False
+                self._event_ch.send_nowait(stt.SpeechEvent(type=stt.SpeechEventType.END_OF_SPEECH))
         elif data["type"] == "Metadata":
             pass  # metadata is too noisy
         else:
diff --git a/livekit-plugins/livekit-plugins-deepgram/livekit/plugins/deepgram/tts.py b/livekit-plugins/livekit-plugins-deepgram/livekit/plugins/deepgram/tts.py