Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
56116f2
Add livekit-plugins-avaz for dashboard WebSocket TTS streaming.
tamerrkanak Jun 17, 2026
e98c398
Fix Avaz TTS stream() NameError by using SynthesizeStream.
tamerrkanak Jun 17, 2026
c0a804b
Sort avaz optional dependency after asyncai alphabetically.
tamerrkanak Jun 17, 2026
2da663b
Call _mark_started when text is sent for meaningful TTFB metrics.
tamerrkanak Jun 17, 2026
b672ea5
Harden Avaz WS JSON handling and document batching tradeoffs.
tamerrkanak Jun 17, 2026
a974cb3
Use asyncio.wait_for for turn timeout on Python 3.10.
tamerrkanak Jun 17, 2026
edb0070
Fix _drain_audio WebSocket scope and add stream regression test.
tamerrkanak Jun 17, 2026
82a5a0b
Fix warmup retry and pcm_accum duration logging.
tamerrkanak Jun 17, 2026
3c3c16e
Avoid duplicate end_segment in Avaz SynthesizeStream.
tamerrkanak Jun 17, 2026
0f51f42
Document chunk_notation normalization with unit tests.
tamerrkanak Jun 17, 2026
bb0656a
Reduce Avaz payload logging and simplify stream model resolution.
tamerrkanak Jun 17, 2026
8354d9f
Add py.typed marker to livekit-plugins-avaz.
tamerrkanak Jun 17, 2026
420c4ca
Call super().aclose() in Avaz TTS cleanup.
tamerrkanak Jun 17, 2026
f77fefe
Wrap Avaz WebSocket connect failures as APIConnectionError.
tamerrkanak Jun 17, 2026
2d135b3
Merge upstream/main and resolve livekit-agents pyproject conflict.
tamerrkanak Jun 17, 2026
9b4eba0
Bump livekit-plugins-avaz to 1.6.1 for optional extra constraint.
tamerrkanak Jun 17, 2026
b21a66a
Replace fixed pre-flush sleeps with recv idle drains in Avaz TTS.
tamerrkanak Jun 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions examples/voice_agents/avaz_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import logging
import os

from dotenv import load_dotenv

from livekit.agents import (
Agent,
AgentServer,
AgentSession,
JobContext,
JobProcess,
MetricsCollectedEvent,
cli,
inference,
metrics,
room_io,
)
from livekit.plugins import avaz, silero
from livekit.plugins.turn_detector.multilingual import MultilingualModel

logger = logging.getLogger("avaz-agent")

load_dotenv()


class AvazAssistant(Agent):
def __init__(self) -> None:
super().__init__(
instructions=(
"You are a helpful voice assistant powered by Avaz TTS. "
"Keep responses concise and conversational."
),
)

async def on_enter(self) -> None:
self.session.generate_reply(
instructions="Greet the user briefly and mention that you are using Avaz TTS."
)


server = AgentServer()


def prewarm(proc: JobProcess) -> None:
proc.userdata["vad"] = silero.VAD.load()


server.setup_fnc = prewarm


@server.rtc_session()
async def entrypoint(ctx: JobContext) -> None:
ctx.log_context_fields = {"room": ctx.room.name}

session = AgentSession(
stt=inference.STT("deepgram/nova-3", language="multi"),
llm=inference.LLM("openai/gpt-4.1-mini"),
tts=avaz.TTS(
api_key=os.environ["AVAZ_API_KEY"],
base_url=os.environ["AVAZ_BASE_URL"],
model_id=os.environ["AVAZ_AGENT_MODEL_ID"],
),
vad=ctx.proc.userdata["vad"],
turn_detection=MultilingualModel(),
)

@session.on("metrics_collected")
def _on_metrics_collected(ev: MetricsCollectedEvent) -> None:
metrics.log_metrics(ev.metrics)

await session.start(
agent=AvazAssistant(),
room=ctx.room,
room_options=room_io.RoomOptions(),
)


if __name__ == "__main__":
cli.run_app(server)
1 change: 1 addition & 0 deletions livekit-agents/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ anam = ["livekit-plugins-anam>=1.6.1"]
anthropic = ["livekit-plugins-anthropic>=1.6.1"]
assemblyai = ["livekit-plugins-assemblyai>=1.6.1"]
asyncai = ["livekit-plugins-asyncai>=1.6.1"]
avaz = ["livekit-plugins-avaz>=1.6.1"]
avatario = ["livekit-plugins-avatario>=1.6.1"]
avatartalk = ["livekit-plugins-avatartalk>=1.6.1"]
aws = ["livekit-plugins-aws>=1.6.1"]
Expand Down
41 changes: 41 additions & 0 deletions livekit-plugins/livekit-plugins-avaz/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# LiveKit Avaz TTS Plugin

LiveKit Agents plugin for [Avaz](https://github.com/Mank-Technology) text-to-speech over the dashboard WebSocket `stream-input` protocol.

## Installation

```bash
pip install "livekit-agents[avaz]~=1.6"
```

## Usage

```python
import os
from livekit.plugins import avaz

tts = avaz.TTS(
api_key=os.environ["AVAZ_API_KEY"],
base_url=os.environ["AVAZ_BASE_URL"],
model_id=os.environ["AVAZ_AGENT_MODEL_ID"],
)
```

## Environment variables

| Variable | Required | Description |
|---|---|---|
| `AVAZ_API_KEY` | yes (dashboard) | Dashboard API token (`X-API-Key` / Bearer) |
| `AVAZ_BASE_URL` | yes (dashboard) | Dashboard API base URL (e.g. `https://your-dashboard.example.com/api`) |
| `AVAZ_AGENT_MODEL_ID` | yes (dashboard) | Agent model UUID from your dashboard TTS catalog |
| `AVAZ_STREAM_MODEL` | no | Upstream WebSocket model string (`avaz1`, `avaz2`, `avaz3`); default `avaz3` |
| `TTS_WS_URI` | no | Direct WebSocket override for local TTS-Service (no auth) |

## Protocol

1. Connect to `{base_url}` → `wss://.../api/tts/stream-input`
2. Send `model_settings` + `voice_settings` (WebSocket `model_id` is the upstream string)
3. Stream `{"text": "..."}` chunks; receive base64 WAV in `{"audio": ...}`
4. Send `{"flush": true}` to finish the turn

HTTP synthesize (`POST /tts/synthesize`) uses the UUID `model_id` from the constructor; WebSocket init uses the upstream string (`stream_model`).
Comment thread
tamerrkanak marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright 2023 LiveKit, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Avaz TTS plugin for LiveKit Agents."""

from .tts import SynthesizeStream, TTS, build_auth_headers
from .version import __version__

__all__ = ["TTS", "SynthesizeStream", "build_auth_headers", "__version__"]

from livekit.agents import Plugin

from .log import logger


class AvazPlugin(Plugin):
def __init__(self) -> None:
super().__init__(__name__, __version__, __package__, logger)


Plugin.register_plugin(AvazPlugin())

_module = dir()
NOT_IN_ALL = [m for m in _module if m not in __all__]

__pdoc__ = {}

for n in NOT_IN_ALL:
__pdoc__[n] = False
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright 2023 LiveKit, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging

logger = logging.getLogger("livekit.plugins.avaz")
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright 2023 LiveKit, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

DEFAULT_SAMPLE_RATE = 48_000
DEFAULT_FRAME_MS = 20
DEFAULT_STREAM_MODEL = "avaz3"
DEFAULT_SPEAKER_ID = 0
DEFAULT_CFG_VALUE = 2.0
DEFAULT_INFERENCE_TIMESTEPS = 10
DEFAULT_CHUNK_NOTATION = "."
DEFAULT_CONNECT_TIMEOUT_S = 10.0
DEFAULT_TURN_TIMEOUT_S = 120.0
DEFAULT_POST_TEXT_DRAIN_S = 0.15
DEFAULT_RECV_IDLE_TIMEOUT_S = 0.5
DEFAULT_FLUSH_RECV_TIMEOUT_S = 2.0
Empty file.
Loading