Skip to content

Commit 80fbc26

Browse files
authored
Merge pull request #1 from google-gemini/thor/add-command-line-examples
feat: Add command-line examples.
2 parents a69a830 + 515b42d commit 80fbc26

10 files changed

Lines changed: 1398 additions & 0 deletions

File tree

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
node_modules
2+
__pycache__
3+
.venv
4+
.env

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ Live API:
6060

6161
* **[Gen AI SDK Python example](./gemini-live-genai-python-sdk/README.md)**: Recommended for ease of use. Connect to the Gemini Live API using the Gen AI SDK to build a real-time multimodal application with a Python backend.
6262
* **[Epheremal tokens and raw WebSocket example](./gemini-live-ephemeral-tokens-websocket/README.md)**: RAW protocol control. Connect to the Gemini Live API using WebSockets to build a real-time multimodal application with a JavaScript frontend and a Python backend.
63+
* **[Command-line Python example](./command-line/python/README.md)**: A minimal command-line app that streams microphone audio to the Gemini Live API and plays back the response in real time using Python.
64+
* **[Command-line Node.js example](./command-line/node/README.md)**: A minimal command-line app that streams microphone audio to the Gemini Live API and plays back the response in real time using Node.js.
65+
66+
> [!TIP]
67+
> Install the [Gemini Live API Dev](https://github.com/google-gemini/gemini-skills?tab=readme-ov-file#gemini-live-api-dev) skill for AI-assisted development with the Live API in your coding agents.
6368
6469
## Partner integrations
6570

command-line/node/README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Gemini Live API – Command Line (Node.js)
2+
3+
A minimal command-line app that streams microphone audio to the Gemini Live API and plays back the response in real time. This example is intended for local testing only, not for production use cases.
4+
5+
> **Note:** Use headphones. This script uses the system default audio input and output, which often won't include echo cancellation. To prevent the model from interrupting itself, use headphones.
6+
7+
## Prerequisites
8+
9+
- Node.js 20+
10+
- A Gemini API key ([get one here](https://aistudio.google.com/apikey))
11+
- SoX (`brew install sox` on macOS) — required by the `mic` package
12+
13+
## Setup
14+
15+
Install helpers for audio streaming. Additional system-level dependencies might be required (`sox` for Mac/Windows or ALSA for Linux). Refer to the [speaker](https://www.npmjs.com/package/speaker) and [mic](https://www.npmjs.com/package/mic) docs for detailed installation steps.
16+
17+
```bash
18+
npm install @google/genai mic speaker
19+
```
20+
21+
## Run
22+
23+
```bash
24+
export GEMINI_API_KEY="your-api-key"
25+
npx tsx main.mts
26+
```
27+
28+
You should see **"Connected to Gemini Live API"** and **"Microphone started. Speak now..."** — talk into your mic and Gemini will respond with audio. Press `Ctrl+C` to quit.

command-line/node/main.mts

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import { GoogleGenAI, Modality, type LiveServerMessage } from '@google/genai';
2+
import mic from 'mic';
3+
import Speaker from 'speaker';
4+
5+
const ai = new GoogleGenAI({});
6+
// WARNING: Do not use API keys in client-side (browser based) applications
7+
// Consider using Ephemeral Tokens instead
8+
// More information at: https://ai.google.dev/gemini-api/docs/ephemeral-tokens
9+
10+
// --- Live API config ---
11+
const model = 'gemini-2.5-flash-native-audio-preview-12-2025';
12+
const config = {
13+
responseModalities: [Modality.AUDIO],
14+
systemInstruction: "You are a helpful and friendly AI assistant.",
15+
};
16+
17+
async function live() {
18+
const responseQueue: LiveServerMessage[] = [];
19+
const audioQueue: Buffer[] = [];
20+
let speaker: Speaker | null = null;
21+
22+
async function waitMessage(): Promise<LiveServerMessage> {
23+
while (responseQueue.length === 0) {
24+
await new Promise<void>((resolve) => setImmediate(resolve));
25+
}
26+
return responseQueue.shift()!;
27+
}
28+
29+
function createSpeaker() {
30+
if (speaker) {
31+
process.stdin.unpipe(speaker);
32+
speaker.end();
33+
}
34+
speaker = new Speaker({
35+
channels: 1,
36+
bitDepth: 16,
37+
sampleRate: 24000,
38+
});
39+
speaker.on('error', (err: Error) => console.error('Speaker error:', err));
40+
process.stdin.pipe(speaker);
41+
}
42+
43+
async function messageLoop() {
44+
// Puts incoming messages in the audio queue.
45+
while (true) {
46+
const message = await waitMessage();
47+
if (message.serverContent && message.serverContent.interrupted) {
48+
// Empty the queue on interruption to stop playback
49+
audioQueue.length = 0;
50+
continue;
51+
}
52+
if (message.serverContent && message.serverContent.modelTurn && message.serverContent.modelTurn.parts) {
53+
for (const part of message.serverContent.modelTurn.parts) {
54+
if (part.inlineData && part.inlineData.data) {
55+
audioQueue.push(Buffer.from(part.inlineData.data, 'base64'));
56+
}
57+
}
58+
}
59+
}
60+
}
61+
62+
async function playbackLoop() {
63+
// Plays audio from the audio queue.
64+
while (true) {
65+
if (audioQueue.length === 0) {
66+
if (speaker) {
67+
// Destroy speaker if no more audio to avoid warnings from speaker library
68+
process.stdin.unpipe(speaker);
69+
speaker.end();
70+
speaker = null;
71+
}
72+
await new Promise<void>((resolve) => setImmediate(resolve));
73+
} else {
74+
if (!speaker) createSpeaker();
75+
const chunk = audioQueue.shift()!;
76+
await new Promise<void>((resolve) => {
77+
speaker!.write(chunk, () => resolve());
78+
});
79+
}
80+
}
81+
}
82+
83+
// Start loops
84+
messageLoop();
85+
playbackLoop();
86+
87+
// Connect to Gemini Live API
88+
const session = await ai.live.connect({
89+
model: model,
90+
config: config,
91+
callbacks: {
92+
onopen: () => console.log('Connected to Gemini Live API'),
93+
onmessage: (message: LiveServerMessage) => responseQueue.push(message),
94+
onerror: (e: ErrorEvent) => console.error('Error:', e.message),
95+
onclose: (e: CloseEvent) => console.log('Closed:', e.reason),
96+
},
97+
});
98+
99+
// Setup Microphone for input
100+
const micInstance = mic({
101+
rate: '16000',
102+
bitwidth: '16',
103+
channels: '1',
104+
});
105+
const micInputStream = micInstance.getAudioStream();
106+
107+
micInputStream.on('data', (data: Buffer) => {
108+
// API expects base64 encoded PCM data
109+
session.sendRealtimeInput({
110+
audio: {
111+
data: data.toString('base64'),
112+
mimeType: "audio/pcm;rate=16000"
113+
}
114+
});
115+
});
116+
117+
micInputStream.on('error', (err: Error) => {
118+
console.error('Microphone error:', err);
119+
});
120+
121+
micInstance.start();
122+
console.log('Microphone started. Speak now...');
123+
}
124+
125+
live().catch(console.error);

command-line/node/mic.d.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
declare module 'mic' {
2+
import { Transform } from 'stream';
3+
4+
interface MicOptions {
5+
rate?: string;
6+
bitwidth?: string;
7+
channels?: string;
8+
encoding?: string;
9+
endian?: string;
10+
device?: string;
11+
exitOnSilence?: number;
12+
fileType?: string;
13+
debug?: boolean;
14+
}
15+
16+
interface MicInstance {
17+
start(): void;
18+
stop(): void;
19+
pause(): void;
20+
resume(): void;
21+
getAudioStream(): Transform;
22+
}
23+
24+
function mic(options?: MicOptions): MicInstance;
25+
export = mic;
26+
}

0 commit comments

Comments
 (0)