Skip to content

Streaming Responses

Set stream: true to receive tokens as they are generated instead of waiting for the full response. This reduces perceived latency significantly for long outputs.

Python SDK

import studiolm
client = studiolm.Client(api_key="sk-...")

for chunk in client.chat.completions.create(
    model="gemma-3-12b-it-qat",
    messages=[{"role": "user", "content": "Write a short story about a robot."}],
    stream=True,
):
    print(chunk["choices"][0].get("delta", {}).get("content", ""), end="", flush=True)
print()  # newline when done

Python (requests)

import requests
import json

def stream_chat(api_key, messages):
    response = requests.post(
        "https://api.studiolm.dev/v1/chat/completions",
        headers={"Authorization": f"Bearer {api_key}"},
        json={"model": "gemma-3-12b-it-qat", "messages": messages, "stream": True},
        stream=True,
    )
    response.raise_for_status()

    for line in response.iter_lines():
        if not line:
            continue
        text = line.decode("utf-8")
        if text.startswith("data: "):
            text = text[6:]
        if text == "[DONE]":
            break
        try:
            chunk = json.loads(text)
            content = chunk["choices"][0].get("delta", {}).get("content", "")
            print(content, end="", flush=True)
        except (json.JSONDecodeError, KeyError):
            continue
    print()

JavaScript (Fetch)

async function streamChat(apiKey, messages) {
  const response = await fetch("https://api.studiolm.dev/v1/chat/completions", {
    method: "POST",
    headers: {
      "Authorization": `Bearer ${apiKey}`,
      "Content-Type": "application/json",
    },
    body: JSON.stringify({ model: "gemma-3-12b-it-qat", messages, stream: true }),
  });

  const reader = response.body.getReader();
  const decoder = new TextDecoder();

  while (true) {
    const { done, value } = await reader.read();
    if (done) break;
    for (const line of decoder.decode(value).split("\n")) {
      if (!line.startsWith("data: ") || line === "data: [DONE]") continue;
      const chunk = JSON.parse(line.slice(6));
      process.stdout.write(chunk.choices[0]?.delta?.content ?? "");
    }
  }
}

SSE chunk format

data: {"id":"chatcmpl-abc","choices":[{"delta":{"content":"Hello"},"index":0}]}

data: [DONE]