[{"data":1,"prerenderedAt":140},["ShallowReactive",2],{"term-s\u002Fstreaming":3,"related-s\u002Fstreaming":121},{"id":4,"title":5,"acronym":6,"body":7,"category":101,"description":102,"difficulty":103,"extension":104,"letter":105,"meta":106,"navigation":107,"path":108,"related":109,"seo":115,"sitemap":116,"stem":119,"subcategory":6,"__hash__":120},"terms\u002Fterms\u002Fs\u002Fstreaming.md","Streaming",null,{"type":8,"value":9,"toc":95},"minimark",[10,15,19,23,26,30,84,88,91],[11,12,14],"h2",{"id":13},"eli5-the-vibe-check","ELI5 — The Vibe Check",[16,17,18],"p",{},"Streaming is when the AI sends you its response word by word as it generates, instead of making you wait for the whole thing at once. You know that typing effect you see in ChatGPT and Claude? That's streaming. Without it, you'd stare at a loading spinner for 30 seconds, then get the whole essay dumped on you at once.",[11,20,22],{"id":21},"real-talk","Real Talk",[16,24,25],{},"Streaming in LLM APIs means the model's output is returned as a sequence of chunks (tokens or token groups) via server-sent events (SSE) or websockets rather than a single response after full generation. This enables progressive UI rendering, faster time-to-first-token perception, and the ability to stop generation early.",[11,27,29],{"id":28},"show-me-the-code","Show Me The Code",[31,32,37],"pre",{"className":33,"code":34,"language":35,"meta":36,"style":36},"language-python shiki shiki-themes material-theme-lighter material-theme material-theme-palenight","with client.messages.stream(\n    model=\"claude-opus-4-6\",\n    max_tokens=1024,\n    messages=[{\"role\": \"user\", \"content\": \"Explain recursion\"}]\n) as stream:\n    for text in stream.text_stream:\n        print(text, end=\"\", flush=True)\n","python","",[38,39,40,48,54,60,66,72,78],"code",{"__ignoreMap":36},[41,42,45],"span",{"class":43,"line":44},"line",1,[41,46,47],{},"with client.messages.stream(\n",[41,49,51],{"class":43,"line":50},2,[41,52,53],{},"    model=\"claude-opus-4-6\",\n",[41,55,57],{"class":43,"line":56},3,[41,58,59],{},"    max_tokens=1024,\n",[41,61,63],{"class":43,"line":62},4,[41,64,65],{},"    messages=[{\"role\": \"user\", \"content\": \"Explain recursion\"}]\n",[41,67,69],{"class":43,"line":68},5,[41,70,71],{},") as stream:\n",[41,73,75],{"class":43,"line":74},6,[41,76,77],{},"    for text in stream.text_stream:\n",[41,79,81],{"class":43,"line":80},7,[41,82,83],{},"        print(text, end=\"\", flush=True)\n",[11,85,87],{"id":86},"when-youll-hear-this","When You'll Hear This",[16,89,90],{},"\"Enable streaming so users see the response immediately.\" \u002F \"Streaming dropped latency perception by 80%.\"",[92,93,94],"style",{},"html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":36,"searchDepth":50,"depth":50,"links":96},[97,98,99,100],{"id":13,"depth":50,"text":14},{"id":21,"depth":50,"text":22},{"id":28,"depth":50,"text":29},{"id":86,"depth":50,"text":87},"ai","Streaming is when the AI sends you its response word by word as it generates, instead of making you wait for the whole thing at once.","beginner","md","s",{},true,"\u002Fterms\u002Fs\u002Fstreaming",[110,111,112,113,114],"Chat Completion","API Key","Token","LLM","Inference",{"title":5,"description":102},{"changefreq":117,"priority":118},"weekly",0.7,"terms\u002Fs\u002Fstreaming","V2vtR6UHFcMDkD35MIG-BeYLmItdAw3nbmDd3jcY0BY",[122,125,128,132,136],{"title":111,"path":123,"acronym":6,"category":101,"difficulty":103,"description":124},"\u002Fterms\u002Fa\u002Fapi-key","An API key is your password to use an AI service. You include it in every request to prove you're allowed to use the API and so they know who to charge.",{"title":110,"path":126,"acronym":6,"category":101,"difficulty":103,"description":127},"\u002Fterms\u002Fc\u002Fchat-completion","Chat Completion is the API pattern for having a back-and-forth conversation with an AI.",{"title":114,"path":129,"acronym":6,"category":101,"difficulty":130,"description":131},"\u002Fterms\u002Fi\u002Finference","intermediate","Inference is when the AI actually runs and generates output — as opposed to training, which is when it's learning.",{"title":113,"path":133,"acronym":134,"category":101,"difficulty":103,"description":135},"\u002Fterms\u002Fl\u002Fllm","Large Language Model","An LLM is a humongous AI that read basically the entire internet and learned to predict what words come next, really really well.",{"title":112,"path":137,"acronym":6,"category":138,"difficulty":103,"description":139},"\u002Fterms\u002Ft\u002Ftoken","vibecoding","In AI-land, a token is a chunk of text — roughly 3\u002F4 of a word.",1776518315806]