[{"data":1,"prerenderedAt":153},["ShallowReactive",2],{"term-r\u002Frate-limit":3,"related-r\u002Frate-limit":138},{"id":4,"title":5,"acronym":6,"body":7,"category":120,"description":121,"difficulty":122,"extension":123,"letter":124,"meta":125,"navigation":59,"path":126,"related":127,"seo":132,"sitemap":133,"stem":136,"subcategory":6,"__hash__":137},"terms\u002Fterms\u002Fr\u002Frate-limit.md","Rate Limit",null,{"type":8,"value":9,"toc":114},"minimark",[10,15,19,23,26,30,103,107,110],[11,12,14],"h2",{"id":13},"eli5-the-vibe-check","ELI5 — The Vibe Check",[16,17,18],"p",{},"A rate limit is the AI provider saying 'slow down, buddy.' You can only make a certain number of API calls per minute, or use a certain number of tokens per day, before you get a 429 error. It's how providers prevent one user from hogging all the compute. When you hit it, implement retry logic with exponential backoff.",[11,20,22],{"id":21},"real-talk","Real Talk",[16,24,25],{},"Rate limits are restrictions on API request frequency imposed by LLM providers. They are typically enforced per key, per organization, and per tier, measured in requests per minute (RPM), tokens per minute (TPM), or tokens per day (TPD). Hitting rate limits returns HTTP 429. Standard mitigation involves exponential backoff with jitter.",[11,27,29],{"id":28},"show-me-the-code","Show Me The Code",[31,32,37],"pre",{"className":33,"code":34,"language":35,"meta":36,"style":36},"language-python shiki shiki-themes material-theme-lighter material-theme material-theme-palenight","import time\nimport anthropic\n\ndef call_with_retry(client, **kwargs, max_retries=3):\n    for attempt in range(max_retries):\n        try:\n            return client.messages.create(**kwargs)\n        except anthropic.RateLimitError:\n            time.sleep(2 ** attempt)  # exponential backoff\n    raise Exception(\"Max retries exceeded\")\n","python","",[38,39,40,48,54,61,67,73,79,85,91,97],"code",{"__ignoreMap":36},[41,42,45],"span",{"class":43,"line":44},"line",1,[41,46,47],{},"import time\n",[41,49,51],{"class":43,"line":50},2,[41,52,53],{},"import anthropic\n",[41,55,57],{"class":43,"line":56},3,[41,58,60],{"emptyLinePlaceholder":59},true,"\n",[41,62,64],{"class":43,"line":63},4,[41,65,66],{},"def call_with_retry(client, **kwargs, max_retries=3):\n",[41,68,70],{"class":43,"line":69},5,[41,71,72],{},"    for attempt in range(max_retries):\n",[41,74,76],{"class":43,"line":75},6,[41,77,78],{},"        try:\n",[41,80,82],{"class":43,"line":81},7,[41,83,84],{},"            return client.messages.create(**kwargs)\n",[41,86,88],{"class":43,"line":87},8,[41,89,90],{},"        except anthropic.RateLimitError:\n",[41,92,94],{"class":43,"line":93},9,[41,95,96],{},"            time.sleep(2 ** attempt)  # exponential backoff\n",[41,98,100],{"class":43,"line":99},10,[41,101,102],{},"    raise Exception(\"Max retries exceeded\")\n",[11,104,106],{"id":105},"when-youll-hear-this","When You'll Hear This",[16,108,109],{},"\"We're hitting the rate limit — add backoff logic.\" \u002F \"Upgrade the tier to increase rate limits.\"",[111,112,113],"style",{},"html .light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html.light .shiki span {color: var(--shiki-light);background: var(--shiki-light-bg);font-style: var(--shiki-light-font-style);font-weight: var(--shiki-light-font-weight);text-decoration: var(--shiki-light-text-decoration);}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}",{"title":36,"searchDepth":50,"depth":50,"links":115},[116,117,118,119],{"id":13,"depth":50,"text":14},{"id":21,"depth":50,"text":22},{"id":28,"depth":50,"text":29},{"id":105,"depth":50,"text":106},"ai","A rate limit is the AI provider saying 'slow down, buddy.","beginner","md","r",{},"\u002Fterms\u002Fr\u002Frate-limit",[128,129,130,131],"API Key","Token","Chat Completion","LLM",{"title":5,"description":121},{"changefreq":134,"priority":135},"weekly",0.7,"terms\u002Fr\u002Frate-limit","UBX61vTfaUHoa-WFv8jb2rYSZ3A1v6d06vZ6gn7srtg",[139,142,145,149],{"title":128,"path":140,"acronym":6,"category":120,"difficulty":122,"description":141},"\u002Fterms\u002Fa\u002Fapi-key","An API key is your password to use an AI service. You include it in every request to prove you're allowed to use the API and so they know who to charge.",{"title":130,"path":143,"acronym":6,"category":120,"difficulty":122,"description":144},"\u002Fterms\u002Fc\u002Fchat-completion","Chat Completion is the API pattern for having a back-and-forth conversation with an AI.",{"title":131,"path":146,"acronym":147,"category":120,"difficulty":122,"description":148},"\u002Fterms\u002Fl\u002Fllm","Large Language Model","An LLM is a humongous AI that read basically the entire internet and learned to predict what words come next, really really well.",{"title":129,"path":150,"acronym":6,"category":151,"difficulty":122,"description":152},"\u002Fterms\u002Ft\u002Ftoken","vibecoding","In AI-land, a token is a chunk of text — roughly 3\u002F4 of a word.",1776518305915]