pythonadvanced

Async AI Inference with Celery

Offload slow LLM inference to Celery background workers with Redis as broker and result backend.

python
from celery import Celery
from openai import OpenAI
import json

app    = Celery('ai_tasks', broker='redis://localhost:6379/0', backend='redis://localhost:6379/1')
client = OpenAI()

@app.task(bind=True, max_retries=3, default_retry_delay=5)
def generate_summary(self, text: str, max_tokens: int = 150) -> dict:
    try:
        resp = client.chat.completions.create(
            model='gpt-4o-mini',
            messages=[{'role':'system','content':'Summarise concisely.'}, {'role':'user','content':text}],
            max_tokens=max_tokens,
        )
        return {'summary': resp.choices[0].message.content, 'tokens': resp.usage.total_tokens}
    except Exception as exc:
        raise self.retry(exc=exc)

# Send task and check result
task   = generate_summary.delay('Python is a versatile programming language used in web development, data science, and AI. It is known for its readability.')
result = task.get(timeout=60)
print(json.dumps(result, indent=2))

Use Cases

  • async AI processing
  • background jobs
  • scalable inference

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.