pythonintermediate

LangChain Streaming Callback Handler

Capture LLM tokens as they stream using a custom callback handler for real-time UI updates.

python
from langchain_core.callbacks import BaseCallbackHandler
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
import sys

class StreamHandler(BaseCallbackHandler):
    def __init__(self):
        self.text = ''

    def on_llm_new_token(self, token: str, **kwargs) -> None:
        self.text += token
        print(token, end='', flush=True)

    def on_llm_end(self, *args, **kwargs) -> None:
        print()

handler = StreamHandler()
llm     = ChatOpenAI(model='gpt-4o-mini', streaming=True, callbacks=[handler])
prompt  = ChatPromptTemplate.from_template('Write a haiku about {topic}')
chain   = prompt | llm

chain.invoke({'topic': 'data pipelines'})
print(f'Total chars: {len(handler.text)}')

Use Cases

  • streaming UI
  • token capture
  • real-time display

Tags

Related Snippets

Similar patterns you can reuse in the same workflow.