LangChain Integration
Use Korad.AI with LangChain Drop-in replacement for OpenAI in LangChain.
Overview​
LangChain works seamlessly with Korad.AI since we're OpenAI-compatible. Just change the base URL and API key.
Installation​
pip install langchain-openai
Basic Setup​
from langchain_openai import ChatOpenAI
# Configure for Korad.AI
chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929"
)
# Use in LangChain
response = chat.invoke("Hello, world!")
print(response.content)
With Optimization Headers​
from langchain_openai import ChatOpenAI
chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929",
default_headers={
"X-Savings-Level": "med" # Balanced optimization
}
)
LangChain Chains​
Conversation Chain​
from langchain.chains import ConversationChain
from langchain_openai import ChatOpenAI
chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929"
)
chain = ConversationChain(llm=chat)
response = chain.run("Tell me a joke")
Retrieval QA Chain​
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
# LLM
llm = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929",
default_headers={
"X-Vanishing-Context": "true" # Document optimization
}
)
# Vector store
vectorstore = Chroma(embedding_function=OpenAIEmbeddings())
# QA chain
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vectorstore.as_retriever()
)
Agents​
from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain_openai import ChatOpenAI
from langchain.tools import Tool
llm = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929"
)
tools = [
Tool(
name="Search",
func=lambda q: f"Results for {q}",
description="Search the web"
)
]
agent = create_openai_functions_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools)
result = agent_executor.invoke({"input": "Search for AI news"})
LangSmith Integration​
import os
from langchain_openai import ChatOpenAI
# Enable LangSmith tracing
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = "your-langsmith-key"
os.environ["LANGCHAIN_PROJECT"] = "korad-ai-app"
chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929"
)
Response Streaming​
from langchain_openai import ChatOpenAI
chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929",
streaming=True
)
# Stream response
for chunk in chat.stream("Tell me a story"):
print(chunk.content, end="")
Cost Monitoring​
from langchain_openai import ChatOpenAI
from langchain.callbacks import get_openai_callback
chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929"
)
with get_openai_callback() as cb:
response = chat.invoke("Explain quantum computing")
print(f"Total Cost: ${cb.total_cost}")
print(f"Total Tokens: {cb.total_tokens}")
Best Practices​
1. Use Appropriate Optimization​
# For document QA
chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929",
default_headers={"X-Vanishing-Context": "true"}
)
# For cost-sensitive apps
chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929",
default_headers={"X-Savings-Level": "max"}
)
2. Handle Rate Limits​
from langchain.rate_limiters import InMemoryRateLimiter
rate_limiter = InMemoryRateLimiter(
requests_per_second=10
)
chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
rate_limiter=rate_limiter
)
3. Monitor Savings​
def get_savings_headers(response):
"""Extract savings info from response."""
# LangChain may not expose headers directly
# Check your optimizer logs or dashboard
pass
chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929"
)
response = chat.invoke("...")
# Check dashboard for savings on this request
LangChain + Korad.AI = Powerful RAG with automatic cost savings.