Skip to main content

LangChain Integration

Use Korad.AI with LangChain Drop-in replacement for OpenAI in LangChain.

Overview​

LangChain works seamlessly with Korad.AI since we're OpenAI-compatible. Just change the base URL and API key.

Installation​

pip install langchain-openai

Basic Setup​

from langchain_openai import ChatOpenAI

# Configure for Korad.AI
chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929"
)

# Use in LangChain
response = chat.invoke("Hello, world!")
print(response.content)

With Optimization Headers​

from langchain_openai import ChatOpenAI

chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929",
default_headers={
"X-Savings-Level": "med" # Balanced optimization
}
)

LangChain Chains​

Conversation Chain​

from langchain.chains import ConversationChain
from langchain_openai import ChatOpenAI

chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929"
)

chain = ConversationChain(llm=chat)
response = chain.run("Tell me a joke")

Retrieval QA Chain​

from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings

# LLM
llm = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929",
default_headers={
"X-Vanishing-Context": "true" # Document optimization
}
)

# Vector store
vectorstore = Chroma(embedding_function=OpenAIEmbeddings())

# QA chain
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vectorstore.as_retriever()
)

Agents​

from langchain.agents import AgentExecutor, create_openai_functions_agent
from langchain_openai import ChatOpenAI
from langchain.tools import Tool

llm = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929"
)

tools = [
Tool(
name="Search",
func=lambda q: f"Results for {q}",
description="Search the web"
)
]

agent = create_openai_functions_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools)

result = agent_executor.invoke({"input": "Search for AI news"})

LangSmith Integration​

import os
from langchain_openai import ChatOpenAI

# Enable LangSmith tracing
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = "your-langsmith-key"
os.environ["LANGCHAIN_PROJECT"] = "korad-ai-app"

chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929"
)

Response Streaming​

from langchain_openai import ChatOpenAI

chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929",
streaming=True
)

# Stream response
for chunk in chat.stream("Tell me a story"):
print(chunk.content, end="")

Cost Monitoring​

from langchain_openai import ChatOpenAI
from langchain.callbacks import get_openai_callback

chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929"
)

with get_openai_callback() as cb:
response = chat.invoke("Explain quantum computing")
print(f"Total Cost: ${cb.total_cost}")
print(f"Total Tokens: {cb.total_tokens}")

Best Practices​

1. Use Appropriate Optimization​

# For document QA
chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929",
default_headers={"X-Vanishing-Context": "true"}
)

# For cost-sensitive apps
chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929",
default_headers={"X-Savings-Level": "max"}
)

2. Handle Rate Limits​

from langchain.rate_limiters import InMemoryRateLimiter

rate_limiter = InMemoryRateLimiter(
requests_per_second=10
)

chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
rate_limiter=rate_limiter
)

3. Monitor Savings​

def get_savings_headers(response):
"""Extract savings info from response."""
# LangChain may not expose headers directly
# Check your optimizer logs or dashboard
pass

chat = ChatOpenAI(
base_url="http://localhost:8084/v1",
api_key="sk-bf-YOUR_VIRTUAL_KEY",
model="anthropic/claude-sonnet-4-5-20250929"
)

response = chat.invoke("...")
# Check dashboard for savings on this request

LangChain + Korad.AI = Powerful RAG with automatic cost savings.