Overview
Langfuse is an open-source LLM observability and analytics platform. Track, debug, and improve your RedPill AI applications with detailed traces, metrics, and cost analysis.
Perfect for production monitoring, debugging, and optimizing your AI applications.
Installation
pip install langfuse openai
Quick Start
1. Setup Langfuse
import os
from langfuse.openai import OpenAI
# Set your API keys
os.environ[ "LANGFUSE_PUBLIC_KEY" ] = "pk-lf-..."
os.environ[ "LANGFUSE_SECRET_KEY" ] = "sk-lf-..."
os.environ[ "LANGFUSE_HOST" ] = "https://cloud.langfuse.com" # or self-hosted URL
# Initialize OpenAI client with Langfuse wrapper
client = OpenAI(
api_key = "YOUR_REDPILL_API_KEY" ,
base_url = "https://api.redpill.ai/v1"
)
2. Make Tracked API Calls
# All calls are automatically logged to Langfuse
response = client.chat.completions.create(
model = "openai/gpt-4o" ,
messages = [
{ "role" : "user" , "content" : "Explain quantum computing" }
]
)
print (response.choices[ 0 ].message.content)
That’s it! View traces in your Langfuse dashboard.
Advanced Tracking
Custom Trace Names
from langfuse.openai import OpenAI
client = OpenAI(
api_key = "YOUR_REDPILL_API_KEY" ,
base_url = "https://api.redpill.ai/v1"
)
# Add custom trace name
response = client.chat.completions.create(
model = "openai/gpt-4o" ,
messages = [{ "role" : "user" , "content" : "Hello" }],
name = "customer_support_query" # Custom trace name
)
from langfuse.openai import OpenAI
client = OpenAI(
api_key = "YOUR_REDPILL_API_KEY" ,
base_url = "https://api.redpill.ai/v1"
)
response = client.chat.completions.create(
model = "anthropic/claude-3.5-sonnet" ,
messages = [{ "role" : "user" , "content" : "Analyze this data" }],
metadata = {
"user_id" : "user_123" ,
"session_id" : "session_456" ,
"environment" : "production"
},
tags = [ "data-analysis" , "high-priority" ]
)
Function Tracing with @observe()
Track complex workflows with nested LLM calls:
from langfuse.decorators import observe
from langfuse.openai import OpenAI
client = OpenAI(
api_key = "YOUR_REDPILL_API_KEY" ,
base_url = "https://api.redpill.ai/v1"
)
@observe ()
def summarize_article ( article : str ) -> str :
"""Summarize an article"""
response = client.chat.completions.create(
model = "openai/gpt-4o" ,
messages = [
{ "role" : "user" , "content" : f "Summarize this article: \n\n { article } " }
]
)
return response.choices[ 0 ].message.content
@observe ()
def translate_summary ( summary : str , language : str ) -> str :
"""Translate summary to another language"""
response = client.chat.completions.create(
model = "anthropic/claude-3.5-sonnet" ,
messages = [
{ "role" : "user" , "content" : f "Translate to { language } : \n\n { summary } " }
]
)
return response.choices[ 0 ].message.content
@observe ()
def process_article ( article : str , target_language : str ) -> dict :
"""Full article processing pipeline"""
summary = summarize_article(article)
translation = translate_summary(summary, target_language)
return {
"original" : article,
"summary" : summary,
"translation" : translation
}
# Execute - creates nested traces in Langfuse
result = process_article(
article = "Long article text here..." ,
target_language = "Spanish"
)
Streaming with Langfuse
from langfuse.openai import OpenAI
client = OpenAI(
api_key = "YOUR_REDPILL_API_KEY" ,
base_url = "https://api.redpill.ai/v1"
)
stream = client.chat.completions.create(
model = "openai/gpt-4o" ,
messages = [{ "role" : "user" , "content" : "Write a story" }],
stream = True ,
metadata = { "type" : "creative_writing" }
)
for chunk in stream:
if chunk.choices[ 0 ].delta.content:
print (chunk.choices[ 0 ].delta.content, end = "" )
Multi-Model Comparison
Track different models to compare performance:
from langfuse.openai import OpenAI
from langfuse.decorators import observe
client = OpenAI(
api_key = "YOUR_REDPILL_API_KEY" ,
base_url = "https://api.redpill.ai/v1"
)
@observe ()
def compare_models ( prompt : str ) -> dict :
"""Compare responses from different models"""
# GPT-4o response
gpt4_response = client.chat.completions.create(
model = "openai/gpt-4o" ,
messages = [{ "role" : "user" , "content" : prompt}],
tags = [ "comparison" , "gpt4" ]
)
# Claude response
claude_response = client.chat.completions.create(
model = "anthropic/claude-3.5-sonnet" ,
messages = [{ "role" : "user" , "content" : prompt}],
tags = [ "comparison" , "claude" ]
)
# DeepSeek response
deepseek_response = client.chat.completions.create(
model = "deepseek/deepseek-chat" ,
messages = [{ "role" : "user" , "content" : prompt}],
tags = [ "comparison" , "deepseek" ]
)
return {
"gpt4" : gpt4_response.choices[ 0 ].message.content,
"claude" : claude_response.choices[ 0 ].message.content,
"deepseek" : deepseek_response.choices[ 0 ].message.content
}
results = compare_models( "Explain quantum computing in 3 sentences" )
Cost Tracking
Langfuse automatically tracks costs for RedPill API calls:
from langfuse.openai import OpenAI
client = OpenAI(
api_key = "YOUR_REDPILL_API_KEY" ,
base_url = "https://api.redpill.ai/v1"
)
# Make tracked calls
for i in range ( 10 ):
response = client.chat.completions.create(
model = "openai/gpt-4o" ,
messages = [{ "role" : "user" , "content" : f "Query { i } " }],
metadata = { "batch" : "morning_batch" }
)
# View cost breakdown in Langfuse dashboard by:
# - Model
# - Time period
# - Tags/metadata
# - User/session
User Tracking
Associate requests with specific users:
from langfuse.openai import OpenAI
client = OpenAI(
api_key = "YOUR_REDPILL_API_KEY" ,
base_url = "https://api.redpill.ai/v1"
)
response = client.chat.completions.create(
model = "openai/gpt-4o" ,
messages = [{ "role" : "user" , "content" : "Help me with this task" }],
user = "user_12345" , # Track by user ID
metadata = {
"user_tier" : "premium" ,
"feature" : "task_assistant"
}
)
Session Tracking
Group related requests into sessions:
from langfuse.openai import OpenAI
import uuid
client = OpenAI(
api_key = "YOUR_REDPILL_API_KEY" ,
base_url = "https://api.redpill.ai/v1"
)
session_id = str (uuid.uuid4())
# First message in conversation
response1 = client.chat.completions.create(
model = "openai/gpt-4o" ,
messages = [{ "role" : "user" , "content" : "What is AI?" }],
metadata = { "session_id" : session_id}
)
# Second message in same conversation
response2 = client.chat.completions.create(
model = "openai/gpt-4o" ,
messages = [{ "role" : "user" , "content" : "Tell me more" }],
metadata = { "session_id" : session_id}
)
# View entire conversation in Langfuse by session_id
Scoring and Feedback
Add scores and feedback to traces:
from langfuse import Langfuse
from langfuse.openai import OpenAI
langfuse = Langfuse()
client = OpenAI(
api_key = "YOUR_REDPILL_API_KEY" ,
base_url = "https://api.redpill.ai/v1"
)
# Make tracked call
response = client.chat.completions.create(
model = "openai/gpt-4o" ,
messages = [{ "role" : "user" , "content" : "Explain AI" }],
name = "explanation_query"
)
# Get trace ID from response
trace_id = response._response.headers.get( "langfuse-trace-id" )
# Add user feedback score
langfuse.score(
trace_id = trace_id,
name = "user_satisfaction" ,
value = 5 , # 1-5 rating
comment = "Very helpful explanation"
)
What Gets Tracked
Langfuse automatically captures:
✅ Request/Response : Full messages and completions
✅ Tokens : Input, output, and total token counts
✅ Latency : API response times
✅ Cost : Estimated costs per request
✅ Model : Which model was used
✅ Metadata : Custom tags and metadata
✅ Errors : API errors and exceptions
✅ Users/Sessions : User and session tracking
Langfuse Dashboard Features
1. Traces View
See all API calls with:
Full request/response
Timing information
Cost breakdown
Nested function calls
2. Metrics Dashboard
Track:
Total requests
Average latency
Token usage
Cost trends
Error rates
3. User Analytics
Analyze:
Requests per user
Cost per user
User engagement
Session patterns
4. Model Comparison
Compare:
Performance across models
Cost efficiency
Response quality
Latency differences
Self-Hosted Langfuse
Use your own Langfuse instance:
import os
from langfuse.openai import OpenAI
# Point to self-hosted instance
os.environ[ "LANGFUSE_HOST" ] = "https://langfuse.your-domain.com"
os.environ[ "LANGFUSE_PUBLIC_KEY" ] = "pk-..."
os.environ[ "LANGFUSE_SECRET_KEY" ] = "sk-..."
client = OpenAI(
api_key = "YOUR_REDPILL_API_KEY" ,
base_url = "https://api.redpill.ai/v1"
)
Best Practices
Tag requests by feature, user type, or use case for better analytics.
Include metadata like user tier, pricing plan, or feature flags.
Use scoring to correlate costs with user satisfaction.
Set up alerts for unusual spending patterns.
Use traces to debug issues without reproducing them locally.
Example: Production Chatbot
from langfuse.decorators import observe
from langfuse.openai import OpenAI
client = OpenAI(
api_key = "YOUR_REDPILL_API_KEY" ,
base_url = "https://api.redpill.ai/v1"
)
@observe ()
def handle_customer_query (
user_id : str ,
query : str ,
user_tier : str ,
session_id : str
) -> str :
"""Handle customer support query with full tracking"""
response = client.chat.completions.create(
model = "openai/gpt-4o" ,
messages = [
{
"role" : "system" ,
"content" : "You are a helpful customer support agent."
},
{
"role" : "user" ,
"content" : query
}
],
user = user_id,
metadata = {
"session_id" : session_id,
"user_tier" : user_tier,
"feature" : "customer_support" ,
"environment" : "production"
},
tags = [ "customer-support" , user_tier]
)
return response.choices[ 0 ].message.content
# Usage
answer = handle_customer_query(
user_id = "user_789" ,
query = "How do I upgrade my plan?" ,
user_tier = "free" ,
session_id = "session_123"
)
Resources
Next Steps