Python SDK
Use Python SDK to quickly integrate LLM API, supporting synchronous/asynchronous calls, streaming responses, error retry and other advanced features
Python 3.7+Latest Version: 1.0.0MIT License
Install
usingpipInstall
Terminal
pip install openaiusingcondaInstall
Terminal
conda install -c conda-forge openaiTip: Recommend using virtual environment to manage project dependencies and avoid version conflicts.
Getting Started
Initialize client
from openai import OpenAI
# Create client instance
client = OpenAI(
api_key="your-api-key", # Your API key
base_url="https://api.n1n.ai/v1" # API endpoint
)
# or read from environment variables
# export OPENAI_API_KEY="your-api-key"
# export OPENAI_BASE_URL="https://api.n1n.ai/v1"
# client = OpenAI()Basic Conversation Example
# Simple conversation
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": "Hello, please introduce Python"}
]
)
print(response.choices[0].message.content)Advanced Features
Streaming Response
Get generated content in real-time, improve user experience
# Streaming response
stream = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Write a long story"}],
stream=True # Enable streaming response
)
# Process response chunk by chunk
for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="")Asynchronous Calls
Use asynchronous programming to improve concurrent performance
import asyncio
from openai import AsyncOpenAI
# Async client
async_client = AsyncOpenAI(
api_key="your-api-key",
base_url="https://api.n1n.ai/v1"
)
async def async_chat():
response = await async_client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Async Request Example"}]
)
return response.choices[0].message.content
# Run async function
result = asyncio.run(async_chat())
print(result)Function Calling
Let the model call custom functions
import json
# Define function
def get_weather(location, unit="celsius"):
"""Get weather for specified location"""
# In actual application, this would call real weather API
return json.dumps({
"location": location,
"temperature": 22,
"unit": unit,
"description": "Sunny"
})
# Function definition
functions = [
{
"name": "get_weather",
"description": "Get weather for specified location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City name, e.g.: Beijing"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"]
}
},
"required": ["location"]
}
}
]
# CallModel
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "What's the weather like in Beijing today?"}],
functions=functions,
function_call="auto"
)
# Handle function call
message = response.choices[0].message
if message.function_call:
function_name = message.function_call.name
function_args = json.loads(message.function_call.arguments)
# Execute function
if function_name == "get_weather":
result = get_weather(**function_args)
print(f"Function execution result: {result}")Conversation Context Management
Maintain multi-turn conversation history
class ChatSession:
def __init__(self, client, model="gpt-3.5-turbo"):
self.client = client
self.model = model
self.messages = []
def add_message(self, role, content):
self.messages.append({"role": role, "content": content})
def chat(self, user_input):
# Add user message
self.add_message("user", user_input)
# CallAPI
response = self.client.chat.completions.create(
model=self.model,
messages=self.messages
)
# Add assistant response to history
assistant_message = response.choices[0].message.content
self.add_message("assistant", assistant_message)
return assistant_message
def clear_history(self):
self.messages = []
# Using session manager
session = ChatSession(client)
session.add_message("system", "You are a helpful assistant")
# Multi-turn conversation
print(session.chat("What isMachine Learning? "))
print(session.chat("Can you give an example?"))
print(session.chat("How to start learning?"))Error Handling
Exception Handling Example
from openai import OpenAI, RateLimitError, APIError
import time
def chat_with_retry(client, messages, max_retries=3):
"""Chat function with retry mechanism"""
for attempt in range(max_retries):
try:
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=messages
)
return response
except RateLimitError as e:
# Rate limit error, wait and retry
if attempt < max_retries - 1:
wait_time = 2 ** attempt # Exponential backoff
print(f"Rate limited, waiting {wait_time} seconds before retry...")
time.sleep(wait_time)
else:
print("Maximum retries reached")
raise
except APIError as e:
# API error
print(f"API error: {e}")
if e.status_code >= 500:
# Server error, can retry
if attempt < max_retries - 1:
time.sleep(1)
continue
raise
except Exception as e:
# Other errors
print(f"Unexpected error: {e}")
raise
# Using function with retry
try:
response = chat_with_retry(
client,
[{"role": "user", "content": "Hello!"}]
)
print(response.choices[0].message.content)
except Exception as e:
print(f"Request failed: {e}")Common Error Types
- RateLimitError- Request frequency exceeded limit
- APIError- API server-side error
- AuthenticationError- Authentication failed
- InvalidRequestError- Invalid request parameters
Best Practices
API Key Management
- • Use environment variables to store API keys
- • Never hardcode keys in your code
- • Use key management services (e.g., AWS Secrets Manager)
Performance Optimization
- • Reuse client instances
- • Use connection pooling
- • Implement request caching strategy
Cost Control
- • Set max_tokens limit
- • Monitor token usage
- • Use appropriate models
Error Handling
- • Implement retry mechanism
- • Log errors
- • Graceful degradation strategy
Utility Functions
Token Calculation
import tiktoken
def count_tokens(messages, model="gpt-3.5-turbo"):
"""Calculate token count for messages"""
encoding = tiktoken.encoding_for_model(model)
tokens_per_message = 3
tokens_per_name = 1
num_tokens = 0
for message in messages:
num_tokens += tokens_per_message
for key, value in message.items():
num_tokens += len(encoding.encode(value))
if key == "name":
num_tokens += tokens_per_name
num_tokens += 3 # Reply start tokens
return num_tokens
# usingExample
messages = [
{"role": "user", "content": "Hello, how are you?"}
]
token_count = count_tokens(messages)
print(f"Token count: {token_count}")Response Logging
import logging
import json
from datetime import datetime
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
filename='api_calls.log'
)
def log_api_call(messages, response, model="gpt-3.5-turbo"):
"""Log API call details"""
log_entry = {
"timestamp": datetime.now().isoformat(),
"model": model,
"messages": messages,
"response": response.choices[0].message.content,
"usage": {
"prompt_tokens": response.usage.prompt_tokens,
"completion_tokens": response.usage.completion_tokens,
"total_tokens": response.usage.total_tokens
}
}
logging.info(json.dumps(log_entry, ensure_ascii=False))
return log_entry
# usingExample
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Test logging"}]
)
log_api_call([{"role": "user", "content": "Test logging"}], response)Related Resources
Ready to Start Building?
You've now understood the core features of Python SDK, and can start building powerful AI applications.