API 调用基础
本文介绍如何调用大语言模型 API,包括基本概念、请求格式、响应处理等内容。
API 基础概念
什么是 LLM API
LLM API 是大语言模型提供的编程接口,允许开发者:
- 发送文本/图片等输入
- 接收模型生成的回复
- 控制生成参数
- 使用各种高级功能
OpenAI 兼容格式
大多数 LLM API 都采用 OpenAI 兼容格式,包括:
- OpenAI (GPT 系列)
- Anthropic (Claude)
- Google (Gemini)
- 国内各大模型
快速开始
安装 SDK
pip install openai
基础调用
from openai import OpenAI
# 初始化客户端
client = OpenAI(
api_key="your-api-key",
base_url="https://api.weelinking.com/v1"
)
# 发送请求
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "你是一个有帮助的助手。"},
{"role": "user", "content": "你好,请介绍一下自己。"}
]
)
# 获取回复
print(response.choices[0].message.content)
请求参数详解
核心参数
| 参数 | 类型 | 说明 |
|---|---|---|
| model | string | 模型名称 |
| messages | array | 对话消息列表 |
| temperature | float | 随机性,0-2 |
| max_tokens | int | 最大输出长度 |
| stream | bool | 是否流式输出 |
messages 格式
messages = [
{
"role": "system", # 系统提示
"content": "你是一个专业的翻译。"
},
{
"role": "user", # 用户消息
"content": "翻译这句话:Hello World"
},
{
"role": "assistant", # AI 回复(多轮对话时使用)
"content": "你好,世界"
}
]
高级参数
response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
# 生成控制
temperature=0.7, # 随机性
top_p=0.9, # 核采样
max_tokens=1000, # 最大输出 token
# 停止条件
stop=["###", "END"], # 停止词
# 其他
n=1, # 生成数量
presence_penalty=0, # 重复惩罚
frequency_penalty=0, # 频率惩罚
# 用户标识
user="user-123" # 用于追踪
)
响应处理
响应结构
{
"id": "chatcmpl-xxx",
"object": "chat.completion",
"created": 1234567890,
"model": "gpt-4o",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "回复内容"
},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 10,
"completion_tokens": 20,
"total_tokens": 30
}
}
提取内容
# 获取回复文本
content = response.choices[0].message.content
# 获取 token 使用量
usage = response.usage
print(f"输入: {usage.prompt_tokens}, 输出: {usage.completion_tokens}")
# 检查结束原因
finish_reason = response.choices[0].finish_reason
# stop: 正常结束
# length: 达到 max_tokens
# content_filter: 内容过滤
流式输出
基础流式
stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "讲个故事"}],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
异步流式
from openai import AsyncOpenAI
async_client = AsyncOpenAI(
api_key="your-api-key",
base_url="https://api.weelinking.com/v1"
)
async def stream_chat():
stream = await async_client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "讲个故事"}],
stream=True
)
async for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")
多模态调用
图片理解
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "描述这张图片"},
{
"type": "image_url",
"image_url": {
"url": "https://example.com/image.jpg"
# 或 base64: "data:image/jpeg;base64,..."
}
}
]
}
]
)
图片生成
response = client.images.generate(
model="dall-e-3",
prompt="一只可爱的猫咪在阳光下",
size="1024x1024",
quality="hd",
n=1
)
image_url = response.data[0].url
错误处理
常见错误
from openai import (
APIError,
RateLimitError,
APIConnectionError,
AuthenticationError
)
try:
response = client.chat.completions.create(...)
except AuthenticationError:
print("API Key 无效")
except RateLimitError:
print("请求过于频繁,请稍后重试")
except APIConnectionError:
print("网络连接失败")
except APIError as e:
print(f"API 错误: {e}")
重试策略
import time
from tenacity import retry, stop_after_attempt, wait_exponential
@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=1, max=10)
)
def call_api_with_retry(messages):
return client.chat.completions.create(
model="gpt-4o",
messages=messages
)
最佳实践
1. 环境变量管理
import os
from openai import OpenAI
client = OpenAI(
api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("OPENAI_BASE_URL", "https://api.weelinking.com/v1")
)
2. 超时设置
client = OpenAI(
api_key="your-api-key",
base_url="https://api.weelinking.com/v1",
timeout=30.0 # 30秒超时
)
3. 日志记录
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def call_llm(messages):
logger.info(f"调用 LLM,消息数: {len(messages)}")
response = client.chat.completions.create(
model="gpt-4o",
messages=messages
)
logger.info(f"Token 使用: {response.usage.total_tokens}")
return response
4. 成本控制
def estimate_cost(response, model="gpt-4o"):
# 价格(示例,需要根据实际价格调整)
prices = {
"gpt-4o": {"input": 0.005, "output": 0.015},
"gpt-4o-mini": {"input": 0.00015, "output": 0.0006}
}
price = prices.get(model, {"input": 0, "output": 0})
input_cost = response.usage.prompt_tokens / 1000 * price["input"]
output_cost = response.usage.completion_tokens / 1000 * price["output"]
return input_cost + output_cost