huggingface模型下载,ollama+fastapi接口
模型下载
https://hf-mirror.com/
https://huggingface.co/
pip install -U huggingface_hub
import osos.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'os.system('huggingface-cli download --resume-download deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B --local-dir E:\models\deepseek-r1-1.5B')
ollama运行
如果用ollama运行,则需要把模型转成gguf格式的。
API调用
import logging
import osimport requests
import uvicorn
from dotenv import load_dotenv, find_dotenv
from fastapi import FastAPI, HTTPException, Header, Depends
from fastapi.responses import StreamingResponse# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)app = FastAPI(title="Ollama API Service")OLLAMA_API_URL = "http://localhost:11434/api/generate"
MODEL_NAME = "deepseek-r1:1.5b"load_dotenv(find_dotenv(".env"))API_KEY_CREDITS = {os.getenv("API_KEY"): 5}def verify_api_key(x_api_key: str = Header(None, alias="x_api_key")):credits = API_KEY_CREDITS.get(x_api_key, 0)if credits <= 0:raise HTTPException(status_code=401,detail="Invalid API key")return x_api_keydef generate_response(prompt: str):try:payload = {"model": MODEL_NAME,"prompt": prompt,"stream": False}response = requests.post(OLLAMA_API_URL,json=payload,timeout=600)if response.status_code == 200:return response.json()["response"]else:return f"Error: {response.text}"except requests.exceptions.ConnectionError:raise HTTPException(status_code=503,detail="Ollama service unavailable. Please ensure Ollama is running.")except Exception as e:raise HTTPException(status_code=500,detail=f"Internal server error: {str(e)}")@app.post("/generate")
async def generate(payload: dict, x_api_key: str = Depends(verify_api_key)):API_KEY_CREDITS[x_api_key] -= 1if "prompt" not in payload:raise HTTPException(status_code=400,detail="Missing 'prompt' in request body")result = generate_response(payload["prompt"])return {"response": result}@app.post("/generate-stream")
async def generate_stream(payload: dict):if "prompt" not in payload:raise HTTPException(status_code=400, detail="Missing 'prompt' in request body")def generate():with requests.post(OLLAMA_API_URL,json={"model": MODEL_NAME,"prompt": payload["prompt"],"stream": True},stream=True) as r:for line in r.iter_lines():if line:yield f"data: {line.decode()}\n\n"return StreamingResponse(generate(), media_type="text/event-stream")if __name__ == "__main__":logger.info("启动 Ollama API 服务...")uvicorn.run(app, host="0.0.0.0", port=8000)
.env文件
API_KEY = sk-123456
测试
curl --location 'http://localhost:8000/generate' \
--header 'Content-Type: application/json' \
--header 'x_api_key: sk-123456' \
--data '{"prompt": "1+2=?"}'