general_chat / main.py
pvanand's picture
Update main.py
adb504f verified
raw
history blame
3.25 kB
from fastapi import FastAPI, HTTPException, Depends, Security
from fastapi.security import APIKeyHeader
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from typing import Literal
import os
from functools import lru_cache
from openai import OpenAI
app = FastAPI()
API_KEY_NAME = "X-API-Key"
API_KEY = os.environ.get("API_KEY", "default_secret_key") # Set this in your environment variables
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
ModelID = Literal[
"meta-llama/llama-3-70b-instruct",
"anthropic/claude-3.5-sonnet",
"deepseek/deepseek-coder",
"anthropic/claude-3-haiku",
"openai/gpt-3.5-turbo-instruct",
"qwen/qwen-72b-chat",
"google/gemma-2-27b-it"
]
class QueryModel(BaseModel):
user_query: str = Field(..., description="User's coding query")
model_id: ModelID = Field(
default="meta-llama/llama-3-70b-instruct",
description="ID of the model to use for response generation"
)
class Config:
schema_extra = {
"example": {
"user_query": "How do I implement a binary search in Python?",
"model_id": "meta-llama/llama-3-70b-instruct"
}
}
@lru_cache()
def get_api_keys():
return {
"OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}"
}
api_keys = get_api_keys()
or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")
def chat_with_llama_stream(messages, model, max_output_tokens=2500):
try:
response = or_client.chat.completions.create(
model=model,
messages=messages,
max_tokens=max_output_tokens,
stream=True
)
for chunk in response:
if chunk.choices[0].delta.content is not None:
yield chunk.choices[0].delta.content
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}")
async def verify_api_key(api_key: str = Security(api_key_header)):
if api_key != API_KEY:
raise HTTPException(status_code=403, detail="Could not validate credentials")
return api_key
@app.post("/coding-assistant")
async def coding_assistant(query: QueryModel, api_key: str = Depends(verify_api_key)):
"""
Coding assistant endpoint that provides programming help based on user queries.
Available models:
- meta-llama/llama-3-70b-instruct (default)
- anthropic/claude-3.5-sonnet
- deepseek/deepseek-coder
- anthropic/claude-3-haiku
- openai/gpt-3.5-turbo-instruct
- qwen/qwen-72b-chat
- google/gemma-2-27b-it
Requires API Key authentication via X-API-Key header.
"""
system_prompt = "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code."
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": query.user_query}
]
return StreamingResponse(
chat_with_llama_stream(messages, model=query.model_id),
media_type="text/event-stream"
)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)