Spaces:
Running
Running
from fastapi import FastAPI, HTTPException | |
from fastapi.responses import StreamingResponse | |
from pydantic import BaseModel, Field | |
from typing import Literal | |
import os | |
from functools import lru_cache | |
from openai import OpenAI | |
app = FastAPI() | |
ModelID = Literal[ | |
"meta-llama/llama-3-70b-instruct", | |
"anthropic/claude-3.5-sonnet", | |
"deepseek/deepseek-coder", | |
"anthropic/claude-3-haiku", | |
"openai/gpt-3.5-turbo-instruct", | |
"qwen/qwen-72b-chat", | |
"google/gemma-2-27b-it" | |
] | |
class QueryModel(BaseModel): | |
user_query: str = Field(..., description="User's coding query") | |
model_id: ModelID = Field( | |
default="meta-llama/llama-3-70b-instruct", | |
description="ID of the model to use for response generation" | |
) | |
class Config: | |
schema_extra = { | |
"example": { | |
"user_query": "How do I implement a binary search in Python?", | |
"model_id": "meta-llama/llama-3-70b-instruct" | |
} | |
} | |
def get_api_keys(): | |
return { | |
"OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}" | |
} | |
api_keys = get_api_keys() | |
or_client = OpenAI(api_key=api_keys["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1") | |
def chat_with_llama_stream(messages, model, max_output_tokens=4000): | |
try: | |
response = or_client.chat.completions.create( | |
model=model, | |
messages=messages, | |
max_tokens=max_output_tokens, | |
stream=True | |
) | |
for chunk in response: | |
if chunk.choices[0].delta.content is not None: | |
yield chunk.choices[0].delta.content | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Error in model response: {str(e)}") | |
async def coding_assistant(query: QueryModel): | |
""" | |
Coding assistant endpoint that provides programming help based on user queries. | |
Available models: | |
- meta-llama/llama-3-70b-instruct (default) | |
- anthropic/claude-3.5-sonnet | |
- deepseek/deepseek-coder | |
- anthropic/claude-3-haiku | |
- openai/gpt-3.5-turbo-instruct | |
- qwen/qwen-72b-chat | |
- google/gemma-2-27b-it | |
""" | |
system_prompt = "You are a helpful assistant proficient in coding tasks. Help the user in understanding and writing code." | |
messages = [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": query.user_query} | |
] | |
return StreamingResponse( | |
chat_with_llama_stream(messages, model=query.model_id), | |
media_type="text/event-stream" | |
) | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"],) | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=7860) | |