Spaces:
Paused
Paused
File size: 3,323 Bytes
7d51224 1044c29 7d51224 acc58cf a7653ed 7d51224 c3fd9b2 1044c29 7d51224 acc58cf 7d51224 86f94f0 b7ec1ef 86f94f0 6218ec6 b7ec1ef 3151c18 7d973d2 6218ec6 7d973d2 6218ec6 7d973d2 6218ec6 b7ec1ef 86f94f0 b7ec1ef 86f94f0 b7ec1ef 86f94f0 7d51224 0d521c3 1044c29 86f94f0 7d51224 0d521c3 7d51224 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import fastapi
import json
import markdown
import uvicorn
from ctransformers import AutoModelForCausalLM
from fastapi.responses import HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
from sse_starlette.sse import EventSourceResponse
from ctransformers.langchain import CTransformers
from pydantic import BaseModel, Field
from typing import List, Any
from typing_extensions import TypedDict, Literal
llm = AutoModelForCausalLM.from_pretrained("NeoDim/starchat-alpha-GGML",
model_file="starchat-alpha-ggml-q4_0.bin",
model_type="starcoder")
app = fastapi.FastAPI(title="Starchat Alpha")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
async def index():
with open("README.md", "r", encoding="utf-8") as readme_file:
md_template_string = readme_file.read()
html_content = markdown.markdown(md_template_string)
return HTMLResponse(content=html_content, status_code=200)
class ChatCompletionRequest(BaseModel):
prompt: str
@app.get("/demo")
async def demo():
html_content = """
<!DOCTYPE html>
<html>
<head>
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
</head>
<body>
<style>
code {
display: inline-block;
background-color: lightgray;
}
#content {
font-family: "SFMono-Regular",Consolas,"Liberation Mono",Menlo,Courier,monospace !important;
box-sizing: border-box;
min-width: 200px;
max-width: 980px;
margin: 0 auto;
padding: 45px;
font-size: 16px;
}
@media (max-width: 767px) {
#content {
padding: 15px;
}
}
</style>
<article id="content"></article>
<script>
var source = new EventSource("https://matthoffner-starchat-alpha.hf.space/stream");
source.onmessage = function(event) {
document.getElementById("content").innerHTML += marked.parse(event.data)
};
</script>
</body>
</html>
"""
return HTMLResponse(content=html_content, status_code=200)
@app.get("/stream")
async def chat(prompt = "Write a simple express server in rust"):
tokens = llm.tokenize(prompt)
async def server_sent_events(chat_chunks, llm):
yield prompt
for chat_chunk in llm.generate(chat_chunks):
yield llm.detokenize(chat_chunk)
yield ""
return EventSourceResponse(server_sent_events(tokens, llm))
@app.post("/v1/chat/completions")
async def chat(request: ChatCompletionRequest, response_mode=None):
tokens = llm.tokenize(request.prompt)
async def server_sent_events(chat_chunks, llm):
for token in llm.generate(chat_chunks):
yield llm.detokenize(token)
yield ""
return EventSourceResponse(server_sent_events(tokens, llm))
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)
|