Spaces:
Running
Running
Commit
·
f2139e9
1
Parent(s):
cb62aa5
Initial commit: Add Llama3-Papalia inference UI and API
Browse files- .DS_Store +0 -0
- Dockerfile +51 -0
- app.py +111 -0
- requirements.txt +7 -0
- templates/index.html +139 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
Dockerfile
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
# Install Ollama and required tools
|
4 |
+
RUN apt-get update && apt-get install -y curl netcat-traditional && \
|
5 |
+
curl -fsSL https://ollama.com/install.sh | sh
|
6 |
+
|
7 |
+
# Create non-root user
|
8 |
+
RUN useradd -m -u 1000 user
|
9 |
+
|
10 |
+
WORKDIR /app
|
11 |
+
|
12 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
13 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
14 |
+
|
15 |
+
# Copy application files including templates
|
16 |
+
COPY --chown=user . /app
|
17 |
+
|
18 |
+
# Create startup script
|
19 |
+
RUN echo '#!/bin/bash\n\
|
20 |
+
echo "Iniciando servidor Ollama..."\n\
|
21 |
+
ollama serve &\n\
|
22 |
+
\n\
|
23 |
+
# Esperar a que Ollama esté disponible\n\
|
24 |
+
echo "Esperando a que Ollama esté listo..."\n\
|
25 |
+
timeout=60\n\
|
26 |
+
while ! nc -z localhost 11434; do\n\
|
27 |
+
if [ "$timeout" -le "0" ]; then\n\
|
28 |
+
echo "Tiempo de espera agotado para Ollama"\n\
|
29 |
+
exit 1\n\
|
30 |
+
fi\n\
|
31 |
+
echo "Esperando a Ollama... $timeout segundos restantes"\n\
|
32 |
+
timeout=$((timeout-1))\n\
|
33 |
+
sleep 1\n\
|
34 |
+
done\n\
|
35 |
+
\n\
|
36 |
+
echo "Verificando modelo llama3-papalia-nuevo..."\n\
|
37 |
+
if ! ollama list | grep -q "llama3-papalia-nuevo"; then\n\
|
38 |
+
echo "Iniciando modelo llama3-papalia-nuevo..."\n\
|
39 |
+
fi\n\
|
40 |
+
\n\
|
41 |
+
echo "Iniciando API..."\n\
|
42 |
+
uvicorn app:app --host 0.0.0.0 --port 7860\n\
|
43 |
+
' > /app/start.sh
|
44 |
+
|
45 |
+
RUN chmod +x /app/start.sh
|
46 |
+
|
47 |
+
USER user
|
48 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
49 |
+
|
50 |
+
# Start both Ollama and the FastAPI app
|
51 |
+
CMD ["/app/start.sh"]
|
app.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, Request
|
2 |
+
from fastapi.responses import HTMLResponse, JSONResponse
|
3 |
+
from fastapi.staticfiles import StaticFiles
|
4 |
+
from fastapi.templating import Jinja2Templates
|
5 |
+
from fastapi.middleware.cors import CORSMiddleware
|
6 |
+
from pydantic import BaseModel
|
7 |
+
import httpx
|
8 |
+
import os
|
9 |
+
import logging
|
10 |
+
from typing import Optional
|
11 |
+
|
12 |
+
# Configurar logging
|
13 |
+
logging.basicConfig(level=logging.INFO)
|
14 |
+
logger = logging.getLogger(__name__)
|
15 |
+
|
16 |
+
app = FastAPI(
|
17 |
+
title="Llama3-Papalia Inference API & UI",
|
18 |
+
description="API y UI para interactuar con el modelo Llama3-Papalia especializado en Desarrollo Humano",
|
19 |
+
version="1.0.0"
|
20 |
+
)
|
21 |
+
|
22 |
+
# Habilitar CORS
|
23 |
+
app.add_middleware(
|
24 |
+
CORSMiddleware,
|
25 |
+
allow_origins=["*"],
|
26 |
+
allow_credentials=True,
|
27 |
+
allow_methods=["*"],
|
28 |
+
allow_headers=["*"],
|
29 |
+
)
|
30 |
+
|
31 |
+
# Configuración de plantillas HTML
|
32 |
+
templates = Jinja2Templates(directory="templates")
|
33 |
+
|
34 |
+
class QueryRequest(BaseModel):
|
35 |
+
prompt: str
|
36 |
+
temperature: Optional[float] = 0.7
|
37 |
+
max_tokens: Optional[int] = 500
|
38 |
+
|
39 |
+
class QueryResponse(BaseModel):
|
40 |
+
response: str
|
41 |
+
model: str = "llama3-papalia-nuevo"
|
42 |
+
|
43 |
+
OLLAMA_API_URL = "http://localhost:11434/api/generate"
|
44 |
+
|
45 |
+
@app.get("/", response_class=HTMLResponse)
|
46 |
+
async def read_root(request: Request):
|
47 |
+
return templates.TemplateResponse(
|
48 |
+
"index.html",
|
49 |
+
{"request": request, "title": "Llama3-Papalia Inference"}
|
50 |
+
)
|
51 |
+
|
52 |
+
@app.post("/generate", response_model=QueryResponse)
|
53 |
+
async def generate_response(query: QueryRequest):
|
54 |
+
logger.info(f"Recibida solicitud de generación con prompt: {query.prompt[:50]}...")
|
55 |
+
|
56 |
+
try:
|
57 |
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
58 |
+
logger.info(f"Enviando solicitud a Ollama: {OLLAMA_API_URL}")
|
59 |
+
response = await client.post(
|
60 |
+
OLLAMA_API_URL,
|
61 |
+
json={
|
62 |
+
"model": "llama3-papalia-nuevo",
|
63 |
+
"prompt": query.prompt,
|
64 |
+
"temperature": query.temperature,
|
65 |
+
"max_tokens": query.max_tokens
|
66 |
+
}
|
67 |
+
)
|
68 |
+
|
69 |
+
logger.info(f"Respuesta de Ollama recibida con status code: {response.status_code}")
|
70 |
+
|
71 |
+
if response.status_code != 200:
|
72 |
+
logger.error(f"Error en la respuesta de Ollama: {response.text}")
|
73 |
+
raise HTTPException(
|
74 |
+
status_code=500,
|
75 |
+
detail=f"Error en la generación con Ollama: {response.text}"
|
76 |
+
)
|
77 |
+
|
78 |
+
result = response.json()
|
79 |
+
logger.info("Respuesta procesada exitosamente")
|
80 |
+
return QueryResponse(response=result["response"])
|
81 |
+
|
82 |
+
except httpx.TimeoutException:
|
83 |
+
logger.error("Timeout al conectar con Ollama")
|
84 |
+
raise HTTPException(
|
85 |
+
status_code=504,
|
86 |
+
detail="Timeout al conectar con el servicio de Ollama"
|
87 |
+
)
|
88 |
+
except Exception as e:
|
89 |
+
logger.error(f"Error inesperado: {str(e)}")
|
90 |
+
raise HTTPException(
|
91 |
+
status_code=500,
|
92 |
+
detail=f"Error en el servidor: {str(e)}"
|
93 |
+
)
|
94 |
+
|
95 |
+
@app.get("/health")
|
96 |
+
async def health_check():
|
97 |
+
try:
|
98 |
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
99 |
+
response = await client.post(
|
100 |
+
OLLAMA_API_URL,
|
101 |
+
json={
|
102 |
+
"model": "llama3-papalia-nuevo",
|
103 |
+
"prompt": "test",
|
104 |
+
"max_tokens": 1
|
105 |
+
}
|
106 |
+
)
|
107 |
+
if response.status_code == 200:
|
108 |
+
return {"status": "healthy", "ollama_status": "connected"}
|
109 |
+
except Exception as e:
|
110 |
+
logger.error(f"Error en health check: {str(e)}")
|
111 |
+
return {"status": "unhealthy", "error": str(e)}
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi==0.104.1
|
2 |
+
uvicorn[standard]==0.24.0
|
3 |
+
python-dotenv==1.0.0
|
4 |
+
httpx==0.25.2
|
5 |
+
pydantic==2.5.2
|
6 |
+
jinja2==3.1.2
|
7 |
+
python-multipart==0.0.6
|
templates/index.html
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html>
|
3 |
+
<head>
|
4 |
+
<title>Llama3-Papalia Inference</title>
|
5 |
+
<script src="https://cdn.tailwindcss.com"></script>
|
6 |
+
</head>
|
7 |
+
<body class="bg-gray-100 p-8">
|
8 |
+
<div class="max-w-4xl mx-auto">
|
9 |
+
<h1 class="text-3xl font-bold mb-8">Llama3-Papalia Inference</h1>
|
10 |
+
|
11 |
+
<div class="bg-white rounded-lg shadow-md p-6">
|
12 |
+
<div class="mb-4">
|
13 |
+
<label class="block text-gray-700 text-sm font-bold mb-2" for="prompt">
|
14 |
+
Prompt
|
15 |
+
</label>
|
16 |
+
<textarea
|
17 |
+
id="prompt"
|
18 |
+
class="shadow appearance-none border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline"
|
19 |
+
rows="4"
|
20 |
+
placeholder="Escribe tu pregunta aquí..."></textarea>
|
21 |
+
</div>
|
22 |
+
|
23 |
+
<div class="grid grid-cols-2 gap-4 mb-4">
|
24 |
+
<div>
|
25 |
+
<label class="block text-gray-700 text-sm font-bold mb-2" for="temperature">
|
26 |
+
Temperature
|
27 |
+
</label>
|
28 |
+
<input
|
29 |
+
type="number"
|
30 |
+
id="temperature"
|
31 |
+
value="0.7"
|
32 |
+
min="0"
|
33 |
+
max="1"
|
34 |
+
step="0.1"
|
35 |
+
class="shadow appearance-none border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline">
|
36 |
+
</div>
|
37 |
+
<div>
|
38 |
+
<label class="block text-gray-700 text-sm font-bold mb-2" for="max_tokens">
|
39 |
+
Max Tokens
|
40 |
+
</label>
|
41 |
+
<input
|
42 |
+
type="number"
|
43 |
+
id="max_tokens"
|
44 |
+
value="500"
|
45 |
+
min="1"
|
46 |
+
max="2000"
|
47 |
+
class="shadow appearance-none border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline">
|
48 |
+
</div>
|
49 |
+
</div>
|
50 |
+
|
51 |
+
<button
|
52 |
+
id="generate-button"
|
53 |
+
onclick="generateResponse()"
|
54 |
+
class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline">
|
55 |
+
Generar Respuesta
|
56 |
+
</button>
|
57 |
+
|
58 |
+
<div class="mt-8">
|
59 |
+
<label class="block text-gray-700 text-sm font-bold mb-2">
|
60 |
+
Respuesta
|
61 |
+
</label>
|
62 |
+
<div
|
63 |
+
id="response"
|
64 |
+
class="mt-2 p-4 bg-gray-100 rounded min-h-[200px] whitespace-pre-wrap">
|
65 |
+
</div>
|
66 |
+
</div>
|
67 |
+
</div>
|
68 |
+
<div id="service-status" class="text-gray-600 text-sm mt-2">Verificando estado del servicio...</div>
|
69 |
+
</div>
|
70 |
+
|
71 |
+
<script>
|
72 |
+
async function checkHealth() {
|
73 |
+
try {
|
74 |
+
const response = await fetch('/health');
|
75 |
+
const data = await response.json();
|
76 |
+
const statusEl = document.getElementById('service-status');
|
77 |
+
if (data.status === 'healthy') {
|
78 |
+
statusEl.textContent = '✅ Servicio activo';
|
79 |
+
statusEl.className = 'text-green-600';
|
80 |
+
} else {
|
81 |
+
statusEl.textContent = '❌ Servicio no disponible';
|
82 |
+
statusEl.className = 'text-red-600';
|
83 |
+
}
|
84 |
+
} catch (error) {
|
85 |
+
const statusEl = document.getElementById('service-status');
|
86 |
+
statusEl.textContent = '❌ Error de conexión';
|
87 |
+
statusEl.className = 'text-red-600';
|
88 |
+
}
|
89 |
+
}
|
90 |
+
|
91 |
+
async function generateResponse() {
|
92 |
+
const promptEl = document.getElementById('prompt');
|
93 |
+
const temperatureEl = document.getElementById('temperature');
|
94 |
+
const maxTokensEl = document.getElementById('max_tokens');
|
95 |
+
const responseEl = document.getElementById('response');
|
96 |
+
const buttonEl = document.getElementById('generate-button');
|
97 |
+
|
98 |
+
if (!promptEl.value.trim()) {
|
99 |
+
responseEl.textContent = 'Por favor, escribe una pregunta.';
|
100 |
+
return;
|
101 |
+
}
|
102 |
+
|
103 |
+
buttonEl.disabled = true;
|
104 |
+
responseEl.textContent = '⏳ Generando respuesta...';
|
105 |
+
|
106 |
+
try {
|
107 |
+
const response = await fetch('/generate', {
|
108 |
+
method: 'POST',
|
109 |
+
headers: {
|
110 |
+
'Content-Type': 'application/json',
|
111 |
+
},
|
112 |
+
body: JSON.stringify({
|
113 |
+
prompt: promptEl.value,
|
114 |
+
temperature: parseFloat(temperatureEl.value),
|
115 |
+
max_tokens: parseInt(maxTokensEl.value),
|
116 |
+
}),
|
117 |
+
});
|
118 |
+
|
119 |
+
const data = await response.json();
|
120 |
+
|
121 |
+
if (!response.ok) {
|
122 |
+
throw new Error(data.detail || 'Error en la generación de respuesta');
|
123 |
+
}
|
124 |
+
|
125 |
+
responseEl.textContent = data.response || 'No se recibió respuesta del modelo';
|
126 |
+
responseEl.className = 'mt-2 p-4 bg-gray-100 rounded min-h-[200px] whitespace-pre-wrap';
|
127 |
+
} catch (error) {
|
128 |
+
responseEl.innerHTML = `❌ Error: ${error.message}<br><br>Por favor, verifica que:<br>1. El modelo llama3-papalia-nuevo está instalado<br>2. El servicio Ollama está corriendo<br>3. El puerto 11434 está accesible`;
|
129 |
+
responseEl.className = 'mt-2 p-4 bg-red-50 text-red-700 rounded min-h-[200px] whitespace-pre-wrap';
|
130 |
+
} finally {
|
131 |
+
buttonEl.disabled = false;
|
132 |
+
}
|
133 |
+
}
|
134 |
+
|
135 |
+
checkHealth();
|
136 |
+
setInterval(checkHealth, 30000);
|
137 |
+
</script>
|
138 |
+
</body>
|
139 |
+
</html>
|