andresdegante commited on
Commit
f2139e9
·
1 Parent(s): cb62aa5

Initial commit: Add Llama3-Papalia inference UI and API

Browse files
Files changed (5) hide show
  1. .DS_Store +0 -0
  2. Dockerfile +51 -0
  3. app.py +111 -0
  4. requirements.txt +7 -0
  5. templates/index.html +139 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
Dockerfile ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ # Install Ollama and required tools
4
+ RUN apt-get update && apt-get install -y curl netcat-traditional && \
5
+ curl -fsSL https://ollama.com/install.sh | sh
6
+
7
+ # Create non-root user
8
+ RUN useradd -m -u 1000 user
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ # Copy application files including templates
16
+ COPY --chown=user . /app
17
+
18
+ # Create startup script
19
+ RUN echo '#!/bin/bash\n\
20
+ echo "Iniciando servidor Ollama..."\n\
21
+ ollama serve &\n\
22
+ \n\
23
+ # Esperar a que Ollama esté disponible\n\
24
+ echo "Esperando a que Ollama esté listo..."\n\
25
+ timeout=60\n\
26
+ while ! nc -z localhost 11434; do\n\
27
+ if [ "$timeout" -le "0" ]; then\n\
28
+ echo "Tiempo de espera agotado para Ollama"\n\
29
+ exit 1\n\
30
+ fi\n\
31
+ echo "Esperando a Ollama... $timeout segundos restantes"\n\
32
+ timeout=$((timeout-1))\n\
33
+ sleep 1\n\
34
+ done\n\
35
+ \n\
36
+ echo "Verificando modelo llama3-papalia-nuevo..."\n\
37
+ if ! ollama list | grep -q "llama3-papalia-nuevo"; then\n\
38
+ echo "Iniciando modelo llama3-papalia-nuevo..."\n\
39
+ fi\n\
40
+ \n\
41
+ echo "Iniciando API..."\n\
42
+ uvicorn app:app --host 0.0.0.0 --port 7860\n\
43
+ ' > /app/start.sh
44
+
45
+ RUN chmod +x /app/start.sh
46
+
47
+ USER user
48
+ ENV PATH="/home/user/.local/bin:$PATH"
49
+
50
+ # Start both Ollama and the FastAPI app
51
+ CMD ["/app/start.sh"]
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Request
2
+ from fastapi.responses import HTMLResponse, JSONResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ from fastapi.templating import Jinja2Templates
5
+ from fastapi.middleware.cors import CORSMiddleware
6
+ from pydantic import BaseModel
7
+ import httpx
8
+ import os
9
+ import logging
10
+ from typing import Optional
11
+
12
+ # Configurar logging
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
+
16
+ app = FastAPI(
17
+ title="Llama3-Papalia Inference API & UI",
18
+ description="API y UI para interactuar con el modelo Llama3-Papalia especializado en Desarrollo Humano",
19
+ version="1.0.0"
20
+ )
21
+
22
+ # Habilitar CORS
23
+ app.add_middleware(
24
+ CORSMiddleware,
25
+ allow_origins=["*"],
26
+ allow_credentials=True,
27
+ allow_methods=["*"],
28
+ allow_headers=["*"],
29
+ )
30
+
31
+ # Configuración de plantillas HTML
32
+ templates = Jinja2Templates(directory="templates")
33
+
34
+ class QueryRequest(BaseModel):
35
+ prompt: str
36
+ temperature: Optional[float] = 0.7
37
+ max_tokens: Optional[int] = 500
38
+
39
+ class QueryResponse(BaseModel):
40
+ response: str
41
+ model: str = "llama3-papalia-nuevo"
42
+
43
+ OLLAMA_API_URL = "http://localhost:11434/api/generate"
44
+
45
+ @app.get("/", response_class=HTMLResponse)
46
+ async def read_root(request: Request):
47
+ return templates.TemplateResponse(
48
+ "index.html",
49
+ {"request": request, "title": "Llama3-Papalia Inference"}
50
+ )
51
+
52
+ @app.post("/generate", response_model=QueryResponse)
53
+ async def generate_response(query: QueryRequest):
54
+ logger.info(f"Recibida solicitud de generación con prompt: {query.prompt[:50]}...")
55
+
56
+ try:
57
+ async with httpx.AsyncClient(timeout=30.0) as client:
58
+ logger.info(f"Enviando solicitud a Ollama: {OLLAMA_API_URL}")
59
+ response = await client.post(
60
+ OLLAMA_API_URL,
61
+ json={
62
+ "model": "llama3-papalia-nuevo",
63
+ "prompt": query.prompt,
64
+ "temperature": query.temperature,
65
+ "max_tokens": query.max_tokens
66
+ }
67
+ )
68
+
69
+ logger.info(f"Respuesta de Ollama recibida con status code: {response.status_code}")
70
+
71
+ if response.status_code != 200:
72
+ logger.error(f"Error en la respuesta de Ollama: {response.text}")
73
+ raise HTTPException(
74
+ status_code=500,
75
+ detail=f"Error en la generación con Ollama: {response.text}"
76
+ )
77
+
78
+ result = response.json()
79
+ logger.info("Respuesta procesada exitosamente")
80
+ return QueryResponse(response=result["response"])
81
+
82
+ except httpx.TimeoutException:
83
+ logger.error("Timeout al conectar con Ollama")
84
+ raise HTTPException(
85
+ status_code=504,
86
+ detail="Timeout al conectar con el servicio de Ollama"
87
+ )
88
+ except Exception as e:
89
+ logger.error(f"Error inesperado: {str(e)}")
90
+ raise HTTPException(
91
+ status_code=500,
92
+ detail=f"Error en el servidor: {str(e)}"
93
+ )
94
+
95
+ @app.get("/health")
96
+ async def health_check():
97
+ try:
98
+ async with httpx.AsyncClient(timeout=5.0) as client:
99
+ response = await client.post(
100
+ OLLAMA_API_URL,
101
+ json={
102
+ "model": "llama3-papalia-nuevo",
103
+ "prompt": "test",
104
+ "max_tokens": 1
105
+ }
106
+ )
107
+ if response.status_code == 200:
108
+ return {"status": "healthy", "ollama_status": "connected"}
109
+ except Exception as e:
110
+ logger.error(f"Error en health check: {str(e)}")
111
+ return {"status": "unhealthy", "error": str(e)}
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn[standard]==0.24.0
3
+ python-dotenv==1.0.0
4
+ httpx==0.25.2
5
+ pydantic==2.5.2
6
+ jinja2==3.1.2
7
+ python-multipart==0.0.6
templates/index.html ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Llama3-Papalia Inference</title>
5
+ <script src="https://cdn.tailwindcss.com"></script>
6
+ </head>
7
+ <body class="bg-gray-100 p-8">
8
+ <div class="max-w-4xl mx-auto">
9
+ <h1 class="text-3xl font-bold mb-8">Llama3-Papalia Inference</h1>
10
+
11
+ <div class="bg-white rounded-lg shadow-md p-6">
12
+ <div class="mb-4">
13
+ <label class="block text-gray-700 text-sm font-bold mb-2" for="prompt">
14
+ Prompt
15
+ </label>
16
+ <textarea
17
+ id="prompt"
18
+ class="shadow appearance-none border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline"
19
+ rows="4"
20
+ placeholder="Escribe tu pregunta aquí..."></textarea>
21
+ </div>
22
+
23
+ <div class="grid grid-cols-2 gap-4 mb-4">
24
+ <div>
25
+ <label class="block text-gray-700 text-sm font-bold mb-2" for="temperature">
26
+ Temperature
27
+ </label>
28
+ <input
29
+ type="number"
30
+ id="temperature"
31
+ value="0.7"
32
+ min="0"
33
+ max="1"
34
+ step="0.1"
35
+ class="shadow appearance-none border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline">
36
+ </div>
37
+ <div>
38
+ <label class="block text-gray-700 text-sm font-bold mb-2" for="max_tokens">
39
+ Max Tokens
40
+ </label>
41
+ <input
42
+ type="number"
43
+ id="max_tokens"
44
+ value="500"
45
+ min="1"
46
+ max="2000"
47
+ class="shadow appearance-none border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline">
48
+ </div>
49
+ </div>
50
+
51
+ <button
52
+ id="generate-button"
53
+ onclick="generateResponse()"
54
+ class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline">
55
+ Generar Respuesta
56
+ </button>
57
+
58
+ <div class="mt-8">
59
+ <label class="block text-gray-700 text-sm font-bold mb-2">
60
+ Respuesta
61
+ </label>
62
+ <div
63
+ id="response"
64
+ class="mt-2 p-4 bg-gray-100 rounded min-h-[200px] whitespace-pre-wrap">
65
+ </div>
66
+ </div>
67
+ </div>
68
+ <div id="service-status" class="text-gray-600 text-sm mt-2">Verificando estado del servicio...</div>
69
+ </div>
70
+
71
+ <script>
72
+ async function checkHealth() {
73
+ try {
74
+ const response = await fetch('/health');
75
+ const data = await response.json();
76
+ const statusEl = document.getElementById('service-status');
77
+ if (data.status === 'healthy') {
78
+ statusEl.textContent = '✅ Servicio activo';
79
+ statusEl.className = 'text-green-600';
80
+ } else {
81
+ statusEl.textContent = '❌ Servicio no disponible';
82
+ statusEl.className = 'text-red-600';
83
+ }
84
+ } catch (error) {
85
+ const statusEl = document.getElementById('service-status');
86
+ statusEl.textContent = '❌ Error de conexión';
87
+ statusEl.className = 'text-red-600';
88
+ }
89
+ }
90
+
91
+ async function generateResponse() {
92
+ const promptEl = document.getElementById('prompt');
93
+ const temperatureEl = document.getElementById('temperature');
94
+ const maxTokensEl = document.getElementById('max_tokens');
95
+ const responseEl = document.getElementById('response');
96
+ const buttonEl = document.getElementById('generate-button');
97
+
98
+ if (!promptEl.value.trim()) {
99
+ responseEl.textContent = 'Por favor, escribe una pregunta.';
100
+ return;
101
+ }
102
+
103
+ buttonEl.disabled = true;
104
+ responseEl.textContent = '⏳ Generando respuesta...';
105
+
106
+ try {
107
+ const response = await fetch('/generate', {
108
+ method: 'POST',
109
+ headers: {
110
+ 'Content-Type': 'application/json',
111
+ },
112
+ body: JSON.stringify({
113
+ prompt: promptEl.value,
114
+ temperature: parseFloat(temperatureEl.value),
115
+ max_tokens: parseInt(maxTokensEl.value),
116
+ }),
117
+ });
118
+
119
+ const data = await response.json();
120
+
121
+ if (!response.ok) {
122
+ throw new Error(data.detail || 'Error en la generación de respuesta');
123
+ }
124
+
125
+ responseEl.textContent = data.response || 'No se recibió respuesta del modelo';
126
+ responseEl.className = 'mt-2 p-4 bg-gray-100 rounded min-h-[200px] whitespace-pre-wrap';
127
+ } catch (error) {
128
+ responseEl.innerHTML = `❌ Error: ${error.message}<br><br>Por favor, verifica que:<br>1. El modelo llama3-papalia-nuevo está instalado<br>2. El servicio Ollama está corriendo<br>3. El puerto 11434 está accesible`;
129
+ responseEl.className = 'mt-2 p-4 bg-red-50 text-red-700 rounded min-h-[200px] whitespace-pre-wrap';
130
+ } finally {
131
+ buttonEl.disabled = false;
132
+ }
133
+ }
134
+
135
+ checkHealth();
136
+ setInterval(checkHealth, 30000);
137
+ </script>
138
+ </body>
139
+ </html>