Spaces:

andresdegante
/

papalia3

Running

App Files Files Community

andresdegante commited on about 1 month ago

Commit

f2139e9

1 Parent(s): cb62aa5

Initial commit: Add Llama3-Papalia inference UI and API

Browse files

Files changed (5) hide show

.DS_Store +0 -0
Dockerfile +51 -0
app.py +111 -0
requirements.txt +7 -0
templates/index.html +139 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

Dockerfile ADDED Viewed

	@@ -0,0 +1,51 @@

+FROM python:3.9
+# Install Ollama and required tools
+RUN apt-get update && apt-get install -y curl netcat-traditional && \
+    curl -fsSL https://ollama.com/install.sh | sh
+# Create non-root user
+RUN useradd -m -u 1000 user
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Copy application files including templates
+COPY --chown=user . /app
+# Create startup script
+RUN echo '#!/bin/bash\n\
+echo "Iniciando servidor Ollama..."\n\
+ollama serve &\n\
+\n\
+# Esperar a que Ollama esté disponible\n\
+echo "Esperando a que Ollama esté listo..."\n\
+timeout=60\n\
+while ! nc -z localhost 11434; do\n\
+  if [ "$timeout" -le "0" ]; then\n\
+    echo "Tiempo de espera agotado para Ollama"\n\
+    exit 1\n\
+  fi\n\
+  echo "Esperando a Ollama... $timeout segundos restantes"\n\
+  timeout=$((timeout-1))\n\
+  sleep 1\n\
+done\n\
+\n\
+echo "Verificando modelo llama3-papalia-nuevo..."\n\
+if ! ollama list | grep -q "llama3-papalia-nuevo"; then\n\
+    echo "Iniciando modelo llama3-papalia-nuevo..."\n\
+fi\n\
+\n\
+echo "Iniciando API..."\n\
+uvicorn app:app --host 0.0.0.0 --port 7860\n\
+' > /app/start.sh
+RUN chmod +x /app/start.sh
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+# Start both Ollama and the FastAPI app
+CMD ["/app/start.sh"]

app.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from fastapi import FastAPI, HTTPException, Request
+from fastapi.responses import HTMLResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import httpx
+import os
+import logging
+from typing import Optional
+# Configurar logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI(
+    title="Llama3-Papalia Inference API & UI",
+    description="API y UI para interactuar con el modelo Llama3-Papalia especializado en Desarrollo Humano",
+    version="1.0.0"
+)
+# Habilitar CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Configuración de plantillas HTML
+templates = Jinja2Templates(directory="templates")
+class QueryRequest(BaseModel):
+    prompt: str
+    temperature: Optional[float] = 0.7
+    max_tokens: Optional[int] = 500
+class QueryResponse(BaseModel):
+    response: str
+    model: str = "llama3-papalia-nuevo"
+OLLAMA_API_URL = "http://localhost:11434/api/generate"
+@app.get("/", response_class=HTMLResponse)
+async def read_root(request: Request):
+    return templates.TemplateResponse(
+        "index.html",
+        {"request": request, "title": "Llama3-Papalia Inference"}
+    )
+@app.post("/generate", response_model=QueryResponse)
+async def generate_response(query: QueryRequest):
+    logger.info(f"Recibida solicitud de generación con prompt: {query.prompt[:50]}...")
+    try:
+        async with httpx.AsyncClient(timeout=30.0) as client:
+            logger.info(f"Enviando solicitud a Ollama: {OLLAMA_API_URL}")
+            response = await client.post(
+                OLLAMA_API_URL,
+                json={
+                    "model": "llama3-papalia-nuevo",
+                    "prompt": query.prompt,
+                    "temperature": query.temperature,
+                    "max_tokens": query.max_tokens
+                }
+            )
+            logger.info(f"Respuesta de Ollama recibida con status code: {response.status_code}")
+            if response.status_code != 200:
+                logger.error(f"Error en la respuesta de Ollama: {response.text}")
+                raise HTTPException(
+                    status_code=500,
+                    detail=f"Error en la generación con Ollama: {response.text}"
+                )
+            result = response.json()
+            logger.info("Respuesta procesada exitosamente")
+            return QueryResponse(response=result["response"])
+    except httpx.TimeoutException:
+        logger.error("Timeout al conectar con Ollama")
+        raise HTTPException(
+            status_code=504,
+            detail="Timeout al conectar con el servicio de Ollama"
+        )
+    except Exception as e:
+        logger.error(f"Error inesperado: {str(e)}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"Error en el servidor: {str(e)}"
+        )
+@app.get("/health")
+async def health_check():
+    try:
+        async with httpx.AsyncClient(timeout=5.0) as client:
+            response = await client.post(
+                OLLAMA_API_URL,
+                json={
+                    "model": "llama3-papalia-nuevo",
+                    "prompt": "test",
+                    "max_tokens": 1
+                }
+            )
+            if response.status_code == 200:
+                return {"status": "healthy", "ollama_status": "connected"}
+    except Exception as e:
+        logger.error(f"Error en health check: {str(e)}")
+        return {"status": "unhealthy", "error": str(e)}

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+python-dotenv==1.0.0
+httpx==0.25.2
+pydantic==2.5.2
+jinja2==3.1.2
+python-multipart==0.0.6

templates/index.html ADDED Viewed

	@@ -0,0 +1,139 @@

+<!DOCTYPE html>
+<html>
+<head>
+    <title>Llama3-Papalia Inference</title>
+    <script src="https://cdn.tailwindcss.com"></script>
+</head>
+<body class="bg-gray-100 p-8">
+    <div class="max-w-4xl mx-auto">
+        <h1 class="text-3xl font-bold mb-8">Llama3-Papalia Inference</h1>
+        <div class="bg-white rounded-lg shadow-md p-6">
+            <div class="mb-4">
+                <label class="block text-gray-700 text-sm font-bold mb-2" for="prompt">
+                    Prompt
+                </label>
+                <textarea
+                    id="prompt"
+                    class="shadow appearance-none border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline"
+                    rows="4"
+                    placeholder="Escribe tu pregunta aquí..."></textarea>
+            </div>
+            <div class="grid grid-cols-2 gap-4 mb-4">
+                <div>
+                    <label class="block text-gray-700 text-sm font-bold mb-2" for="temperature">
+                        Temperature
+                    </label>
+                    <input
+                        type="number"
+                        id="temperature"
+                        value="0.7"
+                        min="0"
+                        max="1"
+                        step="0.1"
+                        class="shadow appearance-none border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline">
+                </div>
+                <div>
+                    <label class="block text-gray-700 text-sm font-bold mb-2" for="max_tokens">
+                        Max Tokens
+                    </label>
+                    <input
+                        type="number"
+                        id="max_tokens"
+                        value="500"
+                        min="1"
+                        max="2000"
+                        class="shadow appearance-none border rounded w-full py-2 px-3 text-gray-700 leading-tight focus:outline-none focus:shadow-outline">
+                </div>
+            </div>
+            <button
+                id="generate-button"
+                onclick="generateResponse()"
+                class="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded focus:outline-none focus:shadow-outline">
+                Generar Respuesta
+            </button>
+            <div class="mt-8">
+                <label class="block text-gray-700 text-sm font-bold mb-2">
+                    Respuesta
+                </label>
+                <div
+                    id="response"
+                    class="mt-2 p-4 bg-gray-100 rounded min-h-[200px] whitespace-pre-wrap">
+                </div>
+            </div>
+        </div>
+        <div id="service-status" class="text-gray-600 text-sm mt-2">Verificando estado del servicio...</div>
+    </div>
+    <script>
+        async function checkHealth() {
+            try {
+                const response = await fetch('/health');
+                const data = await response.json();
+                const statusEl = document.getElementById('service-status');
+                if (data.status === 'healthy') {
+                    statusEl.textContent = '✅ Servicio activo';
+                    statusEl.className = 'text-green-600';
+                } else {
+                    statusEl.textContent = '❌ Servicio no disponible';
+                    statusEl.className = 'text-red-600';
+                }
+            } catch (error) {
+                const statusEl = document.getElementById('service-status');
+                statusEl.textContent = '❌ Error de conexión';
+                statusEl.className = 'text-red-600';
+            }
+        }
+        async function generateResponse() {
+            const promptEl = document.getElementById('prompt');
+            const temperatureEl = document.getElementById('temperature');
+            const maxTokensEl = document.getElementById('max_tokens');
+            const responseEl = document.getElementById('response');
+            const buttonEl = document.getElementById('generate-button');
+            if (!promptEl.value.trim()) {
+                responseEl.textContent = 'Por favor, escribe una pregunta.';
+                return;
+            }
+            buttonEl.disabled = true;
+            responseEl.textContent = '⏳ Generando respuesta...';
+            try {
+                const response = await fetch('/generate', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify({
+                        prompt: promptEl.value,
+                        temperature: parseFloat(temperatureEl.value),
+                        max_tokens: parseInt(maxTokensEl.value),
+                    }),
+                });
+                const data = await response.json();
+                if (!response.ok) {
+                    throw new Error(data.detail || 'Error en la generación de respuesta');
+                }
+                responseEl.textContent = data.response || 'No se recibió respuesta del modelo';
+                responseEl.className = 'mt-2 p-4 bg-gray-100 rounded min-h-[200px] whitespace-pre-wrap';
+            } catch (error) {
+                responseEl.innerHTML = `❌ Error: ${error.message}<br><br>Por favor, verifica que:<br>1. El modelo llama3-papalia-nuevo está instalado<br>2. El servicio Ollama está corriendo<br>3. El puerto 11434 está accesible`;
+                responseEl.className = 'mt-2 p-4 bg-red-50 text-red-700 rounded min-h-[200px] whitespace-pre-wrap';
+            } finally {
+                buttonEl.disabled = false;
+            }
+        }
+        checkHealth();
+        setInterval(checkHealth, 30000);
+    </script>
+</body>
+</html>