##############################################
# app.py - Near-Perfect Accuracy Farm AI Demo
##############################################

import streamlit as st
import numpy as np
import pandas as pd
import requests
import json
import os

from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error

################################################################################
# 1) SYNTHETIC DATA GENERATION WITH NEAR-PERFECT RELATIONSHIPS
################################################################################

@st.cache_data
def generate_pest_data(n=200):
    """
    Generates synthetic data for pest outbreak classification with a strong,
    easily learnable relationship so that the model can achieve near 100% accuracy.
    Features: temperature, humidity, leaf_wetness
    Target: pest_outbreak (0 or 1)
    """

    # For reproducibility
    np.random.seed(42)

    # We'll create a 'perfect' or near-perfect relationship:
    # pest_score = 0.3*temperature + 0.5*humidity + 0.1*leaf_wetness
    # Then threshold around the median for 0/1.

    temperature = np.random.uniform(15, 40, n)
    humidity = np.random.uniform(30, 90, n)
    leaf_wetness = np.random.uniform(0, 100, n)

    pest_score = 0.3 * temperature + 0.5 * humidity + 0.1 * leaf_wetness
    median_score = np.median(pest_score)
    pest_outbreak = (pest_score >= median_score).astype(int)

    df = pd.DataFrame({
        "temperature": temperature,
        "humidity": humidity,
        "leaf_wetness": leaf_wetness,
        "pest_outbreak": pest_outbreak
    })
    return df

@st.cache_data
def generate_disease_data(n=200):
    """
    Synthetic data for disease detection classification with near-perfect relationship.
    We'll use a polynomial link so the model can memorize it well.
    Features: soil_pH, rainfall, planting_density
    Target: disease_present (0 or 1)
    """

    np.random.seed(123)

    soil_ph = np.random.uniform(5.0, 8.0, n)
    rainfall = np.random.uniform(0, 200, n)
    planting_density = np.random.uniform(50, 200, n)

    # 'Perfect' polynomial relationship:
    # disease_score = -(soil_ph - 6.5)^2 + 0.02*rainfall + 0.006*planting_density
    # Then threshold around median for 0/1

    disease_score = (
        -1.0 * (soil_ph - 6.5)**2
        + 0.02 * rainfall
        + 0.006 * planting_density
    )
    median_score = np.median(disease_score)
    disease_present = (disease_score >= median_score).astype(int)

    df = pd.DataFrame({
        "soil_ph": soil_ph,
        "rainfall": rainfall,
        "planting_density": planting_density,
        "disease_present": disease_present
    })
    return df

@st.cache_data
def generate_yield_data(n=200):
    """
    Synthetic data for yield regression with near-perfect correlation.
    We'll ensure a strong linear relationship so the model can achieve very low error.
    Features: soil_fertility, temperature, irrigation_freq, fertilizer_score
    Target: crop_yield (tons/ha)
    """

    np.random.seed(999)

    soil_fertility = np.random.uniform(0, 100, n)
    temperature = np.random.uniform(15, 35, n)
    irrigation_freq = np.random.uniform(0, 10, n)
    fertilizer_score = np.random.uniform(0, 100, n)

    # Near-perfect linear relationship:
    # yield_val = 2.0*soil_fertility + (-0.3)*(abs(temperature-25)) + 3.0*irrigation_freq
    #           + 0.8*fertilizer_score
    # plus tiny normal noise

    yield_val = (
        2.0 * soil_fertility
        + (-0.3) * np.abs(temperature - 25)
        + 3.0 * irrigation_freq
        + 0.8 * fertilizer_score
        + np.random.normal(0, 1, n)  # small noise
    )

    df = pd.DataFrame({
        "soil_fertility": soil_fertility,
        "temperature": temperature,
        "irrigation_freq": irrigation_freq,
        "fertilizer_score": fertilizer_score,
        "crop_yield": yield_val
    })
    return df

################################################################################
# 2) TRAINING MODELS (CACHED FOR PERFORMANCE)
################################################################################

@st.cache_resource
def train_all_models():
    """
    Trains all three models on near-perfect synthetic data:
    - Pest Outbreak (RandomForest)
    - Disease Presence (LogisticRegression w/ polynomial)
    - Yield Regression (GradientBoosting)
    Returns pipelines + metrics in a dict.
    """

    # 2.1 Pest Model
    pest_df = generate_pest_data()
    X_pest = pest_df[["temperature", "humidity", "leaf_wetness"]]
    y_pest = pest_df["pest_outbreak"]

    pest_pipeline = Pipeline([
        ("scaler", StandardScaler()),
        ("rf", RandomForestClassifier(n_estimators=50, random_state=42))
    ])
    pest_pipeline.fit(X_pest, y_pest)
    pest_pred_train = pest_pipeline.predict(X_pest)
    pest_accuracy = accuracy_score(y_pest, pest_pred_train)

    # 2.2 Disease Model
    disease_df = generate_disease_data()
    X_dis = disease_df[["soil_ph", "rainfall", "planting_density"]]
    y_dis = disease_df["disease_present"]

    disease_pipeline = Pipeline([
        ("poly", PolynomialFeatures(degree=2, include_bias=False)),
        ("scaler", StandardScaler()),
        ("lr", LogisticRegression(random_state=42, max_iter=2000))
    ])
    disease_pipeline.fit(X_dis, y_dis)
    disease_pred_train = disease_pipeline.predict(X_dis)
    disease_accuracy = accuracy_score(y_dis, disease_pred_train)

    # 2.3 Yield Model
    yield_df = generate_yield_data()
    X_yield = yield_df[["soil_fertility", "temperature", "irrigation_freq", "fertilizer_score"]]
    y_yield = yield_df["crop_yield"]

    X_train, X_test, y_train, y_test = train_test_split(X_yield, y_yield, test_size=0.2, random_state=42)

    yield_pipeline = Pipeline([
        ("scaler", StandardScaler()),
        ("gbr", GradientBoostingRegressor(n_estimators=50, learning_rate=0.1, random_state=42))
    ])
    yield_pipeline.fit(X_train, y_train)

    # Evaluate on train data to see near-perfect learning
    yield_pred_train = yield_pipeline.predict(X_train)
    yield_rmse_train = mean_squared_error(y_train, yield_pred_train, squared=False)

    # Also check test set
    yield_pred_test = yield_pipeline.predict(X_test)
    yield_rmse_test = mean_squared_error(y_test, yield_pred_test, squared=False)

    return {
        "pest_model": pest_pipeline,
        "pest_accuracy": pest_accuracy,
        "disease_model": disease_pipeline,
        "disease_accuracy": disease_accuracy,
        "yield_model": yield_pipeline,
        "yield_rmse_train": yield_rmse_train,
        "yield_rmse_test": yield_rmse_test
    }

################################################################################
# 3) GPT CALLS (BYPASSING 'openai' LIB)
################################################################################

def call_gpt(prompt_text, openai_api_key):
    """
    Posts to the Chat Completions endpoint with the user prompt.
    Returns GPT's text.
    """

    if not openai_api_key:
        return "ERROR: No OpenAI API key provided. Please enter it to get GPT advice."

    endpoint = "https://api.openai.com/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {openai_api_key}"
    }
    data = {
        "model": "gpt-3.5-turbo",  # or "gpt-4" if you have access
        "messages": [
            {"role": "system", "content": "You are a highly knowledgeable, helpful farm AI assistant."},
            {"role": "user", "content": prompt_text}
        ],
        "max_tokens": 500,
        "temperature": 0.7
    }

    try:
        response = requests.post(endpoint, headers=headers, json=data, timeout=30)
        result = response.json()
        if "choices" in result and len(result["choices"]) > 0:
            return result["choices"][0]["message"]["content"].strip()
        else:
            return "No valid response from GPT."
    except Exception as e:
        return f"Error calling GPT: {e}"

################################################################################
# 4) STREAMLIT APP (NEAR-PERFECT ACCURACY, SCALABLE POC)
################################################################################

def main():
    st.title("Near-Perfect Farm AI + GPT (Scalable PoC)")
    st.markdown("""
    **Features**:
    - Three local ML models (pest, disease, yield) with near-perfect synthetic data.
    - GPT for advanced explanations and advice.
    - Demonstrates a scalable approach for real production: replace synthetic data
      and model training with real data or pre-trained models.
    """)

    # 4.1 Load/Train Models
    with st.spinner("Training models on near-perfect synthetic data..."):
        models = train_all_models()

    # 4.2 Sidebar: Show training metrics
    st.sidebar.header("Local Model Performance")
    st.sidebar.write(f"**Pest Model Accuracy:** {models['pest_accuracy']*100:.2f}%")
    st.sidebar.write(f"**Disease Model Accuracy:** {models['disease_accuracy']*100:.2f}%")
    st.sidebar.write(f"**Yield RMSE (Train):** {models['yield_rmse_train']:.2f}")
    st.sidebar.write(f"**Yield RMSE (Test):** {models['yield_rmse_test']:.2f}")

    # 4.3 User Input Form
    st.subheader("Enter Your Farm Conditions")
    with st.form("farm_form"):
        soil_moisture = st.slider("Soil Moisture (%)", 0.0, 100.0, 25.0, 1.0)
        soil_ph = st.slider("Soil pH", 0.0, 14.0, 6.5, 0.1)
        temperature = st.slider("Temperature (°C)", 0.0, 50.0, 28.0, 1.0)
        humidity = st.slider("Humidity (%)", 0.0, 100.0, 60.0, 1.0)
        crop_health = st.slider("Crop Health (0-100)", 0.0, 100.0, 80.0, 1.0)
        leaf_wetness = st.slider("Leaf Wetness (0-100)", 0.0, 100.0, 40.0, 1.0)
        rainfall = st.number_input("Rainfall (mm)", 0.0, 500.0, 50.0, 5.0)
        planting_density = st.number_input("#Plants/Acre", 10.0, 500.0, 100.0, 10.0)
        irrigation_freq = st.number_input("Irrigation Frequency (times/week)", 0.0, 14.0, 3.0, 1.0)
        fertilizer_score = st.slider("Fertilizer Score (0-100)", 0.0, 100.0, 50.0, 1.0)

        st.markdown("### GPT Setup")
        openai_api_key = st.text_input("OpenAI API Key (for GPT)", type="password")

        submitted = st.form_submit_button("Analyze & Get GPT Advice")

    if submitted:
        st.subheader("Local Analysis & Predictions")
        # 4.3.1 Basic rules
        irrigation_needed = (soil_moisture < 30)
        crop_health_status = "Healthy" if crop_health > 85 else "Needs Attention"

        # 4.3.2 Pest Prediction
        X_pest = np.array([[temperature, humidity, leaf_wetness]])
        pest_outbreak = (models["pest_model"].predict(X_pest)[0] == 1)

        # 4.3.3 Disease Detection
        X_dis = np.array([[soil_ph, rainfall, planting_density]])
        disease_present = (models["disease_model"].predict(X_dis)[0] == 1)

        # 4.3.4 Yield Regression
        # We treat "crop_health" as a stand-in for "soil_fertility" here.
        X_yield = np.array([[crop_health, temperature, irrigation_freq, fertilizer_score]])
        yield_prediction = models["yield_model"].predict(X_yield)[0]

        # Display local results
        st.write(f"- **Irrigation Needed?** {irrigation_needed}")
        st.write(f"- **Crop Health Status:** {crop_health_status}")
        st.write(f"- **Pest Outbreak Likely?** {pest_outbreak}")
        st.write(f"- **Disease Present?** {disease_present}")
        st.write(f"- **Predicted Yield (t/ha):** {yield_prediction:.2f}")

        # 4.3.5 GPT Explanation
        summary_prompt = (
            "These are the farm conditions:\n"
            f"  - Soil Moisture: {soil_moisture:.1f}%\n"
            f"  - Soil pH: {soil_ph:.1f}\n"
            f"  - Temperature: {temperature:.1f} °C\n"
            f"  - Humidity: {humidity:.1f}%\n"
            f"  - Crop Health: {crop_health:.1f}\n"
            f"  - Leaf Wetness: {leaf_wetness:.1f}\n"
            f"  - Rainfall: {rainfall:.1f} mm\n"
            f"  - Planting Density: {planting_density:.1f}\n"
            f"  - Irrigation Frequency: {irrigation_freq:.1f} times/week\n"
            f"  - Fertilizer Score: {fertilizer_score:.1f}\n\n"
            "Local ML Results:\n"
            f"  - Irrigation Needed: {irrigation_needed}\n"
            f"  - Crop Health Status: {crop_health_status}\n"
            f"  - Pest Outbreak: {pest_outbreak}\n"
            f"  - Disease Present: {disease_present}\n"
            f"  - Yield: {yield_prediction:.2f} t/ha\n\n"
            "Please provide a clear, helpful explanation in plain language, "
            "along with sustainable, cost-effective steps to improve or maintain these conditions."
        )

        st.subheader("GPT Advice")
        gpt_response = call_gpt(summary_prompt, openai_api_key)
        st.write(gpt_response)

if __name__ == "__main__":
    main()