farm-ai-gpt / app.py
emmunger's picture
Update app.py
6e2acb7 verified
raw
history blame
13.1 kB
##############################################
# app.py - Near-Perfect Accuracy Farm AI Demo
##############################################
import streamlit as st
import numpy as np
import pandas as pd
import requests
import json
import os
from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
################################################################################
# 1) SYNTHETIC DATA GENERATION WITH NEAR-PERFECT RELATIONSHIPS
################################################################################
@st.cache_data
def generate_pest_data(n=200):
"""
Generates synthetic data for pest outbreak classification with a strong,
easily learnable relationship so that the model can achieve near 100% accuracy.
Features: temperature, humidity, leaf_wetness
Target: pest_outbreak (0 or 1)
"""
# For reproducibility
np.random.seed(42)
# We'll create a 'perfect' or near-perfect relationship:
# pest_score = 0.3*temperature + 0.5*humidity + 0.1*leaf_wetness
# Then threshold around the median for 0/1.
temperature = np.random.uniform(15, 40, n)
humidity = np.random.uniform(30, 90, n)
leaf_wetness = np.random.uniform(0, 100, n)
pest_score = 0.3 * temperature + 0.5 * humidity + 0.1 * leaf_wetness
median_score = np.median(pest_score)
pest_outbreak = (pest_score >= median_score).astype(int)
df = pd.DataFrame({
"temperature": temperature,
"humidity": humidity,
"leaf_wetness": leaf_wetness,
"pest_outbreak": pest_outbreak
})
return df
@st.cache_data
def generate_disease_data(n=200):
"""
Synthetic data for disease detection classification with near-perfect relationship.
We'll use a polynomial link so the model can memorize it well.
Features: soil_pH, rainfall, planting_density
Target: disease_present (0 or 1)
"""
np.random.seed(123)
soil_ph = np.random.uniform(5.0, 8.0, n)
rainfall = np.random.uniform(0, 200, n)
planting_density = np.random.uniform(50, 200, n)
# 'Perfect' polynomial relationship:
# disease_score = -(soil_ph - 6.5)^2 + 0.02*rainfall + 0.006*planting_density
# Then threshold around median for 0/1
disease_score = (
-1.0 * (soil_ph - 6.5)**2
+ 0.02 * rainfall
+ 0.006 * planting_density
)
median_score = np.median(disease_score)
disease_present = (disease_score >= median_score).astype(int)
df = pd.DataFrame({
"soil_ph": soil_ph,
"rainfall": rainfall,
"planting_density": planting_density,
"disease_present": disease_present
})
return df
@st.cache_data
def generate_yield_data(n=200):
"""
Synthetic data for yield regression with near-perfect correlation.
We'll ensure a strong linear relationship so the model can achieve very low error.
Features: soil_fertility, temperature, irrigation_freq, fertilizer_score
Target: crop_yield (tons/ha)
"""
np.random.seed(999)
soil_fertility = np.random.uniform(0, 100, n)
temperature = np.random.uniform(15, 35, n)
irrigation_freq = np.random.uniform(0, 10, n)
fertilizer_score = np.random.uniform(0, 100, n)
# Near-perfect linear relationship:
# yield_val = 2.0*soil_fertility + (-0.3)*(abs(temperature-25)) + 3.0*irrigation_freq
# + 0.8*fertilizer_score
# plus tiny normal noise
yield_val = (
2.0 * soil_fertility
+ (-0.3) * np.abs(temperature - 25)
+ 3.0 * irrigation_freq
+ 0.8 * fertilizer_score
+ np.random.normal(0, 1, n) # small noise
)
df = pd.DataFrame({
"soil_fertility": soil_fertility,
"temperature": temperature,
"irrigation_freq": irrigation_freq,
"fertilizer_score": fertilizer_score,
"crop_yield": yield_val
})
return df
################################################################################
# 2) TRAINING MODELS (CACHED FOR PERFORMANCE)
################################################################################
@st.cache_resource
def train_all_models():
"""
Trains all three models on near-perfect synthetic data:
- Pest Outbreak (RandomForest)
- Disease Presence (LogisticRegression w/ polynomial)
- Yield Regression (GradientBoosting)
Returns pipelines + metrics in a dict.
"""
# 2.1 Pest Model
pest_df = generate_pest_data()
X_pest = pest_df[["temperature", "humidity", "leaf_wetness"]]
y_pest = pest_df["pest_outbreak"]
pest_pipeline = Pipeline([
("scaler", StandardScaler()),
("rf", RandomForestClassifier(n_estimators=50, random_state=42))
])
pest_pipeline.fit(X_pest, y_pest)
pest_pred_train = pest_pipeline.predict(X_pest)
pest_accuracy = accuracy_score(y_pest, pest_pred_train)
# 2.2 Disease Model
disease_df = generate_disease_data()
X_dis = disease_df[["soil_ph", "rainfall", "planting_density"]]
y_dis = disease_df["disease_present"]
disease_pipeline = Pipeline([
("poly", PolynomialFeatures(degree=2, include_bias=False)),
("scaler", StandardScaler()),
("lr", LogisticRegression(random_state=42, max_iter=2000))
])
disease_pipeline.fit(X_dis, y_dis)
disease_pred_train = disease_pipeline.predict(X_dis)
disease_accuracy = accuracy_score(y_dis, disease_pred_train)
# 2.3 Yield Model
yield_df = generate_yield_data()
X_yield = yield_df[["soil_fertility", "temperature", "irrigation_freq", "fertilizer_score"]]
y_yield = yield_df["crop_yield"]
X_train, X_test, y_train, y_test = train_test_split(X_yield, y_yield, test_size=0.2, random_state=42)
yield_pipeline = Pipeline([
("scaler", StandardScaler()),
("gbr", GradientBoostingRegressor(n_estimators=50, learning_rate=0.1, random_state=42))
])
yield_pipeline.fit(X_train, y_train)
# Evaluate on train data to see near-perfect learning
yield_pred_train = yield_pipeline.predict(X_train)
yield_rmse_train = mean_squared_error(y_train, yield_pred_train, squared=False)
# Also check test set
yield_pred_test = yield_pipeline.predict(X_test)
yield_rmse_test = mean_squared_error(y_test, yield_pred_test, squared=False)
return {
"pest_model": pest_pipeline,
"pest_accuracy": pest_accuracy,
"disease_model": disease_pipeline,
"disease_accuracy": disease_accuracy,
"yield_model": yield_pipeline,
"yield_rmse_train": yield_rmse_train,
"yield_rmse_test": yield_rmse_test
}
################################################################################
# 3) GPT CALLS (BYPASSING 'openai' LIB)
################################################################################
def call_gpt(prompt_text, openai_api_key):
"""
Posts to the Chat Completions endpoint with the user prompt.
Returns GPT's text.
"""
if not openai_api_key:
return "ERROR: No OpenAI API key provided. Please enter it to get GPT advice."
endpoint = "https://api.openai.com/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {openai_api_key}"
}
data = {
"model": "gpt-3.5-turbo", # or "gpt-4" if you have access
"messages": [
{"role": "system", "content": "You are a highly knowledgeable, helpful farm AI assistant."},
{"role": "user", "content": prompt_text}
],
"max_tokens": 500,
"temperature": 0.7
}
try:
response = requests.post(endpoint, headers=headers, json=data, timeout=30)
result = response.json()
if "choices" in result and len(result["choices"]) > 0:
return result["choices"][0]["message"]["content"].strip()
else:
return "No valid response from GPT."
except Exception as e:
return f"Error calling GPT: {e}"
################################################################################
# 4) STREAMLIT APP (NEAR-PERFECT ACCURACY, SCALABLE POC)
################################################################################
def main():
st.title("Near-Perfect Farm AI + GPT (Scalable PoC)")
st.markdown("""
**Features**:
- Three local ML models (pest, disease, yield) with near-perfect synthetic data.
- GPT for advanced explanations and advice.
- Demonstrates a scalable approach for real production: replace synthetic data
and model training with real data or pre-trained models.
""")
# 4.1 Load/Train Models
with st.spinner("Training models on near-perfect synthetic data..."):
models = train_all_models()
# 4.2 Sidebar: Show training metrics
st.sidebar.header("Local Model Performance")
st.sidebar.write(f"**Pest Model Accuracy:** {models['pest_accuracy']*100:.2f}%")
st.sidebar.write(f"**Disease Model Accuracy:** {models['disease_accuracy']*100:.2f}%")
st.sidebar.write(f"**Yield RMSE (Train):** {models['yield_rmse_train']:.2f}")
st.sidebar.write(f"**Yield RMSE (Test):** {models['yield_rmse_test']:.2f}")
# 4.3 User Input Form
st.subheader("Enter Your Farm Conditions")
with st.form("farm_form"):
soil_moisture = st.slider("Soil Moisture (%)", 0.0, 100.0, 25.0, 1.0)
soil_ph = st.slider("Soil pH", 0.0, 14.0, 6.5, 0.1)
temperature = st.slider("Temperature (°C)", 0.0, 50.0, 28.0, 1.0)
humidity = st.slider("Humidity (%)", 0.0, 100.0, 60.0, 1.0)
crop_health = st.slider("Crop Health (0-100)", 0.0, 100.0, 80.0, 1.0)
leaf_wetness = st.slider("Leaf Wetness (0-100)", 0.0, 100.0, 40.0, 1.0)
rainfall = st.number_input("Rainfall (mm)", 0.0, 500.0, 50.0, 5.0)
planting_density = st.number_input("#Plants/Acre", 10.0, 500.0, 100.0, 10.0)
irrigation_freq = st.number_input("Irrigation Frequency (times/week)", 0.0, 14.0, 3.0, 1.0)
fertilizer_score = st.slider("Fertilizer Score (0-100)", 0.0, 100.0, 50.0, 1.0)
st.markdown("### GPT Setup")
openai_api_key = st.text_input("OpenAI API Key (for GPT)", type="password")
submitted = st.form_submit_button("Analyze & Get GPT Advice")
if submitted:
st.subheader("Local Analysis & Predictions")
# 4.3.1 Basic rules
irrigation_needed = (soil_moisture < 30)
crop_health_status = "Healthy" if crop_health > 85 else "Needs Attention"
# 4.3.2 Pest Prediction
X_pest = np.array([[temperature, humidity, leaf_wetness]])
pest_outbreak = (models["pest_model"].predict(X_pest)[0] == 1)
# 4.3.3 Disease Detection
X_dis = np.array([[soil_ph, rainfall, planting_density]])
disease_present = (models["disease_model"].predict(X_dis)[0] == 1)
# 4.3.4 Yield Regression
# We treat "crop_health" as a stand-in for "soil_fertility" here.
X_yield = np.array([[crop_health, temperature, irrigation_freq, fertilizer_score]])
yield_prediction = models["yield_model"].predict(X_yield)[0]
# Display local results
st.write(f"- **Irrigation Needed?** {irrigation_needed}")
st.write(f"- **Crop Health Status:** {crop_health_status}")
st.write(f"- **Pest Outbreak Likely?** {pest_outbreak}")
st.write(f"- **Disease Present?** {disease_present}")
st.write(f"- **Predicted Yield (t/ha):** {yield_prediction:.2f}")
# 4.3.5 GPT Explanation
summary_prompt = (
"These are the farm conditions:\n"
f" - Soil Moisture: {soil_moisture:.1f}%\n"
f" - Soil pH: {soil_ph:.1f}\n"
f" - Temperature: {temperature:.1f} °C\n"
f" - Humidity: {humidity:.1f}%\n"
f" - Crop Health: {crop_health:.1f}\n"
f" - Leaf Wetness: {leaf_wetness:.1f}\n"
f" - Rainfall: {rainfall:.1f} mm\n"
f" - Planting Density: {planting_density:.1f}\n"
f" - Irrigation Frequency: {irrigation_freq:.1f} times/week\n"
f" - Fertilizer Score: {fertilizer_score:.1f}\n\n"
"Local ML Results:\n"
f" - Irrigation Needed: {irrigation_needed}\n"
f" - Crop Health Status: {crop_health_status}\n"
f" - Pest Outbreak: {pest_outbreak}\n"
f" - Disease Present: {disease_present}\n"
f" - Yield: {yield_prediction:.2f} t/ha\n\n"
"Please provide a clear, helpful explanation in plain language, "
"along with sustainable, cost-effective steps to improve or maintain these conditions."
)
st.subheader("GPT Advice")
gpt_response = call_gpt(summary_prompt, openai_api_key)
st.write(gpt_response)
if __name__ == "__main__":
main()