Spaces:
Running
Running
############################################## | |
# app.py - Near-Perfect Accuracy Farm AI Demo | |
############################################## | |
import streamlit as st | |
import numpy as np | |
import pandas as pd | |
import requests | |
import json | |
import os | |
from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.preprocessing import PolynomialFeatures, StandardScaler | |
from sklearn.pipeline import Pipeline | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import accuracy_score, mean_squared_error | |
################################################################################ | |
# 1) SYNTHETIC DATA GENERATION WITH NEAR-PERFECT RELATIONSHIPS | |
################################################################################ | |
def generate_pest_data(n=200): | |
""" | |
Generates synthetic data for pest outbreak classification with a strong, | |
easily learnable relationship so that the model can achieve near 100% accuracy. | |
Features: temperature, humidity, leaf_wetness | |
Target: pest_outbreak (0 or 1) | |
""" | |
# For reproducibility | |
np.random.seed(42) | |
# We'll create a 'perfect' or near-perfect relationship: | |
# pest_score = 0.3*temperature + 0.5*humidity + 0.1*leaf_wetness | |
# Then threshold around the median for 0/1. | |
temperature = np.random.uniform(15, 40, n) | |
humidity = np.random.uniform(30, 90, n) | |
leaf_wetness = np.random.uniform(0, 100, n) | |
pest_score = 0.3 * temperature + 0.5 * humidity + 0.1 * leaf_wetness | |
median_score = np.median(pest_score) | |
pest_outbreak = (pest_score >= median_score).astype(int) | |
df = pd.DataFrame({ | |
"temperature": temperature, | |
"humidity": humidity, | |
"leaf_wetness": leaf_wetness, | |
"pest_outbreak": pest_outbreak | |
}) | |
return df | |
def generate_disease_data(n=200): | |
""" | |
Synthetic data for disease detection classification with near-perfect relationship. | |
We'll use a polynomial link so the model can memorize it well. | |
Features: soil_pH, rainfall, planting_density | |
Target: disease_present (0 or 1) | |
""" | |
np.random.seed(123) | |
soil_ph = np.random.uniform(5.0, 8.0, n) | |
rainfall = np.random.uniform(0, 200, n) | |
planting_density = np.random.uniform(50, 200, n) | |
# 'Perfect' polynomial relationship: | |
# disease_score = -(soil_ph - 6.5)^2 + 0.02*rainfall + 0.006*planting_density | |
# Then threshold around median for 0/1 | |
disease_score = ( | |
-1.0 * (soil_ph - 6.5)**2 | |
+ 0.02 * rainfall | |
+ 0.006 * planting_density | |
) | |
median_score = np.median(disease_score) | |
disease_present = (disease_score >= median_score).astype(int) | |
df = pd.DataFrame({ | |
"soil_ph": soil_ph, | |
"rainfall": rainfall, | |
"planting_density": planting_density, | |
"disease_present": disease_present | |
}) | |
return df | |
def generate_yield_data(n=200): | |
""" | |
Synthetic data for yield regression with near-perfect correlation. | |
We'll ensure a strong linear relationship so the model can achieve very low error. | |
Features: soil_fertility, temperature, irrigation_freq, fertilizer_score | |
Target: crop_yield (tons/ha) | |
""" | |
np.random.seed(999) | |
soil_fertility = np.random.uniform(0, 100, n) | |
temperature = np.random.uniform(15, 35, n) | |
irrigation_freq = np.random.uniform(0, 10, n) | |
fertilizer_score = np.random.uniform(0, 100, n) | |
# Near-perfect linear relationship: | |
# yield_val = 2.0*soil_fertility + (-0.3)*(abs(temperature-25)) + 3.0*irrigation_freq | |
# + 0.8*fertilizer_score | |
# plus tiny normal noise | |
yield_val = ( | |
2.0 * soil_fertility | |
+ (-0.3) * np.abs(temperature - 25) | |
+ 3.0 * irrigation_freq | |
+ 0.8 * fertilizer_score | |
+ np.random.normal(0, 1, n) # small noise | |
) | |
df = pd.DataFrame({ | |
"soil_fertility": soil_fertility, | |
"temperature": temperature, | |
"irrigation_freq": irrigation_freq, | |
"fertilizer_score": fertilizer_score, | |
"crop_yield": yield_val | |
}) | |
return df | |
################################################################################ | |
# 2) TRAINING MODELS (CACHED FOR PERFORMANCE) | |
################################################################################ | |
def train_all_models(): | |
""" | |
Trains all three models on near-perfect synthetic data: | |
- Pest Outbreak (RandomForest) | |
- Disease Presence (LogisticRegression w/ polynomial) | |
- Yield Regression (GradientBoosting) | |
Returns pipelines + metrics in a dict. | |
""" | |
# 2.1 Pest Model | |
pest_df = generate_pest_data() | |
X_pest = pest_df[["temperature", "humidity", "leaf_wetness"]] | |
y_pest = pest_df["pest_outbreak"] | |
pest_pipeline = Pipeline([ | |
("scaler", StandardScaler()), | |
("rf", RandomForestClassifier(n_estimators=50, random_state=42)) | |
]) | |
pest_pipeline.fit(X_pest, y_pest) | |
pest_pred_train = pest_pipeline.predict(X_pest) | |
pest_accuracy = accuracy_score(y_pest, pest_pred_train) | |
# 2.2 Disease Model | |
disease_df = generate_disease_data() | |
X_dis = disease_df[["soil_ph", "rainfall", "planting_density"]] | |
y_dis = disease_df["disease_present"] | |
disease_pipeline = Pipeline([ | |
("poly", PolynomialFeatures(degree=2, include_bias=False)), | |
("scaler", StandardScaler()), | |
("lr", LogisticRegression(random_state=42, max_iter=2000)) | |
]) | |
disease_pipeline.fit(X_dis, y_dis) | |
disease_pred_train = disease_pipeline.predict(X_dis) | |
disease_accuracy = accuracy_score(y_dis, disease_pred_train) | |
# 2.3 Yield Model | |
yield_df = generate_yield_data() | |
X_yield = yield_df[["soil_fertility", "temperature", "irrigation_freq", "fertilizer_score"]] | |
y_yield = yield_df["crop_yield"] | |
X_train, X_test, y_train, y_test = train_test_split(X_yield, y_yield, test_size=0.2, random_state=42) | |
yield_pipeline = Pipeline([ | |
("scaler", StandardScaler()), | |
("gbr", GradientBoostingRegressor(n_estimators=50, learning_rate=0.1, random_state=42)) | |
]) | |
yield_pipeline.fit(X_train, y_train) | |
# Evaluate on train data to see near-perfect learning | |
yield_pred_train = yield_pipeline.predict(X_train) | |
yield_rmse_train = mean_squared_error(y_train, yield_pred_train, squared=False) | |
# Also check test set | |
yield_pred_test = yield_pipeline.predict(X_test) | |
yield_rmse_test = mean_squared_error(y_test, yield_pred_test, squared=False) | |
return { | |
"pest_model": pest_pipeline, | |
"pest_accuracy": pest_accuracy, | |
"disease_model": disease_pipeline, | |
"disease_accuracy": disease_accuracy, | |
"yield_model": yield_pipeline, | |
"yield_rmse_train": yield_rmse_train, | |
"yield_rmse_test": yield_rmse_test | |
} | |
################################################################################ | |
# 3) GPT CALLS (BYPASSING 'openai' LIB) | |
################################################################################ | |
def call_gpt(prompt_text, openai_api_key): | |
""" | |
Posts to the Chat Completions endpoint with the user prompt. | |
Returns GPT's text. | |
""" | |
if not openai_api_key: | |
return "ERROR: No OpenAI API key provided. Please enter it to get GPT advice." | |
endpoint = "https://api.openai.com/v1/chat/completions" | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {openai_api_key}" | |
} | |
data = { | |
"model": "gpt-3.5-turbo", # or "gpt-4" if you have access | |
"messages": [ | |
{"role": "system", "content": "You are a highly knowledgeable, helpful farm AI assistant."}, | |
{"role": "user", "content": prompt_text} | |
], | |
"max_tokens": 500, | |
"temperature": 0.7 | |
} | |
try: | |
response = requests.post(endpoint, headers=headers, json=data, timeout=30) | |
result = response.json() | |
if "choices" in result and len(result["choices"]) > 0: | |
return result["choices"][0]["message"]["content"].strip() | |
else: | |
return "No valid response from GPT." | |
except Exception as e: | |
return f"Error calling GPT: {e}" | |
################################################################################ | |
# 4) STREAMLIT APP (NEAR-PERFECT ACCURACY, SCALABLE POC) | |
################################################################################ | |
def main(): | |
st.title("Near-Perfect Farm AI + GPT (Scalable PoC)") | |
st.markdown(""" | |
**Features**: | |
- Three local ML models (pest, disease, yield) with near-perfect synthetic data. | |
- GPT for advanced explanations and advice. | |
- Demonstrates a scalable approach for real production: replace synthetic data | |
and model training with real data or pre-trained models. | |
""") | |
# 4.1 Load/Train Models | |
with st.spinner("Training models on near-perfect synthetic data..."): | |
models = train_all_models() | |
# 4.2 Sidebar: Show training metrics | |
st.sidebar.header("Local Model Performance") | |
st.sidebar.write(f"**Pest Model Accuracy:** {models['pest_accuracy']*100:.2f}%") | |
st.sidebar.write(f"**Disease Model Accuracy:** {models['disease_accuracy']*100:.2f}%") | |
st.sidebar.write(f"**Yield RMSE (Train):** {models['yield_rmse_train']:.2f}") | |
st.sidebar.write(f"**Yield RMSE (Test):** {models['yield_rmse_test']:.2f}") | |
# 4.3 User Input Form | |
st.subheader("Enter Your Farm Conditions") | |
with st.form("farm_form"): | |
soil_moisture = st.slider("Soil Moisture (%)", 0.0, 100.0, 25.0, 1.0) | |
soil_ph = st.slider("Soil pH", 0.0, 14.0, 6.5, 0.1) | |
temperature = st.slider("Temperature (°C)", 0.0, 50.0, 28.0, 1.0) | |
humidity = st.slider("Humidity (%)", 0.0, 100.0, 60.0, 1.0) | |
crop_health = st.slider("Crop Health (0-100)", 0.0, 100.0, 80.0, 1.0) | |
leaf_wetness = st.slider("Leaf Wetness (0-100)", 0.0, 100.0, 40.0, 1.0) | |
rainfall = st.number_input("Rainfall (mm)", 0.0, 500.0, 50.0, 5.0) | |
planting_density = st.number_input("#Plants/Acre", 10.0, 500.0, 100.0, 10.0) | |
irrigation_freq = st.number_input("Irrigation Frequency (times/week)", 0.0, 14.0, 3.0, 1.0) | |
fertilizer_score = st.slider("Fertilizer Score (0-100)", 0.0, 100.0, 50.0, 1.0) | |
st.markdown("### GPT Setup") | |
openai_api_key = st.text_input("OpenAI API Key (for GPT)", type="password") | |
submitted = st.form_submit_button("Analyze & Get GPT Advice") | |
if submitted: | |
st.subheader("Local Analysis & Predictions") | |
# 4.3.1 Basic rules | |
irrigation_needed = (soil_moisture < 30) | |
crop_health_status = "Healthy" if crop_health > 85 else "Needs Attention" | |
# 4.3.2 Pest Prediction | |
X_pest = np.array([[temperature, humidity, leaf_wetness]]) | |
pest_outbreak = (models["pest_model"].predict(X_pest)[0] == 1) | |
# 4.3.3 Disease Detection | |
X_dis = np.array([[soil_ph, rainfall, planting_density]]) | |
disease_present = (models["disease_model"].predict(X_dis)[0] == 1) | |
# 4.3.4 Yield Regression | |
# We treat "crop_health" as a stand-in for "soil_fertility" here. | |
X_yield = np.array([[crop_health, temperature, irrigation_freq, fertilizer_score]]) | |
yield_prediction = models["yield_model"].predict(X_yield)[0] | |
# Display local results | |
st.write(f"- **Irrigation Needed?** {irrigation_needed}") | |
st.write(f"- **Crop Health Status:** {crop_health_status}") | |
st.write(f"- **Pest Outbreak Likely?** {pest_outbreak}") | |
st.write(f"- **Disease Present?** {disease_present}") | |
st.write(f"- **Predicted Yield (t/ha):** {yield_prediction:.2f}") | |
# 4.3.5 GPT Explanation | |
summary_prompt = ( | |
"These are the farm conditions:\n" | |
f" - Soil Moisture: {soil_moisture:.1f}%\n" | |
f" - Soil pH: {soil_ph:.1f}\n" | |
f" - Temperature: {temperature:.1f} °C\n" | |
f" - Humidity: {humidity:.1f}%\n" | |
f" - Crop Health: {crop_health:.1f}\n" | |
f" - Leaf Wetness: {leaf_wetness:.1f}\n" | |
f" - Rainfall: {rainfall:.1f} mm\n" | |
f" - Planting Density: {planting_density:.1f}\n" | |
f" - Irrigation Frequency: {irrigation_freq:.1f} times/week\n" | |
f" - Fertilizer Score: {fertilizer_score:.1f}\n\n" | |
"Local ML Results:\n" | |
f" - Irrigation Needed: {irrigation_needed}\n" | |
f" - Crop Health Status: {crop_health_status}\n" | |
f" - Pest Outbreak: {pest_outbreak}\n" | |
f" - Disease Present: {disease_present}\n" | |
f" - Yield: {yield_prediction:.2f} t/ha\n\n" | |
"Please provide a clear, helpful explanation in plain language, " | |
"along with sustainable, cost-effective steps to improve or maintain these conditions." | |
) | |
st.subheader("GPT Advice") | |
gpt_response = call_gpt(summary_prompt, openai_api_key) | |
st.write(gpt_response) | |
if __name__ == "__main__": | |
main() |