############################################## # app.py - Near-Perfect Accuracy Farm AI Demo ############################################## import streamlit as st import numpy as np import pandas as pd import requests import json import os from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor from sklearn.linear_model import LogisticRegression from sklearn.preprocessing import PolynomialFeatures, StandardScaler from sklearn.pipeline import Pipeline from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, mean_squared_error ################################################################################ # 1) SYNTHETIC DATA GENERATION WITH NEAR-PERFECT RELATIONSHIPS ################################################################################ @st.cache_data def generate_pest_data(n=200): """ Generates synthetic data for pest outbreak classification with a strong, easily learnable relationship so that the model can achieve near 100% accuracy. Features: temperature, humidity, leaf_wetness Target: pest_outbreak (0 or 1) """ # For reproducibility np.random.seed(42) # We'll create a 'perfect' or near-perfect relationship: # pest_score = 0.3*temperature + 0.5*humidity + 0.1*leaf_wetness # Then threshold around the median for 0/1. temperature = np.random.uniform(15, 40, n) humidity = np.random.uniform(30, 90, n) leaf_wetness = np.random.uniform(0, 100, n) pest_score = 0.3 * temperature + 0.5 * humidity + 0.1 * leaf_wetness median_score = np.median(pest_score) pest_outbreak = (pest_score >= median_score).astype(int) df = pd.DataFrame({ "temperature": temperature, "humidity": humidity, "leaf_wetness": leaf_wetness, "pest_outbreak": pest_outbreak }) return df @st.cache_data def generate_disease_data(n=200): """ Synthetic data for disease detection classification with near-perfect relationship. We'll use a polynomial link so the model can memorize it well. Features: soil_pH, rainfall, planting_density Target: disease_present (0 or 1) """ np.random.seed(123) soil_ph = np.random.uniform(5.0, 8.0, n) rainfall = np.random.uniform(0, 200, n) planting_density = np.random.uniform(50, 200, n) # 'Perfect' polynomial relationship: # disease_score = -(soil_ph - 6.5)^2 + 0.02*rainfall + 0.006*planting_density # Then threshold around median for 0/1 disease_score = ( -1.0 * (soil_ph - 6.5)**2 + 0.02 * rainfall + 0.006 * planting_density ) median_score = np.median(disease_score) disease_present = (disease_score >= median_score).astype(int) df = pd.DataFrame({ "soil_ph": soil_ph, "rainfall": rainfall, "planting_density": planting_density, "disease_present": disease_present }) return df @st.cache_data def generate_yield_data(n=200): """ Synthetic data for yield regression with near-perfect correlation. We'll ensure a strong linear relationship so the model can achieve very low error. Features: soil_fertility, temperature, irrigation_freq, fertilizer_score Target: crop_yield (tons/ha) """ np.random.seed(999) soil_fertility = np.random.uniform(0, 100, n) temperature = np.random.uniform(15, 35, n) irrigation_freq = np.random.uniform(0, 10, n) fertilizer_score = np.random.uniform(0, 100, n) # Near-perfect linear relationship: # yield_val = 2.0*soil_fertility + (-0.3)*(abs(temperature-25)) + 3.0*irrigation_freq # + 0.8*fertilizer_score # plus tiny normal noise yield_val = ( 2.0 * soil_fertility + (-0.3) * np.abs(temperature - 25) + 3.0 * irrigation_freq + 0.8 * fertilizer_score + np.random.normal(0, 1, n) # small noise ) df = pd.DataFrame({ "soil_fertility": soil_fertility, "temperature": temperature, "irrigation_freq": irrigation_freq, "fertilizer_score": fertilizer_score, "crop_yield": yield_val }) return df ################################################################################ # 2) TRAINING MODELS (CACHED FOR PERFORMANCE) ################################################################################ @st.cache_resource def train_all_models(): """ Trains all three models on near-perfect synthetic data: - Pest Outbreak (RandomForest) - Disease Presence (LogisticRegression w/ polynomial) - Yield Regression (GradientBoosting) Returns pipelines + metrics in a dict. """ # 2.1 Pest Model pest_df = generate_pest_data() X_pest = pest_df[["temperature", "humidity", "leaf_wetness"]] y_pest = pest_df["pest_outbreak"] pest_pipeline = Pipeline([ ("scaler", StandardScaler()), ("rf", RandomForestClassifier(n_estimators=50, random_state=42)) ]) pest_pipeline.fit(X_pest, y_pest) pest_pred_train = pest_pipeline.predict(X_pest) pest_accuracy = accuracy_score(y_pest, pest_pred_train) # 2.2 Disease Model disease_df = generate_disease_data() X_dis = disease_df[["soil_ph", "rainfall", "planting_density"]] y_dis = disease_df["disease_present"] disease_pipeline = Pipeline([ ("poly", PolynomialFeatures(degree=2, include_bias=False)), ("scaler", StandardScaler()), ("lr", LogisticRegression(random_state=42, max_iter=2000)) ]) disease_pipeline.fit(X_dis, y_dis) disease_pred_train = disease_pipeline.predict(X_dis) disease_accuracy = accuracy_score(y_dis, disease_pred_train) # 2.3 Yield Model yield_df = generate_yield_data() X_yield = yield_df[["soil_fertility", "temperature", "irrigation_freq", "fertilizer_score"]] y_yield = yield_df["crop_yield"] X_train, X_test, y_train, y_test = train_test_split(X_yield, y_yield, test_size=0.2, random_state=42) yield_pipeline = Pipeline([ ("scaler", StandardScaler()), ("gbr", GradientBoostingRegressor(n_estimators=50, learning_rate=0.1, random_state=42)) ]) yield_pipeline.fit(X_train, y_train) # Evaluate on train data to see near-perfect learning yield_pred_train = yield_pipeline.predict(X_train) yield_rmse_train = mean_squared_error(y_train, yield_pred_train, squared=False) # Also check test set yield_pred_test = yield_pipeline.predict(X_test) yield_rmse_test = mean_squared_error(y_test, yield_pred_test, squared=False) return { "pest_model": pest_pipeline, "pest_accuracy": pest_accuracy, "disease_model": disease_pipeline, "disease_accuracy": disease_accuracy, "yield_model": yield_pipeline, "yield_rmse_train": yield_rmse_train, "yield_rmse_test": yield_rmse_test } ################################################################################ # 3) GPT CALLS (BYPASSING 'openai' LIB) ################################################################################ def call_gpt(prompt_text, openai_api_key): """ Posts to the Chat Completions endpoint with the user prompt. Returns GPT's text. """ if not openai_api_key: return "ERROR: No OpenAI API key provided. Please enter it to get GPT advice." endpoint = "https://api.openai.com/v1/chat/completions" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {openai_api_key}" } data = { "model": "gpt-3.5-turbo", # or "gpt-4" if you have access "messages": [ {"role": "system", "content": "You are a highly knowledgeable, helpful farm AI assistant."}, {"role": "user", "content": prompt_text} ], "max_tokens": 500, "temperature": 0.7 } try: response = requests.post(endpoint, headers=headers, json=data, timeout=30) result = response.json() if "choices" in result and len(result["choices"]) > 0: return result["choices"][0]["message"]["content"].strip() else: return "No valid response from GPT." except Exception as e: return f"Error calling GPT: {e}" ################################################################################ # 4) STREAMLIT APP (NEAR-PERFECT ACCURACY, SCALABLE POC) ################################################################################ def main(): st.title("Near-Perfect Farm AI + GPT (Scalable PoC)") st.markdown(""" **Features**: - Three local ML models (pest, disease, yield) with near-perfect synthetic data. - GPT for advanced explanations and advice. - Demonstrates a scalable approach for real production: replace synthetic data and model training with real data or pre-trained models. """) # 4.1 Load/Train Models with st.spinner("Training models on near-perfect synthetic data..."): models = train_all_models() # 4.2 Sidebar: Show training metrics st.sidebar.header("Local Model Performance") st.sidebar.write(f"**Pest Model Accuracy:** {models['pest_accuracy']*100:.2f}%") st.sidebar.write(f"**Disease Model Accuracy:** {models['disease_accuracy']*100:.2f}%") st.sidebar.write(f"**Yield RMSE (Train):** {models['yield_rmse_train']:.2f}") st.sidebar.write(f"**Yield RMSE (Test):** {models['yield_rmse_test']:.2f}") # 4.3 User Input Form st.subheader("Enter Your Farm Conditions") with st.form("farm_form"): soil_moisture = st.slider("Soil Moisture (%)", 0.0, 100.0, 25.0, 1.0) soil_ph = st.slider("Soil pH", 0.0, 14.0, 6.5, 0.1) temperature = st.slider("Temperature (°C)", 0.0, 50.0, 28.0, 1.0) humidity = st.slider("Humidity (%)", 0.0, 100.0, 60.0, 1.0) crop_health = st.slider("Crop Health (0-100)", 0.0, 100.0, 80.0, 1.0) leaf_wetness = st.slider("Leaf Wetness (0-100)", 0.0, 100.0, 40.0, 1.0) rainfall = st.number_input("Rainfall (mm)", 0.0, 500.0, 50.0, 5.0) planting_density = st.number_input("#Plants/Acre", 10.0, 500.0, 100.0, 10.0) irrigation_freq = st.number_input("Irrigation Frequency (times/week)", 0.0, 14.0, 3.0, 1.0) fertilizer_score = st.slider("Fertilizer Score (0-100)", 0.0, 100.0, 50.0, 1.0) st.markdown("### GPT Setup") openai_api_key = st.text_input("OpenAI API Key (for GPT)", type="password") submitted = st.form_submit_button("Analyze & Get GPT Advice") if submitted: st.subheader("Local Analysis & Predictions") # 4.3.1 Basic rules irrigation_needed = (soil_moisture < 30) crop_health_status = "Healthy" if crop_health > 85 else "Needs Attention" # 4.3.2 Pest Prediction X_pest = np.array([[temperature, humidity, leaf_wetness]]) pest_outbreak = (models["pest_model"].predict(X_pest)[0] == 1) # 4.3.3 Disease Detection X_dis = np.array([[soil_ph, rainfall, planting_density]]) disease_present = (models["disease_model"].predict(X_dis)[0] == 1) # 4.3.4 Yield Regression # We treat "crop_health" as a stand-in for "soil_fertility" here. X_yield = np.array([[crop_health, temperature, irrigation_freq, fertilizer_score]]) yield_prediction = models["yield_model"].predict(X_yield)[0] # Display local results st.write(f"- **Irrigation Needed?** {irrigation_needed}") st.write(f"- **Crop Health Status:** {crop_health_status}") st.write(f"- **Pest Outbreak Likely?** {pest_outbreak}") st.write(f"- **Disease Present?** {disease_present}") st.write(f"- **Predicted Yield (t/ha):** {yield_prediction:.2f}") # 4.3.5 GPT Explanation summary_prompt = ( "These are the farm conditions:\n" f" - Soil Moisture: {soil_moisture:.1f}%\n" f" - Soil pH: {soil_ph:.1f}\n" f" - Temperature: {temperature:.1f} °C\n" f" - Humidity: {humidity:.1f}%\n" f" - Crop Health: {crop_health:.1f}\n" f" - Leaf Wetness: {leaf_wetness:.1f}\n" f" - Rainfall: {rainfall:.1f} mm\n" f" - Planting Density: {planting_density:.1f}\n" f" - Irrigation Frequency: {irrigation_freq:.1f} times/week\n" f" - Fertilizer Score: {fertilizer_score:.1f}\n\n" "Local ML Results:\n" f" - Irrigation Needed: {irrigation_needed}\n" f" - Crop Health Status: {crop_health_status}\n" f" - Pest Outbreak: {pest_outbreak}\n" f" - Disease Present: {disease_present}\n" f" - Yield: {yield_prediction:.2f} t/ha\n\n" "Please provide a clear, helpful explanation in plain language, " "along with sustainable, cost-effective steps to improve or maintain these conditions." ) st.subheader("GPT Advice") gpt_response = call_gpt(summary_prompt, openai_api_key) st.write(gpt_response) if __name__ == "__main__": main()