Spaces:

emmunger
/

farm-ai-gpt

Running

App Files Files Community

farm-ai-gpt / app.py

emmunger

Update app.py

6e2acb7 verified about 1 month ago

raw

history blame

13.1 kB

	##############################################
	# app.py - Near-Perfect Accuracy Farm AI Demo
	##############################################

	import streamlit as st
	import numpy as np
	import pandas as pd
	import requests
	import json
	import os

	from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor
	from sklearn.linear_model import LogisticRegression
	from sklearn.preprocessing import PolynomialFeatures, StandardScaler
	from sklearn.pipeline import Pipeline
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import accuracy_score, mean_squared_error

	################################################################################
	# 1) SYNTHETIC DATA GENERATION WITH NEAR-PERFECT RELATIONSHIPS
	################################################################################

	@st.cache_data
	def generate_pest_data(n=200):
	"""
	Generates synthetic data for pest outbreak classification with a strong,
	easily learnable relationship so that the model can achieve near 100% accuracy.
	Features: temperature, humidity, leaf_wetness
	Target: pest_outbreak (0 or 1)
	"""

	# For reproducibility
	np.random.seed(42)

	# We'll create a 'perfect' or near-perfect relationship:
	# pest_score = 0.3temperature + 0.5humidity + 0.1*leaf_wetness
	# Then threshold around the median for 0/1.

	temperature = np.random.uniform(15, 40, n)
	humidity = np.random.uniform(30, 90, n)
	leaf_wetness = np.random.uniform(0, 100, n)

	pest_score = 0.3 * temperature + 0.5 * humidity + 0.1 * leaf_wetness
	median_score = np.median(pest_score)
	pest_outbreak = (pest_score >= median_score).astype(int)

	df = pd.DataFrame({
	"temperature": temperature,
	"humidity": humidity,
	"leaf_wetness": leaf_wetness,
	"pest_outbreak": pest_outbreak
	})
	return df

	@st.cache_data
	def generate_disease_data(n=200):
	"""
	Synthetic data for disease detection classification with near-perfect relationship.
	We'll use a polynomial link so the model can memorize it well.
	Features: soil_pH, rainfall, planting_density
	Target: disease_present (0 or 1)
	"""

	np.random.seed(123)

	soil_ph = np.random.uniform(5.0, 8.0, n)
	rainfall = np.random.uniform(0, 200, n)
	planting_density = np.random.uniform(50, 200, n)

	# 'Perfect' polynomial relationship:
	# disease_score = -(soil_ph - 6.5)^2 + 0.02rainfall + 0.006planting_density
	# Then threshold around median for 0/1

	disease_score = (
	-1.0 * (soil_ph - 6.5)**2
	+ 0.02 * rainfall
	+ 0.006 * planting_density
	)
	median_score = np.median(disease_score)
	disease_present = (disease_score >= median_score).astype(int)

	df = pd.DataFrame({
	"soil_ph": soil_ph,
	"rainfall": rainfall,
	"planting_density": planting_density,
	"disease_present": disease_present
	})
	return df

	@st.cache_data
	def generate_yield_data(n=200):
	"""
	Synthetic data for yield regression with near-perfect correlation.
	We'll ensure a strong linear relationship so the model can achieve very low error.
	Features: soil_fertility, temperature, irrigation_freq, fertilizer_score
	Target: crop_yield (tons/ha)
	"""

	np.random.seed(999)

	soil_fertility = np.random.uniform(0, 100, n)
	temperature = np.random.uniform(15, 35, n)
	irrigation_freq = np.random.uniform(0, 10, n)
	fertilizer_score = np.random.uniform(0, 100, n)

	# Near-perfect linear relationship:
	# yield_val = 2.0soil_fertility + (-0.3)(abs(temperature-25)) + 3.0*irrigation_freq
	# + 0.8*fertilizer_score
	# plus tiny normal noise

	yield_val = (
	2.0 * soil_fertility
	+ (-0.3) * np.abs(temperature - 25)
	+ 3.0 * irrigation_freq
	+ 0.8 * fertilizer_score
	+ np.random.normal(0, 1, n) # small noise
	)

	df = pd.DataFrame({
	"soil_fertility": soil_fertility,
	"temperature": temperature,
	"irrigation_freq": irrigation_freq,
	"fertilizer_score": fertilizer_score,
	"crop_yield": yield_val
	})
	return df

	################################################################################
	# 2) TRAINING MODELS (CACHED FOR PERFORMANCE)
	################################################################################

	@st.cache_resource
	def train_all_models():
	"""
	Trains all three models on near-perfect synthetic data:
	- Pest Outbreak (RandomForest)
	- Disease Presence (LogisticRegression w/ polynomial)
	- Yield Regression (GradientBoosting)
	Returns pipelines + metrics in a dict.
	"""

	# 2.1 Pest Model
	pest_df = generate_pest_data()
	X_pest = pest_df[["temperature", "humidity", "leaf_wetness"]]
	y_pest = pest_df["pest_outbreak"]

	pest_pipeline = Pipeline([
	("scaler", StandardScaler()),
	("rf", RandomForestClassifier(n_estimators=50, random_state=42))
	])
	pest_pipeline.fit(X_pest, y_pest)
	pest_pred_train = pest_pipeline.predict(X_pest)
	pest_accuracy = accuracy_score(y_pest, pest_pred_train)

	# 2.2 Disease Model
	disease_df = generate_disease_data()
	X_dis = disease_df[["soil_ph", "rainfall", "planting_density"]]
	y_dis = disease_df["disease_present"]

	disease_pipeline = Pipeline([
	("poly", PolynomialFeatures(degree=2, include_bias=False)),
	("scaler", StandardScaler()),
	("lr", LogisticRegression(random_state=42, max_iter=2000))
	])
	disease_pipeline.fit(X_dis, y_dis)
	disease_pred_train = disease_pipeline.predict(X_dis)
	disease_accuracy = accuracy_score(y_dis, disease_pred_train)

	# 2.3 Yield Model
	yield_df = generate_yield_data()
	X_yield = yield_df[["soil_fertility", "temperature", "irrigation_freq", "fertilizer_score"]]
	y_yield = yield_df["crop_yield"]

	X_train, X_test, y_train, y_test = train_test_split(X_yield, y_yield, test_size=0.2, random_state=42)

	yield_pipeline = Pipeline([
	("scaler", StandardScaler()),
	("gbr", GradientBoostingRegressor(n_estimators=50, learning_rate=0.1, random_state=42))
	])
	yield_pipeline.fit(X_train, y_train)

	# Evaluate on train data to see near-perfect learning
	yield_pred_train = yield_pipeline.predict(X_train)
	yield_rmse_train = mean_squared_error(y_train, yield_pred_train, squared=False)

	# Also check test set
	yield_pred_test = yield_pipeline.predict(X_test)
	yield_rmse_test = mean_squared_error(y_test, yield_pred_test, squared=False)

	return {
	"pest_model": pest_pipeline,
	"pest_accuracy": pest_accuracy,
	"disease_model": disease_pipeline,
	"disease_accuracy": disease_accuracy,
	"yield_model": yield_pipeline,
	"yield_rmse_train": yield_rmse_train,
	"yield_rmse_test": yield_rmse_test
	}

	################################################################################
	# 3) GPT CALLS (BYPASSING 'openai' LIB)
	################################################################################

	def call_gpt(prompt_text, openai_api_key):
	"""
	Posts to the Chat Completions endpoint with the user prompt.
	Returns GPT's text.
	"""

	if not openai_api_key:
	return "ERROR: No OpenAI API key provided. Please enter it to get GPT advice."

	endpoint = "https://api.openai.com/v1/chat/completions"
	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {openai_api_key}"
	}
	data = {
	"model": "gpt-3.5-turbo", # or "gpt-4" if you have access
	"messages": [
	{"role": "system", "content": "You are a highly knowledgeable, helpful farm AI assistant."},
	{"role": "user", "content": prompt_text}
	],
	"max_tokens": 500,
	"temperature": 0.7
	}

	try:
	response = requests.post(endpoint, headers=headers, json=data, timeout=30)
	result = response.json()
	if "choices" in result and len(result["choices"]) > 0:
	return result["choices"][0]["message"]["content"].strip()
	else:
	return "No valid response from GPT."
	except Exception as e:
	return f"Error calling GPT: {e}"

	################################################################################
	# 4) STREAMLIT APP (NEAR-PERFECT ACCURACY, SCALABLE POC)
	################################################################################

	def main():
	st.title("Near-Perfect Farm AI + GPT (Scalable PoC)")
	st.markdown("""
	Features:
	- Three local ML models (pest, disease, yield) with near-perfect synthetic data.
	- GPT for advanced explanations and advice.
	- Demonstrates a scalable approach for real production: replace synthetic data
	and model training with real data or pre-trained models.
	""")

	# 4.1 Load/Train Models
	with st.spinner("Training models on near-perfect synthetic data..."):
	models = train_all_models()

	# 4.2 Sidebar: Show training metrics
	st.sidebar.header("Local Model Performance")
	st.sidebar.write(f"Pest Model Accuracy: {models['pest_accuracy']*100:.2f}%")
	st.sidebar.write(f"Disease Model Accuracy: {models['disease_accuracy']*100:.2f}%")
	st.sidebar.write(f"Yield RMSE (Train): {models['yield_rmse_train']:.2f}")
	st.sidebar.write(f"Yield RMSE (Test): {models['yield_rmse_test']:.2f}")

	# 4.3 User Input Form
	st.subheader("Enter Your Farm Conditions")
	with st.form("farm_form"):
	soil_moisture = st.slider("Soil Moisture (%)", 0.0, 100.0, 25.0, 1.0)
	soil_ph = st.slider("Soil pH", 0.0, 14.0, 6.5, 0.1)
	temperature = st.slider("Temperature (°C)", 0.0, 50.0, 28.0, 1.0)
	humidity = st.slider("Humidity (%)", 0.0, 100.0, 60.0, 1.0)
	crop_health = st.slider("Crop Health (0-100)", 0.0, 100.0, 80.0, 1.0)
	leaf_wetness = st.slider("Leaf Wetness (0-100)", 0.0, 100.0, 40.0, 1.0)
	rainfall = st.number_input("Rainfall (mm)", 0.0, 500.0, 50.0, 5.0)
	planting_density = st.number_input("#Plants/Acre", 10.0, 500.0, 100.0, 10.0)
	irrigation_freq = st.number_input("Irrigation Frequency (times/week)", 0.0, 14.0, 3.0, 1.0)
	fertilizer_score = st.slider("Fertilizer Score (0-100)", 0.0, 100.0, 50.0, 1.0)

	st.markdown("### GPT Setup")
	openai_api_key = st.text_input("OpenAI API Key (for GPT)", type="password")

	submitted = st.form_submit_button("Analyze & Get GPT Advice")

	if submitted:
	st.subheader("Local Analysis & Predictions")
	# 4.3.1 Basic rules
	irrigation_needed = (soil_moisture < 30)
	crop_health_status = "Healthy" if crop_health > 85 else "Needs Attention"

	# 4.3.2 Pest Prediction
	X_pest = np.array([[temperature, humidity, leaf_wetness]])
	pest_outbreak = (models["pest_model"].predict(X_pest)[0] == 1)

	# 4.3.3 Disease Detection
	X_dis = np.array([[soil_ph, rainfall, planting_density]])
	disease_present = (models["disease_model"].predict(X_dis)[0] == 1)

	# 4.3.4 Yield Regression
	# We treat "crop_health" as a stand-in for "soil_fertility" here.
	X_yield = np.array([[crop_health, temperature, irrigation_freq, fertilizer_score]])
	yield_prediction = models["yield_model"].predict(X_yield)[0]

	# Display local results
	st.write(f"- Irrigation Needed? {irrigation_needed}")
	st.write(f"- Crop Health Status: {crop_health_status}")
	st.write(f"- Pest Outbreak Likely? {pest_outbreak}")
	st.write(f"- Disease Present? {disease_present}")
	st.write(f"- Predicted Yield (t/ha): {yield_prediction:.2f}")

	# 4.3.5 GPT Explanation
	summary_prompt = (
	"These are the farm conditions:\n"
	f" - Soil Moisture: {soil_moisture:.1f}%\n"
	f" - Soil pH: {soil_ph:.1f}\n"
	f" - Temperature: {temperature:.1f} °C\n"
	f" - Humidity: {humidity:.1f}%\n"
	f" - Crop Health: {crop_health:.1f}\n"
	f" - Leaf Wetness: {leaf_wetness:.1f}\n"
	f" - Rainfall: {rainfall:.1f} mm\n"
	f" - Planting Density: {planting_density:.1f}\n"
	f" - Irrigation Frequency: {irrigation_freq:.1f} times/week\n"
	f" - Fertilizer Score: {fertilizer_score:.1f}\n\n"
	"Local ML Results:\n"
	f" - Irrigation Needed: {irrigation_needed}\n"
	f" - Crop Health Status: {crop_health_status}\n"
	f" - Pest Outbreak: {pest_outbreak}\n"
	f" - Disease Present: {disease_present}\n"
	f" - Yield: {yield_prediction:.2f} t/ha\n\n"
	"Please provide a clear, helpful explanation in plain language, "
	"along with sustainable, cost-effective steps to improve or maintain these conditions."
	)

	st.subheader("GPT Advice")
	gpt_response = call_gpt(summary_prompt, openai_api_key)
	st.write(gpt_response)

	if __name__ == "__main__":
	main()