File size: 2,953 Bytes
d9f8b0b
 
31e1d52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c09be8
 
 
d9f8b0b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import gradio as gr

longitude = gr.inputs.Slider(-124.00, -114.00, step=0.01, label = "longitude")
latitude = gr.inputs.Slider(32.00, 42.00, step=0.01, label = "latitude")
housing_median_age = gr.inputs.Slider(1, 100, step=1, label = "housing_median_age")
total_rooms = gr.inputs.Slider(1, 50000, step=1, label = "total_rooms")
total_bedrooms = gr.inputs.Slider(1, 10000, step=1, label = "total_bedrooms")
population = gr.inputs.Slider(1, 40000, step=1, label = "population")
households = gr.inputs.Slider(1, 6100, step=1, label = "households")
median_income = gr.inputs.Slider(0.1, 50, step=0.15, label = "median_income")

pred_house_price = gr.outputs.Textbox(label = "predicting housing price")


from sklearn.model_selection import train_test_split
import pandas as pd 
## 4. scale the numeric features in training set
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
import joblib
import os 

def housing_predict(longitude, latitude, housing_median_age, total_rooms, total_bedrooms, population,
  households, median_income):
  """
  [longitude, latitude, housing_median_age, total_rooms, total_bedrooms, population,
  households, median_income]
  """
  item = [longitude, latitude, housing_median_age, total_rooms, total_bedrooms, population,
  households, median_income]
  print(f"passing parameters: {item}")

  if os.path.exists("sample_data/LinearRegressionHousing.pkl"):
    regr = joblib.load("sample_data/LinearRegressionHousing.pkl")
    print("load directly from pkl file")
  else: 
    print("load training data...")
    housing = pd.read_csv('sample_data/housing.csv')
    train_set, test_set = train_test_split(housing, test_size=0.2, random_state=10)
    ## 2. clean the missing values
    train_set_clean = train_set.dropna(subset=["total_bedrooms"])
    train_set_clean

    ## 2. derive training features and training labels 
    train_labels = train_set_clean["median_house_value"].copy() # get labels for output label Y
    train_features = train_set_clean.drop("median_house_value", axis=1) # drop labels to get features X for training set
    scaler = MinMaxScaler() ## define the transformer
    scaler.fit(train_features) ## call .fit() method to calculate the min and max value for each column in dataset
    train_features_normalized = scaler.transform(train_features)
    
    lin_reg = LinearRegression() ## Initialize the class
    regr = lin_reg.fit(train_features_normalized, train_labels) # feed the training data X, and label Y for supervised learning

    if not os.path.exists("sample_data/LinearRegressionHousing.pkl"):
      joblib.dump(regr, "sample_data/LinearRegressionHousing.pkl")
  
  res = regr.predict([ item])
  print(f"res={res}")
  return str(res[0])

gr.Interface(fn=housing_predict, inputs=[longitude, latitude, housing_median_age, total_rooms, total_bedrooms, population,
  households, median_income], outputs=pred_house_price).launch(debug=True)