|
import requests |
|
import zipfile |
|
import io |
|
import os |
|
import pandas as pd |
|
from sklearn.model_selection import train_test_split |
|
from tensorflow.keras.preprocessing.image import ImageDataGenerator |
|
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping |
|
from tensorflow.keras.applications import ResNet50 |
|
from tensorflow.keras.applications.resnet50 import preprocess_input |
|
from tensorflow.keras.optimizers import Adam |
|
from tensorflow.keras.models import Model |
|
from sklearn.utils.class_weight import compute_class_weight |
|
from keras.layers import Dense, GlobalAveragePooling2D |
|
from keras.layers import Dropout |
|
|
|
output_dir = "./data" |
|
url = "https://huggingface.co/datasets/garythung/trashnet/resolve/main/dataset-resized.zip" |
|
|
|
|
|
response = requests.get(url) |
|
with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref: |
|
zip_ref.extractall(output_dir) |
|
|
|
data_dir = './data/dataset-resized' |
|
garbage_types = os.listdir(data_dir) |
|
|
|
|
|
data = [] |
|
for garbage_type in garbage_types: |
|
garbage_type_path = os.path.join(data_dir, garbage_type) |
|
if os.path.isdir(garbage_type_path): |
|
for file in os.listdir(garbage_type_path): |
|
data.append((os.path.join(garbage_type_path, file), garbage_type)) |
|
|
|
df = pd.DataFrame(data, columns=['filepath', 'label']) |
|
|
|
|
|
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label']) |
|
|
|
|
|
train_datagen = ImageDataGenerator( |
|
rotation_range=60, |
|
width_shift_range=0.15, |
|
height_shift_range=0.15, |
|
zoom_range=0.20, |
|
horizontal_flip=True, |
|
vertical_flip=True, |
|
shear_range=0.05, |
|
brightness_range=[0.9, 1.1], |
|
channel_shift_range=10, |
|
fill_mode='nearest', |
|
preprocessing_function=preprocess_input |
|
) |
|
|
|
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input) |
|
|
|
train_generator = train_datagen.flow_from_dataframe( |
|
dataframe=train_df, |
|
x_col="filepath", |
|
y_col="label", |
|
target_size=(384, 384), |
|
batch_size=32, |
|
class_mode='categorical', |
|
shuffle=False |
|
) |
|
|
|
val_generator = val_datagen.flow_from_dataframe( |
|
dataframe=val_df, |
|
x_col="filepath", |
|
y_col="label", |
|
target_size=(384, 384), |
|
batch_size=32, |
|
class_mode='categorical', |
|
shuffle=False |
|
) |
|
|
|
class_labels = train_df['label'].unique() |
|
class_labels |
|
|
|
train_generator.class_indices |
|
|
|
weights = compute_class_weight(class_weight='balanced', classes=class_labels, y=train_df['label']) |
|
|
|
class_weights = dict(zip(train_generator.class_indices.values(), weights)) |
|
|
|
|
|
|
|
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(384, 384, 3)) |
|
|
|
for layer in base_model.layers[:143]: |
|
layer.trainable = False |
|
|
|
x = base_model.output |
|
x = GlobalAveragePooling2D()(x) |
|
x = Dropout(0.5)(x) |
|
x = Dense(6, activation='softmax')(x) |
|
|
|
model = Model(inputs=base_model.input, outputs=x) |
|
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy']) |
|
|
|
|
|
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.00001) |
|
early_stopping = EarlyStopping(monitor='val_loss', mode='min', patience=8, restore_best_weights=True, verbose=1) |
|
model_checkpoint = ModelCheckpoint(filepath="best_model.keras", monitor="val_loss", save_best_only=True, verbose=1) |
|
|
|
callbacks = [reduce_lr, early_stopping, model_checkpoint] |
|
|
|
|
|
history = model.fit( |
|
train_generator, |
|
epochs=50, |
|
validation_data=val_generator, |
|
class_weight=class_weights, |
|
callbacks=callbacks |
|
) |
|
|