import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from matplotlib.patches import Patch
from ipywidgets import IntSlider, interact, Layout
from IPython.display import display
import zipfile
Introduction
In this week’s discussion section, we will use a dataset containing images of different plant diseases, and classify these images into different clusters. We will create a widget to see how our model classified a few of the images, as well as see how our classification changes when we change the value of K.
Data
The dataset this week is zipped file contain many different folders containg images of plants. Each folder represents a different plant disease, and all images in that folder house pictures representing the corresponding disease. The dataset can be found here.
Excercise
Load in libraries and data
Function to unzip the zipped plant data
def unzip(zip_path, extract_to):
# Ensure the extraction directory exists
if not os.path.exists(extract_to):
os.makedirs(extract_to)
# Open the zip file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
# Extract all the contents into the directory
zip_ref.extractall(extract_to)print(f"Files extracted to {extract_to}")
"../data/plant_disease.zip", "../data/plant_disease") unzip(
Files extracted to ../data/plant_disease
Use the function above to unzip your data folder. The first argument in the function is locating your zip file, and the second is picking a location/ file name for your new folder.
unzip('/path/to/zipped/file.zip', 'path/to/unzipped/folder')
Now that we have our data in the correct format (unzipped!), let’s preprocess our data.
Preprocess data
Function to load image data
# Function to open and standardize images used in model
def load_images(base_path, max_per_folder=20):
= [] # Empty list to store images
images = [] # Empty list to store label of each images
labels = [] # Empty list to store the names of the folders for all images
class_names
for i, folder in enumerate(sorted(os.listdir(base_path))):
= os.path.join(base_path, folder) # Join base path with folders to iterate over
folder_path if not os.path.isdir(folder_path):
continue
class_names.append(folder)print(f"Loading from {folder}...")
= 0
count for img_file in os.listdir(folder_path): # Iterate over each item in each folder
if count >= max_per_folder: # Stop when counter gets to 20 images
break
if img_file.lower().endswith(('.png', '.jpg', '.jpeg')): # Ensure file in folder is correct format
try:
= os.path.join(folder_path, img_file)
img_path with Image.open(img_path) as img: # Open image
= img.convert('RGB') # Convert it to RGB to standardize color channels
img = img.resize((100, 100), Image.Resampling.LANCZOS) # Resize image using LANCZOS resampling method
img
# Convert image to array and add to image list
images.append(np.array(img)) # Add label to label list
labels.append(i) += 1
count except Exception as e: # Print error message if error with a file
print(f"Error with {img_file}: {e}")
return np.array(images), np.array(labels), class_names
= "../data/plant_disease"
data_path = load_images(data_path)
images, labels, class_names print(f"Loaded {len(images)} images from {len(class_names)} disease classes")
Loading from Apple___Apple_scab...
Loading from Apple___Black_rot...
Loading from Apple___Cedar_apple_rust...
Loading from Apple___healthy...
Loading from Background_without_leaves...
Loading from Blueberry___healthy...
Loading from Cherry___Powdery_mildew...
Loading from Cherry___healthy...
Loading from Corn___Cercospora_leaf_spot Gray_leaf_spot...
Loading from Corn___Common_rust...
Loading from Corn___Northern_Leaf_Blight...
Loading from Corn___healthy...
Loading from Grape___Black_rot...
Loading from Grape___Esca_(Black_Measles)...
Loading from Grape___Leaf_blight_(Isariopsis_Leaf_Spot)...
Loading from Grape___healthy...
Loading from Orange___Haunglongbing_(Citrus_greening)...
Loading from Peach___Bacterial_spot...
Loading from Peach___healthy...
Loading from Pepper,_bell___Bacterial_spot...
Loading from Pepper,_bell___healthy...
Loading from Potato___Early_blight...
Loading from Potato___Late_blight...
Loading from Potato___healthy...
Loading from Raspberry___healthy...
Loading from Soybean___healthy...
Loading from Squash___Powdery_mildew...
Loading from Strawberry___Leaf_scorch...
Loading from Strawberry___healthy...
Loading from Tomato___Bacterial_spot...
Loading from Tomato___Early_blight...
Loading from Tomato___Late_blight...
Loading from Tomato___Leaf_Mold...
Loading from Tomato___Septoria_leaf_spot...
Loading from Tomato___Spider_mites Two-spotted_spider_mite...
Loading from Tomato___Target_Spot...
Loading from Tomato___Tomato_Yellow_Leaf_Curl_Virus...
Loading from Tomato___Tomato_mosaic_virus...
Loading from Tomato___healthy...
Loaded 780 images from 39 disease classes