PlantDiseaseTreatmentAssistant / download_huggingface_dataset.py
iqramukhtiar's picture
Upload 3 files
78446ba verified
raw
history blame
1.97 kB
import os
import requests
from tqdm import tqdm
from datasets import load_dataset
import shutil
def download_plantvillage_from_huggingface():
"""
Downloads the PlantVillage dataset from Hugging Face and organizes it for training.
"""
print("Downloading PlantVillage dataset from Hugging Face...")
# Create directory for the dataset
os.makedirs('PlantVillage', exist_ok=True)
try:
# Load the dataset from Hugging Face
dataset = load_dataset("GVJahnavi/PlantVillage_dataset")
print(f"Dataset loaded successfully with {len(dataset['train'])} training samples")
# Get unique labels
labels = dataset['train'].features['label'].names
print(f"Found {len(labels)} classes: {labels}")
# Create directories for each class
for label_idx, label_name in enumerate(labels):
label_dir = os.path.join('PlantVillage', label_name)
os.makedirs(label_dir, exist_ok=True)
# Get samples for this class
class_samples = dataset['train'].filter(lambda example: example['label'] == label_idx)
print(f"Processing class {label_name} with {len(class_samples)} samples")
# Save images for this class
for i, sample in enumerate(tqdm(class_samples, desc=f"Saving {label_name}")):
img = sample['image']
img_path = os.path.join(label_dir, f"{label_name}_{i}.jpg")
img.save(img_path)
# Save class names to a file
with open('class_names.json', 'w') as f:
import json
json.dump(labels, f)
print("Dataset downloaded and organized successfully")
return True
except Exception as e:
print(f"Error downloading dataset from Hugging Face: {e}")
return False
if __name__ == "__main__":
download_plantvillage_from_huggingface()