File size: 1,969 Bytes
78446ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import os
import requests
from tqdm import tqdm
from datasets import load_dataset
import shutil
def download_plantvillage_from_huggingface():
"""
Downloads the PlantVillage dataset from Hugging Face and organizes it for training.
"""
print("Downloading PlantVillage dataset from Hugging Face...")
# Create directory for the dataset
os.makedirs('PlantVillage', exist_ok=True)
try:
# Load the dataset from Hugging Face
dataset = load_dataset("GVJahnavi/PlantVillage_dataset")
print(f"Dataset loaded successfully with {len(dataset['train'])} training samples")
# Get unique labels
labels = dataset['train'].features['label'].names
print(f"Found {len(labels)} classes: {labels}")
# Create directories for each class
for label_idx, label_name in enumerate(labels):
label_dir = os.path.join('PlantVillage', label_name)
os.makedirs(label_dir, exist_ok=True)
# Get samples for this class
class_samples = dataset['train'].filter(lambda example: example['label'] == label_idx)
print(f"Processing class {label_name} with {len(class_samples)} samples")
# Save images for this class
for i, sample in enumerate(tqdm(class_samples, desc=f"Saving {label_name}")):
img = sample['image']
img_path = os.path.join(label_dir, f"{label_name}_{i}.jpg")
img.save(img_path)
# Save class names to a file
with open('class_names.json', 'w') as f:
import json
json.dump(labels, f)
print("Dataset downloaded and organized successfully")
return True
except Exception as e:
print(f"Error downloading dataset from Hugging Face: {e}")
return False
if __name__ == "__main__":
download_plantvillage_from_huggingface()
|