File size: 1,969 Bytes
78446ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import os
import requests
from tqdm import tqdm
from datasets import load_dataset
import shutil

def download_plantvillage_from_huggingface():
    """
    Downloads the PlantVillage dataset from Hugging Face and organizes it for training.
    """
    print("Downloading PlantVillage dataset from Hugging Face...")
    
    # Create directory for the dataset
    os.makedirs('PlantVillage', exist_ok=True)
    
    try:
        # Load the dataset from Hugging Face
        dataset = load_dataset("GVJahnavi/PlantVillage_dataset")
        print(f"Dataset loaded successfully with {len(dataset['train'])} training samples")
        
        # Get unique labels
        labels = dataset['train'].features['label'].names
        print(f"Found {len(labels)} classes: {labels}")
        
        # Create directories for each class
        for label_idx, label_name in enumerate(labels):
            label_dir = os.path.join('PlantVillage', label_name)
            os.makedirs(label_dir, exist_ok=True)
            
            # Get samples for this class
            class_samples = dataset['train'].filter(lambda example: example['label'] == label_idx)
            print(f"Processing class {label_name} with {len(class_samples)} samples")
            
            # Save images for this class
            for i, sample in enumerate(tqdm(class_samples, desc=f"Saving {label_name}")):
                img = sample['image']
                img_path = os.path.join(label_dir, f"{label_name}_{i}.jpg")
                img.save(img_path)
        
        # Save class names to a file
        with open('class_names.json', 'w') as f:
            import json
            json.dump(labels, f)
        
        print("Dataset downloaded and organized successfully")
        return True
        
    except Exception as e:
        print(f"Error downloading dataset from Hugging Face: {e}")
        return False

if __name__ == "__main__":
    download_plantvillage_from_huggingface()