import os import requests import zipfile from tqdm import tqdm import shutil def download_plantvillage_dataset(): """ Downloads a small subset of the PlantVillage dataset for demonstration purposes. """ print("Downloading PlantVillage dataset sample...") # Create directory for dataset os.makedirs('dataset_download', exist_ok=True) # URL for a small subset of PlantVillage dataset # Note: This is a placeholder URL. In a real scenario, you would use the actual dataset URL. dataset_url = "https://github.com/spMohanty/PlantVillage-Dataset/archive/master.zip" try: # Check if we can access the URL response = requests.head(dataset_url) if response.status_code != 200: print(f"Cannot access dataset URL (status code: {response.status_code})") print("Creating a synthetic dataset instead...") return False # Download the dataset zip_path = os.path.join('dataset_download', 'plantvillage.zip') print(f"Downloading dataset to {zip_path}...") response = requests.get(dataset_url, stream=True) total_size = int(response.headers.get('content-length', 0)) with open(zip_path, 'wb') as f: for data in tqdm(response.iter_content(chunk_size=1024), total=total_size//1024, unit='KB'): f.write(data) # Extract the dataset print("Extracting dataset...") with zipfile.ZipFile(zip_path, 'r') as zip_ref: zip_ref.extractall('dataset_download') # Find the extracted directory extracted_dir = None for item in os.listdir('dataset_download'): if os.path.isdir(os.path.join('dataset_download', item)) and 'PlantVillage' in item: extracted_dir = os.path.join('dataset_download', item) break if not extracted_dir: print("Could not find extracted dataset directory") return False # Move the dataset to the PlantVillage directory print("Organizing dataset...") if os.path.exists('PlantVillage'): shutil.rmtree('PlantVillage') # Find the color images directory color_dir = None for root, dirs, files in os.walk(extracted_dir): if 'color' in dirs: color_dir = os.path.join(root, 'color') break if not color_dir: print("Could not find color images directory") return False # Copy the color images to PlantVillage directory shutil.copytree(color_dir, 'PlantVillage') # Clean up shutil.rmtree('dataset_download') print("Dataset downloaded and organized successfully") return True except Exception as e: print(f"Error downloading dataset: {e}") print("Creating a synthetic dataset instead...") return False if __name__ == "__main__": download_plantvillage_dataset()