PlantDiseaseTreatmentAssistant / download_plantvillage.py
iqramukhtiar's picture
Upload 3 files
4382bbc verified
raw
history blame
3.1 kB
import os
import requests
import zipfile
from tqdm import tqdm
import shutil
def download_plantvillage_dataset():
"""
Downloads a small subset of the PlantVillage dataset for demonstration purposes.
"""
print("Downloading PlantVillage dataset sample...")
# Create directory for dataset
os.makedirs('dataset_download', exist_ok=True)
# URL for a small subset of PlantVillage dataset
# Note: This is a placeholder URL. In a real scenario, you would use the actual dataset URL.
dataset_url = "https://github.com/spMohanty/PlantVillage-Dataset/archive/master.zip"
try:
# Check if we can access the URL
response = requests.head(dataset_url)
if response.status_code != 200:
print(f"Cannot access dataset URL (status code: {response.status_code})")
print("Creating a synthetic dataset instead...")
return False
# Download the dataset
zip_path = os.path.join('dataset_download', 'plantvillage.zip')
print(f"Downloading dataset to {zip_path}...")
response = requests.get(dataset_url, stream=True)
total_size = int(response.headers.get('content-length', 0))
with open(zip_path, 'wb') as f:
for data in tqdm(response.iter_content(chunk_size=1024), total=total_size//1024, unit='KB'):
f.write(data)
# Extract the dataset
print("Extracting dataset...")
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall('dataset_download')
# Find the extracted directory
extracted_dir = None
for item in os.listdir('dataset_download'):
if os.path.isdir(os.path.join('dataset_download', item)) and 'PlantVillage' in item:
extracted_dir = os.path.join('dataset_download', item)
break
if not extracted_dir:
print("Could not find extracted dataset directory")
return False
# Move the dataset to the PlantVillage directory
print("Organizing dataset...")
if os.path.exists('PlantVillage'):
shutil.rmtree('PlantVillage')
# Find the color images directory
color_dir = None
for root, dirs, files in os.walk(extracted_dir):
if 'color' in dirs:
color_dir = os.path.join(root, 'color')
break
if not color_dir:
print("Could not find color images directory")
return False
# Copy the color images to PlantVillage directory
shutil.copytree(color_dir, 'PlantVillage')
# Clean up
shutil.rmtree('dataset_download')
print("Dataset downloaded and organized successfully")
return True
except Exception as e:
print(f"Error downloading dataset: {e}")
print("Creating a synthetic dataset instead...")
return False
if __name__ == "__main__":
download_plantvillage_dataset()