|
|
import os |
|
|
import requests |
|
|
import zipfile |
|
|
from tqdm import tqdm |
|
|
import shutil |
|
|
|
|
|
def download_plantvillage_dataset(): |
|
|
""" |
|
|
Downloads a small subset of the PlantVillage dataset for demonstration purposes. |
|
|
""" |
|
|
print("Downloading PlantVillage dataset sample...") |
|
|
|
|
|
|
|
|
os.makedirs('dataset_download', exist_ok=True) |
|
|
|
|
|
|
|
|
|
|
|
dataset_url = "https://github.com/spMohanty/PlantVillage-Dataset/archive/master.zip" |
|
|
|
|
|
try: |
|
|
|
|
|
response = requests.head(dataset_url) |
|
|
if response.status_code != 200: |
|
|
print(f"Cannot access dataset URL (status code: {response.status_code})") |
|
|
print("Creating a synthetic dataset instead...") |
|
|
return False |
|
|
|
|
|
|
|
|
zip_path = os.path.join('dataset_download', 'plantvillage.zip') |
|
|
print(f"Downloading dataset to {zip_path}...") |
|
|
|
|
|
response = requests.get(dataset_url, stream=True) |
|
|
total_size = int(response.headers.get('content-length', 0)) |
|
|
|
|
|
with open(zip_path, 'wb') as f: |
|
|
for data in tqdm(response.iter_content(chunk_size=1024), total=total_size//1024, unit='KB'): |
|
|
f.write(data) |
|
|
|
|
|
|
|
|
print("Extracting dataset...") |
|
|
with zipfile.ZipFile(zip_path, 'r') as zip_ref: |
|
|
zip_ref.extractall('dataset_download') |
|
|
|
|
|
|
|
|
extracted_dir = None |
|
|
for item in os.listdir('dataset_download'): |
|
|
if os.path.isdir(os.path.join('dataset_download', item)) and 'PlantVillage' in item: |
|
|
extracted_dir = os.path.join('dataset_download', item) |
|
|
break |
|
|
|
|
|
if not extracted_dir: |
|
|
print("Could not find extracted dataset directory") |
|
|
return False |
|
|
|
|
|
|
|
|
print("Organizing dataset...") |
|
|
if os.path.exists('PlantVillage'): |
|
|
shutil.rmtree('PlantVillage') |
|
|
|
|
|
|
|
|
color_dir = None |
|
|
for root, dirs, files in os.walk(extracted_dir): |
|
|
if 'color' in dirs: |
|
|
color_dir = os.path.join(root, 'color') |
|
|
break |
|
|
|
|
|
if not color_dir: |
|
|
print("Could not find color images directory") |
|
|
return False |
|
|
|
|
|
|
|
|
shutil.copytree(color_dir, 'PlantVillage') |
|
|
|
|
|
|
|
|
shutil.rmtree('dataset_download') |
|
|
|
|
|
print("Dataset downloaded and organized successfully") |
|
|
return True |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error downloading dataset: {e}") |
|
|
print("Creating a synthetic dataset instead...") |
|
|
return False |
|
|
|
|
|
if __name__ == "__main__": |
|
|
download_plantvillage_dataset() |
|
|
|