Spaces:

iqramukhtiar
/

PlantDiseaseTreatmentAssistant

Sleeping

App Files Files Community

PlantDiseaseTreatmentAssistant / download_plantvillage.py

iqramukhtiar

Upload 3 files

4382bbc verified 6 months ago

raw

history blame

3.1 kB

	import os
	import requests
	import zipfile
	from tqdm import tqdm
	import shutil

	def download_plantvillage_dataset():
	"""
	Downloads a small subset of the PlantVillage dataset for demonstration purposes.
	"""
	print("Downloading PlantVillage dataset sample...")

	# Create directory for dataset
	os.makedirs('dataset_download', exist_ok=True)

	# URL for a small subset of PlantVillage dataset
	# Note: This is a placeholder URL. In a real scenario, you would use the actual dataset URL.
	dataset_url = "https://github.com/spMohanty/PlantVillage-Dataset/archive/master.zip"

	try:
	# Check if we can access the URL
	response = requests.head(dataset_url)
	if response.status_code != 200:
	print(f"Cannot access dataset URL (status code: {response.status_code})")
	print("Creating a synthetic dataset instead...")
	return False

	# Download the dataset
	zip_path = os.path.join('dataset_download', 'plantvillage.zip')
	print(f"Downloading dataset to {zip_path}...")

	response = requests.get(dataset_url, stream=True)
	total_size = int(response.headers.get('content-length', 0))

	with open(zip_path, 'wb') as f:
	for data in tqdm(response.iter_content(chunk_size=1024), total=total_size//1024, unit='KB'):
	f.write(data)

	# Extract the dataset
	print("Extracting dataset...")
	with zipfile.ZipFile(zip_path, 'r') as zip_ref:
	zip_ref.extractall('dataset_download')

	# Find the extracted directory
	extracted_dir = None
	for item in os.listdir('dataset_download'):
	if os.path.isdir(os.path.join('dataset_download', item)) and 'PlantVillage' in item:
	extracted_dir = os.path.join('dataset_download', item)
	break

	if not extracted_dir:
	print("Could not find extracted dataset directory")
	return False

	# Move the dataset to the PlantVillage directory
	print("Organizing dataset...")
	if os.path.exists('PlantVillage'):
	shutil.rmtree('PlantVillage')

	# Find the color images directory
	color_dir = None
	for root, dirs, files in os.walk(extracted_dir):
	if 'color' in dirs:
	color_dir = os.path.join(root, 'color')
	break

	if not color_dir:
	print("Could not find color images directory")
	return False

	# Copy the color images to PlantVillage directory
	shutil.copytree(color_dir, 'PlantVillage')

	# Clean up
	shutil.rmtree('dataset_download')

	print("Dataset downloaded and organized successfully")
	return True

	except Exception as e:
	print(f"Error downloading dataset: {e}")
	print("Creating a synthetic dataset instead...")
	return False

	if __name__ == "__main__":
	download_plantvillage_dataset()