Spaces:

iqramukhtiar
/

PlantDiseaseTreatmentAssistant

Sleeping

App Files Files Community

iqramukhtiar commited on Apr 24

Commit

287f451

verified ·

1 Parent(s): dfbb97c

Update data_utils.py

Browse files

Files changed (1) hide show

data_utils.py +75 -158

data_utils.py CHANGED Viewed

@@ -1,185 +1,102 @@
 import pandas as pd
-import requests
-import io
-from typing import Dict, List, Optional, Tuple, Union
-def load_csv_data(url: str) -> pd.DataFrame:
-    """
-    Load CSV data from a URL
-    Args:
-        url: URL to the CSV file
-    Returns:
-        DataFrame containing the CSV data
-    """
     try:
-        response = requests.get(url)
-        response.raise_for_status()
-        df = pd.read_csv(io.StringIO(response.text))
-        return df
     except Exception as e:
-        print(f"Error loading CSV: {e}")
-        # Create a fallback DataFrame if loading fails
-        return pd.DataFrame(columns=["Crop", "Disease", "Symptoms", "Treatment", "Medicine/Chemical Control"])
-def analyze_csv_data(df: pd.DataFrame) -> Dict:
-    """
-    Analyze the CSV data to extract useful information
-    Args:
-        df: DataFrame containing the CSV data
-    Returns:
-        Dictionary containing analysis results
-    """
-    # Get unique crops and diseases
-    unique_crops = df['Crop'].unique().tolist()
-    unique_diseases = df['Disease'].unique().tolist()
-    # Count diseases by crop
-    diseases_by_crop = df.groupby('Crop')['Disease'].count().to_dict()
-    # Get common treatments
-    common_treatments = df['Treatment'].value_counts().head(5).to_dict()
-    # Get common chemicals
-    common_chemicals = df['Medicine/Chemical Control'].value_counts().head(5).to_dict()
-    return {
-        'unique_crops': unique_crops,
-        'unique_diseases': unique_diseases,
-        'diseases_by_crop': diseases_by_crop,
-        'common_treatments': common_treatments,
-        'common_chemicals': common_chemicals
     }
-def search_disease_info(df: pd.DataFrame, query: str) -> List[Dict]:
-    """
-    Search for disease information in the CSV data
-    Args:
-        df: DataFrame containing the CSV data
-        query: Search query
-    Returns:
-        List of dictionaries containing matching disease information
-    """
-    query_lower = query.lower()
-    # Search in crop names
-    crop_matches = df[df['Crop'].str.lower().str.contains(query_lower)]
-    # Search in disease names
-    disease_matches = df[df['Disease'].str.lower().str.contains(query_lower)]
-    # Search in symptoms
-    symptom_matches = df[df['Symptoms'].str.lower().str.contains(query_lower)]
-    # Combine all matches and remove duplicates
-    all_matches = pd.concat([crop_matches, disease_matches, symptom_matches]).drop_duplicates()
-    # Convert to list of dictionaries
-    results = all_matches.to_dict('records')
     return results
-def extract_entities_from_question(question: str, df: pd.DataFrame) -> Tuple[Optional[str], Optional[str]]:
-    """
-    Extract crop and disease entities from a question
-    Args:
-        question: Question text
-        df: DataFrame containing the CSV data
-    Returns:
-        Tuple of (crop, disease) extracted from the question
-    """
-    question_lower = question.lower()
     # Get all crops and diseases from the dataframe
-    crops = list(set(df['Crop'].str.lower()))
-    diseases = list(set(df['Disease'].str.lower()))
-    # Find crop mentions
     found_crop = None
     for crop in crops:
-        if crop in question_lower:
             found_crop = crop
             break
-    # Find disease mentions
-    found_disease = None
     for disease in diseases:
-        if disease in question_lower:
             found_disease = disease
             break
     return found_crop, found_disease
-def get_treatment_for_disease(df: pd.DataFrame, crop: Optional[str], disease: str) -> Dict:
-    """
-    Get treatment information for a specific disease
-    Args:
-        df: DataFrame containing the CSV data
-        crop: Optional crop name
-        disease: Disease name
-    Returns:
-        Dictionary containing treatment information
-    """
-    if crop:
-        # Look for specific crop-disease combination
-        matches = df[(df['Crop'].str.lower() == crop) &
-                     (df['Disease'].str.lower() == disease)]
         if not matches.empty:
-            match = matches.iloc[0]
             return {
-                'crop': match['Crop'],
-                'disease': match['Disease'],
-                'symptoms': match['Symptoms'],
-                'treatment': match['Treatment'],
-                'medicine': match['Medicine/Chemical Control'],
-                'exact_match': True
             }
-    # Look for disease in any crop
-    matches = df[df['Disease'].str.lower() == disease]
-    if not matches.empty:
-        match = matches.iloc[0]
-        return {
-            'crop': match['Crop'],
-            'disease': match['Disease'],
-            'symptoms': match['Symptoms'],
-            'treatment': match['Treatment'],
-            'medicine': match['Medicine/Chemical Control'],
-            'exact_match': False
-        }
-    # Try partial match on disease name
-    matches = df[df['Disease'].str.lower().str.contains(disease)]
-    if not matches.empty:
-        match = matches.iloc[0]
-        return {
-            'crop': match['Crop'],
-            'disease': match['Disease'],
-            'symptoms': match['Symptoms'],
-            'treatment': match['Treatment'],
-            'medicine': match['Medicine/Chemical Control'],
-            'exact_match': False,
-            'partial_match': True
-        }
-    return {
-        'crop': None,
-        'disease': disease,
-        'symptoms': None,
-        'treatment': None,
-        'medicine': None,
-        'exact_match': False,
-        'found': False
-    }

 import pandas as pd
+import re
+def load_csv_data(file_path):
+    """Load CSV data from file path"""
     try:
+        return pd.read_csv(file_path)
     except Exception as e:
+        print(f"Error loading CSV: {str(e)}")
+        # Return a minimal dataframe for demonstration
+        return pd.DataFrame({
+            'Crop': ['Tomato', 'Apple'],
+            'Disease': ['Early Blight', 'Apple Scab'],
+            'Symptoms': ['Yellow spots on leaves', 'Dark scab-like lesions'],
+            'Treatment': ['Remove affected leaves', 'Prune affected branches'],
+            'Medicine/Chemical Control': ['Copper fungicide', 'Sulfur spray']
+        })
+def analyze_csv_data(df):
+    """Analyze CSV data and return statistics"""
+    stats = {
+        "total_entries": len(df),
+        "unique_crops": df['Crop'].nunique(),
+        "unique_diseases": df['Disease'].nunique(),
+        "crops": df['Crop'].unique().tolist()
     }
+    return stats
+def search_disease_info(df, query):
+    """Search for disease information in the dataframe"""
+    query = query.lower()
+    results = []
+    # Search in all columns
+    for _, row in df.iterrows():
+        score = 0
+        if query in str(row['Crop']).lower():
+            score += 3
+        if query in str(row['Disease']).lower():
+            score += 5
+        if query in str(row['Symptoms']).lower():
+            score += 2
+        if query in str(row['Treatment']).lower():
+            score += 1
+        if query in str(row['Medicine/Chemical Control']).lower():
+            score += 1
+        if score > 0:
+            results.append({
+                'score': score,
+                'Crop': row['Crop'],
+                'Disease': row['Disease'],
+                'Symptoms': row['Symptoms'],
+                'Treatment': row['Treatment'],
+                'Medicine/Chemical Control': row['Medicine/Chemical Control']
+            })
+    # Sort by relevance score
+    results.sort(key=lambda x: x['score'], reverse=True)
     return results
+def extract_entities_from_question(question, df):
+    """Extract crop and disease entities from a question"""
+    question = question.lower()
     # Get all crops and diseases from the dataframe
+    crops = set(df['Crop'].str.lower())
+    diseases = set(df['Disease'].str.lower())
+    # Find matches in the question
     found_crop = None
+    found_disease = None
     for crop in crops:
+        if crop in question:
             found_crop = crop
             break
     for disease in diseases:
+        if disease in question:
             found_disease = disease
             break
     return found_crop, found_disease
+def get_treatment_for_disease(df, crop, disease):
+    """Get treatment information for a specific crop and disease"""
+    if crop and disease:
+        matches = df[(df['Crop'].str.lower() == crop.lower()) &
+                     (df['Disease'].str.lower() == disease.lower())]
         if not matches.empty:
+            row = matches.iloc[0]
             return {
+                'crop': row['Crop'],
+                'disease': row['Disease'],
+                'symptoms': row['Symptoms'],
+                'treatment': row['Treatment'],
+                'medicine': row['Medicine/Chemical Control']
             }
+    return None