iqramukhtiar commited on
Commit
287f451
·
verified ·
1 Parent(s): dfbb97c

Update data_utils.py

Browse files
Files changed (1) hide show
  1. data_utils.py +75 -158
data_utils.py CHANGED
@@ -1,185 +1,102 @@
1
  import pandas as pd
2
- import requests
3
- import io
4
- from typing import Dict, List, Optional, Tuple, Union
5
 
6
- def load_csv_data(url: str) -> pd.DataFrame:
7
- """
8
- Load CSV data from a URL
9
-
10
- Args:
11
- url: URL to the CSV file
12
-
13
- Returns:
14
- DataFrame containing the CSV data
15
- """
16
  try:
17
- response = requests.get(url)
18
- response.raise_for_status()
19
- df = pd.read_csv(io.StringIO(response.text))
20
- return df
21
  except Exception as e:
22
- print(f"Error loading CSV: {e}")
23
- # Create a fallback DataFrame if loading fails
24
- return pd.DataFrame(columns=["Crop", "Disease", "Symptoms", "Treatment", "Medicine/Chemical Control"])
 
 
 
 
 
 
25
 
26
- def analyze_csv_data(df: pd.DataFrame) -> Dict:
27
- """
28
- Analyze the CSV data to extract useful information
29
-
30
- Args:
31
- df: DataFrame containing the CSV data
32
-
33
- Returns:
34
- Dictionary containing analysis results
35
- """
36
- # Get unique crops and diseases
37
- unique_crops = df['Crop'].unique().tolist()
38
- unique_diseases = df['Disease'].unique().tolist()
39
-
40
- # Count diseases by crop
41
- diseases_by_crop = df.groupby('Crop')['Disease'].count().to_dict()
42
-
43
- # Get common treatments
44
- common_treatments = df['Treatment'].value_counts().head(5).to_dict()
45
-
46
- # Get common chemicals
47
- common_chemicals = df['Medicine/Chemical Control'].value_counts().head(5).to_dict()
48
-
49
- return {
50
- 'unique_crops': unique_crops,
51
- 'unique_diseases': unique_diseases,
52
- 'diseases_by_crop': diseases_by_crop,
53
- 'common_treatments': common_treatments,
54
- 'common_chemicals': common_chemicals
55
  }
 
56
 
57
- def search_disease_info(df: pd.DataFrame, query: str) -> List[Dict]:
58
- """
59
- Search for disease information in the CSV data
60
-
61
- Args:
62
- df: DataFrame containing the CSV data
63
- query: Search query
64
-
65
- Returns:
66
- List of dictionaries containing matching disease information
67
- """
68
- query_lower = query.lower()
69
-
70
- # Search in crop names
71
- crop_matches = df[df['Crop'].str.lower().str.contains(query_lower)]
72
-
73
- # Search in disease names
74
- disease_matches = df[df['Disease'].str.lower().str.contains(query_lower)]
75
-
76
- # Search in symptoms
77
- symptom_matches = df[df['Symptoms'].str.lower().str.contains(query_lower)]
78
-
79
- # Combine all matches and remove duplicates
80
- all_matches = pd.concat([crop_matches, disease_matches, symptom_matches]).drop_duplicates()
81
-
82
- # Convert to list of dictionaries
83
- results = all_matches.to_dict('records')
84
-
 
 
 
85
  return results
86
 
87
- def extract_entities_from_question(question: str, df: pd.DataFrame) -> Tuple[Optional[str], Optional[str]]:
88
- """
89
- Extract crop and disease entities from a question
90
-
91
- Args:
92
- question: Question text
93
- df: DataFrame containing the CSV data
94
-
95
- Returns:
96
- Tuple of (crop, disease) extracted from the question
97
- """
98
- question_lower = question.lower()
99
 
100
  # Get all crops and diseases from the dataframe
101
- crops = list(set(df['Crop'].str.lower()))
102
- diseases = list(set(df['Disease'].str.lower()))
103
 
104
- # Find crop mentions
105
  found_crop = None
 
 
106
  for crop in crops:
107
- if crop in question_lower:
108
  found_crop = crop
109
  break
110
-
111
- # Find disease mentions
112
- found_disease = None
113
  for disease in diseases:
114
- if disease in question_lower:
115
  found_disease = disease
116
  break
117
 
118
  return found_crop, found_disease
119
 
120
- def get_treatment_for_disease(df: pd.DataFrame, crop: Optional[str], disease: str) -> Dict:
121
- """
122
- Get treatment information for a specific disease
123
-
124
- Args:
125
- df: DataFrame containing the CSV data
126
- crop: Optional crop name
127
- disease: Disease name
128
-
129
- Returns:
130
- Dictionary containing treatment information
131
- """
132
- if crop:
133
- # Look for specific crop-disease combination
134
- matches = df[(df['Crop'].str.lower() == crop) &
135
- (df['Disease'].str.lower() == disease)]
136
 
137
  if not matches.empty:
138
- match = matches.iloc[0]
139
  return {
140
- 'crop': match['Crop'],
141
- 'disease': match['Disease'],
142
- 'symptoms': match['Symptoms'],
143
- 'treatment': match['Treatment'],
144
- 'medicine': match['Medicine/Chemical Control'],
145
- 'exact_match': True
146
  }
147
 
148
- # Look for disease in any crop
149
- matches = df[df['Disease'].str.lower() == disease]
150
-
151
- if not matches.empty:
152
- match = matches.iloc[0]
153
- return {
154
- 'crop': match['Crop'],
155
- 'disease': match['Disease'],
156
- 'symptoms': match['Symptoms'],
157
- 'treatment': match['Treatment'],
158
- 'medicine': match['Medicine/Chemical Control'],
159
- 'exact_match': False
160
- }
161
-
162
- # Try partial match on disease name
163
- matches = df[df['Disease'].str.lower().str.contains(disease)]
164
-
165
- if not matches.empty:
166
- match = matches.iloc[0]
167
- return {
168
- 'crop': match['Crop'],
169
- 'disease': match['Disease'],
170
- 'symptoms': match['Symptoms'],
171
- 'treatment': match['Treatment'],
172
- 'medicine': match['Medicine/Chemical Control'],
173
- 'exact_match': False,
174
- 'partial_match': True
175
- }
176
-
177
- return {
178
- 'crop': None,
179
- 'disease': disease,
180
- 'symptoms': None,
181
- 'treatment': None,
182
- 'medicine': None,
183
- 'exact_match': False,
184
- 'found': False
185
- }
 
1
  import pandas as pd
2
+ import re
 
 
3
 
4
+ def load_csv_data(file_path):
5
+ """Load CSV data from file path"""
 
 
 
 
 
 
 
 
6
  try:
7
+ return pd.read_csv(file_path)
 
 
 
8
  except Exception as e:
9
+ print(f"Error loading CSV: {str(e)}")
10
+ # Return a minimal dataframe for demonstration
11
+ return pd.DataFrame({
12
+ 'Crop': ['Tomato', 'Apple'],
13
+ 'Disease': ['Early Blight', 'Apple Scab'],
14
+ 'Symptoms': ['Yellow spots on leaves', 'Dark scab-like lesions'],
15
+ 'Treatment': ['Remove affected leaves', 'Prune affected branches'],
16
+ 'Medicine/Chemical Control': ['Copper fungicide', 'Sulfur spray']
17
+ })
18
 
19
+ def analyze_csv_data(df):
20
+ """Analyze CSV data and return statistics"""
21
+ stats = {
22
+ "total_entries": len(df),
23
+ "unique_crops": df['Crop'].nunique(),
24
+ "unique_diseases": df['Disease'].nunique(),
25
+ "crops": df['Crop'].unique().tolist()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
+ return stats
28
 
29
+ def search_disease_info(df, query):
30
+ """Search for disease information in the dataframe"""
31
+ query = query.lower()
32
+ results = []
33
+
34
+ # Search in all columns
35
+ for _, row in df.iterrows():
36
+ score = 0
37
+ if query in str(row['Crop']).lower():
38
+ score += 3
39
+ if query in str(row['Disease']).lower():
40
+ score += 5
41
+ if query in str(row['Symptoms']).lower():
42
+ score += 2
43
+ if query in str(row['Treatment']).lower():
44
+ score += 1
45
+ if query in str(row['Medicine/Chemical Control']).lower():
46
+ score += 1
47
+
48
+ if score > 0:
49
+ results.append({
50
+ 'score': score,
51
+ 'Crop': row['Crop'],
52
+ 'Disease': row['Disease'],
53
+ 'Symptoms': row['Symptoms'],
54
+ 'Treatment': row['Treatment'],
55
+ 'Medicine/Chemical Control': row['Medicine/Chemical Control']
56
+ })
57
+
58
+ # Sort by relevance score
59
+ results.sort(key=lambda x: x['score'], reverse=True)
60
  return results
61
 
62
+ def extract_entities_from_question(question, df):
63
+ """Extract crop and disease entities from a question"""
64
+ question = question.lower()
 
 
 
 
 
 
 
 
 
65
 
66
  # Get all crops and diseases from the dataframe
67
+ crops = set(df['Crop'].str.lower())
68
+ diseases = set(df['Disease'].str.lower())
69
 
70
+ # Find matches in the question
71
  found_crop = None
72
+ found_disease = None
73
+
74
  for crop in crops:
75
+ if crop in question:
76
  found_crop = crop
77
  break
78
+
 
 
79
  for disease in diseases:
80
+ if disease in question:
81
  found_disease = disease
82
  break
83
 
84
  return found_crop, found_disease
85
 
86
+ def get_treatment_for_disease(df, crop, disease):
87
+ """Get treatment information for a specific crop and disease"""
88
+ if crop and disease:
89
+ matches = df[(df['Crop'].str.lower() == crop.lower()) &
90
+ (df['Disease'].str.lower() == disease.lower())]
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  if not matches.empty:
93
+ row = matches.iloc[0]
94
  return {
95
+ 'crop': row['Crop'],
96
+ 'disease': row['Disease'],
97
+ 'symptoms': row['Symptoms'],
98
+ 'treatment': row['Treatment'],
99
+ 'medicine': row['Medicine/Chemical Control']
 
100
  }
101
 
102
+ return None