Jeypi159 commited on
Commit
d2952ce
·
verified ·
1 Parent(s): aa1beec

Upload 4 files

Browse files
Files changed (4) hide show
  1. .gitignore +2 -0
  2. app.py +275 -0
  3. divesites.csv +25 -0
  4. requirements.txt +7 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ venv/
2
+ .env
app.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from flask import Flask, request, jsonify
6
+ from flask_cors import CORS
7
+ import pandas as pd
8
+ import re
9
+ import nltk
10
+ from nltk.tokenize import word_tokenize
11
+ from nltk.corpus import stopwords
12
+ from nltk.stem import WordNetLemmatizer
13
+ from string import punctuation
14
+
15
+ # Download necessary NLTK resources
16
+ try:
17
+ nltk.download('punkt', quiet=True)
18
+ nltk.download('stopwords', quiet=True)
19
+ nltk.download('wordnet', quiet=True)
20
+ print("NLTK resources downloaded successfully")
21
+ except Exception as e:
22
+ print(f"Warning: Failed to download NLTK resources: {e}")
23
+
24
+ # Load environment variables from .env file
25
+ load_dotenv()
26
+
27
+ # Create Flask app
28
+ app = Flask(__name__)
29
+ # Enable CORS for all routes and origins
30
+ CORS(app, resources={r"/*": {"origins": "*"}})
31
+
32
+ # Hugging Face API configuration
33
+ HF_API_TOKEN = os.environ.get("HF_API_TOKEN") # Store your token as an environment variable
34
+ HF_API_URL = "https://api-inference.huggingface.co/models/"
35
+ ZERO_SHOT_MODEL = "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli" # Multilingual model that supports Korean
36
+
37
+ # Function to call Hugging Face API
38
+ def query_huggingface(payload, model):
39
+ headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
40
+ API_URL = HF_API_URL + model
41
+ try:
42
+ response = requests.post(API_URL, headers=headers, json=payload)
43
+ return response.json()
44
+ except Exception as e:
45
+ print(f"Error calling Hugging Face API for {model}: {e}")
46
+ return None
47
+
48
+ # Simple Korean detection function
49
+ def is_korean(text):
50
+ korean_pattern = re.compile('[\uac00-\ud7a3]')
51
+ return bool(korean_pattern.search(text))
52
+
53
+ # Zero-shot classification using Hugging Face API
54
+ def zero_shot_classify(text, candidate_labels):
55
+ if not text:
56
+ return {"labels": candidate_labels, "scores": [1/len(candidate_labels)] * len(candidate_labels)}
57
+ try:
58
+ payload = {
59
+ "inputs": text,
60
+ "parameters": {"candidate_labels": candidate_labels}
61
+ }
62
+ result = query_huggingface(payload, ZERO_SHOT_MODEL)
63
+ if result and "labels" in result and "scores" in result:
64
+ return result
65
+ # Fallback if API call fails
66
+ print("Zero-shot API call failed, using fallback")
67
+ import random
68
+ scores = [random.uniform(0.1, 0.9) for _ in candidate_labels]
69
+ total = sum(scores)
70
+ normalized = [s/total for s in scores]
71
+ sorted_indices = sorted(range(len(normalized)), key=lambda i: normalized[i], reverse=True)
72
+ return {
73
+ 'labels': [candidate_labels[i] for i in sorted_indices],
74
+ 'scores': [normalized[i] for i in sorted_indices]
75
+ }
76
+ except Exception as e:
77
+ print(f"Zero-shot classification error: {e}")
78
+ # Return fallback response
79
+ return {"labels": candidate_labels, "scores": [1/len(candidate_labels)] * len(candidate_labels)}
80
+
81
+ csv_path = "divesites.csv"
82
+ if not os.path.exists(csv_path):
83
+ # Create default data for demo purposes
84
+ default_data = {
85
+ "Dive Site": ["Dry Dock", "Lalaguna Point", "Shark Cave", "Fish Bowl", "Sabang Reef"],
86
+ "Experience Level": ["beginner", "beginner", "advanced", "advanced", "intermediate"],
87
+ "Marine Life": [
88
+ "sweetlips,batfish,surgeonfish,grouper,snapper,lionfish,pufferfish,octopus,seahorse",
89
+ "coral,angelfish,butterflyfish,parrotfish",
90
+ "shark,stingray,octopus,coral",
91
+ "shark,sweetlips,tuna,jack",
92
+ "coral,angelfish,butterflyfish,parrotfish,damselfish,clownfish"
93
+ ]
94
+ }
95
+ pd.DataFrame(default_data).to_csv(csv_path, index=False)
96
+ print(f"Created default {csv_path}")
97
+
98
+ # Load dive sites
99
+ try:
100
+ df = pd.read_csv(csv_path)
101
+ print(f"Loaded {len(df)} dive sites from {csv_path}")
102
+ except Exception as e:
103
+ print(f"Error loading CSV: {e}")
104
+ # Create an empty dataframe as fallback
105
+ df = pd.DataFrame({"Dive Site": [], "Experience Level": [], "Marine Life": []})
106
+
107
+ # Define candidate labels
108
+ experience_labels = ["beginner", "advanced", "intermediate"]
109
+ marine_labels = [
110
+ "turtle", "shark", "clownfish", "ray", "seahorse", "manta ray", "lionfish", "batfish",
111
+ "grouper", "snapper", "octopus", "nudibranch", "frogfish", "sweetlips", "scorpionfish",
112
+ "moray", "angelfish", "parrotfish", "butterflyfish", "damselfish", "trevally", "mackerel",
113
+ "stingray", "flatworm", "drummer", "jack", "crustacean", "snake", "coral"
114
+ ]
115
+
116
+ @app.route('/recommend', methods=['POST', 'OPTIONS'])
117
+ def recommend():
118
+ # Handle preflight request
119
+ if request.method == 'OPTIONS':
120
+ return '', 204
121
+
122
+ try:
123
+ # Get user input from request
124
+ data = request.get_json()
125
+ if not data or 'message' not in data:
126
+ return jsonify({"error": "Invalid request. 'message' field required"}), 400
127
+
128
+ user_input = data['message']
129
+ if not user_input or len(user_input) < 3:
130
+ return jsonify({"error": "Message too short"}), 400
131
+
132
+ # No need to translate - the multilingual model handles Korean directly
133
+ original_input = user_input
134
+
135
+ # Preprocess text with NLTK
136
+ def preprocess_text(text):
137
+ # For Korean text, skip tokenization and just return the original
138
+ if is_korean(text):
139
+ return text
140
+
141
+ # For English text, apply standard NLP preprocessing
142
+ # Tokenize
143
+ tokens = word_tokenize(text.lower())
144
+ # Remove stopwords and punctuation
145
+ stop_words = set(stopwords.words('english'))
146
+ tokens = [token for token in tokens if token not in stop_words and token not in punctuation]
147
+ # Lemmatize
148
+ lemmatizer = WordNetLemmatizer()
149
+ tokens = [lemmatizer.lemmatize(token) for token in tokens]
150
+ return " ".join(tokens)
151
+
152
+ processed_input = preprocess_text(user_input)
153
+ print(f"Processed input: {processed_input}")
154
+
155
+ # Extract keywords relevant to diving
156
+ diving_keywords = {
157
+ 'experience': ['beginner', 'intermediate', 'advanced', 'expert', 'novice', 'technical', 'professional'],
158
+ 'marine_life': marine_labels,
159
+ 'site_types': ['reef', 'wreck', 'wall', 'cave', 'drift', 'slope']
160
+ }
161
+
162
+ # Extract keywords relevant to diving (no embeddings)
163
+ found_keywords = {}
164
+ # Only apply keyword extraction for English input
165
+ if not is_korean(user_input):
166
+ for category, words in diving_keywords.items():
167
+ matches = [word for word in words if word in processed_input.lower().split()]
168
+ if matches:
169
+ found_keywords[category] = matches
170
+ print(f"Extracted keywords: {found_keywords}")
171
+
172
+ # Predict experience level with API
173
+ exp_result = zero_shot_classify(processed_input, experience_labels)
174
+ top_experience = exp_result['labels'][0] # Highest scored experience level
175
+
176
+ # Consider explicitly mentioned experience levels
177
+ if 'experience' in found_keywords:
178
+ for exp in found_keywords['experience']:
179
+ if exp in experience_labels:
180
+ top_experience = exp
181
+ break
182
+
183
+ # Predict marine life preferences with API
184
+ marine_result = zero_shot_classify(processed_input, marine_labels)
185
+ desired_marine_life = [
186
+ label for label, score in zip(marine_result['labels'], marine_result['scores'])
187
+ if score > 0.3
188
+ ][:5] # Limit to top 5 marine life with score > 0.3
189
+
190
+ if not desired_marine_life:
191
+ desired_marine_life = marine_result['labels'][:3] # If no high scores, take top 3
192
+
193
+ # Add explicitly mentioned marine life
194
+ if 'marine_life' in found_keywords:
195
+ for life in found_keywords['marine_life']:
196
+ if life not in desired_marine_life:
197
+ desired_marine_life.append(life)
198
+
199
+ print(f"User input: {original_input}")
200
+ if is_korean(original_input):
201
+ print(f"Input detected as Korean")
202
+ print(f"Detected experience level: {top_experience}")
203
+ print(f"Detected marine life interests: {desired_marine_life}")
204
+
205
+ # Filter dive sites - first by experience level
206
+ # Handle multi-level experience entries like "Beginner/Intermediate"
207
+ filtered = df[df["Experience Level"].apply(
208
+ lambda x: top_experience.lower() in x.lower()
209
+ )]
210
+
211
+ # If no experience level matches, try all sites
212
+ if len(filtered) == 0:
213
+ filtered = df
214
+
215
+ print(f"After experience filter: {len(filtered)} sites")
216
+
217
+ # Marine life matching without embeddings
218
+ def contains_marine_life(marine_list_str, desired_terms):
219
+ # Split the comma-separated string and normalize each term
220
+ marine_species = [species.lower().strip() for species in marine_list_str.split(',')]
221
+ lemmatizer = WordNetLemmatizer()
222
+ marine_species = [lemmatizer.lemmatize(species) for species in marine_species]
223
+ desired_lemmas = [lemmatizer.lemmatize(term.lower()) for term in desired_terms]
224
+ # Score each site based on how many desired species it contains
225
+ matches = 0
226
+ for desired in desired_lemmas:
227
+ for species in marine_species:
228
+ # Check for exact match or if desired term is part of the species name
229
+ if desired in species:
230
+ matches += 1
231
+ break
232
+ return matches > 0
233
+
234
+ marine_filtered = filtered[filtered["Marine Life"].apply(
235
+ lambda x: contains_marine_life(x, desired_marine_life)
236
+ )]
237
+
238
+ print(f"After marine life filter: {len(marine_filtered)} sites")
239
+
240
+ # If no marine life matches, return experience-level filtered sites
241
+ if len(marine_filtered) == 0:
242
+ marine_filtered = filtered
243
+
244
+ # Add site type filtering if mentioned in input
245
+ if 'site_types' in found_keywords and 'Site Type' in df.columns:
246
+ site_type_filtered = marine_filtered[marine_filtered["Site Type"].apply(
247
+ lambda x: any(site_type.lower() in x.lower() for site_type in found_keywords['site_types'])
248
+ )]
249
+
250
+ if len(site_type_filtered) > 0:
251
+ marine_filtered = site_type_filtered
252
+ print(f"After site type filter: {len(marine_filtered)} sites")
253
+
254
+ # Convert to list of dictionaries for JSON response
255
+ results = marine_filtered.head(6).to_dict(orient='records')
256
+
257
+ # Debug info
258
+ print(f"Found {len(marine_filtered)} matching dive sites")
259
+ if len(marine_filtered) > 0:
260
+ print(f"Sample match: {marine_filtered['Dive Site'].iloc[0]} with marine life: {marine_filtered['Marine Life'].iloc[0]}")
261
+
262
+ return jsonify(results)
263
+
264
+ except Exception as e:
265
+ print(f"Error processing recommendation: {e}")
266
+ return jsonify({"error": str(e)}), 500
267
+
268
+ # Debug route to test connectivity
269
+ @app.route('/test', methods=['GET'])
270
+ def test():
271
+ return jsonify({"status": "ok", "message": "API is working!"})
272
+
273
+
274
+ if __name__ == "__main__":
275
+ app.run(host="0.0.0.0", port=7860)
divesites.csv ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Dive Site,Experience Level,Marine Life,Site Type
2
+ Dry Dock,Beginner/Intermediate/Advanced,"sweetlips, batfish, surgeonfish, grouper, snapper, lionfish, pufferfish, octopus, seahorse, anemonefish",Reef
3
+ Lalaguna Point,Beginner,"coral, angelfish, butterflyfish, parrotfish",Reef
4
+ Alma Jane Wreck,Intermediate,"snapper, grouper, batfish, lionfish",Wreck
5
+ St. Christopher Wreck,Intermediate,"batfish, grouper, snapper, lionfish",Wreck
6
+ Sabang Reef,Advanced,"coral, angelfish, butterflyfish, parrotfish, damselfish, clownfish",Reef
7
+ Sabang Wreck,Intermediate,"grouper, snapper, lionfish, batfish",Wreck
8
+ Sabang Point,Beginner,"coral, angelfish, butterflyfish, parrotfish, damselfish",Reef
9
+ Monkey Wreck,Advanced,"grouper, snapper, batfish, lionfish",Wreck
10
+ Monkey Beach,Beginner/Intermediate,"coral, damselfish, angelfish, butterflyfish, ray",Slope
11
+ Ernie's Point,Intermediate,"grouper, mantis, trevally, mackerel",Reef
12
+ Dungon Beach/Wall,Beginner/Intermediate,"lionfish, scorpionfish, nudibranch, moray, frogfish",Wall
13
+ West Escarceo,Beginner/Intermediate,"stingray, scorpionfish, octopus, grouper, mackerel, tuna, trevally",Drift
14
+ Fish Bowl,Advanced/Technical,"shark, sweetlips, tuna, jack",Deep Reef
15
+ Canyons,Advanced,"drums, batfish, trevally, sweetlips, octopus, ray, shark, manta",Drift
16
+ Hole in the Wall,Beginner/Intermediate,"drums, sweetlips, trevally, snapper, lionfish, scorpionfish, moray, pufferfish, octopus, frogfish",Tunnel
17
+ Pink Wall,Beginner,"moray, scorpionfish, octopus, snake, nudibranch, crustacean",Wall
18
+ Shark Cave,Advanced,"shark, stingray, octopus, coral",Cave
19
+ Atoll,Advanced,"angelfish, sweetlips, scorpionfish, grouper, moray, frogfish, flatworm, nudibranch, lionfish",Rock
20
+ Kilima Beach/Steps,Beginner/Intermediate/Advanced,"anthias, surgeonfish, angelfish, butterflyfish, parrotfish, moray, frogfish, snake, turtle, octopus, batfish, seahorse",Ridge
21
+ Sinandigan Wall,Beginner/Intermediate/Advanced,"nudibranch, sweetlips, leaffish, frogfish, crocodilefish, clownfish, lionfish, cuttlefish, cucumber",Wall
22
+ Turtle Rock,Advanced/Technical,"sweetlips, nudibranch, angelfish, turtle, shark, coral",Deep Reef
23
+ Pink Wall (Intermediate),Intermediate,"nudibranch, coral, angelfish, butterflyfish, invertebrates",Wall
24
+ The Atoll,Intermediate,"coral, angelfish, butterflyfish, parrotfish, damselfish, surgeonfish",Reef
25
+ Coral Garden,Beginner,"coral, clownfish, angelfish, butterflyfish, parrotfish, damselfish",Reef
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ flask==2.3.3
2
+ flask-cors==4.0.0
3
+ pandas==2.1.0
4
+ nltk==3.8.1
5
+ requests==2.31.0
6
+ python-dotenv==1.0.0
7
+ gunicorn==21.2.0