Spaces:

Jeypi159
/

scuba

Sleeping

App Files Files Community

Jeypi159 commited on Aug 31

Commit

d2952ce

verified ·

1 Parent(s): aa1beec

Upload 4 files

Browse files

Files changed (4) hide show

.gitignore +2 -0
app.py +275 -0
divesites.csv +25 -0
requirements.txt +7 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ venv/
2	+ .env

app.py ADDED Viewed

	@@ -0,0 +1,275 @@

+import requests
+import json
+import os
+from dotenv import load_dotenv
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+import pandas as pd
+import re
+import nltk
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+from nltk.stem import WordNetLemmatizer
+from string import punctuation
+# Download necessary NLTK resources
+try:
+    nltk.download('punkt', quiet=True)
+    nltk.download('stopwords', quiet=True)
+    nltk.download('wordnet', quiet=True)
+    print("NLTK resources downloaded successfully")
+except Exception as e:
+    print(f"Warning: Failed to download NLTK resources: {e}")
+# Load environment variables from .env file
+load_dotenv()
+# Create Flask app
+app = Flask(__name__)
+# Enable CORS for all routes and origins
+CORS(app, resources={r"/*": {"origins": "*"}})
+# Hugging Face API configuration
+HF_API_TOKEN = os.environ.get("HF_API_TOKEN")  # Store your token as an environment variable
+HF_API_URL = "https://api-inference.huggingface.co/models/"
+ZERO_SHOT_MODEL = "MoritzLaurer/mDeBERTa-v3-base-mnli-xnli"  # Multilingual model that supports Korean
+# Function to call Hugging Face API
+def query_huggingface(payload, model):
+    headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
+    API_URL = HF_API_URL + model
+    try:
+        response = requests.post(API_URL, headers=headers, json=payload)
+        return response.json()
+    except Exception as e:
+        print(f"Error calling Hugging Face API for {model}: {e}")
+        return None
+# Simple Korean detection function
+def is_korean(text):
+    korean_pattern = re.compile('[\uac00-\ud7a3]')
+    return bool(korean_pattern.search(text))
+# Zero-shot classification using Hugging Face API
+def zero_shot_classify(text, candidate_labels):
+    if not text:
+        return {"labels": candidate_labels, "scores": [1/len(candidate_labels)] * len(candidate_labels)}
+    try:
+        payload = {
+            "inputs": text,
+            "parameters": {"candidate_labels": candidate_labels}
+        }
+        result = query_huggingface(payload, ZERO_SHOT_MODEL)
+        if result and "labels" in result and "scores" in result:
+            return result
+        # Fallback if API call fails
+        print("Zero-shot API call failed, using fallback")
+        import random
+        scores = [random.uniform(0.1, 0.9) for _ in candidate_labels]
+        total = sum(scores)
+        normalized = [s/total for s in scores]
+        sorted_indices = sorted(range(len(normalized)), key=lambda i: normalized[i], reverse=True)
+        return {
+            'labels': [candidate_labels[i] for i in sorted_indices],
+            'scores': [normalized[i] for i in sorted_indices]
+        }
+    except Exception as e:
+        print(f"Zero-shot classification error: {e}")
+        # Return fallback response
+        return {"labels": candidate_labels, "scores": [1/len(candidate_labels)] * len(candidate_labels)}
+csv_path = "divesites.csv"
+if not os.path.exists(csv_path):
+    # Create default data for demo purposes
+    default_data = {
+        "Dive Site": ["Dry Dock", "Lalaguna Point", "Shark Cave", "Fish Bowl", "Sabang Reef"],
+        "Experience Level": ["beginner", "beginner", "advanced", "advanced", "intermediate"],
+        "Marine Life": [
+            "sweetlips,batfish,surgeonfish,grouper,snapper,lionfish,pufferfish,octopus,seahorse",
+            "coral,angelfish,butterflyfish,parrotfish",
+            "shark,stingray,octopus,coral",
+            "shark,sweetlips,tuna,jack",
+            "coral,angelfish,butterflyfish,parrotfish,damselfish,clownfish"
+        ]
+    }
+    pd.DataFrame(default_data).to_csv(csv_path, index=False)
+    print(f"Created default {csv_path}")
+# Load dive sites
+try:
+    df = pd.read_csv(csv_path)
+    print(f"Loaded {len(df)} dive sites from {csv_path}")
+except Exception as e:
+    print(f"Error loading CSV: {e}")
+    # Create an empty dataframe as fallback
+    df = pd.DataFrame({"Dive Site": [], "Experience Level": [], "Marine Life": []})
+# Define candidate labels
+experience_labels = ["beginner", "advanced", "intermediate"]
+marine_labels = [
+    "turtle", "shark", "clownfish", "ray", "seahorse", "manta ray", "lionfish", "batfish",
+    "grouper", "snapper", "octopus", "nudibranch", "frogfish", "sweetlips", "scorpionfish",
+    "moray", "angelfish", "parrotfish", "butterflyfish", "damselfish", "trevally", "mackerel",
+    "stingray", "flatworm", "drummer", "jack", "crustacean", "snake", "coral"
+]
+@app.route('/recommend', methods=['POST', 'OPTIONS'])
+def recommend():
+    # Handle preflight request
+    if request.method == 'OPTIONS':
+        return '', 204
+    try:
+        # Get user input from request
+        data = request.get_json()
+        if not data or 'message' not in data:
+            return jsonify({"error": "Invalid request. 'message' field required"}), 400
+        user_input = data['message']
+        if not user_input or len(user_input) < 3:
+            return jsonify({"error": "Message too short"}), 400
+        # No need to translate - the multilingual model handles Korean directly
+        original_input = user_input
+        # Preprocess text with NLTK
+        def preprocess_text(text):
+            # For Korean text, skip tokenization and just return the original
+            if is_korean(text):
+                return text
+            # For English text, apply standard NLP preprocessing
+            # Tokenize
+            tokens = word_tokenize(text.lower())
+            # Remove stopwords and punctuation
+            stop_words = set(stopwords.words('english'))
+            tokens = [token for token in tokens if token not in stop_words and token not in punctuation]
+            # Lemmatize
+            lemmatizer = WordNetLemmatizer()
+            tokens = [lemmatizer.lemmatize(token) for token in tokens]
+            return " ".join(tokens)
+        processed_input = preprocess_text(user_input)
+        print(f"Processed input: {processed_input}")
+        # Extract keywords relevant to diving
+        diving_keywords = {
+            'experience': ['beginner', 'intermediate', 'advanced', 'expert', 'novice', 'technical', 'professional'],
+            'marine_life': marine_labels,
+            'site_types': ['reef', 'wreck', 'wall', 'cave', 'drift', 'slope']
+        }
+        # Extract keywords relevant to diving (no embeddings)
+        found_keywords = {}
+        # Only apply keyword extraction for English input
+        if not is_korean(user_input):
+            for category, words in diving_keywords.items():
+                matches = [word for word in words if word in processed_input.lower().split()]
+                if matches:
+                    found_keywords[category] = matches
+        print(f"Extracted keywords: {found_keywords}")
+        # Predict experience level with API
+        exp_result = zero_shot_classify(processed_input, experience_labels)
+        top_experience = exp_result['labels'][0]  # Highest scored experience level
+        # Consider explicitly mentioned experience levels
+        if 'experience' in found_keywords:
+            for exp in found_keywords['experience']:
+                if exp in experience_labels:
+                    top_experience = exp
+                    break
+        # Predict marine life preferences with API
+        marine_result = zero_shot_classify(processed_input, marine_labels)
+        desired_marine_life = [
+            label for label, score in zip(marine_result['labels'], marine_result['scores'])
+            if score > 0.3
+        ][:5]  # Limit to top 5 marine life with score > 0.3
+        if not desired_marine_life:
+            desired_marine_life = marine_result['labels'][:3]  # If no high scores, take top 3
+        # Add explicitly mentioned marine life
+        if 'marine_life' in found_keywords:
+            for life in found_keywords['marine_life']:
+                if life not in desired_marine_life:
+                    desired_marine_life.append(life)
+        print(f"User input: {original_input}")
+        if is_korean(original_input):
+            print(f"Input detected as Korean")
+        print(f"Detected experience level: {top_experience}")
+        print(f"Detected marine life interests: {desired_marine_life}")
+        # Filter dive sites - first by experience level
+        # Handle multi-level experience entries like "Beginner/Intermediate"
+        filtered = df[df["Experience Level"].apply(
+            lambda x: top_experience.lower() in x.lower()
+        )]
+        # If no experience level matches, try all sites
+        if len(filtered) == 0:
+            filtered = df
+        print(f"After experience filter: {len(filtered)} sites")
+        # Marine life matching without embeddings
+        def contains_marine_life(marine_list_str, desired_terms):
+            # Split the comma-separated string and normalize each term
+            marine_species = [species.lower().strip() for species in marine_list_str.split(',')]
+            lemmatizer = WordNetLemmatizer()
+            marine_species = [lemmatizer.lemmatize(species) for species in marine_species]
+            desired_lemmas = [lemmatizer.lemmatize(term.lower()) for term in desired_terms]
+            # Score each site based on how many desired species it contains
+            matches = 0
+            for desired in desired_lemmas:
+                for species in marine_species:
+                    # Check for exact match or if desired term is part of the species name
+                    if desired in species:
+                        matches += 1
+                        break
+            return matches > 0
+        marine_filtered = filtered[filtered["Marine Life"].apply(
+            lambda x: contains_marine_life(x, desired_marine_life)
+        )]
+        print(f"After marine life filter: {len(marine_filtered)} sites")
+        # If no marine life matches, return experience-level filtered sites
+        if len(marine_filtered) == 0:
+            marine_filtered = filtered
+        # Add site type filtering if mentioned in input
+        if 'site_types' in found_keywords and 'Site Type' in df.columns:
+            site_type_filtered = marine_filtered[marine_filtered["Site Type"].apply(
+                lambda x: any(site_type.lower() in x.lower() for site_type in found_keywords['site_types'])
+            )]
+            if len(site_type_filtered) > 0:
+                marine_filtered = site_type_filtered
+                print(f"After site type filter: {len(marine_filtered)} sites")
+        # Convert to list of dictionaries for JSON response
+        results = marine_filtered.head(6).to_dict(orient='records')
+        # Debug info
+        print(f"Found {len(marine_filtered)} matching dive sites")
+        if len(marine_filtered) > 0:
+            print(f"Sample match: {marine_filtered['Dive Site'].iloc[0]} with marine life: {marine_filtered['Marine Life'].iloc[0]}")
+        return jsonify(results)
+    except Exception as e:
+        print(f"Error processing recommendation: {e}")
+        return jsonify({"error": str(e)}), 500
+# Debug route to test connectivity
+@app.route('/test', methods=['GET'])
+def test():
+    return jsonify({"status": "ok", "message": "API is working!"})
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860)

divesites.csv ADDED Viewed

	@@ -0,0 +1,25 @@

+Dive Site,Experience Level,Marine Life,Site Type
+Dry Dock,Beginner/Intermediate/Advanced,"sweetlips, batfish, surgeonfish, grouper, snapper, lionfish, pufferfish, octopus, seahorse, anemonefish",Reef
+Lalaguna Point,Beginner,"coral, angelfish, butterflyfish, parrotfish",Reef
+Alma Jane Wreck,Intermediate,"snapper, grouper, batfish, lionfish",Wreck
+St. Christopher Wreck,Intermediate,"batfish, grouper, snapper, lionfish",Wreck
+Sabang Reef,Advanced,"coral, angelfish, butterflyfish, parrotfish, damselfish, clownfish",Reef
+Sabang Wreck,Intermediate,"grouper, snapper, lionfish, batfish",Wreck
+Sabang Point,Beginner,"coral, angelfish, butterflyfish, parrotfish, damselfish",Reef
+Monkey Wreck,Advanced,"grouper, snapper, batfish, lionfish",Wreck
+Monkey Beach,Beginner/Intermediate,"coral, damselfish, angelfish, butterflyfish, ray",Slope
+Ernie's Point,Intermediate,"grouper, mantis, trevally, mackerel",Reef
+Dungon Beach/Wall,Beginner/Intermediate,"lionfish, scorpionfish, nudibranch, moray, frogfish",Wall
+West Escarceo,Beginner/Intermediate,"stingray, scorpionfish, octopus, grouper, mackerel, tuna, trevally",Drift
+Fish Bowl,Advanced/Technical,"shark, sweetlips, tuna, jack",Deep Reef
+Canyons,Advanced,"drums, batfish, trevally, sweetlips, octopus, ray, shark, manta",Drift
+Hole in the Wall,Beginner/Intermediate,"drums, sweetlips, trevally, snapper, lionfish, scorpionfish, moray, pufferfish, octopus, frogfish",Tunnel
+Pink Wall,Beginner,"moray, scorpionfish, octopus, snake, nudibranch, crustacean",Wall
+Shark Cave,Advanced,"shark, stingray, octopus, coral",Cave
+Atoll,Advanced,"angelfish, sweetlips, scorpionfish, grouper, moray, frogfish, flatworm, nudibranch, lionfish",Rock
+Kilima Beach/Steps,Beginner/Intermediate/Advanced,"anthias, surgeonfish, angelfish, butterflyfish, parrotfish, moray, frogfish, snake, turtle, octopus, batfish, seahorse",Ridge
+Sinandigan Wall,Beginner/Intermediate/Advanced,"nudibranch, sweetlips, leaffish, frogfish, crocodilefish, clownfish, lionfish, cuttlefish, cucumber",Wall
+Turtle Rock,Advanced/Technical,"sweetlips, nudibranch, angelfish, turtle, shark, coral",Deep Reef
+Pink Wall (Intermediate),Intermediate,"nudibranch, coral, angelfish, butterflyfish, invertebrates",Wall
+The Atoll,Intermediate,"coral, angelfish, butterflyfish, parrotfish, damselfish, surgeonfish",Reef
+Coral Garden,Beginner,"coral, clownfish, angelfish, butterflyfish, parrotfish, damselfish",Reef

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+flask==2.3.3
+flask-cors==4.0.0
+pandas==2.1.0
+nltk==3.8.1
+requests==2.31.0
+python-dotenv==1.0.0
+gunicorn==21.2.0