import os


import spacy
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
import nltk
from flask import Flask, request, jsonify
os.environ["NLTK_DATA"] = "/usr/local/nltk_data"
# Ensure NLTK corpora are available
try:
    nltk.data.find('corpora/wordnet')
except LookupError:
    raise RuntimeError("NLTK wordnet data not found. Make sure it's downloaded in Docker build.")


# Initialize Flask app and NLP tools
app = Flask(__name__)
nlp = spacy.load("en_core_web_sm")
lemmatizer = WordNetLemmatizer()

# --- NLP Helper Functions ---

def extract_main_noun(text):
    """Extracts the last noun from the text, assuming it's the primary object."""
    doc = nlp(text)
    nouns = [token.text for token in doc if token.pos_ == "NOUN"]
    return nouns[-1] if nouns else text

def check_singular_form(word1, word2):
    """Checks if two words share the same singular form (e.g., 'glasses' vs 'glass')."""
    lemma1 = lemmatizer.lemmatize(word1.lower(), pos='n')
    lemma2 = lemmatizer.lemmatize(word2.lower(), pos='n')
    return lemma1 == lemma2

def check_synonyms(word1, word2):
    """Checks if two words are synonyms (e.g., 'wallet' vs 'billfold')."""
    syns1 = set(l.name() for s in wn.synsets(word1, pos=wn.NOUN) for l in s.lemmas())
    syns2 = set(l.name() for s in wn.synsets(word2, pos=wn.NOUN) for l in s.lemmas())
    return len(syns1.intersection(syns2)) > 0

def check_hyponym(word1, word2):
    """Checks if one word is a type of another (e.g., 'bottle' vs 'container')."""
    syns1 = wn.synsets(word1, pos=wn.NOUN)
    syns2 = wn.synsets(word2, pos=wn.NOUN)
    if not syns1 or not syns2:
        return False
    for s1 in syns1:
        for hyper in s1.hypernyms():
            if hyper in syns2: return True
    for s2 in syns2:
        for hyper in s2.hypernyms():
            if hyper in syns1: return True
    return False

# --- Core Logic for Label Assignment ---

def find_canonical_label(object_name, existing_labels):
    """
    Assigns a canonical label to a new object by checking against existing labels.
    If no match is found, it returns the lemmatized (singular) noun of the new object.
    """
    noun = extract_main_noun(object_name)

    for label in existing_labels:
        if check_singular_form(noun, label) or check_synonyms(noun, label) or check_hyponym(noun, label):
            print(f"Match found: '{noun}' -> Existing label '{label}'")
            return label

    new_label = lemmatizer.lemmatize(noun.lower(), pos='n')
    print(f"No match found for '{noun}'. Creating new canonical label: '{new_label}'")
    return new_label

# --- API Endpoint ---

@app.route('/get-canonical-label', methods=['POST'])
def assign_label_endpoint():
    """
    API endpoint to receive an object name and existing labels, and return a canonical label.
    """
    data = request.get_json()
    if not data or 'object_name' not in data or 'existing_labels' not in data:
        return jsonify({"error": "Request must include 'object_name' and 'existing_labels'"}), 400

    canonical_label = find_canonical_label(data['object_name'], data['existing_labels'])
    return jsonify({"canonical_label": canonical_label})

if __name__ == '__main__':
    # Run the service on port 7860, the default for Hugging Face Spaces
    app.run(host='0.0.0.0', port=7860, debug=True)