Spaces:
Sleeping
Sleeping
Commit
Β·
a755cd8
1
Parent(s):
f751254
remove debug message
Browse files
.ipynb_checkpoints/README-checkpoint.md
CHANGED
|
@@ -24,7 +24,7 @@ Chinese METEOR Implementation
|
|
| 24 |
|
| 25 |
```python
|
| 26 |
import evaluate
|
| 27 |
-
meteor = evaluate.load("raptorkwok/
|
| 28 |
results = meteor.compute(
|
| 29 |
predictions=["ζε¨ι裑ει£―"],
|
| 30 |
references=["ζε¨ι裑ει£―"]
|
|
|
|
| 24 |
|
| 25 |
```python
|
| 26 |
import evaluate
|
| 27 |
+
meteor = evaluate.load("raptorkwok/chinesemeteor")
|
| 28 |
results = meteor.compute(
|
| 29 |
predictions=["ζε¨ι裑ει£―"],
|
| 30 |
references=["ζε¨ι裑ει£―"]
|
.ipynb_checkpoints/chinesemeteor-checkpoint.py
CHANGED
|
@@ -136,10 +136,10 @@ class ChineseMETEOR(evaluate.Metric):
|
|
| 136 |
pred_seg = [" ".join(jieba.cut(p.strip())) for p in predictions]
|
| 137 |
ref_seg = [" ".join(jieba.cut(r.strip())) for r in references]
|
| 138 |
|
| 139 |
-
# ---
|
| 140 |
def _cwn_synsets(self, word, pos=None): # Matches NLTK method call
|
| 141 |
if not isinstance(word, str) or not word.strip():
|
| 142 |
-
print(f"DEBUG: Skipping non-string input: {type(word)}")
|
| 143 |
return []
|
| 144 |
cwn = _load_cwn()
|
| 145 |
try:
|
|
@@ -147,12 +147,12 @@ class ChineseMETEOR(evaluate.Metric):
|
|
| 147 |
pattern = f"^{re.escape(word)}$"
|
| 148 |
lemmas = cwn.find_lemma(pattern)
|
| 149 |
except Exception as e:
|
| 150 |
-
print(f"DEBUG: Error querying CWN for '{word}': {e}")
|
| 151 |
return []
|
| 152 |
-
|
| 153 |
exact_lemmas = [l for l in lemmas if _get_lemma_name(l) == word]
|
| 154 |
if not exact_lemmas:
|
| 155 |
-
print(f"DEBUG: No exact lemma found for '{word}'")
|
| 156 |
return []
|
| 157 |
synsets_list = []
|
| 158 |
seen_synset_ids = set()
|
|
@@ -181,13 +181,13 @@ class ChineseMETEOR(evaluate.Metric):
|
|
| 181 |
lemma = s.lemma
|
| 182 |
synset_lemmas.append(lemma)
|
| 183 |
except AttributeError:
|
| 184 |
-
print(f"DEBUG: Could not extract lemma from sense {s}")
|
| 185 |
continue
|
| 186 |
syn_lemma_names = [_get_lemma_name(l) for l in synset_lemmas]
|
| 187 |
syn_lemmas_set = set(syn_lemma_names)
|
| 188 |
if syn_lemmas_set:
|
| 189 |
synsets_list.append(_CwnSynset(list(syn_lemmas_set), synset_id))
|
| 190 |
-
print(f"DEBUG: Found {len(synsets_list)} synsets for '{word}': {synsets_list[0]._lemmas if synsets_list else []}")
|
| 191 |
return synsets_list
|
| 192 |
|
| 193 |
# Use class for proper method binding
|
|
|
|
| 136 |
pred_seg = [" ".join(jieba.cut(p.strip())) for p in predictions]
|
| 137 |
ref_seg = [" ".join(jieba.cut(r.strip())) for r in references]
|
| 138 |
|
| 139 |
+
# --- Apply Real Chinese WordNet into METEOR algorithm ---
|
| 140 |
def _cwn_synsets(self, word, pos=None): # Matches NLTK method call
|
| 141 |
if not isinstance(word, str) or not word.strip():
|
| 142 |
+
#print(f"DEBUG: Skipping non-string input: {type(word)}")
|
| 143 |
return []
|
| 144 |
cwn = _load_cwn()
|
| 145 |
try:
|
|
|
|
| 147 |
pattern = f"^{re.escape(word)}$"
|
| 148 |
lemmas = cwn.find_lemma(pattern)
|
| 149 |
except Exception as e:
|
| 150 |
+
#print(f"DEBUG: Error querying CWN for '{word}': {e}")
|
| 151 |
return []
|
| 152 |
+
|
| 153 |
exact_lemmas = [l for l in lemmas if _get_lemma_name(l) == word]
|
| 154 |
if not exact_lemmas:
|
| 155 |
+
#print(f"DEBUG: No exact lemma found for '{word}'")
|
| 156 |
return []
|
| 157 |
synsets_list = []
|
| 158 |
seen_synset_ids = set()
|
|
|
|
| 181 |
lemma = s.lemma
|
| 182 |
synset_lemmas.append(lemma)
|
| 183 |
except AttributeError:
|
| 184 |
+
#print(f"DEBUG: Could not extract lemma from sense {s}")
|
| 185 |
continue
|
| 186 |
syn_lemma_names = [_get_lemma_name(l) for l in synset_lemmas]
|
| 187 |
syn_lemmas_set = set(syn_lemma_names)
|
| 188 |
if syn_lemmas_set:
|
| 189 |
synsets_list.append(_CwnSynset(list(syn_lemmas_set), synset_id))
|
| 190 |
+
#print(f"DEBUG: Found {len(synsets_list)} synsets for '{word}': {synsets_list[0]._lemmas if synsets_list else []}")
|
| 191 |
return synsets_list
|
| 192 |
|
| 193 |
# Use class for proper method binding
|
README.md
CHANGED
|
@@ -24,7 +24,7 @@ Chinese METEOR Implementation
|
|
| 24 |
|
| 25 |
```python
|
| 26 |
import evaluate
|
| 27 |
-
meteor = evaluate.load("raptorkwok/
|
| 28 |
results = meteor.compute(
|
| 29 |
predictions=["ζε¨ι裑ει£―"],
|
| 30 |
references=["ζε¨ι裑ει£―"]
|
|
|
|
| 24 |
|
| 25 |
```python
|
| 26 |
import evaluate
|
| 27 |
+
meteor = evaluate.load("raptorkwok/chinesemeteor")
|
| 28 |
results = meteor.compute(
|
| 29 |
predictions=["ζε¨ι裑ει£―"],
|
| 30 |
references=["ζε¨ι裑ει£―"]
|
chinesemeteor.py
CHANGED
|
@@ -136,10 +136,10 @@ class ChineseMETEOR(evaluate.Metric):
|
|
| 136 |
pred_seg = [" ".join(jieba.cut(p.strip())) for p in predictions]
|
| 137 |
ref_seg = [" ".join(jieba.cut(r.strip())) for r in references]
|
| 138 |
|
| 139 |
-
# ---
|
| 140 |
def _cwn_synsets(self, word, pos=None): # Matches NLTK method call
|
| 141 |
if not isinstance(word, str) or not word.strip():
|
| 142 |
-
print(f"DEBUG: Skipping non-string input: {type(word)}")
|
| 143 |
return []
|
| 144 |
cwn = _load_cwn()
|
| 145 |
try:
|
|
@@ -147,12 +147,12 @@ class ChineseMETEOR(evaluate.Metric):
|
|
| 147 |
pattern = f"^{re.escape(word)}$"
|
| 148 |
lemmas = cwn.find_lemma(pattern)
|
| 149 |
except Exception as e:
|
| 150 |
-
print(f"DEBUG: Error querying CWN for '{word}': {e}")
|
| 151 |
return []
|
| 152 |
-
|
| 153 |
exact_lemmas = [l for l in lemmas if _get_lemma_name(l) == word]
|
| 154 |
if not exact_lemmas:
|
| 155 |
-
print(f"DEBUG: No exact lemma found for '{word}'")
|
| 156 |
return []
|
| 157 |
synsets_list = []
|
| 158 |
seen_synset_ids = set()
|
|
@@ -181,13 +181,13 @@ class ChineseMETEOR(evaluate.Metric):
|
|
| 181 |
lemma = s.lemma
|
| 182 |
synset_lemmas.append(lemma)
|
| 183 |
except AttributeError:
|
| 184 |
-
print(f"DEBUG: Could not extract lemma from sense {s}")
|
| 185 |
continue
|
| 186 |
syn_lemma_names = [_get_lemma_name(l) for l in synset_lemmas]
|
| 187 |
syn_lemmas_set = set(syn_lemma_names)
|
| 188 |
if syn_lemmas_set:
|
| 189 |
synsets_list.append(_CwnSynset(list(syn_lemmas_set), synset_id))
|
| 190 |
-
print(f"DEBUG: Found {len(synsets_list)} synsets for '{word}': {synsets_list[0]._lemmas if synsets_list else []}")
|
| 191 |
return synsets_list
|
| 192 |
|
| 193 |
# Use class for proper method binding
|
|
|
|
| 136 |
pred_seg = [" ".join(jieba.cut(p.strip())) for p in predictions]
|
| 137 |
ref_seg = [" ".join(jieba.cut(r.strip())) for r in references]
|
| 138 |
|
| 139 |
+
# --- Apply Real Chinese WordNet into METEOR algorithm ---
|
| 140 |
def _cwn_synsets(self, word, pos=None): # Matches NLTK method call
|
| 141 |
if not isinstance(word, str) or not word.strip():
|
| 142 |
+
#print(f"DEBUG: Skipping non-string input: {type(word)}")
|
| 143 |
return []
|
| 144 |
cwn = _load_cwn()
|
| 145 |
try:
|
|
|
|
| 147 |
pattern = f"^{re.escape(word)}$"
|
| 148 |
lemmas = cwn.find_lemma(pattern)
|
| 149 |
except Exception as e:
|
| 150 |
+
#print(f"DEBUG: Error querying CWN for '{word}': {e}")
|
| 151 |
return []
|
| 152 |
+
|
| 153 |
exact_lemmas = [l for l in lemmas if _get_lemma_name(l) == word]
|
| 154 |
if not exact_lemmas:
|
| 155 |
+
#print(f"DEBUG: No exact lemma found for '{word}'")
|
| 156 |
return []
|
| 157 |
synsets_list = []
|
| 158 |
seen_synset_ids = set()
|
|
|
|
| 181 |
lemma = s.lemma
|
| 182 |
synset_lemmas.append(lemma)
|
| 183 |
except AttributeError:
|
| 184 |
+
#print(f"DEBUG: Could not extract lemma from sense {s}")
|
| 185 |
continue
|
| 186 |
syn_lemma_names = [_get_lemma_name(l) for l in synset_lemmas]
|
| 187 |
syn_lemmas_set = set(syn_lemma_names)
|
| 188 |
if syn_lemmas_set:
|
| 189 |
synsets_list.append(_CwnSynset(list(syn_lemmas_set), synset_id))
|
| 190 |
+
#print(f"DEBUG: Found {len(synsets_list)} synsets for '{word}': {synsets_list[0]._lemmas if synsets_list else []}")
|
| 191 |
return synsets_list
|
| 192 |
|
| 193 |
# Use class for proper method binding
|