| |
| |
|
|
| |
| |
| |
| |
| |
|
|
|
|
| import sys |
| import spacy |
| from pathlib import Path |
| import json |
|
|
|
|
| RoundTo = 2 |
| Encoding = 'utf8' |
| ScoreThreshold = 0.2 |
| MaxResults = 3 |
| ParagraphMinLetters = 10 |
| ListMinLetters = 10 |
|
|
| SubModels = {} |
|
|
| Nlp = spacy.load(sys.argv[1]) |
| SubModelDir = Path(__file__).parent.joinpath(sys.argv[2]).absolute() |
| input = sys.argv[3] |
|
|
|
|
| def filterDoc(doc, scoreThreshold, maxResults, roundTo=2): |
| cats = doc.cats.items() |
| filt = list(filter(lambda c: c[1] > scoreThreshold, cats)) |
| sort = sorted(filt, key=lambda c: c[1], reverse=True) |
| maxi = sort[0:maxResults] |
| rund = [(l[0], round(l[1], roundTo)) for l in maxi ] |
| return dict(rund) |
|
|
|
|
|
|
| def recognize(text): |
| global Nlp |
| |
| labels = filterDoc(Nlp(text), ScoreThreshold, MaxResults) |
|
|
| |
| relabels = dict() |
| for label in labels.keys(): |
| label2 = label.strip() |
| SubModelPath = SubModelDir.joinpath(label2).absolute() |
| if SubModelPath.exists(): |
| Nlp = spacy.load(SubModelPath) |
| docSub = filterDoc(Nlp(text), ScoreThreshold, MaxResults) |
| relabels[label2] = {'score': labels[label], 'subs': docSub} |
| else: |
| relabels[label2] = {'score': labels[label]} |
| relabels["messages"] = "Submodel path \"" + str(SubModelPath) + "\" not found" |
|
|
| return relabels |
|
|
| print(json.dumps(recognize(input))) |
|
|