Spaces:
Sleeping
Sleeping
| import re | |
| _no_period_re = re.compile(r"(No[.])(?=[ ]?[0-9])") | |
| _percent_re = re.compile(r"([ ]?[%])") | |
| _half_re = re.compile("([0-9]½)|(½)") | |
| # List of (regular expression, replacement) pairs for abbreviations: | |
| _abbreviations = [ | |
| (re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1]) | |
| for x in [ | |
| ("mrs", "misess"), | |
| ("ms", "miss"), | |
| ("mr", "mister"), | |
| ("dr", "doctor"), | |
| ("st", "saint"), | |
| ("co", "company"), | |
| ("jr", "junior"), | |
| ("maj", "major"), | |
| ("gen", "general"), | |
| ("drs", "doctors"), | |
| ("rev", "reverend"), | |
| ("lt", "lieutenant"), | |
| ("hon", "honorable"), | |
| ("sgt", "sergeant"), | |
| ("capt", "captain"), | |
| ("esq", "esquire"), | |
| ("ltd", "limited"), | |
| ("col", "colonel"), | |
| ("ft", "fort"), | |
| ] | |
| ] | |
| def _expand_no_period(m): | |
| word = m.group(0) | |
| if word[0] == "N": | |
| return "Number" | |
| return "number" | |
| def _expand_percent(m): | |
| return " percent" | |
| def _expand_half(m): | |
| word = m.group(1) | |
| if word is None: | |
| return "half" | |
| return word[0] + " and a half" | |
| def normalize_abbreviations(text): | |
| text = re.sub(_no_period_re, _expand_no_period, text) | |
| text = re.sub(_percent_re, _expand_percent, text) | |
| text = re.sub(_half_re, _expand_half, text) | |
| return text | |