d254
/

Kikuyu
Not-For-All-Audiences

import re # Import the regular expression module import unicodedata import warnings

warnings.filterwarnings('ignore')

class OfflineTranslator: """ A class to provide direct English-to-Kikuyu translations using a built-in dictionary. This is much more reliable for a small, defined set of phrases than a complex ML model. """ def init(self): self.translation_dictionary = {} # Load the translations into the dictionary when the class is created self._load_translations()

def _normalize_kikuyu_text(self, text):
    """Normalizes Kikuyu text to handle special characters consistently."""
    diacritic_map = {
        'i~': 'ĩ', 'i^': 'ĩ', 'i`': 'ĩ', 'u~': 'ũ', 'u^': 'ũ', 'u`': 'ũ',
        'I~': 'Ĩ', 'I^': 'Ĩ', 'I`': 'Ĩ', 'U~': 'Ũ', 'U^': 'Ũ', 'U`': 'Ũ'
    }
    if not text:
        return ""
    for old_char, new_char in diacritic_map.items():
        text = text.replace(old_char, new_char)
    # Normalize to ensure consistent character representation
    return unicodedata.normalize('NFC', text)

def _load_translations(self):
    """
    Loads the English-Kikuyu phrase pairs into the dictionary.
    This is the new "brain" of the translator.
    """
    print("Loading built-in translation dictionary...")
    
    english_phrases = [
        # Greetings & Basic Conversation
        "hello", "how are you", "i am fine thank you", "what is your name",
        "my name is", "good morning", "good afternoon", "good night", "goodbye",
        "i love you", "god bless you", "yes", "no", "please", "thank you",
        "you are welcome", "excuse me",
        
        # Basic Questions
        "where are you going", "what are you doing", "what is this", "do you speak english",
        "how much is this",
        
        # Common Nouns
        "water", "food", "house", "car", "money", "friend", "child", "man", "woman",
        
        # Biblical & General Phrases
        "in the beginning god created the heaven and the earth",
        "god said let there be light and there was light",
        "for god so loved the world",
        "the lord is my shepherd",
        "your word is a lamp for my feet",
        "trust in the lord with all your heart",
        "be still and know that i am god",
        "i can do all things through christ who strengthens me",
        "our father who is in heaven",
        "give us this day our daily bread",
        "forgive us our sins",
        "lead us not into temptation"
    ]
    
    kikuyu_phrases = [
        # Greetings & Basic Conversation
        "wĩ mwega", "ũhoro waku", "ndĩ mwega nĩ wega", "wĩtagwo atĩa",
        "njitagwo", "wĩra mwega", "wabucha", "ũkire wega", "tigwo na wega",
        "nĩngwendete", "Ngaĩ akũrathime", "ĩĩ", "aaca", "ndakũthaitha", "nĩ wega muno",
        "karibu", "nĩm सॉरी",

        # Basic Questions
        "ũrothiĩ kũ", "ũreka atĩa", "gĩkĩ nĩ kĩĩ", "nĩ ũraria gĩthũngũ",
        "gĩkĩ nĩ mbeca cigana",

        # Common Nouns
        "maĩ", "irio", "nyũmba", "ngari", "mbeca", "mũrata", "mwana", "mũndũrũme", "mũtumia",

        # Biblical & General Phrases
        "hapo mwandĩko-inĩ Ngaĩ ombire igũrũ na thĩ",
        "Ngaĩ akiuga nĩ kurore na kũgĩa ũtheri na ũtheri ũkĩgĩa",
        "nĩgũkorwo Ngai nĩendete thĩ o ũguo",
        "Jehova nĩwe mũrĩithi wakwa",
        "kiugo gĩaku nĩ tawa wa magũrũ makwa",
        "mwĩhoke Jehova na ngoro yaku yothe", 
        "hoorerai na mũmenye atĩ niĩ nĩ niĩ Ngai",
        "nĩngĩhota maũndũ mothe thĩinĩ wa Kristo ũrĩa ũheaga hinya",
        "Baba witũ wĩ igũrũ",
        "tũhe ũmũthĩ irio ciitũ cia o mũthenya",
        "na ũtũrekere mehia maitũ",
        "na ndũgatũtware kũgerio-inĩ"
    ]

    # Populate the dictionary, ensuring keys are lowercase for easy lookup
    for eng, kik in zip(english_phrases, kikuyu_phrases):
        normalized_kik = self._normalize_kikuyu_text(kik)
        self.translation_dictionary[eng.lower()] = normalized_kik

def translate(self, english_text):
    """
    Translates English text by looking it up in the dictionary.
    It now ignores punctuation in the input.
    """
    # --- THIS IS THE MODIFIED PART ---
    # Keep only letters and spaces, remove all other characters (like ?, !, .)
    sanitized_text = re.sub(r'[^a-zA-Z\s]', '', english_text)
    
    # Clean the sanitized text (lowercase, remove leading/trailing spaces)
    query = sanitized_text.lower().strip()
    
    # Return the translation from the dictionary.
    return self.translation_dictionary.get(query, "Translation not found in dictionary.")

--- Main Execution Block ---

if name == "main": print("Initializing English to Kikuyu Translator (Dictionary Mode)") print("=" * 60)

# Create an instance of our simple translator
translator = OfflineTranslator()

# --- Interactive Translation Mode ---
print("\n" + "=" * 60)
print("Interactive Translation Mode (Type 'quit' to exit)")
print("-" * 45)

while True:
    english_input = input("\nEnter English text to translate: ").strip()
    if english_input.lower() == 'quit':
        break
    
    if english_input:
        translation = translator.translate(english_input)
        print(f"Kikuyu translation: {translation}")
    else:
        print("Please enter some text.")

print("\n Thengĩu muno! (Thank you very much!)")
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Model tree for d254/harambee_gpt

Base model

echo840/MonkeyOCR
Finetuned
(1)
this model

Dataset used to train d254/harambee_gpt