Spaces:

impresso-project
/

solr-normalization-demo

Running

App Files Files Community

maslionok commited on 3 days ago

Commit

8e796ef

1 Parent(s): ac40fb1

changes

Browse files

Files changed (3) hide show

README.md +14 -1
app.py +58 -11
logo.jpeg +0 -0

README.md CHANGED Viewed

@@ -5,7 +5,20 @@ colorFrom: blue
 colorTo: indigo
 sdk: docker
 pinned: false
-short_description: Solr Normalization Demo
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 colorTo: indigo
 sdk: docker
 pinned: false
+short_description: Demonstrate text normalization in the Impresso project using Solr functionality
 ---
+# Solr Normalization Demo
+This space demonstrates how text is normalized in the **Impresso** project, replicating Solr's text processing functionality.
+Solr normalization is meant to demonstrate how text is normalized in the Impresso project. The pipeline processes text through various analyzers including tokenization, stopword removal, and language-specific transformations to prepare text for search and analysis.
+## Features
+- Multi-language support (German, French, Spanish, Italian, Portuguese, Dutch, English)
+- Automatic language detection
+- Detailed analyzer pipeline visualization
+- Stopword detection and removal
+- Token normalization
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -10,6 +10,10 @@ pipeline = SolrNormalizationPipeline()
 LANGUAGES = ["de", "fr", "es", "it", "pt", "nl", "en", "general"]
 def normalize(text, lang_choice):
     try:
         lang = None if lang_choice == "Auto-detect" else lang_choice
@@ -30,16 +34,59 @@ def normalize(text, lang_choice):
         print("❌ Pipeline error:", e)
         return f"Error: {e}"
-demo = gr.Interface(
-    fn=normalize,
-    inputs=[
-        gr.Textbox(label="Enter Text"),
-        gr.Dropdown(choices=LANGUAGES, value="Auto-detect", label="Language")
-    ],
-    outputs=gr.Textbox(label="Normalized Output"),
-    title="Solr Normalization Pipeline",
-    description="Text normalization replicating Solr functionality.",
-    allow_flagging="never"
-)
 demo.launch(server_name="0.0.0.0", server_port=7860)

 LANGUAGES = ["de", "fr", "es", "it", "pt", "nl", "en", "general"]
+# Example text and default language
+EXAMPLE_TEXT = "The quick brown fox jumps over the lazy dog. This is a sample text for demonstration purposes."
+DEFAULT_LANGUAGE = "en"
 def normalize(text, lang_choice):
     try:
         lang = None if lang_choice == "Auto-detect" else lang_choice
         print("❌ Pipeline error:", e)
         return f"Error: {e}"
+# Create the interface with logo and improved description
+with gr.Blocks(title="Solr Normalization Demo") as demo:
+    # Add logo at the top
+    gr.Image("logo.jpeg", label=None, show_label=False, container=False, height=100)
+    gr.Markdown(
+        """
+        # 🔥 Solr Normalization Pipeline Demo
+        **Solr normalization** is meant to demonstrate how text is normalized in the **Impresso** project.
+        This pipeline replicates Solr's text processing functionality, showing how text goes through various
+        analyzers including tokenization, stopword removal, and language-specific transformations.
+        Try the example below or enter your own text to see how it gets processed!
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            text_input = gr.Textbox(
+                label="Enter Text",
+                value=EXAMPLE_TEXT,
+                lines=3,
+                placeholder="Enter your text here..."
+            )
+            lang_dropdown = gr.Dropdown(
+                choices=["Auto-detect"] + LANGUAGES,
+                value=DEFAULT_LANGUAGE,
+                label="Language"
+            )
+            submit_btn = gr.Button("🚀 Normalize Text", variant="primary")
+        with gr.Column():
+            output = gr.Textbox(
+                label="Normalized Output",
+                lines=15,
+                placeholder="Results will appear here..."
+            )
+    submit_btn.click(
+        fn=normalize,
+        inputs=[text_input, lang_dropdown],
+        outputs=output
+    )
+    gr.Markdown(
+        """
+        ### 📝 About the Pipeline
+        - **Tokenization**: Splits text into individual tokens
+        - **Stopword Removal**: Identifies and removes common words
+        - **Language Detection**: Automatically detects text language
+        - **Normalization**: Applies language-specific text transformations
+        """
+    )
 demo.launch(server_name="0.0.0.0", server_port=7860)

logo.jpeg ADDED Viewed