Spaces:

pszemraj
/

FLAN-grammar-correction

Running on Zero

App Files Files Community

pszemraj commited on Nov 3, 2024

Commit

e741efb

verified ·

1 Parent(s): de56fbd

move to zerpGPU

Browse files

Files changed (1) hide show

app.py +32 -39

app.py CHANGED Viewed

@@ -3,6 +3,9 @@ import logging
 import os
 import re
 import torch
 from cleantext import clean
 import gradio as gr
@@ -13,45 +16,34 @@ from transformers import AutoModelForSequenceClassification, AutoTokenizer
 logging.basicConfig(level=logging.INFO)
 logging.info(f"torch version:\t{torch.__version__}")
 checker_model_name = "textattack/roberta-base-CoLA"
 corrector_model_name = "pszemraj/flan-t5-large-grammar-synthesis"
-# pipelines
-checker = pipeline(
-        "text-classification",
-        checker_model_name,
-    )
-# checker.model = torch.compile(checker.model)
-gc.collect()
-if os.environ.get("HF_DEMO_NO_USE_ONNX") is None:
-    # load onnx runtime unless HF_DEMO_NO_USE_ONNX is set
-    from optimum.pipelines import pipeline
-    corrector = pipeline(
-        "text2text-generation", model=corrector_model_name, accelerator="ort"
-    )
-else:
-    corrector = pipeline("text2text-generation", corrector_model_name)
 def split_text(text: str) -> list:
     # Split the text into sentences using regex
     sentences = re.split(r"(?<=[^A-Z].[.?]) +(?=[A-Z])", text)
-    # Initialize a list to store the sentence batches
     sentence_batches = []
-    # Initialize a temporary list to store the current batch of sentences
     temp_batch = []
-    # Iterate through the sentences
     for sentence in sentences:
-        # Add the sentence to the temporary batch
         temp_batch.append(sentence)
-        # If the length of the temporary batch is between 2 and 3 sentences, or if it is the last batch, add it to the list of sentence batches
         if len(temp_batch) >= 2 and len(temp_batch) <= 3 or sentence == sentences[-1]:
             sentence_batches.append(temp_batch)
             temp_batch = []
@@ -59,44 +51,44 @@ def split_text(text: str) -> list:
     return sentence_batches
-def correct_text(text: str, checker, corrector, separator: str = " ") -> str:
     # Split the text into sentence batches
     sentence_batches = split_text(text)
     # Initialize a list to store the corrected text
     corrected_text = []
-    # Iterate through the sentence batches
     for batch in tqdm(
         sentence_batches, total=len(sentence_batches), desc="correcting text.."
     ):
-        # Join the sentences in the batch into a single string
         raw_text = " ".join(batch)
-        # Check the grammar quality of the text using the text-classification pipeline
         results = checker(raw_text)
-        # Only correct the text if the results of the text-classification are not LABEL_1 or are LABEL_1 with a score below 0.9
         if results[0]["label"] != "LABEL_1" or (
             results[0]["label"] == "LABEL_1" and results[0]["score"] < 0.9
         ):
-            # Correct the text using the text-generation pipeline
             corrected_batch = corrector(raw_text)
             corrected_text.append(corrected_batch[0]["generated_text"])
         else:
             corrected_text.append(raw_text)
-    # Join the corrected text into a single string
-    corrected_text = separator.join(corrected_text)
-    return corrected_text
 def update(text: str):
     text = clean(text[:4000], lower=False)
-    return correct_text(text, checker, corrector)
 with gr.Blocks() as demo:
     gr.Markdown("# <center>Robust Grammar Correction with FLAN-T5</center>")
     gr.Markdown(
@@ -111,7 +103,7 @@ with gr.Blocks() as demo:
     with gr.Row():
         inp = gr.Textbox(
             label="input",
-            placeholder="PUT TEXT TO CHECK & CORRECT BROSKI",
             value="I wen to the store yesturday to bye some food. I needd milk, bread, and a few otter things. The store was really crowed and I had a hard time finding everyting I needed. I finaly made it to the check out line and payed for my stuff.",
         )
         out = gr.Textbox(label="output", interactive=False)
@@ -119,7 +111,8 @@ with gr.Blocks() as demo:
     btn.click(fn=update, inputs=inp, outputs=out)
     gr.Markdown("---")
     gr.Markdown(
-        "- see the [model card](https://huggingface.co/pszemraj/flan-t5-large-grammar-synthesis) for more info"
     )
-    gr.Markdown("- if experiencing long wait times, feel free to duplicate the space!")
-demo.launch(debug=True)

 import os
 import re
+import spaces
 import torch
 from cleantext import clean
 import gradio as gr
 logging.basicConfig(level=logging.INFO)
 logging.info(f"torch version:\t{torch.__version__}")
+# Model names
 checker_model_name = "textattack/roberta-base-CoLA"
 corrector_model_name = "pszemraj/flan-t5-large-grammar-synthesis"
+checker = pipeline(
+    "text-classification",
+    checker_model_name,
+    device_map="cuda",
+)
+corrector = pipeline(
+    "text2text-generation",
+    corrector_model_name,
+    device_map="cuda",
+)
 def split_text(text: str) -> list:
     # Split the text into sentences using regex
     sentences = re.split(r"(?<=[^A-Z].[.?]) +(?=[A-Z])", text)
+    # Initialize lists for batching
     sentence_batches = []
     temp_batch = []
+    # Create batches of 2-3 sentences
     for sentence in sentences:
         temp_batch.append(sentence)
         if len(temp_batch) >= 2 and len(temp_batch) <= 3 or sentence == sentences[-1]:
             sentence_batches.append(temp_batch)
             temp_batch = []
     return sentence_batches
+@spaces.GPU(duration=60)
+def correct_text(text: str, separator: str = " ") -> str:
     # Split the text into sentence batches
     sentence_batches = split_text(text)
     # Initialize a list to store the corrected text
     corrected_text = []
+    # Process each batch
     for batch in tqdm(
         sentence_batches, total=len(sentence_batches), desc="correcting text.."
     ):
         raw_text = " ".join(batch)
+        # Check grammar quality
         results = checker(raw_text)
+        # Correct text if needed
         if results[0]["label"] != "LABEL_1" or (
             results[0]["label"] == "LABEL_1" and results[0]["score"] < 0.9
         ):
             corrected_batch = corrector(raw_text)
             corrected_text.append(corrected_batch[0]["generated_text"])
         else:
             corrected_text.append(raw_text)
+    # Join the corrected text
+    return separator.join(corrected_text)
 def update(text: str):
+    # Clean and truncate input text
     text = clean(text[:4000], lower=False)
+    return correct_text(text)
+# Create the Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# <center>Robust Grammar Correction with FLAN-T5</center>")
     gr.Markdown(
     with gr.Row():
         inp = gr.Textbox(
             label="input",
+            placeholder="Enter text to check & correct",
             value="I wen to the store yesturday to bye some food. I needd milk, bread, and a few otter things. The store was really crowed and I had a hard time finding everyting I needed. I finaly made it to the check out line and payed for my stuff.",
         )
         out = gr.Textbox(label="output", interactive=False)
     btn.click(fn=update, inputs=inp, outputs=out)
     gr.Markdown("---")
     gr.Markdown(
+        "- See the [model card](https://huggingface.co/pszemraj/flan-t5-large-grammar-synthesis) for more info"
     )
+# Launch the demo
+demo.launch(debug=True)