Spaces:
Sleeping
Sleeping
| # paraphraser.py | |
| from model_loader import paraphraser_model | |
| def paraphrase_comment(comment): | |
| """ | |
| Paraphrase a toxic comment using the Granite 3.2-2B-Instruct model. | |
| Returns the paraphrased comment. | |
| """ | |
| if not comment: | |
| return None | |
| try: | |
| model = paraphraser_model.model | |
| tokenizer = paraphraser_model.tokenizer | |
| # Create a detailed prompt with guidelines and examples | |
| prompt = ( | |
| "You are a content moderator tasked with rewriting toxic comments into neutral and constructive ones while maintaining the original meaning. " | |
| "Follow these guidelines:\n" | |
| "- Remove explicit hate speech, personal attacks, or offensive language.\n" | |
| "- Keep the response neutral and professional.\n" | |
| "- Ensure the rewritten comment retains the original intent but in a constructive tone.\n" | |
| "- Match the length and brevity of the original toxic comment whenever possible. Keep the response short and to the point.\n\n" | |
| "Examples:\n" | |
| "Toxic: \"You're so dumb! You never understand anything!\"\n" | |
| "Neutral: \"You might be misunderstanding this.\"\n" | |
| "Toxic: \"This is the worst idea ever. Only an idiot would suggest this.\"\n" | |
| "Neutral: \"I don’t think this idea works well.\"\n" | |
| "Toxic: \"You’re useless.\"\n" | |
| "Neutral: \"This isn’t helping much.\"\n" | |
| "Toxic: \"Shut up.\"\n" | |
| "Neutral: \"Let’s take a break from this.\"\n\n" | |
| f"Now, rewrite this comment: \"{comment}\"" | |
| ) | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True, max_length=512) | |
| # Generate the paraphrased comment with optimized parameters | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=50, # Specify the number of new tokens to generate (excludes input length) | |
| num_beams=4, # Use beam search for faster and more consistent generation | |
| early_stopping=True, # Stop generation once a good sequence is found | |
| do_sample=False # Disable sampling to use beam search | |
| ) | |
| paraphrased_comment = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Remove the prompt part from the output | |
| paraphrased_comment = paraphrased_comment.replace(prompt, "").strip() | |
| # Remove unwanted prefixes like "Neutral: " | |
| if paraphrased_comment.startswith("Neutral: "): | |
| paraphrased_comment = paraphrased_comment[len("Neutral: "):].strip() | |
| return paraphrased_comment | |
| except Exception as e: | |
| return f"Error paraphrasing comment: {str(e)}" |