Spaces:

rwillats
/

Contextual-Policy-Engine-Hate-Speech-Classification

Sleeping

App Files Files Community

rwillats commited on Apr 21

Commit

ed3a6ea

verified ·

1 Parent(s): 5f3fe93

Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

.DS_Store +0 -0
hate_speech_demo.py +224 -279
hatespeech/hate_speech_demo.py +224 -279
test/.DS_Store +0 -0
test/Hate Speech Policy.pdf +3 -0
test/README.md +6 -0
test/test.py +96 -0

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

hate_speech_demo.py CHANGED Viewed

@@ -10,11 +10,33 @@ from dotenv import load_dotenv
 from together import Together
 import openai
 import json
 # Load environment variables (for local development)
 load_dotenv()
-# Process retrieval text to highlight relevant parts - IMPROVED FORMATTING
 def process_retrieval_text(retrieval_text, user_input):
     """
     Process the retrieval text by identifying proper document boundaries
@@ -45,17 +67,22 @@ def process_retrieval_text(retrieval_text, user_input):
                 content_match = re.search(r'Content:\s*(.*)', doc_info, re.DOTALL)
                 content = content_match.group(1).strip() if content_match else "No content available"
-                # Format with clear section headers
                 formatted_html = f"""
                 <div class='doc-section'>
-                    <strong>Evidence Document {i+1}</strong>
-                    <br>
-                    <strong>Document Title:</strong> {doc_name}
-                    <br>
-                    <strong>Section:</strong> Page {page}
-                    <br>
-                    <strong>Content:</strong>
-                    <div class='doc-content'>{content}</div>
                 </div>
                 """
                 chunks.append(formatted_html)
@@ -86,8 +113,12 @@ def process_retrieval_text(retrieval_text, user_input):
             chunks.append(current_chunk)
         # Format each chunk with better section styling
-        chunks = [f"<div class='doc-section'><div class='doc-title'>Evidence Document {i+1}</div><div class='doc-content'>{chunk.strip()}</div></div>"
-                  for i, chunk in enumerate(chunks)]
     # Extract keywords from user input (longer than 3 chars)
     keywords = re.findall(r'\b\w{4,}\b', user_input.lower())
@@ -106,7 +137,85 @@ def process_retrieval_text(retrieval_text, user_input):
         highlighted_chunks.append(highlighted_chunk)
-    return "<div class='knowledge-sections'>" + "".join(highlighted_chunks) + "</div>"
 # API Keys - hardcoded for convenience
 # Replace these with your actual API keys
@@ -388,6 +497,55 @@ h1, h2, h3, h4, h5, h6, p, span, div, button, input, textarea, label {
     box-shadow: 0 5px 15px rgba(0,0,0,0.2);
     transform: translateY(-2px);
 }
 """
 # Contextual API class - UPDATED WITH NEW MODEL ID
@@ -437,6 +595,7 @@ class ContextualAPIUtils:
         return response_content, response_time, retrieval_text, error_message
 # Contextual AI rating - UPDATED PROMPT FORMAT
 def get_contextual_rating(contextual_api, user_input):
     rating_prompt = f"Content to rate: {user_input}"
     response_text, response_time, retrieval_text, error = contextual_api.chat(rating_prompt)
@@ -450,7 +609,38 @@ def get_contextual_rating(contextual_api, user_input):
     elif "caution" in response_text.lower() or "warning" in response_text.lower():
         safety_level = "warning"
-    return response_text, retrieval_text, safety_level
 # LlamaGuard rating
 LLAMA_HARM_CATEGORIES = {
@@ -516,7 +706,6 @@ def get_openai_moderation(openai_client, user_input):
         return f"Safety Status: Error\nError: {str(e)}", "unsafe"
 # Perspective API
 def get_perspective_rating(api_key, user_input):
     url = 'https://commentanalyzer.googleapis.com/v1alpha1/comments:analyze'
     params = {'key': api_key}
@@ -552,7 +741,6 @@ def get_perspective_rating(api_key, user_input):
     except Exception as e:
         return f"Safety Status: Error\nError: {str(e)}", "unsafe"
 # Updated to only require one input
 def rate_user_input(user_input):
     # Initialize APIs with hardcoded keys
@@ -566,7 +754,6 @@ def rate_user_input(user_input):
     openai_rating, openai_safety = get_openai_moderation(openai_client, user_input)
     perspective_rating, perspective_safety = get_perspective_rating(PERSPECTIVE_API_KEY, user_input)
     # Format responses carefully to avoid random line breaks
     llama_rating = re.sub(r'\.(?=\s+[A-Z])', '.\n', llama_rating)
     # Don't add line breaks to contextual rating
@@ -651,257 +838,15 @@ def create_gradio_app():
         border_color_primary="#E0E0E0"
     )
-    # Add CSS for the policy popup and custom button color
-    custom_css = CUSTOM_CSS + """
-    /* Policy preview popup styles */
-    .policy-popup {
-        display: none;
-        position: fixed;
-        top: 0;
-        left: 0;
-        width: 100%;
-        height: 100%;
-        background-color: rgba(0,0,0,0.7);
-        z-index: 1000;
-        justify-content: center;
-        align-items: center;
-    }
-    .policy-popup-content {
-        background-color: white;
-        width: 80%;
-        height: 80%;
-        border-radius: 8px;
-        padding: 20px;
-        position: relative;
-        box-shadow: 0 5px 20px rgba(0,0,0,0.3);
-        display: flex;
-        flex-direction: column;
-    }
-    .policy-popup-header {
-        display: flex;
-        justify-content: space-between;
-        align-items: center;
-        margin-bottom: 15px;
-        border-bottom: 1px solid #eee;
-        padding-bottom: 10px;
-    }
-    .policy-popup-title {
-        font-weight: bold;
-        font-size: 18px;
-    }
-    .policy-popup-close {
-        background-color: #222222;
-        color: white;
-        border: none;
-        border-radius: 4px;
-        padding: 5px 10px;
-        cursor: pointer;
-    }
-    .policy-popup-close:hover {
-        background-color: #000000;
-    }
-    .policy-iframe-container {
-        flex: 1;
-        overflow: hidden;
-    }
-    .policy-iframe {
-        width: 100%;
-        height: 100%;
-        border: 1px solid #eee;
-    }
-    /* Fallback for when PDF can't be displayed in iframe */
-    .policy-fallback {
-        padding: 20px;
-        text-align: center;
-    }
-    .policy-fallback a {
-        display: inline-block;
-        margin-top: 15px;
-        padding: 10px 15px;
-        background-color: #FCA539;
-        color: #000000;
-        text-decoration: none;
-        border-radius: 4px;
-        font-weight: bold;
-    }
-    """
     with gr.Blocks(title="Hate Speech Rating Oracle", theme=theme, css=custom_css) as app:
         # Add loading spinner
         loading_spinner = gr.HTML('<div id="loading-spinner"></div>')
-        # Create a file component to serve the PDF (hidden from UI)
-        pdf_file = gr.File("Policy.pdf", visible=False, label="Policy PDF")
-        # Add policy popup HTML with reliable PDF loading that works on first click
-        policy_popup_html = """
-        <div id="policy-popup" class="policy-popup">
-            <div class="policy-popup-content">
-                <div class="policy-popup-header">
-                    <div class="policy-popup-title">Hate Speech Policy</div>
-                    <button class="policy-popup-close" onclick="document.getElementById('policy-popup').style.display='none';">Close</button>
-                </div>
-                <div class="policy-iframe-container">
-                    <!-- PDF viewer iframe -->
-                    <iframe class="policy-iframe" id="policy-iframe"></iframe>
-                    <!-- Loading indicator while PDF loads -->
-                    <div id="pdf-loading" style="display: flex; justify-content: center; align-items: center; position: absolute; top: 0; left: 0; right: 0; bottom: 0; background: rgba(255,255,255,0.8);">
-                        <div style="text-align: center;">
-                            <div style="width: 50px; height: 50px; border: 5px solid #f3f3f3; border-top: 5px solid #FCA539; border-radius: 50%; margin: 0 auto 15px auto; animation: spin 1s linear infinite;"></div>
-                            <p>Loading policy document...</p>
-                        </div>
-                    </div>
-                    <style>
-                        @keyframes spin {
-                            0% { transform: rotate(0deg); }
-                            100% { transform: rotate(360deg); }
-                        }
-                    </style>
-                    <!-- Fallback content if iframe fails -->
-                    <div class="policy-fallback" id="policy-fallback" style="display:none;">
-                        <p>The policy document couldn't be displayed in the preview.</p>
-                        <a href="#" id="policy-download-link" target="_blank">Download Policy PDF</a>
-                        <button onclick="retryPdfLoad()" style="display: block; margin-top: 15px; padding: 8px 15px; background: #FCA539; border: none; border-radius: 4px; cursor: pointer;">Try Again</button>
-                    </div>
-                </div>
-            </div>
-        </div>
-        <script>
-        // Store the PDF URL globally once found
-        let pdfPolicyUrl = null;
-        let pdfLoadAttempted = false;
-        // Find the PDF URL as soon as possible
-        document.addEventListener('DOMContentLoaded', function() {
-            findPdfUrl();
-        });
-        // Function to find the PDF URL in various locations
-        function findPdfUrl() {
-            // Try to find the file component
-            const fileComponents = document.querySelectorAll("div.file-component");
-            for (const comp of fileComponents) {
-                const downloadBtn = comp.querySelector("a[download]");
-                if (downloadBtn && downloadBtn.href && downloadBtn.href.includes("file=")) {
-                    pdfPolicyUrl = downloadBtn.href;
-                    console.log("Found PDF URL:", pdfPolicyUrl);
-                    return pdfPolicyUrl;
-                }
-            }
-            // Try to find any link with the PDF name
-            const allLinks = document.querySelectorAll("a");
-            for (const link of allLinks) {
-                if (link.href && (link.href.includes("Policy.pdf") ||
-                                  link.href.includes("file=") && link.textContent.includes("Download"))) {
-                    pdfPolicyUrl = link.href;
-                    console.log("Found PDF URL from links:", pdfPolicyUrl);
-                    return pdfPolicyUrl;
-                }
-            }
-            // If we didn't find it, try again in a second
-            setTimeout(findPdfUrl, 1000);
-            return null;
-        }
-        // Function to actually load the PDF
-        function loadPolicyPdf(url) {
-            const iframe = document.getElementById("policy-iframe");
-            const loadingIndicator = document.getElementById("pdf-loading");
-            const fallback = document.getElementById("policy-fallback");
-            const downloadLink = document.getElementById("policy-download-link");
-            // Show loading, hide fallback
-            loadingIndicator.style.display = "flex";
-            fallback.style.display = "none";
-            iframe.style.display = "block";
-            // Set the download link
-            downloadLink.href = url;
-            // Set iframe source to PDF
-            iframe.src = url;
-            // Set a timeout to hide loading regardless of outcome
-            setTimeout(function() {
-                loadingIndicator.style.display = "none";
-            }, 3000);
-            // When iframe loads, hide loading indicator
-            iframe.onload = function() {
-                loadingIndicator.style.display = "none";
-                pdfLoadAttempted = true;
-            };
-            // Handle errors
-            iframe.onerror = function() {
-                loadingIndicator.style.display = "none";
-                fallback.style.display = "block";
-                iframe.style.display = "none";
-                pdfLoadAttempted = true;
-            };
-        }
-        // Function to retry loading the PDF
-        function retryPdfLoad() {
-            if (pdfPolicyUrl) {
-                loadPolicyPdf(pdfPolicyUrl);
-            } else {
-                // Try to find the URL first
-                const url = findPdfUrl();
-                if (url) {
-                    loadPolicyPdf(url);
-                }
-            }
-        }
-        // Function to handle opening the policy popup
-        function openPolicyPopup() {
-            // Display the popup immediately
-            document.getElementById('policy-popup').style.display = 'flex';
-            // If we've never tried to load the PDF, or it failed previously, try loading it
-            if (!pdfLoadAttempted) {
-                if (pdfPolicyUrl) {
-                    // We already have the URL, load the PDF
-                    loadPolicyPdf(pdfPolicyUrl);
-                } else {
-                    // Try to find the URL first
-                    const url = findPdfUrl();
-                    if (url) {
-                        loadPolicyPdf(url);
-                    } else {
-                        // Show fallback if we can't find the URL
-                        document.getElementById("pdf-loading").style.display = "none";
-                        document.getElementById("policy-iframe").style.display = "none";
-                        document.getElementById("policy-fallback").style.display = "block";
-                    }
-                }
-            }
-        }
-        // Make functions globally available
-        window.openPolicyPopup = openPolicyPopup;
-        window.retryPdfLoad = retryPdfLoad;
-        </script>
-        """
-        gr.HTML(policy_popup_html)
         gr.Markdown("# Safety Oracle for Rating Hate Speech [BETA]")
         gr.HTML("""
@@ -950,10 +895,10 @@ def create_gradio_app():
                 rate_btn = gr.Button("Rate Content", elem_classes=["gray-button"], scale=1)
             user_input = gr.Textbox(
-    placeholder="Type content to evaluate here...",
-    lines=6,
-    label=""
-)
             # 🌟 Contextual Safety Oracle
             gr.HTML("""
@@ -1025,18 +970,18 @@ def create_gradio_app():
         # Bind rating button with loading indicator
         rate_btn.click(
-    show_loading,
-    inputs=None,
-    outputs=loading_spinner
-).then(
-    rate_user_input,
-    inputs=[user_input],
-    outputs=[contextual_results, llama_results, openai_results, perspective_results]
-).then(
-    hide_loading,
-    inputs=None,
-    outputs=loading_spinner
-)
     return app

 from together import Together
 import openai
 import json
+from pathlib import Path
 # Load environment variables (for local development)
 load_dotenv()
+# Google Drive Embed URL for PDF
+PDF_EMBED_URL = "https://drive.google.com/file/d/1WZCQpIWfelbxgBr8dNiW2rqVGaDyf-Gi/preview"
+# Custom PDF Viewer Modal (injected HTML)
+PDF_MODAL_HTML = f"""
+<div id='policy-popup' class='pdf-modal' style='display:none;'>
+  <div class='pdf-modal-content'>
+    <button class='close-btn' onclick="document.getElementById('policy-popup').style.display='none'; document.body.style.overflow='auto';">×</button>
+    <div class='pdf-container'>
+      <iframe class='pdf-embed' src='{PDF_EMBED_URL}' allow="autoplay"></iframe>
+    </div>
+  </div>
+</div>
+<script>
+function openPolicyPopup() {{
+  document.getElementById('policy-popup').style.display = 'flex';
+  document.body.style.overflow = 'hidden';
+}}
+</script>
+"""
+# Replace your process_retrieval_text function with this updated version
 def process_retrieval_text(retrieval_text, user_input):
     """
     Process the retrieval text by identifying proper document boundaries
                 content_match = re.search(r'Content:\s*(.*)', doc_info, re.DOTALL)
                 content = content_match.group(1).strip() if content_match else "No content available"
+                # Format with clear section headers and better spacing
                 formatted_html = f"""
                 <div class='doc-section'>
+                    <h3 class="doc-number">Evidence Section {i+1}</h3>
+                    <div class="doc-section-info">
+                        <p><strong>Document Title:</strong> {doc_name}</p>
+                        <div class="subsection-info">
+                            <p><strong>Page Number:</strong> Page {page}</p>
+                        </div>
+                    </div>
+                    <div class="doc-content-container">
+                        <h4>Content:</h4>
+                        <div class='doc-content'>{content}</div>
+                    </div>
                 </div>
                 """
                 chunks.append(formatted_html)
             chunks.append(current_chunk)
         # Format each chunk with better section styling
+        chunks = [f"""
+        <div class='doc-section'>
+            <h3 class="doc-title">Evidence Section {i+1}</h3>
+            <div class='doc-content'>{chunk.strip()}</div>
+        </div>
+        """ for i, chunk in enumerate(chunks)]
     # Extract keywords from user input (longer than 3 chars)
     keywords = re.findall(r'\b\w{4,}\b', user_input.lower())
         highlighted_chunks.append(highlighted_chunk)
+    # Add some additional CSS for the knowledge sections
+    additional_css = """
+    <style>
+    .knowledge-sections {
+        border-radius: 8px;
+        background: #f9f9f9;
+        padding: 15px;
+        font-family: 'All Round Gothic Demi', 'Poppins', sans-serif !important;
+    }
+    .doc-section {
+        margin-bottom: 25px;
+        padding: 15px;
+        background: white;
+        border-radius: 8px;
+        box-shadow: 0 2px 5px rgba(0,0,0,0.05);
+    }
+    .doc-number, .doc-title {
+        margin-top: 0;
+        padding-bottom: 10px;
+        border-bottom: 1px solid #eee;
+        color: #222;
+        font-size: 18px;
+    }
+    .doc-section-info {
+        margin: 10px 0;
+        padding: 8px;
+        background: #f5f5f5;
+        border-radius: 4px;
+    }
+    .doc-section-info p {
+        margin: 5px 0;
+        font-size: 16px;
+    }
+    .subsection-info {
+        margin-left: 15px;
+        padding-left: 10px;
+        border-left: 2px solid #ddd;
+        margin-top: 5px;
+    }
+    .subsection-info p {
+        font-size: 14px;
+        color: #555;
+    }
+    .doc-content-container {
+        margin-top: 15px;
+    }
+    .doc-content-container h4 {
+        margin-bottom: 8px;
+        font-size: 16px;
+    }
+    .doc-content {
+        padding: 12px;
+        background: #f9f9f9;
+        border-left: 3px solid #FCA539;
+        line-height: 1.6;
+        border-radius: 4px;
+        white-space: pre-line;
+    }
+    .highlight-match {
+        background-color: #FCA539;
+        color: black;
+        font-weight: bold;
+        padding: 0 2px;
+        border-radius: 2px;
+    }
+    </style>
+    """
+    return additional_css + "<div class='knowledge-sections'>" + "".join(highlighted_chunks) + "</div>"
 # API Keys - hardcoded for convenience
 # Replace these with your actual API keys
     box-shadow: 0 5px 15px rgba(0,0,0,0.2);
     transform: translateY(-2px);
 }
+/* PDF Modal Styling */
+.pdf-modal {
+    display: none;
+    position: fixed;
+    top: 0;
+    left: 0;
+    width: 100%;
+    height: 100%;
+    background-color: rgba(0,0,0,0.7);
+    z-index: 1000;
+    justify-content: center;
+    align-items: center;
+}
+.pdf-modal-content {
+    background-color: white;
+    width: 80%;
+    height: 80%;
+    border-radius: 8px;
+    padding: 20px;
+    position: relative;
+    box-shadow: 0 5px 20px rgba(0,0,0,0.3);
+}
+.close-btn {
+    position: absolute;
+    right: 15px;
+    top: 15px;
+    font-size: 24px;
+    cursor: pointer;
+    background: #222;
+    color: white;
+    border: none;
+    border-radius: 4px;
+    padding: 5px 15px;
+}
+.pdf-container {
+    width: 100%;
+    height: calc(100% - 40px);
+    margin-top: 40px;
+}
+.pdf-embed {
+    width: 100%;
+    height: 100%;
+    border: 1px solid #eee;
+}
 """
 # Contextual API class - UPDATED WITH NEW MODEL ID
         return response_content, response_time, retrieval_text, error_message
 # Contextual AI rating - UPDATED PROMPT FORMAT
+# Replace your get_contextual_rating function with this enhanced version
 def get_contextual_rating(contextual_api, user_input):
     rating_prompt = f"Content to rate: {user_input}"
     response_text, response_time, retrieval_text, error = contextual_api.chat(rating_prompt)
     elif "caution" in response_text.lower() or "warning" in response_text.lower():
         safety_level = "warning"
+    # Parse specific parts of the response
+    try:
+        # Look for policy rating
+        rating_match = re.search(r'Policy Rating: (.*?)(?:Policy|$)', response_text, re.IGNORECASE)
+        policy_rating = rating_match.group(1).strip() if rating_match else ""
+        # Look for policy category
+        category_match = re.search(r'Policy Category: (.*?)(?:Explanation|$)', response_text, re.IGNORECASE)
+        policy_category = category_match.group(1).strip() if category_match else ""
+        # Look for explanation
+        explanation_match = re.search(r'Explanation: (.*)', response_text, re.IGNORECASE)
+        explanation = explanation_match.group(1).strip() if explanation_match else response_text
+        # Format with HTML
+        formatted_text = ""
+        if policy_rating:
+            formatted_text += f"<strong>Policy Rating:</strong> {policy_rating}<br><br>"
+        if policy_category:
+            formatted_text += f"<strong>Policy Category:</strong> {policy_category}<br><br>"
+        if explanation:
+            formatted_text += f"<strong>Explanation:</strong> {explanation}"
+        # If we couldn't extract anything, use the original
+        if not formatted_text:
+            formatted_text = response_text
+    except Exception as e:
+        print(f"Error formatting rating: {e}")
+        formatted_text = response_text
+    return formatted_text, retrieval_text, safety_level
 # LlamaGuard rating
 LLAMA_HARM_CATEGORIES = {
         return f"Safety Status: Error\nError: {str(e)}", "unsafe"
 # Perspective API
 def get_perspective_rating(api_key, user_input):
     url = 'https://commentanalyzer.googleapis.com/v1alpha1/comments:analyze'
     params = {'key': api_key}
     except Exception as e:
         return f"Safety Status: Error\nError: {str(e)}", "unsafe"
 # Updated to only require one input
 def rate_user_input(user_input):
     # Initialize APIs with hardcoded keys
     openai_rating, openai_safety = get_openai_moderation(openai_client, user_input)
     perspective_rating, perspective_safety = get_perspective_rating(PERSPECTIVE_API_KEY, user_input)
     # Format responses carefully to avoid random line breaks
     llama_rating = re.sub(r'\.(?=\s+[A-Z])', '.\n', llama_rating)
     # Don't add line breaks to contextual rating
         border_color_primary="#E0E0E0"
     )
+    # Use the custom CSS with PDF modal styling
+    custom_css = CUSTOM_CSS
     with gr.Blocks(title="Hate Speech Rating Oracle", theme=theme, css=custom_css) as app:
         # Add loading spinner
         loading_spinner = gr.HTML('<div id="loading-spinner"></div>')
+        # Add the PDF modal HTML directly (defined at the top of the file)
+        gr.HTML(PDF_MODAL_HTML)
         gr.Markdown("# Safety Oracle for Rating Hate Speech [BETA]")
         gr.HTML("""
                 rate_btn = gr.Button("Rate Content", elem_classes=["gray-button"], scale=1)
             user_input = gr.Textbox(
+                placeholder="Type content to evaluate here...",
+                lines=6,
+                label=""
+            )
             # 🌟 Contextual Safety Oracle
             gr.HTML("""
         # Bind rating button with loading indicator
         rate_btn.click(
+            show_loading,
+            inputs=None,
+            outputs=loading_spinner
+        ).then(
+            rate_user_input,
+            inputs=[user_input],
+            outputs=[contextual_results, llama_results, openai_results, perspective_results]
+        ).then(
+            hide_loading,
+            inputs=None,
+            outputs=loading_spinner
+        )
     return app

hatespeech/hate_speech_demo.py CHANGED Viewed

@@ -10,11 +10,33 @@ from dotenv import load_dotenv
 from together import Together
 import openai
 import json
 # Load environment variables (for local development)
 load_dotenv()
-# Process retrieval text to highlight relevant parts - IMPROVED FORMATTING
 def process_retrieval_text(retrieval_text, user_input):
     """
     Process the retrieval text by identifying proper document boundaries
@@ -45,17 +67,22 @@ def process_retrieval_text(retrieval_text, user_input):
                 content_match = re.search(r'Content:\s*(.*)', doc_info, re.DOTALL)
                 content = content_match.group(1).strip() if content_match else "No content available"
-                # Format with clear section headers
                 formatted_html = f"""
                 <div class='doc-section'>
-                    <strong>Evidence Document {i+1}</strong>
-                    <br>
-                    <strong>Document Title:</strong> {doc_name}
-                    <br>
-                    <strong>Section:</strong> Page {page}
-                    <br>
-                    <strong>Content:</strong>
-                    <div class='doc-content'>{content}</div>
                 </div>
                 """
                 chunks.append(formatted_html)
@@ -86,8 +113,12 @@ def process_retrieval_text(retrieval_text, user_input):
             chunks.append(current_chunk)
         # Format each chunk with better section styling
-        chunks = [f"<div class='doc-section'><div class='doc-title'>Evidence Document {i+1}</div><div class='doc-content'>{chunk.strip()}</div></div>"
-                  for i, chunk in enumerate(chunks)]
     # Extract keywords from user input (longer than 3 chars)
     keywords = re.findall(r'\b\w{4,}\b', user_input.lower())
@@ -106,7 +137,85 @@ def process_retrieval_text(retrieval_text, user_input):
         highlighted_chunks.append(highlighted_chunk)
-    return "<div class='knowledge-sections'>" + "".join(highlighted_chunks) + "</div>"
 # API Keys - hardcoded for convenience
 # Replace these with your actual API keys
@@ -388,6 +497,55 @@ h1, h2, h3, h4, h5, h6, p, span, div, button, input, textarea, label {
     box-shadow: 0 5px 15px rgba(0,0,0,0.2);
     transform: translateY(-2px);
 }
 """
 # Contextual API class - UPDATED WITH NEW MODEL ID
@@ -437,6 +595,7 @@ class ContextualAPIUtils:
         return response_content, response_time, retrieval_text, error_message
 # Contextual AI rating - UPDATED PROMPT FORMAT
 def get_contextual_rating(contextual_api, user_input):
     rating_prompt = f"Content to rate: {user_input}"
     response_text, response_time, retrieval_text, error = contextual_api.chat(rating_prompt)
@@ -450,7 +609,38 @@ def get_contextual_rating(contextual_api, user_input):
     elif "caution" in response_text.lower() or "warning" in response_text.lower():
         safety_level = "warning"
-    return response_text, retrieval_text, safety_level
 # LlamaGuard rating
 LLAMA_HARM_CATEGORIES = {
@@ -516,7 +706,6 @@ def get_openai_moderation(openai_client, user_input):
         return f"Safety Status: Error\nError: {str(e)}", "unsafe"
 # Perspective API
 def get_perspective_rating(api_key, user_input):
     url = 'https://commentanalyzer.googleapis.com/v1alpha1/comments:analyze'
     params = {'key': api_key}
@@ -552,7 +741,6 @@ def get_perspective_rating(api_key, user_input):
     except Exception as e:
         return f"Safety Status: Error\nError: {str(e)}", "unsafe"
 # Updated to only require one input
 def rate_user_input(user_input):
     # Initialize APIs with hardcoded keys
@@ -566,7 +754,6 @@ def rate_user_input(user_input):
     openai_rating, openai_safety = get_openai_moderation(openai_client, user_input)
     perspective_rating, perspective_safety = get_perspective_rating(PERSPECTIVE_API_KEY, user_input)
     # Format responses carefully to avoid random line breaks
     llama_rating = re.sub(r'\.(?=\s+[A-Z])', '.\n', llama_rating)
     # Don't add line breaks to contextual rating
@@ -651,257 +838,15 @@ def create_gradio_app():
         border_color_primary="#E0E0E0"
     )
-    # Add CSS for the policy popup and custom button color
-    custom_css = CUSTOM_CSS + """
-    /* Policy preview popup styles */
-    .policy-popup {
-        display: none;
-        position: fixed;
-        top: 0;
-        left: 0;
-        width: 100%;
-        height: 100%;
-        background-color: rgba(0,0,0,0.7);
-        z-index: 1000;
-        justify-content: center;
-        align-items: center;
-    }
-    .policy-popup-content {
-        background-color: white;
-        width: 80%;
-        height: 80%;
-        border-radius: 8px;
-        padding: 20px;
-        position: relative;
-        box-shadow: 0 5px 20px rgba(0,0,0,0.3);
-        display: flex;
-        flex-direction: column;
-    }
-    .policy-popup-header {
-        display: flex;
-        justify-content: space-between;
-        align-items: center;
-        margin-bottom: 15px;
-        border-bottom: 1px solid #eee;
-        padding-bottom: 10px;
-    }
-    .policy-popup-title {
-        font-weight: bold;
-        font-size: 18px;
-    }
-    .policy-popup-close {
-        background-color: #222222;
-        color: white;
-        border: none;
-        border-radius: 4px;
-        padding: 5px 10px;
-        cursor: pointer;
-    }
-    .policy-popup-close:hover {
-        background-color: #000000;
-    }
-    .policy-iframe-container {
-        flex: 1;
-        overflow: hidden;
-    }
-    .policy-iframe {
-        width: 100%;
-        height: 100%;
-        border: 1px solid #eee;
-    }
-    /* Fallback for when PDF can't be displayed in iframe */
-    .policy-fallback {
-        padding: 20px;
-        text-align: center;
-    }
-    .policy-fallback a {
-        display: inline-block;
-        margin-top: 15px;
-        padding: 10px 15px;
-        background-color: #FCA539;
-        color: #000000;
-        text-decoration: none;
-        border-radius: 4px;
-        font-weight: bold;
-    }
-    """
     with gr.Blocks(title="Hate Speech Rating Oracle", theme=theme, css=custom_css) as app:
         # Add loading spinner
         loading_spinner = gr.HTML('<div id="loading-spinner"></div>')
-        # Create a file component to serve the PDF (hidden from UI)
-        pdf_file = gr.File("Hate Speech Policy.pdf", visible=False, label="Policy PDF")
-        # Add policy popup HTML with reliable PDF loading that works on first click
-        policy_popup_html = """
-        <div id="policy-popup" class="policy-popup">
-            <div class="policy-popup-content">
-                <div class="policy-popup-header">
-                    <div class="policy-popup-title">Hate Speech Policy</div>
-                    <button class="policy-popup-close" onclick="document.getElementById('policy-popup').style.display='none';">Close</button>
-                </div>
-                <div class="policy-iframe-container">
-                    <!-- PDF viewer iframe -->
-                    <iframe class="policy-iframe" id="policy-iframe"></iframe>
-                    <!-- Loading indicator while PDF loads -->
-                    <div id="pdf-loading" style="display: flex; justify-content: center; align-items: center; position: absolute; top: 0; left: 0; right: 0; bottom: 0; background: rgba(255,255,255,0.8);">
-                        <div style="text-align: center;">
-                            <div style="width: 50px; height: 50px; border: 5px solid #f3f3f3; border-top: 5px solid #FCA539; border-radius: 50%; margin: 0 auto 15px auto; animation: spin 1s linear infinite;"></div>
-                            <p>Loading policy document...</p>
-                        </div>
-                    </div>
-                    <style>
-                        @keyframes spin {
-                            0% { transform: rotate(0deg); }
-                            100% { transform: rotate(360deg); }
-                        }
-                    </style>
-                    <!-- Fallback content if iframe fails -->
-                    <div class="policy-fallback" id="policy-fallback" style="display:none;">
-                        <p>The policy document couldn't be displayed in the preview.</p>
-                        <a href="#" id="policy-download-link" target="_blank">Download Policy PDF</a>
-                        <button onclick="retryPdfLoad()" style="display: block; margin-top: 15px; padding: 8px 15px; background: #FCA539; border: none; border-radius: 4px; cursor: pointer;">Try Again</button>
-                    </div>
-                </div>
-            </div>
-        </div>
-        <script>
-        // Store the PDF URL globally once found
-        let pdfPolicyUrl = null;
-        let pdfLoadAttempted = false;
-        // Find the PDF URL as soon as possible
-        document.addEventListener('DOMContentLoaded', function() {
-            findPdfUrl();
-        });
-        // Function to find the PDF URL in various locations
-        function findPdfUrl() {
-            // Try to find the file component
-            const fileComponents = document.querySelectorAll("div.file-component");
-            for (const comp of fileComponents) {
-                const downloadBtn = comp.querySelector("a[download]");
-                if (downloadBtn && downloadBtn.href && downloadBtn.href.includes("file=")) {
-                    pdfPolicyUrl = downloadBtn.href;
-                    console.log("Found PDF URL:", pdfPolicyUrl);
-                    return pdfPolicyUrl;
-                }
-            }
-            // Try to find any link with the PDF name
-            const allLinks = document.querySelectorAll("a");
-            for (const link of allLinks) {
-                if (link.href && (link.href.includes("Hate%20Speech%20Policy.pdf") ||
-                                  link.href.includes("file=") && link.textContent.includes("Download"))) {
-                    pdfPolicyUrl = link.href;
-                    console.log("Found PDF URL from links:", pdfPolicyUrl);
-                    return pdfPolicyUrl;
-                }
-            }
-            // If we didn't find it, try again in a second
-            setTimeout(findPdfUrl, 1000);
-            return null;
-        }
-        // Function to actually load the PDF
-        function loadPolicyPdf(url) {
-            const iframe = document.getElementById("policy-iframe");
-            const loadingIndicator = document.getElementById("pdf-loading");
-            const fallback = document.getElementById("policy-fallback");
-            const downloadLink = document.getElementById("policy-download-link");
-            // Show loading, hide fallback
-            loadingIndicator.style.display = "flex";
-            fallback.style.display = "none";
-            iframe.style.display = "block";
-            // Set the download link
-            downloadLink.href = url;
-            // Set iframe source to PDF
-            iframe.src = url;
-            // Set a timeout to hide loading regardless of outcome
-            setTimeout(function() {
-                loadingIndicator.style.display = "none";
-            }, 3000);
-            // When iframe loads, hide loading indicator
-            iframe.onload = function() {
-                loadingIndicator.style.display = "none";
-                pdfLoadAttempted = true;
-            };
-            // Handle errors
-            iframe.onerror = function() {
-                loadingIndicator.style.display = "none";
-                fallback.style.display = "block";
-                iframe.style.display = "none";
-                pdfLoadAttempted = true;
-            };
-        }
-        // Function to retry loading the PDF
-        function retryPdfLoad() {
-            if (pdfPolicyUrl) {
-                loadPolicyPdf(pdfPolicyUrl);
-            } else {
-                // Try to find the URL first
-                const url = findPdfUrl();
-                if (url) {
-                    loadPolicyPdf(url);
-                }
-            }
-        }
-        // Function to handle opening the policy popup
-        function openPolicyPopup() {
-            // Display the popup immediately
-            document.getElementById('policy-popup').style.display = 'flex';
-            // If we've never tried to load the PDF, or it failed previously, try loading it
-            if (!pdfLoadAttempted) {
-                if (pdfPolicyUrl) {
-                    // We already have the URL, load the PDF
-                    loadPolicyPdf(pdfPolicyUrl);
-                } else {
-                    // Try to find the URL first
-                    const url = findPdfUrl();
-                    if (url) {
-                        loadPolicyPdf(url);
-                    } else {
-                        // Show fallback if we can't find the URL
-                        document.getElementById("pdf-loading").style.display = "none";
-                        document.getElementById("policy-iframe").style.display = "none";
-                        document.getElementById("policy-fallback").style.display = "block";
-                    }
-                }
-            }
-        }
-        // Make functions globally available
-        window.openPolicyPopup = openPolicyPopup;
-        window.retryPdfLoad = retryPdfLoad;
-        </script>
-        """
-        gr.HTML(policy_popup_html)
         gr.Markdown("# Safety Oracle for Rating Hate Speech [BETA]")
         gr.HTML("""
@@ -950,10 +895,10 @@ def create_gradio_app():
                 rate_btn = gr.Button("Rate Content", elem_classes=["gray-button"], scale=1)
             user_input = gr.Textbox(
-    placeholder="Type content to evaluate here...",
-    lines=6,
-    label=""
-)
             # 🌟 Contextual Safety Oracle
             gr.HTML("""
@@ -1025,18 +970,18 @@ def create_gradio_app():
         # Bind rating button with loading indicator
         rate_btn.click(
-    show_loading,
-    inputs=None,
-    outputs=loading_spinner
-).then(
-    rate_user_input,
-    inputs=[user_input],
-    outputs=[contextual_results, llama_results, openai_results, perspective_results]
-).then(
-    hide_loading,
-    inputs=None,
-    outputs=loading_spinner
-)
     return app

 from together import Together
 import openai
 import json
+from pathlib import Path
 # Load environment variables (for local development)
 load_dotenv()
+# Google Drive Embed URL for PDF
+PDF_EMBED_URL = "https://drive.google.com/file/d/1WZCQpIWfelbxgBr8dNiW2rqVGaDyf-Gi/preview"
+# Custom PDF Viewer Modal (injected HTML)
+PDF_MODAL_HTML = f"""
+<div id='policy-popup' class='pdf-modal' style='display:none;'>
+  <div class='pdf-modal-content'>
+    <button class='close-btn' onclick="document.getElementById('policy-popup').style.display='none'; document.body.style.overflow='auto';">×</button>
+    <div class='pdf-container'>
+      <iframe class='pdf-embed' src='{PDF_EMBED_URL}' allow="autoplay"></iframe>
+    </div>
+  </div>
+</div>
+<script>
+function openPolicyPopup() {{
+  document.getElementById('policy-popup').style.display = 'flex';
+  document.body.style.overflow = 'hidden';
+}}
+</script>
+"""
+# Replace your process_retrieval_text function with this updated version
 def process_retrieval_text(retrieval_text, user_input):
     """
     Process the retrieval text by identifying proper document boundaries
                 content_match = re.search(r'Content:\s*(.*)', doc_info, re.DOTALL)
                 content = content_match.group(1).strip() if content_match else "No content available"
+                # Format with clear section headers and better spacing
                 formatted_html = f"""
                 <div class='doc-section'>
+                    <h3 class="doc-number">Evidence Section {i+1}</h3>
+                    <div class="doc-section-info">
+                        <p><strong>Document Title:</strong> {doc_name}</p>
+                        <div class="subsection-info">
+                            <p><strong>Page Number:</strong> Page {page}</p>
+                        </div>
+                    </div>
+                    <div class="doc-content-container">
+                        <h4>Content:</h4>
+                        <div class='doc-content'>{content}</div>
+                    </div>
                 </div>
                 """
                 chunks.append(formatted_html)
             chunks.append(current_chunk)
         # Format each chunk with better section styling
+        chunks = [f"""
+        <div class='doc-section'>
+            <h3 class="doc-title">Evidence Section {i+1}</h3>
+            <div class='doc-content'>{chunk.strip()}</div>
+        </div>
+        """ for i, chunk in enumerate(chunks)]
     # Extract keywords from user input (longer than 3 chars)
     keywords = re.findall(r'\b\w{4,}\b', user_input.lower())
         highlighted_chunks.append(highlighted_chunk)
+    # Add some additional CSS for the knowledge sections
+    additional_css = """
+    <style>
+    .knowledge-sections {
+        border-radius: 8px;
+        background: #f9f9f9;
+        padding: 15px;
+        font-family: 'All Round Gothic Demi', 'Poppins', sans-serif !important;
+    }
+    .doc-section {
+        margin-bottom: 25px;
+        padding: 15px;
+        background: white;
+        border-radius: 8px;
+        box-shadow: 0 2px 5px rgba(0,0,0,0.05);
+    }
+    .doc-number, .doc-title {
+        margin-top: 0;
+        padding-bottom: 10px;
+        border-bottom: 1px solid #eee;
+        color: #222;
+        font-size: 18px;
+    }
+    .doc-section-info {
+        margin: 10px 0;
+        padding: 8px;
+        background: #f5f5f5;
+        border-radius: 4px;
+    }
+    .doc-section-info p {
+        margin: 5px 0;
+        font-size: 16px;
+    }
+    .subsection-info {
+        margin-left: 15px;
+        padding-left: 10px;
+        border-left: 2px solid #ddd;
+        margin-top: 5px;
+    }
+    .subsection-info p {
+        font-size: 14px;
+        color: #555;
+    }
+    .doc-content-container {
+        margin-top: 15px;
+    }
+    .doc-content-container h4 {
+        margin-bottom: 8px;
+        font-size: 16px;
+    }
+    .doc-content {
+        padding: 12px;
+        background: #f9f9f9;
+        border-left: 3px solid #FCA539;
+        line-height: 1.6;
+        border-radius: 4px;
+        white-space: pre-line;
+    }
+    .highlight-match {
+        background-color: #FCA539;
+        color: black;
+        font-weight: bold;
+        padding: 0 2px;
+        border-radius: 2px;
+    }
+    </style>
+    """
+    return additional_css + "<div class='knowledge-sections'>" + "".join(highlighted_chunks) + "</div>"
 # API Keys - hardcoded for convenience
 # Replace these with your actual API keys
     box-shadow: 0 5px 15px rgba(0,0,0,0.2);
     transform: translateY(-2px);
 }
+/* PDF Modal Styling */
+.pdf-modal {
+    display: none;
+    position: fixed;
+    top: 0;
+    left: 0;
+    width: 100%;
+    height: 100%;
+    background-color: rgba(0,0,0,0.7);
+    z-index: 1000;
+    justify-content: center;
+    align-items: center;
+}
+.pdf-modal-content {
+    background-color: white;
+    width: 80%;
+    height: 80%;
+    border-radius: 8px;
+    padding: 20px;
+    position: relative;
+    box-shadow: 0 5px 20px rgba(0,0,0,0.3);
+}
+.close-btn {
+    position: absolute;
+    right: 15px;
+    top: 15px;
+    font-size: 24px;
+    cursor: pointer;
+    background: #222;
+    color: white;
+    border: none;
+    border-radius: 4px;
+    padding: 5px 15px;
+}
+.pdf-container {
+    width: 100%;
+    height: calc(100% - 40px);
+    margin-top: 40px;
+}
+.pdf-embed {
+    width: 100%;
+    height: 100%;
+    border: 1px solid #eee;
+}
 """
 # Contextual API class - UPDATED WITH NEW MODEL ID
         return response_content, response_time, retrieval_text, error_message
 # Contextual AI rating - UPDATED PROMPT FORMAT
+# Replace your get_contextual_rating function with this enhanced version
 def get_contextual_rating(contextual_api, user_input):
     rating_prompt = f"Content to rate: {user_input}"
     response_text, response_time, retrieval_text, error = contextual_api.chat(rating_prompt)
     elif "caution" in response_text.lower() or "warning" in response_text.lower():
         safety_level = "warning"
+    # Parse specific parts of the response
+    try:
+        # Look for policy rating
+        rating_match = re.search(r'Policy Rating: (.*?)(?:Policy|$)', response_text, re.IGNORECASE)
+        policy_rating = rating_match.group(1).strip() if rating_match else ""
+        # Look for policy category
+        category_match = re.search(r'Policy Category: (.*?)(?:Explanation|$)', response_text, re.IGNORECASE)
+        policy_category = category_match.group(1).strip() if category_match else ""
+        # Look for explanation
+        explanation_match = re.search(r'Explanation: (.*)', response_text, re.IGNORECASE)
+        explanation = explanation_match.group(1).strip() if explanation_match else response_text
+        # Format with HTML
+        formatted_text = ""
+        if policy_rating:
+            formatted_text += f"<strong>Policy Rating:</strong> {policy_rating}<br><br>"
+        if policy_category:
+            formatted_text += f"<strong>Policy Category:</strong> {policy_category}<br><br>"
+        if explanation:
+            formatted_text += f"<strong>Explanation:</strong> {explanation}"
+        # If we couldn't extract anything, use the original
+        if not formatted_text:
+            formatted_text = response_text
+    except Exception as e:
+        print(f"Error formatting rating: {e}")
+        formatted_text = response_text
+    return formatted_text, retrieval_text, safety_level
 # LlamaGuard rating
 LLAMA_HARM_CATEGORIES = {
         return f"Safety Status: Error\nError: {str(e)}", "unsafe"
 # Perspective API
 def get_perspective_rating(api_key, user_input):
     url = 'https://commentanalyzer.googleapis.com/v1alpha1/comments:analyze'
     params = {'key': api_key}
     except Exception as e:
         return f"Safety Status: Error\nError: {str(e)}", "unsafe"
 # Updated to only require one input
 def rate_user_input(user_input):
     # Initialize APIs with hardcoded keys
     openai_rating, openai_safety = get_openai_moderation(openai_client, user_input)
     perspective_rating, perspective_safety = get_perspective_rating(PERSPECTIVE_API_KEY, user_input)
     # Format responses carefully to avoid random line breaks
     llama_rating = re.sub(r'\.(?=\s+[A-Z])', '.\n', llama_rating)
     # Don't add line breaks to contextual rating
         border_color_primary="#E0E0E0"
     )
+    # Use the custom CSS with PDF modal styling
+    custom_css = CUSTOM_CSS
     with gr.Blocks(title="Hate Speech Rating Oracle", theme=theme, css=custom_css) as app:
         # Add loading spinner
         loading_spinner = gr.HTML('<div id="loading-spinner"></div>')
+        # Add the PDF modal HTML directly (defined at the top of the file)
+        gr.HTML(PDF_MODAL_HTML)
         gr.Markdown("# Safety Oracle for Rating Hate Speech [BETA]")
         gr.HTML("""
                 rate_btn = gr.Button("Rate Content", elem_classes=["gray-button"], scale=1)
             user_input = gr.Textbox(
+                placeholder="Type content to evaluate here...",
+                lines=6,
+                label=""
+            )
             # 🌟 Contextual Safety Oracle
             gr.HTML("""
         # Bind rating button with loading indicator
         rate_btn.click(
+            show_loading,
+            inputs=None,
+            outputs=loading_spinner
+        ).then(
+            rate_user_input,
+            inputs=[user_input],
+            outputs=[contextual_results, llama_results, openai_results, perspective_results]
+        ).then(
+            hide_loading,
+            inputs=None,
+            outputs=loading_spinner
+        )
     return app

test/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

test/Hate Speech Policy.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0722f7379e6ebfb13fdf4595fbda155ead833f2258246f996b454e76f5b8ce39
+size 487967

test/README.md ADDED Viewed

	@@ -0,0 +1,6 @@

+---
+title: test
+app_file: test.py
+sdk: gradio
+sdk_version: 5.23.3
+---

test/test.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import gradio as gr
+def create_policy_popup_app():
+    # Your Google Drive file ID
+    pdf_id = "1WZCQpIWfelbxgBr8dNiW2rqVGaDyf-Gi"
+    embed_url = f"https://drive.google.com/file/d/{pdf_id}/preview"
+    custom_css = """
+    .pdf-modal {
+        display: none;
+        position: fixed;
+        z-index: 1000;
+        left: 0;
+        top: 0;
+        width: 100%;
+        height: 100%;
+        background-color: rgba(0,0,0,0.7);
+        overflow: auto;
+    }
+    .pdf-modal-content {
+        background-color: white;
+        margin: 2% auto;
+        padding: 20px;
+        width: 90%;
+        max-width: 900px;
+        height: 90%;
+        box-shadow: 0 5px 15px rgba(0,0,0,0.3);
+        border-radius: 8px;
+        position: relative;
+    }
+    .close-btn {
+        position: absolute;
+        right: 15px;
+        top: 15px;
+        font-size: 24px;
+        cursor: pointer;
+        background: #222;
+        color: white;
+        border: none;
+        border-radius: 4px;
+        padding: 5px 15px;
+        z-index: 1001;
+    }
+    .pdf-container {
+        width: 100%;
+        height: calc(100% - 40px);
+        margin-top: 40px;
+    }
+    .pdf-embed {
+        width: 100%;
+        height: 100%;
+        border: 1px solid #ddd;
+    }
+    """
+    modal_html = f"""
+    <div id="pdfModal" class="pdf-modal">
+        <div class="pdf-modal-content">
+            <button class="close-btn" onclick="document.getElementById('pdfModal').style.display='none';document.body.style.overflow='auto';">×</button>
+            <div class="pdf-container">
+                <iframe class="pdf-embed" src="{embed_url}" type="application/pdf"></iframe>
+            </div>
+        </div>
+    </div>
+    """
+    with gr.Blocks(css=custom_css) as app:
+        gr.Markdown("## View Hate Speech Policy")
+        gr.HTML(modal_html)
+        open_button = gr.Button("📄 Open Policy Document")
+        open_button.click(
+            fn=lambda: None,
+            inputs=None,
+            outputs=None,
+            js="""
+                () => {
+                    const modal = document.getElementById('pdfModal');
+                    if (modal) {
+                        modal.style.display = 'block';
+                        document.body.style.overflow = 'hidden';
+                    }
+                }
+            """
+        )
+    return app
+if __name__ == "__main__":
+    app = create_policy_popup_app()
+    app.launch()