Spaces:

taesiri
/

ClaudeReadsArxiv

Paused

App Files Files Community

taesiri commited on Oct 27, 2023

Commit

efed853

1 Parent(s): 7e8de53

update

Browse files

Files changed (1) hide show

app.py +53 -10

app.py CHANGED Viewed

@@ -1,22 +1,25 @@
 import os
 import re
-from anthropic import AI_PROMPT, HUMAN_PROMPT, Anthropic
 import gradio as gr
 import requests
-import arxiv
 from arxiv_latex_extractor import get_paper_content
-import requests
 from coreservice import app
-from fastapi.staticfiles import StaticFiles
 LEADING_PROMPT = "Read the following paper:"
 def replace_texttt(text):
     return re.sub(r"\\texttt\{(.*?)\}", r"*\1*", text)
@@ -102,24 +105,66 @@ class ContextualQA:
         self.client = None
 def load_context(paper_id):
     global LEADING_PROMPT
-    # First, try to get the paper from Hugging Face
     latex_source = get_paper_from_huggingface(paper_id)
-    # If not found, use arxiv_latex_extractor
     if not latex_source:
         try:
             latex_source = get_paper_content(paper_id)
         except Exception as e:
             return None, [(f"Error loading paper with id {paper_id}: {e}",)]
     client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
     qa_model = ContextualQA(client, model="claude-2.0")
     context = f"{LEADING_PROMPT}\n{latex_source}"
     qa_model.load_text(context)
     title, abstract = get_paper_info(paper_id)
     title = replace_texttt(title)
     abstract = replace_texttt(abstract)
@@ -229,7 +274,5 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     btn_clear.click(clear_context, outputs=[chatbot])
-# demo.launch()
 app.mount("/js", StaticFiles(directory="js"), name="js")
 gr.mount_gradio_app(app, demo, path="/")

 import os
 import re
+import tempfile
+import os
+import arxiv
 import gradio as gr
 import requests
+from anthropic import AI_PROMPT, HUMAN_PROMPT, Anthropic
 from arxiv_latex_extractor import get_paper_content
+from fastapi.staticfiles import StaticFiles
+from huggingface_hub import HfApi
+hf_api = HfApi()
 from coreservice import app
+hf_api = HfApi()
 LEADING_PROMPT = "Read the following paper:"
 def replace_texttt(text):
     return re.sub(r"\\texttt\{(.*?)\}", r"*\1*", text)
         self.client = None
+def clean_paper_id(raw_id):
+    # Remove any leading/trailing spaces
+    cleaned_id = raw_id.strip()
+    # Extract paper ID from ArXiv URL if present
+    match = re.search(r"arxiv\.org\/abs\/([\w\.]+)", cleaned_id)
+    if match:
+        cleaned_id = match.group(1)
+    else:
+        # Remove trailing dot if present
+        cleaned_id = re.sub(r"\.$", "", cleaned_id)
+    return cleaned_id
 def load_context(paper_id):
     global LEADING_PROMPT
+    # Clean the paper_id to remove spaces or extract ID from URL
+    paper_id = clean_paper_id(paper_id)
+    # Check if the paper is already on Hugging Face
     latex_source = get_paper_from_huggingface(paper_id)
+    paper_downloaded = False
+    # If not found on Hugging Face, use arxiv_latex_extractor
     if not latex_source:
         try:
             latex_source = get_paper_content(paper_id)
+            paper_downloaded = True
         except Exception as e:
             return None, [(f"Error loading paper with id {paper_id}: {e}",)]
+    if paper_downloaded:
+        # Save the LaTeX content to a temporary file
+        with tempfile.NamedTemporaryFile(
+            mode="w+", suffix=".tex", delete=False
+        ) as tmp_file:
+            tmp_file.write(latex_source)
+            temp_file_path = tmp_file.name
+        # Upload the paper to Hugging Face
+        try:
+            if os.path.getsize(temp_file_path) > 1:
+                hf_api.upload_file(
+                    path_or_fileobj=temp_file_path,
+                    path_in_repo=f"papers/{paper_id}.tex",
+                    repo_id="taesiri/arxiv_db",
+                    repo_type="dataset",
+                )
+        except Exception as e:
+            print(f"Error uploading paper with id {paper_id}: {e}")
+    # Initialize the Anthropic client and QA model
     client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
     qa_model = ContextualQA(client, model="claude-2.0")
     context = f"{LEADING_PROMPT}\n{latex_source}"
     qa_model.load_text(context)
+    # Get the paper's title and abstract
     title, abstract = get_paper_info(paper_id)
     title = replace_texttt(title)
     abstract = replace_texttt(abstract)
     btn_clear.click(clear_context, outputs=[chatbot])
 app.mount("/js", StaticFiles(directory="js"), name="js")
 gr.mount_gradio_app(app, demo, path="/")