Spaces:

raannakasturi
/

ScientryPDFDataAPI

Sleeping

App Files Files Community

raannakasturi commited on Feb 15

Commit

48a89a4

verified ·

1 Parent(s): 1542721

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -80

app.py CHANGED Viewed

@@ -1,81 +1,81 @@
-import json
-import os
-import pdf2doi
-import gradio as gr
-import requests
-import html
-def download_pdf(url):
-    file_path = f"{url.split('/')[-1]}.pdf"
-    response = requests.get(url)
-    with open(file_path, 'wb') as file:
-        file.write(response.content)
-    return file_path
-def get_doi(pdf_path):
-    pdf2doi.config.set('verbose', False)
-    results = pdf2doi.pdf2doi(pdf_path)
-    validation_info = json.loads(results['validation_info'])
-    doi = validation_info.get('DOI', None)
-    title = validation_info.get('title', None)
-    url = validation_info.get('URL', None)
-    return doi, title, url
-def get_paper_data(doi):
-    api_url = f"https://api.citeas.org/product/{doi}"
-    response = requests.get(api_url)
-    return response.json()
-def main(pdf_url):
-    pdf_path = download_pdf(pdf_url)
-    doi, title, url = get_doi(pdf_path)
-    if doi is None:
-        return json.dumps({"error": "DOI not found"}, indent=4)
-    paper_data = get_paper_data(doi)
-    if not paper_data:
-        return json.dumps({"error": "Paper data not found"}, indent=4)
-    citation_text = None
-    for citation in paper_data.get('citations', []):
-        if citation.get('style_shortname') == 'apa':
-            citation_text = citation.get('citation')
-            break
-    title = title or paper_data.get('name')
-    url = url or f"https://doi.org/{doi}"
-    if citation_text:
-        citation_text = citation_text.encode('utf-8').decode('utf-8')
-        citation_text = html.unescape(
-            citation_text.replace("<i>", "").replace("</i>", "").replace("\u2026", "...").replace("\n", " ")
-        )
-    else:
-        citation_text = "Citation not found"
-    data = {
-        "doi": doi,
-        "title": title if title else "Title not found",
-        "citation_text": citation_text,
-        "url": url
-    }
-    os.remove(pdf_path)
-    return json.dumps(data, ensure_ascii=False, indent=4)
-theme = gr.themes.Soft(
-    primary_hue="purple",
-    secondary_hue="cyan",
-    neutral_hue="slate",
-    font=[
-        gr.themes.GoogleFont("Syne"),
-        gr.themes.GoogleFont("Poppins"),
-        gr.themes.GoogleFont("Poppins"),
-        gr.themes.GoogleFont("Poppins")
-    ],
-)
-with gr.Blocks(theme=theme) as app:
-    with gr.Row():
-        pdf_path = gr.Textbox(lines=1, label="PDF URL", placeholder="Enter the URL of the PDF")
-        doi_data = gr.Textbox(lines=7, label="DOI Data", placeholder="DOI data will be displayed here", show_copy_button=True)
-    get_data = gr.Button(value="Get DOI Data", variant='primary')
-    get_data.click(main, inputs=[pdf_path], outputs=[doi_data], api_name="getDOIData")
 app.queue(default_concurrency_limit=250).launch()

+import json
+import os
+import pdf2doi
+import gradio as gr
+import requests
+import html
+def download_pdf(url):
+    file_path = f"{url.split('/')[-1]}.pdf"
+    response = requests.get(url)
+    with open(file_path, 'wb') as file:
+        file.write(response.content)
+    return file_path
+def get_doi(pdf_path):
+    pdf2doi.config.set('verbose', False)
+    results = pdf2doi.pdf2doi(pdf_path)
+    validation_info = json.loads(str(results['validation_info']))
+    doi = validation_info.get('DOI', None)
+    title = validation_info.get('title', None)
+    url = validation_info.get('URL', None)
+    return doi, title, url
+def get_paper_data(doi):
+    api_url = f"https://api.citeas.org/product/{doi}"
+    response = requests.get(api_url)
+    return response.json()
+def main(pdf_url):
+    pdf_path = download_pdf(pdf_url)
+    doi, title, url = get_doi(pdf_path)
+    if doi is None:
+        return json.dumps({"error": "DOI not found"}, indent=4)
+    paper_data = get_paper_data(doi)
+    if not paper_data:
+        return json.dumps({"error": "Paper data not found"}, indent=4)
+    citation_text = None
+    for citation in paper_data.get('citations', []):
+        if citation.get('style_shortname') == 'apa':
+            citation_text = citation.get('citation')
+            break
+    title = title or paper_data.get('name')
+    url = url or f"https://doi.org/{doi}"
+    if citation_text:
+        citation_text = citation_text.encode('utf-8').decode('utf-8')
+        citation_text = html.unescape(
+            citation_text.replace("<i>", "").replace("</i>", "").replace("\u2026", "...").replace("\n", " ")
+        )
+    else:
+        citation_text = "Citation not found"
+    data = {
+        "doi": doi,
+        "title": title if title else "Title not found",
+        "citation_text": citation_text,
+        "url": url
+    }
+    os.remove(pdf_path)
+    return json.dumps(data, ensure_ascii=False, indent=4)
+theme = gr.themes.Soft(
+    primary_hue="purple",
+    secondary_hue="cyan",
+    neutral_hue="slate",
+    font=[
+        gr.themes.GoogleFont("Syne"),
+        gr.themes.GoogleFont("Poppins"),
+        gr.themes.GoogleFont("Poppins"),
+        gr.themes.GoogleFont("Poppins")
+    ],
+)
+with gr.Blocks(theme=theme) as app:
+    with gr.Row():
+        pdf_path = gr.Textbox(lines=1, label="PDF URL", placeholder="Enter the URL of the PDF")
+        doi_data = gr.Textbox(lines=7, label="DOI Data", placeholder="DOI data will be displayed here", show_copy_button=True)
+    get_data = gr.Button(value="Get DOI Data", variant='primary')
+    get_data.click(main, inputs=[pdf_path], outputs=[doi_data], api_name="getDOIData")
 app.queue(default_concurrency_limit=250).launch()