raannakasturi commited on
Commit
48a89a4
·
verified ·
1 Parent(s): 1542721

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -80
app.py CHANGED
@@ -1,81 +1,81 @@
1
- import json
2
- import os
3
- import pdf2doi
4
- import gradio as gr
5
- import requests
6
- import html
7
-
8
- def download_pdf(url):
9
- file_path = f"{url.split('/')[-1]}.pdf"
10
- response = requests.get(url)
11
- with open(file_path, 'wb') as file:
12
- file.write(response.content)
13
- return file_path
14
-
15
- def get_doi(pdf_path):
16
- pdf2doi.config.set('verbose', False)
17
- results = pdf2doi.pdf2doi(pdf_path)
18
- validation_info = json.loads(results['validation_info'])
19
- doi = validation_info.get('DOI', None)
20
- title = validation_info.get('title', None)
21
- url = validation_info.get('URL', None)
22
- return doi, title, url
23
-
24
- def get_paper_data(doi):
25
- api_url = f"https://api.citeas.org/product/{doi}"
26
- response = requests.get(api_url)
27
- return response.json()
28
-
29
- def main(pdf_url):
30
- pdf_path = download_pdf(pdf_url)
31
- doi, title, url = get_doi(pdf_path)
32
- if doi is None:
33
- return json.dumps({"error": "DOI not found"}, indent=4)
34
- paper_data = get_paper_data(doi)
35
- if not paper_data:
36
- return json.dumps({"error": "Paper data not found"}, indent=4)
37
- citation_text = None
38
- for citation in paper_data.get('citations', []):
39
- if citation.get('style_shortname') == 'apa':
40
- citation_text = citation.get('citation')
41
- break
42
- title = title or paper_data.get('name')
43
- url = url or f"https://doi.org/{doi}"
44
- if citation_text:
45
- citation_text = citation_text.encode('utf-8').decode('utf-8')
46
- citation_text = html.unescape(
47
- citation_text.replace("<i>", "").replace("</i>", "").replace("\u2026", "...").replace("\n", " ")
48
- )
49
- else:
50
- citation_text = "Citation not found"
51
- data = {
52
- "doi": doi,
53
- "title": title if title else "Title not found",
54
- "citation_text": citation_text,
55
- "url": url
56
- }
57
- os.remove(pdf_path)
58
- return json.dumps(data, ensure_ascii=False, indent=4)
59
-
60
-
61
- theme = gr.themes.Soft(
62
- primary_hue="purple",
63
- secondary_hue="cyan",
64
- neutral_hue="slate",
65
- font=[
66
- gr.themes.GoogleFont("Syne"),
67
- gr.themes.GoogleFont("Poppins"),
68
- gr.themes.GoogleFont("Poppins"),
69
- gr.themes.GoogleFont("Poppins")
70
- ],
71
- )
72
-
73
- with gr.Blocks(theme=theme) as app:
74
- with gr.Row():
75
- pdf_path = gr.Textbox(lines=1, label="PDF URL", placeholder="Enter the URL of the PDF")
76
- doi_data = gr.Textbox(lines=7, label="DOI Data", placeholder="DOI data will be displayed here", show_copy_button=True)
77
- get_data = gr.Button(value="Get DOI Data", variant='primary')
78
-
79
- get_data.click(main, inputs=[pdf_path], outputs=[doi_data], api_name="getDOIData")
80
-
81
  app.queue(default_concurrency_limit=250).launch()
 
1
+ import json
2
+ import os
3
+ import pdf2doi
4
+ import gradio as gr
5
+ import requests
6
+ import html
7
+
8
+ def download_pdf(url):
9
+ file_path = f"{url.split('/')[-1]}.pdf"
10
+ response = requests.get(url)
11
+ with open(file_path, 'wb') as file:
12
+ file.write(response.content)
13
+ return file_path
14
+
15
+ def get_doi(pdf_path):
16
+ pdf2doi.config.set('verbose', False)
17
+ results = pdf2doi.pdf2doi(pdf_path)
18
+ validation_info = json.loads(str(results['validation_info']))
19
+ doi = validation_info.get('DOI', None)
20
+ title = validation_info.get('title', None)
21
+ url = validation_info.get('URL', None)
22
+ return doi, title, url
23
+
24
+ def get_paper_data(doi):
25
+ api_url = f"https://api.citeas.org/product/{doi}"
26
+ response = requests.get(api_url)
27
+ return response.json()
28
+
29
+ def main(pdf_url):
30
+ pdf_path = download_pdf(pdf_url)
31
+ doi, title, url = get_doi(pdf_path)
32
+ if doi is None:
33
+ return json.dumps({"error": "DOI not found"}, indent=4)
34
+ paper_data = get_paper_data(doi)
35
+ if not paper_data:
36
+ return json.dumps({"error": "Paper data not found"}, indent=4)
37
+ citation_text = None
38
+ for citation in paper_data.get('citations', []):
39
+ if citation.get('style_shortname') == 'apa':
40
+ citation_text = citation.get('citation')
41
+ break
42
+ title = title or paper_data.get('name')
43
+ url = url or f"https://doi.org/{doi}"
44
+ if citation_text:
45
+ citation_text = citation_text.encode('utf-8').decode('utf-8')
46
+ citation_text = html.unescape(
47
+ citation_text.replace("<i>", "").replace("</i>", "").replace("\u2026", "...").replace("\n", " ")
48
+ )
49
+ else:
50
+ citation_text = "Citation not found"
51
+ data = {
52
+ "doi": doi,
53
+ "title": title if title else "Title not found",
54
+ "citation_text": citation_text,
55
+ "url": url
56
+ }
57
+ os.remove(pdf_path)
58
+ return json.dumps(data, ensure_ascii=False, indent=4)
59
+
60
+
61
+ theme = gr.themes.Soft(
62
+ primary_hue="purple",
63
+ secondary_hue="cyan",
64
+ neutral_hue="slate",
65
+ font=[
66
+ gr.themes.GoogleFont("Syne"),
67
+ gr.themes.GoogleFont("Poppins"),
68
+ gr.themes.GoogleFont("Poppins"),
69
+ gr.themes.GoogleFont("Poppins")
70
+ ],
71
+ )
72
+
73
+ with gr.Blocks(theme=theme) as app:
74
+ with gr.Row():
75
+ pdf_path = gr.Textbox(lines=1, label="PDF URL", placeholder="Enter the URL of the PDF")
76
+ doi_data = gr.Textbox(lines=7, label="DOI Data", placeholder="DOI data will be displayed here", show_copy_button=True)
77
+ get_data = gr.Button(value="Get DOI Data", variant='primary')
78
+
79
+ get_data.click(main, inputs=[pdf_path], outputs=[doi_data], api_name="getDOIData")
80
+
81
  app.queue(default_concurrency_limit=250).launch()