import json import os import pdf2doi import gradio as gr import requests import html def download_pdf(url): file_path = f"{url.split('/')[-1]}.pdf" response = requests.get(url) with open(file_path, 'wb') as file: file.write(response.content) return file_path import json def get_doi(pdf_path): pdf2doi.config.set('verbose', False) results = pdf2doi.pdf2doi(pdf_path) validation_info = results.get('validation_info', {}) if isinstance(validation_info, str): try: validation_info = json.loads(validation_info) # Convert JSON string to dic except json.JSONDecodeError as e: print(f"Error decoding JSON: {e}") return None, None, None if not isinstance(validation_info, dict): print("Validation info is not a dictionary") return None, None, None doi = validation_info.get('DOI', None) title = validation_info.get('title', None) url = validation_info.get('URL', None) return doi, title, url def get_paper_data(doi): api_url = f"https://api.citeas.org/product/{doi}" response = requests.get(api_url) return response.json() def main(pdf_url): pdf_path = download_pdf(pdf_url) doi, title, url = get_doi(pdf_path) if doi is None: return json.dumps({"error": "DOI not found"}, indent=4) paper_data = get_paper_data(doi) if not paper_data: return json.dumps({"error": "Paper data not found"}, indent=4) citation_text = None for citation in paper_data.get('citations', []): if citation.get('style_shortname') == 'apa': citation_text = citation.get('citation') break title = title or paper_data.get('name') url = url or f"https://doi.org/{doi}" if citation_text: citation_text = citation_text.encode('utf-8').decode('utf-8') citation_text = html.unescape( citation_text.replace("", "").replace("", "").replace("\u2026", "...").replace("\n", " ") ) else: citation_text = "Citation not found" data = { "doi": doi, "title": title if title else "Title not found", "citation_text": citation_text, "url": url } os.remove(pdf_path) return json.dumps(data, ensure_ascii=False, indent=4) theme = gr.themes.Soft( primary_hue="purple", secondary_hue="cyan", neutral_hue="slate", font=[ gr.themes.GoogleFont("Syne"), gr.themes.GoogleFont("Poppins"), gr.themes.GoogleFont("Poppins"), gr.themes.GoogleFont("Poppins") ], ) with gr.Blocks(theme=theme) as app: with gr.Row(): pdf_path = gr.Textbox(lines=1, label="PDF URL", placeholder="Enter the URL of the PDF") doi_data = gr.Textbox(lines=7, label="DOI Data", placeholder="DOI data will be displayed here", show_copy_button=True) get_data = gr.Button(value="Get DOI Data", variant='primary') get_data.click(main, inputs=[pdf_path], outputs=[doi_data], api_name="getDOIData") app.queue(default_concurrency_limit=250).launch()