Spaces:
Runtime error
Runtime error
""" | |
Quantum Physics Problem Generator | |
Shlomo Kashani | |
Description: | |
------------ | |
This module is part of the QuantumLLMInstruct system, designed to generate and solve quantum physics problems | |
using advanced Large Language Models (LLMs). It utilizes a multi-stage pipeline for problem generation, | |
solution generation, and database management. | |
Core Functionalities: | |
--------------------- | |
1. **Problem Generation**: | |
- Generates quantum physics problems in LaTeX format using LLMs. | |
- Supports domain-specific problem generation across multiple quantum fields. | |
2. **Solution Generation**: | |
- Provides step-by-step LaTeX solutions for the generated problems using a second LLM. | |
3. **Data Management**: | |
- Stores generated problems and solutions in DuckDB and Parquet files. | |
- Enables exporting data in Parquet format for scalability and compatibility. | |
4. **Gradio Interface**: | |
- A user-friendly interface to interact with the system, including problem generation, | |
solution generation, and database exploration. | |
5. **Hugging Face Integration**: | |
- Supports visualization and interaction with the dataset on the Hugging Face platform. | |
Main Components: | |
---------------- | |
- **initialize_duckdb() / initialize_parquet()**: Initializes the database schema. | |
- **generate_multiple_problems()**: Generates multiple problems for the selected quantum domains. | |
- **generate_solutions()**: Solves unsolved problems in the database. | |
- **export_parquet()**: Exports the database to a Parquet file for external use. | |
Dependencies: | |
------------- | |
- Python 3.7+ | |
- Transformers: `transformers` | |
- DuckDB: `duckdb` | |
- Gradio: `gradio` | |
- Pandas: `pandas` | |
""" | |
import gradio as gr | |
from Q_llm_prompts import * | |
from Q_quantum_utils import * | |
initialize_duckdb() | |
description = """ | |
This demo showcases **[QuantumLLMInstruct](https://huggingface.co/datasets/BoltzmannEntropy/QuantumLLMInstruct/)** | |
<img src="https://huggingface.co/datasets/BoltzmannEntropy/QuantumLLMInstruct/resolve/main/qlmmi-detailed-flowchart.jpg" alt="The Pipeline" width="70%" align="center" /> | |
## π Pipeline: | |
1. **π Problem Generation:** The Qwen model generates a user instruction. | |
2. **π¬ Solution Generation:** The Qwen model generates a response to this instruction. | |
""" | |
# Gradio app | |
with gr.Blocks() as app: | |
""" | |
Main Gradio application block defining the QuantumLLMInstruct pipeline. | |
Provides tabs for viewing datasets, generating problems, generating solutions, | |
and exporting the database. | |
""" | |
gr.Markdown("# QuantumLLMInstruct: A 500k LLM Instruction-Tuning Dataset with Problem-Solution Pairs for Quantum Computing.") | |
with gr.Tab("View HF DB"): | |
""" | |
Tab for displaying the Hugging Face QuantumLLMInstruct dataset. | |
Embeds a viewer for exploring the dataset hosted on Hugging Face. | |
""" | |
gr.Markdown("### Generated Dataset") | |
gr.HTML("""<iframe | |
src="https://huggingface.co/datasets/BoltzmannEntropy/QuantumLLMInstruct/embed/viewer" | |
frameborder="0" | |
width="100%" | |
height="560px" | |
></iframe>""") | |
with gr.Tab("LLM stage 1 model: Generate Problems"): | |
""" | |
Tab for generating quantum physics problems using the Qwen model. | |
Allows users to select models, choose domains, and specify the number of problems to generate. | |
""" | |
model_selector = gr.Dropdown( | |
choices=model_options, | |
value=model_options[0], | |
label="Select Qwen Model" | |
) | |
reload_button = gr.Button("Reload Model") | |
reload_status = gr.Textbox(label="Model Status", interactive=False) | |
generate_button = gr.Button("π Generate Instructions For the Pair") | |
result_output = gr.Textbox(label="Generated Problems", interactive=False) | |
num_pairs = gr.Radio(choices=[1, 5, 50, 200, 2000, 20000, 200000], value=5, label="Number of Problems") | |
domain_selector = gr.CheckboxGroup( | |
choices=list(quantum_problem_domains.keys()), | |
value=list(quantum_problem_domains.keys()), | |
label="Select Domain Types" | |
) | |
reload_button.click( | |
reload_model, | |
inputs=[model_selector], | |
outputs=[reload_status] | |
) | |
def generate_and_display(num_pairs, selected_domains): | |
""" | |
Generates multiple quantum problems based on user inputs. | |
Args: | |
num_pairs (int): Number of problems to generate. | |
selected_domains (list): Selected quantum domains for problem generation. | |
Returns: | |
str: Status message confirming successful problem generation. | |
""" | |
generate_multiple_problems(num_pairs, selected_domains) | |
return "Problems generated successfully." | |
generate_button.click( | |
generate_and_display, | |
inputs=[num_pairs, domain_selector], | |
outputs=[result_output] | |
) | |
with gr.Tab("LLM Stage 2: Generate Solutions"): | |
""" | |
Tab for generating solutions to the quantum problems using the Qwen solution models. | |
""" | |
generate_solutions_button = gr.Button("π Generate Responses for the Pair") | |
solutions_status = gr.Textbox(label="Solution Generation Status", interactive=False) | |
solutions_model_selector = gr.Dropdown( | |
choices=solutions_model_options, | |
value=solutions_model_options[4], | |
label="Select Solution Model" | |
) | |
generate_solutions_button.click( | |
generate_solutions, | |
inputs=[solutions_model_selector], | |
outputs=[solutions_status] | |
) | |
with gr.Tab("View instruction-pairs DB"): | |
""" | |
Tab for viewing data stored in the DuckDB database. | |
Provides an HTML table viewer for summarized and detailed data exploration. | |
""" | |
summary_output = gr.HTML() | |
view_button = gr.Button("View Data") | |
db_output_display = gr.HTML() | |
view_button.click(load_summary_from_duckdb, inputs=None, outputs=summary_output) | |
def view_db_data(): | |
""" | |
Loads and formats problems stored in the DuckDB database for display. | |
Returns: | |
str: HTML representation of the database content or a message indicating no data. | |
""" | |
df = load_problems_from_duckdb() | |
if df.empty: | |
return "<p>No data found in the DuckDB database.</p>" | |
html_table = df.to_html(index=False, escape=False) | |
return html_table | |
view_button.click( | |
view_db_data, | |
inputs=None, | |
outputs=[db_output_display] | |
) | |
with gr.Tab("Export Parquet"): | |
""" | |
Tab for exporting the DuckDB database into a Parquet file format. | |
Enables efficient data sharing and downstream analysis. | |
""" | |
gr.Markdown("### Export DuckDB Data to Parquet Format") | |
db_file_input = gr.Textbox(label="Database File Path", value="quantum_problems.duckdb") | |
export_button = gr.Button("Export Data") | |
export_status = gr.Textbox(label="Export Status", interactive=False) | |
export_button.click( | |
export_parquet, | |
inputs=[db_file_input], | |
outputs=[export_status] | |
) | |
gr.Markdown(description) | |
app.launch() | |