daniel-dona's picture
Update app.py
8418b3c verified
import os
import json
import gradio
import numpy
import pandas
import pyparseit
import rdflib
import SPARQLWrapper
SPARQL_ENDPOINT = os.getenv("SPARQL_ENDPOINT", "http://192.168.10.174:8890/sparql")
prompt_valid = open("./templates/prompt_valid.txt").read()
prompt_sparql = open("./templates/prompt_sparql.txt").read()
prompt_sparql_semantic = open("./templates/prompt_sparql_semantic.txt").read()
system = open("./templates/system1.txt").read()
examples_data = [[e["query"]] for e in json.loads(open("./examples.json").read())]
model_options = [
{"name": "qwen3-0.6b-no-finetuned", "repo": "unsloth/qwen3-0.6b-bnb-4bit"},
{"name": "sft-model-0.6b-v0.2", "repo": "daniel-dona/sparql-model-era-lora-128-qwen3-0.6b"},
{"name": "sft-model-1.7b-v0.2", "repo": "daniel-dona/sparql-model-era-lora-128-qwen3-1.7b"},
{"name": "sft-model-4b-v0.1", "repo": "daniel-dona/sparql-model-era-lora-128-qwen3-4b"},
{"name": "grpo1-model-0.6b", "repo": "daniel-dona/sparql-model-era-lora-128-qwen3-0.6b-grpo1"},
#{"name": "grpo2-model-0.6b", "repo": "daniel-dona/sparql-model-era-lora-128-qwen3-0.6b-grpo2"}
]
from src.SemanticSearch import SemanticSearch
from src.Inference import model_completion
extractor = SemanticSearch()
#extractor.load_ne_from_kg(SPARQL_ENDPOINT)
#extractor.build_vector_db()
extractor.load_vector_db()
def sparql_json_to_df(sparql_json):
if 'results' not in sparql_json or 'bindings' not in sparql_json['results']:
return pandas.DataFrame()
cols = []
for i, var in enumerate(sparql_json['head']['vars']):
cols.append(var)
bindings = sparql_json['results']['bindings']
if not bindings:
return pandas.DataFrame(columns=cols)
data_rows = [
[row.get(col, {}).get('value') for col in sparql_json['head']['vars']]
for row in bindings
]
df = pandas.DataFrame(data_rows, columns=cols)
df.fillna(value=numpy.nan, inplace=True)
df = df.convert_dtypes()
return df
def execute_sparql(sparql, sparql_endpoint, timeout=60):
agent = SPARQLWrapper.SPARQLWrapper(endpoint=sparql_endpoint)
try:
agent.setTimeout(timeout)
agent.addExtraURITag("timeout",str((timeout-1)*1000))
agent.setQuery(sparql)
agent.setReturnFormat(SPARQLWrapper.JSON)
results = agent.queryAndConvert()
df = sparql_json_to_df(results)
return gradio.DataFrame(df, visible=True)
except Exception as e:
raise gradio.Error("Unable to fetch results from the Triplestore", duration=5)
def check_valid_nlq(message, model_name, model_temperature, model_thinking):
gradio.Info("Validating user request...", duration=5)
valid = False
messages = [{"role": "system", "content": system}]
print("NLQ:", message)
prompt = prompt_valid.replace("%nlq", message)
print("Prompt:", prompt)
messages.append({"role": "user", "content": prompt})
generation = model_completion(messages, model_name, model_temperature, model_thinking)
print("Generated:", generation)
blocks = pyparseit.parse_markdown_string(generation)
if len(blocks) >= 1:
try:
valid = json.loads(blocks[-1].content)["valid"] # Último bloque
except Exception as e:
print(e)
return valid
def semantic_search(message):
gradio.Info("Performing semantic search...", duration=5)
results = extractor.extract(message, n_results=5, rerank=True)
print(results)
return results
def generate_sparql(message, model_name, model_temperature, semantic_results, model_thinking):
gradio.Info("Generating SPARQL query...", duration=5)
sparql = "```sparql\n[code]```"
messages = [{"role": "system", "content": system}]
print("NLQ:", message)
if semantic_results is not None:
prompt = prompt_sparql_semantic.replace("%nlq", message).replace("%semantic", json.dumps(semantic_results))
else:
prompt = prompt_sparql.replace("%nlq", message)
print("Prompt:", prompt)
messages.append({"role": "user", "content": prompt})
generation = model_completion(messages, model_name, model_temperature, model_thinking)
print("Generated:", generation)
blocks = pyparseit.parse_markdown_string(generation)
if len(blocks) >= 1:
try:
sparql = blocks[-1].content
except Exception as e:
print(e)
return gradio.Code(sparql, visible=True)
def process_query(message, model_name, model_temperature, model_semantic, model_thinking, nlq_validate):
if not nlq_validate or check_valid_nlq(message, model_name, model_temperature, model_thinking):
semantic_results = None
if model_semantic:
semantic_results = semantic_search(message)
return generate_sparql(message, model_name, model_temperature, semantic_results, model_thinking)
else:
raise gradio.Error("Your request cannot be answered by generating a SPARQL query, try a different one that apply to the ERA Knowledge Graph.", duration=5)
def clear_query():
return gradio.Code(visible=False), gradio.DataFrame(visible=False)
with gradio.Blocks() as demo:
# Options
model_selector = gradio.Dropdown([(item["name"], item["repo"]) for item in model_options], value="daniel-dona/sparql-model-era-lora-128-qwen3-4b", render=False, interactive=True, label="Model", info="Base model provided as reference, SFT model is trained on generated datasets, GRPO model is reinforced on to of SFT.")
model_temperature = gradio.Slider(0, 1, render=False, step=0.1, value=0, label="Temperature", info="Ajust model variability, with a value of 0, the model use greedy decoding.")
sparql_endpoint = gradio.Textbox(value=SPARQL_ENDPOINT, render=False, interactive=True, label="SPARQL endpoint", info="SPARQL endpoint to send the generate queries to fetch results.")
model_semantic = gradio.Checkbox(value=False, render=False, interactive=True, label="Enable semantic entity lookup", info="Use embeddings and reranking model to retrieve relevant objects.")
model_thinking = gradio.Checkbox(value=False, render=False, interactive=True, label="Enable thinking", info="Use thinking mode in the Jinja chat template, mostly for GRPO experiments.")
nlq_validate = gradio.Checkbox(value=True, render=False, interactive=True, label="Enable query prevalidation", info="Use the model first to determine if the user query is applicable to the ERA KG")
# Main blocks
query = gradio.Textbox(render=False, label="Query", placeholder="Write a query or select an example above", submit_btn="Send query", show_copy_button=True)
sparql_block = gradio.Code(render=False, visible=False, label="Generated SPARQL", interactive=False, language="sql")
sparql_results = gradio.DataFrame(render=False, visible=False, label="Data result", value=None, headers=None, interactive=False, wrap=True, show_row_numbers=True, show_copy_button=True)
# Rendering
with gradio.Row():
with gradio.Accordion(label="Examples", open=False):
gradio.Examples(label="Query examples", examples=examples_data, example_labels=[e[0] for e in examples_data], cache_examples=False, inputs=[query], examples_per_page=50)
with gradio.Row():
query.render()
with gradio.Row():
sparql_block.render()
with gradio.Row():
sparql_results.render()
with gradio.Sidebar(label="Inference options", open=True, width=480):
model_selector.render()
model_temperature.render()
sparql_endpoint.render()
model_semantic.render()
model_thinking.render()
nlq_validate.render()
query.submit(process_query, inputs=[query, model_selector, model_temperature, model_semantic, model_thinking, nlq_validate], outputs=[sparql_block])
#query.change(clear_query, inputs=[], outputs=[sparql_block, sparql_results])
sparql_block.change(execute_sparql, inputs=[sparql_block, sparql_endpoint], outputs=[sparql_results])
if __name__ == "__main__":
demo.launch()