Data_Analyst_Assistant / fine-tuning /nl_sql_finetune_dataset.jsonl
DakshChaudhary's picture
Fine-tuned model + RAG + completed NL-to-SQL Agent
a343d94
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"How many regions are there in the database?"},{"role":"assistant","content":"SELECT COUNT(*) FROM regions;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"List all region names."},{"role":"assistant","content":"SELECT region_name FROM regions;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Retrieve region_ids and names for regions starting with 'West'."},{"role":"assistant","content":"SELECT region_id, region_name FROM regions WHERE region_name LIKE 'West%';"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Show me region names in alphabetical order."},{"role":"assistant","content":"SELECT region_name FROM regions ORDER BY region_name ASC;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Count regions whose names contain 'East'."},{"role":"assistant","content":"SELECT COUNT(*) FROM regions WHERE region_name LIKE '%East%';"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Get the maximum region_id."},{"role":"assistant","content":"SELECT MAX(region_id) FROM regions;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Select all regions with region_id > 3."},{"role":"assistant","content":"SELECT * FROM regions WHERE region_id > 3;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Retrieve the second highest region_id."},{"role":"assistant","content":"SELECT region_id FROM regions ORDER BY region_id DESC LIMIT 1 OFFSET 1;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"List the first three regions by ID."},{"role":"assistant","content":"SELECT * FROM regions ORDER BY region_id ASC LIMIT 3;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"List region names in reverse alphabetical order."},{"role":"assistant","content":"SELECT region_name FROM regions ORDER BY region_name DESC;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Which regions have even IDs?"},{"role":"assistant","content":"SELECT * FROM regions WHERE region_id % 2 = 0;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"How many products are available?"},{"role":"assistant","content":"SELECT COUNT(*) FROM products;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"List all product names and categories."},{"role":"assistant","content":"SELECT product_name, category FROM products;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Find products priced above 100."},{"role":"assistant","content":"SELECT * FROM products WHERE price > 100;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Average product price per category."},{"role":"assistant","content":"SELECT category, AVG(price) FROM products GROUP BY category;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Retrieve the most expensive product."},{"role":"assistant","content":"SELECT * FROM products ORDER BY price DESC LIMIT 1;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Count how many 'Electronics' products exist."},{"role":"assistant","content":"SELECT COUNT(*) FROM products WHERE category = 'Electronics';"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Show products priced between 50 and 150."},{"role":"assistant","content":"SELECT * FROM products WHERE price BETWEEN 50 AND 150;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"List categories and product counts."},{"role":"assistant","content":"SELECT category, COUNT(*) FROM products GROUP BY category;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Get the total sum of all product prices."},{"role":"assistant","content":"SELECT SUM(price) FROM products;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Retrieve the cheapest product name."},{"role":"assistant","content":"SELECT product_name FROM products ORDER BY price ASC LIMIT 1;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Which products end with 'Pro'?"},{"role":"assistant","content":"SELECT product_name FROM products WHERE product_name LIKE '%Pro';"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"How many customers are registered?"},{"role":"assistant","content":"SELECT COUNT(*) FROM customers;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"List all customer names and emails."},{"role":"assistant","content":"SELECT customer_name, email FROM customers;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Find customers in region 2."},{"role":"assistant","content":"SELECT * FROM customers WHERE region_id = 2;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Show customers with an email address."},{"role":"assistant","content":"SELECT customer_name FROM customers WHERE email IS NOT NULL;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Count customers per region."},{"role":"assistant","content":"SELECT region_id, COUNT(*) FROM customers GROUP BY region_id;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"List customers whose name starts with 'A'."},{"role":"assistant","content":"SELECT * FROM customers WHERE customer_name LIKE 'A%';"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Retrieve emails sorted alphabetically."},{"role":"assistant","content":"SELECT email FROM customers WHERE email IS NOT NULL ORDER BY email ASC;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Get the most recent customer by ID."},{"role":"assistant","content":"SELECT * FROM customers ORDER BY customer_id DESC LIMIT 1;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Which customers have no region assigned?"},{"role":"assistant","content":"SELECT * FROM customers WHERE region_id IS NULL;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"How many sales have occurred?"},{"role":"assistant","content":"SELECT COUNT(*) FROM sales;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"List sale_id, sale_date, and amount for all sales."},{"role":"assistant","content":"SELECT sale_id, sale_date, amount FROM sales;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Find sales between '2025-01-01' and '2025-06-01'."},{"role":"assistant","content":"SELECT * FROM sales WHERE sale_date BETWEEN '2025-01-01' AND '2025-06-01';"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Sum total sales amount by region."},{"role":"assistant","content":"SELECT region_id, SUM(amount) FROM sales GROUP BY region_id;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Average quantity sold per product."},{"role":"assistant","content":"SELECT product_id, AVG(quantity) FROM sales GROUP BY product_id;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Retrieve the highest single sale amount."},{"role":"assistant","content":"SELECT MAX(amount) FROM sales;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Count sales for customer 5."},{"role":"assistant","content":"SELECT COUNT(*) FROM sales WHERE customer_id = 5;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Show sales of product 3 ordered by date descending."},{"role":"assistant","content":"SELECT * FROM sales WHERE product_id = 3 ORDER BY sale_date DESC;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Get the total amount for all sales."},{"role":"assistant","content":"SELECT SUM(amount) FROM sales;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"What was the quantity sold on the earliest sale?"},{"role":"assistant","content":"SELECT quantity FROM sales ORDER BY sale_date ASC LIMIT 1;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"For each customer, show total sales amount."},{"role":"assistant","content":"SELECT customer_id, SUM(amount) FROM sales GROUP BY customer_id;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"List top 5 customers by total amount spent."},{"role":"assistant","content":"SELECT customer_id, SUM(amount) as total_spent FROM sales GROUP BY customer_id ORDER BY total_spent DESC LIMIT 5;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Join customers and sales to get names and sale amounts."},{"role":"assistant","content":"SELECT c.customer_name, s.amount FROM customers c JOIN sales s ON c.customer_id = s.customer_id;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Find the most popular product by sales count."},{"role":"assistant","content":"SELECT product_id, COUNT(*) as cnt FROM sales GROUP BY product_id ORDER BY cnt DESC LIMIT 1;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Show average sale amount per region for May 2025."},{"role":"assistant","content":"SELECT region_id, AVG(amount) FROM sales WHERE sale_date LIKE '2025-05-%' GROUP BY region_id;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"List the sale_date and customer_name for every sale."},{"role":"assistant","content":"SELECT s.sale_date, c.customer_name FROM sales s JOIN customers c ON s.customer_id = c.customer_id;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Show customer_name, product_name, quantity, and amount for each sale."},{"role":"assistant","content":"SELECT c.customer_name, p.product_name, s.quantity, s.amount FROM sales s JOIN customers c ON s.customer_id = c.customer_id JOIN products p ON s.product_id = p.product_id;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Calculate total sales amount per product category."},{"role":"assistant","content":"SELECT p.category, SUM(s.amount) AS total_revenue FROM sales s JOIN products p ON s.product_id = p.product_id GROUP BY p.category;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Get the top 3 product categories by total revenue."},{"role":"assistant","content":"SELECT p.category, SUM(s.amount) AS revenue FROM sales s JOIN products p ON s.product_id = p.product_id GROUP BY p.category ORDER BY revenue DESC LIMIT 3;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"List customer_name and region_name for all sales over 100."},{"role":"assistant","content":"SELECT c.customer_name, r.region_name FROM sales s JOIN customers c ON s.customer_id = c.customer_id JOIN regions r ON s.region_id = r.region_id WHERE s.amount > 100;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Find all products that have never been sold."},{"role":"assistant","content":"SELECT p.product_name FROM products p LEFT JOIN sales s ON p.product_id = s.product_id WHERE s.sale_id IS NULL;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"List regions that had no sales in June 2025."},{"role":"assistant","content":"SELECT r.region_name FROM regions r LEFT JOIN sales s ON r.region_id = s.region_id AND s.sale_date LIKE '2025-06-%' WHERE s.sale_id IS NULL;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Show a running total of amount over sale_date."},{"role":"assistant","content":"SELECT sale_date, amount, SUM(amount) OVER (ORDER BY sale_date) AS running_total FROM sales;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Rank customers by total spent."},{"role":"assistant","content":"SELECT customer_id, SUM(amount) AS total_spent, RANK() OVER (ORDER BY SUM(amount) DESC) AS rank FROM sales GROUP BY customer_id;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Calculate the average sale amount per calendar month."},{"role":"assistant","content":"SELECT SUBSTR(sale_date,1,7) AS month, AVG(amount) FROM sales GROUP BY month;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Compute each region’s percentage of total sales amount."},{"role":"assistant","content":"SELECT region_id, SUM(amount)*100.0/(SELECT SUM(amount) FROM sales) AS pct_of_total FROM sales GROUP BY region_id;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"For each product, show its price and the average sale amount."},{"role":"assistant","content":"SELECT p.product_name, p.price, (SELECT AVG(amount) FROM sales WHERE product_id = p.product_id) AS avg_sale FROM products p;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"List customers who spent more than the average customer."},{"role":"assistant","content":"SELECT DISTINCT c.customer_name FROM customers c WHERE (SELECT SUM(amount) FROM sales WHERE customer_id = c.customer_id) > (SELECT AVG(total) FROM (SELECT SUM(amount) AS total FROM sales GROUP BY customer_id));"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Use a CTE to show monthly total sales."},{"role":"assistant","content":"WITH monthly_sales AS (SELECT SUBSTR(sale_date,1,7) AS month, SUM(amount) AS total FROM sales GROUP BY month) SELECT * FROM monthly_sales;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Use a CTE to find the top 5 products by revenue."},{"role":"assistant","content":"WITH prod_rev AS (SELECT product_id, SUM(amount) AS revenue FROM sales GROUP BY product_id) SELECT p.product_name, pr.revenue FROM prod_rev pr JOIN products p ON pr.product_id = p.product_id ORDER BY pr.revenue DESC LIMIT 5;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Find each customer’s first sale date."},{"role":"assistant","content":"SELECT customer_id, MIN(sale_date) AS first_sale FROM sales GROUP BY customer_id;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Identify repeat customers (more than one sale)."},{"role":"assistant","content":"SELECT customer_id FROM sales GROUP BY customer_id HAVING COUNT(*) > 1;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Find customers who purchased both product 1 and product 2."},{"role":"assistant","content":"SELECT DISTINCT s1.customer_id FROM sales s1 JOIN sales s2 ON s1.customer_id = s2.customer_id WHERE s1.product_id = 1 AND s2.product_id = 2;"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"List products sold in every region."},{"role":"assistant","content":"SELECT product_id FROM sales GROUP BY product_id HAVING COUNT(DISTINCT region_id) = (SELECT COUNT(*) FROM regions);"}]}
{"messages":[{"role":"system","content":"You are a helpful assistant that translates natural language queries into executable SQL queries. Only respond with the SQL query."},{"role":"user","content":"Determine which region has the highest average sale amount."},{"role":"assistant","content":"SELECT region_id FROM sales GROUP BY region_id ORDER BY AVG(amount) DESC LIMIT 1;"}]}