File size: 6,673 Bytes
858a667 827a20a 858a667 f8cada4 858a667 f8cada4 858a667 f8cada4 d20c737 f8cada4 d20c737 827a20a f8cada4 b4a405e 858a667 0e6ecd3 6a2a034 858a667 6a2a034 858a667 b4a405e 858a667 0e6ecd3 858a667 0e6ecd3 858a667 f8cada4 858a667 6a2a034 858a667 f8cada4 858a667 f8cada4 858a667 827a20a 858a667 827a20a 858a667 827a20a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
# -*- coding: utf-8 -*-
"""saivv_protoype"""
# Import necessary libraries
import cv2 # For image processing with OpenCV
import pytesseract # For Optical Character Recognition (OCR) on receipts
import gradio as gr # For creating the Gradio interface
import speech_recognition as sr # For voice recognition
# Model setup (using transformers)
import torch
import transformers # Added import for transformers
from transformers import AutoTokenizer
from langchain_community.llms import HuggingFacePipeline # Updated import for HuggingFacePipeline
# Initialize device and model config
device = 'cpu' # Use CPU since CUDA is unavailable
print(device)
# Load GPT-2 model instead of zephyr-7b-beta
model_id = 'gpt2' # Use GPT-2, a smaller and CPU-friendly model
model_config = transformers.AutoConfig.from_pretrained(
model_id,
trust_remote_code=True,
max_new_tokens=1024
)
model = transformers.AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
config=model_config,
device_map='auto', # Set to CPU
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Create the query pipeline for GPT-2 model
query_pipeline = transformers.pipeline(
'text-generation',
model=model,
tokenizer=tokenizer,
torch_dtype=torch.float32, # Use float32 for CPU compatibility
max_length=6000,
max_new_tokens=500,
device_map="auto"
)
llm = HuggingFacePipeline(pipeline=query_pipeline)
# User profile setup
user_profile = """
User Profile:
Age: 40, Gender: Non-Binary, Marital Status: Divorced, Income Level: Medium ($2733),
Education: PhD, Occupation: Manager, Residential Status: Mortgaged, Dependents: 1,
Debt: $27664, Debt-to-Income Ratio: 10.12
Spending:
Groceries: $496.0, Supplies: $454.42, Food: $341.69, Electronics: $351.92,
Home Shopping: $235.68, Others: $253.45
"""
question = "Based on this data, can I buy a Lamborghini?"
prompt = f"{user_profile}\n\nQuestion: {question}"
# Get response from LLM
response = llm(prompt=prompt)
# Display result
from IPython.display import display, Markdown
def colorize_text(text):
for word, color in zip(["Reasoning", "Question", "Answer", "Total time"], ["blue", "red", "green", "magenta"]):
text = text.replace(f"{word}:", f"\n\n**<font color='{color}'>{word}:</font>**")
return text
full_response = f"**Question:** {question}\n\n**Answer:** {response}"
display(Markdown(colorize_text(full_response)))
# Placeholder function for receipt scanning with OCR
def scan_receipt(image):
try:
img_orig = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
options = "--psm 6"
text = pytesseract.image_to_string(img_orig, config=options)
return text.strip()
except Exception as e:
return f"An error occurred: {str(e)}"
# Placeholder function for voice recording
def record_expense(audio_path):
recognizer = sr.Recognizer()
with sr.AudioFile(audio_path) as source:
audio = recognizer.record(source)
try:
# Transcribe the audio file
transcription = recognizer.recognize_google(audio)
return transcription
except sr.UnknownValueError:
return "Audio not clear, please try again."
except sr.RequestError:
return "Could not request results; check internet connection."
# Recommendation chatbot function
def recommendation_chatbot(user_input):
prompt = f"{user_profile}\n\nQuestion: {user_input}"
response = llm(prompt=prompt)
return str(response) # Convert to string if needed
# Wrapper function to handle receipt scanning and voice recording
def process_inputs(image, audio):
receipt_data = scan_receipt(image)
expense_data = record_expense(audio)
return receipt_data, expense_data
# Gradio Interface setup using Blocks
with gr.Blocks() as iface:
# Centered title and description
gr.Markdown("<h1 style='text-align: center; font-size: 2.5em; color: #2B2D42;'>SAIVV</h1>")
gr.Markdown("<p style='text-align: center; font-size: 1.2em; color: #8D99AE;'>An AI-powered fintech solution for tracking expenses and managing finances.</p>")
# Profile and Spending section with improved formatting
with gr.Row():
gr.Markdown("""
<div style="padding: 20px; border: 1px solid #2B2D42; border-radius: 10px; background-color: #EDF2F4;">
<h2 style="color: #2B2D42; margin-bottom: 10px;">User Profile: Mohamed</h2>
<ul style="font-size: 1.1em; color: #2B2D42; list-style-type: none; padding: 0;">
<li><strong>Age:</strong> 40</li>
<li><strong>Gender:</strong> Male</li>
<li><strong>Marital Status:</strong> Married</li>
<li><strong>Income Level:</strong> Medium ($2733)</li>
<li><strong>Education:</strong> PhD</li>
<li><strong>Occupation:</strong> Manager</li>
<li><strong>Residential Status:</strong> Mortgaged</li>
<li><strong>Dependents:</strong> 1</li>
<li><strong>Debt:</strong> $27,664</li>
<li><strong>Debt-to-Income Ratio:</strong> 10.12%</li>
</ul>
</div>
""", elem_id="user-profile")
gr.Markdown("""
<div style="padding: 20px; border: 1px solid #2B2D42; border-radius: 10px; background-color: #EDF2F4;">
<h2 style="color: #2B2D42; margin-bottom: 10px;">Spending</h2>
<ul style="font-size: 1.1em; color: #2B2D42; list-style-type: none; padding: 0;">
<li><strong>Groceries:</strong> $496.0</li>
<li><strong>Supplies:</strong> $454.42</li>
<li><strong>Food:</strong> $341.69</li>
<li><strong>Electronics:</strong> $351.92</li>
<li><strong>Home Shopping:</strong> $235.68</li>
<li><strong>Others:</strong> $253.45</li>
</ul>
</div>
""", elem_id="spending-info")
# Input and output components for receipt scanning and expense recording
with gr.Row():
receipt_input = gr.Image(type="numpy", label="Capture Receipt")
audio_input = gr.Audio(type="filepath", label="Record Expense by Voice")
# Outputs for receipt and expense recording
receipt_output = gr.Textbox(label="Receipt Data")
expense_output = gr.Textbox(label="Recorded Expense Data")
# Button to process inputs
submit_btn = gr.Button("Submit")
submit_btn.click(process_inputs, inputs=[receipt_input, audio_input], outputs=[receipt_output, expense_output])
# Launch the interface
iface.launch(debug=True)
|