import onnxruntime_genai as og
model = og.Model('soap5_onnx')
tokenizer = og.Tokenizer(model)
tokenizer_stream = tokenizer.create_stream()
# Search options - exact match to original
search_options = {
'max_length': 4096,
'temperature': 0.1,
'top_p': 0.9,
'do_sample': True,
'batch_size': 1
}
soap_note_prompt = """You are an expert medical professor assisting in the creation of medically accurate SOAP summaries.
Please ensure the response follows the structured format: S:, O:, A:, P: without using markdown or special formatting.
Create a Medical SOAP note summary from the dialogue, following these guidelines:\n
S (Subjective): Summarize the patient's reported symptoms, including chief complaint and relevant history.
Rely on the patient's statements as the primary source and ensure standardized terminology.\n
O (Objective): Highlight critical findings such as vital signs, lab results, and imaging, emphasizing important details like the side of the body affected and specific dosages.
Include normal ranges where relevant.\n
A (Assessment): Offer a concise assessment combining subjective and objective data. State the primary diagnosis and any differential diagnoses, noting potential complications and the prognostic outlook.\n
P (Plan): Outline the management plan, covering medication, diet, consultations, and education. Ensure to mention necessary referrals to other specialties and address compliance challenges.\n
Considerations: Compile the report based solely on the transcript provided. Use concise medical jargon and abbreviations for effective doctor communication.\n
Please format the summary in a clean, simple list format without using markdown or bullet points. Use 'S:', 'O:', 'A:', 'P:' directly followed by the text. Avoid any styling or special characters.
TRANSCRIPT: \n"""
text = input("Input: ")
if not text:
print("Error, input cannot be empty")
exit()
# Method 1: Force generation by adding a SOAP starter after the prompt
full_prompt = soap_note_prompt + text
# Use the most complete Llama format
chat_template = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\nS: "
prompt = chat_template.format(prompt=full_prompt)
input_tokens = tokenizer.encode(prompt)
print(f"Tokens in prompt: {len(input_tokens)}")
params = og.GeneratorParams(model)
params.set_search_options(**search_options)
generator = og.Generator(model, params)
generator.append_tokens(input_tokens)
print("\nGenerating SOAP note...")
print("S: ", end='', flush=True) # We already have "S: " in the prompt
# Generate the rest of the SOAP note
generated_text = ""
token_count = 0
try:
while not generator.is_done() and token_count < 2000: # Limit to 2000 tokens for safety
generator.generate_next_token()
new_token = generator.get_next_tokens()[0]
decoded = tokenizer_stream.decode(new_token)
# Skip if we're still in the input echo phase
if token_count < 50 and (text[:20] in generated_text + decoded):
token_count += 1
continue
print(decoded, end='', flush=True)
generated_text += decoded
token_count += 1
# Stop if we see end markers
if any(marker in decoded for marker in ["<|eot_id|>", "<|end_of_text|>", "</s>"]):
break
except KeyboardInterrupt:
print("\nInterrupted")
print()
# If that didn't work, try Method 2: Different prompt structure
if len(generated_text.strip()) < 50 or text[:50] in generated_text:
print("\n\nMethod 1 didn't work well. Trying alternative method...")
del generator # Clean up
# Try a simpler approach - maybe the model expects a different format
simple_prompt = f"{soap_note_prompt}{text}\n\nSOAP Note:\nS: "
input_tokens = tokenizer.encode(simple_prompt)
params = og.GeneratorParams(model)
params.set_search_options(**search_options)
generator = og.Generator(model, params)
generator.append_tokens(input_tokens)
print("\nGenerating with simplified format...")
print("S: ", end='', flush=True)
generated_text = ""
token_count = 0
try:
while not generator.is_done() and token_count < 2000:
generator.generate_next_token()
new_token = generator.get_next_tokens()[0]
decoded = tokenizer_stream.decode(new_token)
print(decoded, end='', flush=True)
generated_text += decoded
token_count += 1
if any(marker in decoded for marker in ["<|eot_id|>", "<|end_of_text|>", "</s>"]):
break
except KeyboardInterrupt:
print("\nInterrupted")
print()
del generator
print("\n--- Generation Complete ---")
'''
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
๐
Ask for provider support
Model tree for Johnyquest7/Genai_onnx
Base model
meta-llama/Llama-3.2-1B-Instruct