import onnxruntime_genai as og

model = og.Model('soap5_onnx')
tokenizer = og.Tokenizer(model)
tokenizer_stream = tokenizer.create_stream()

# Search options - exact match to original
search_options = {
    'max_length': 4096,
    'temperature': 0.1,
    'top_p': 0.9,
    'do_sample': True,
    'batch_size': 1
}

soap_note_prompt = """You are an expert medical professor assisting in the creation of medically accurate SOAP summaries. 
Please ensure the response follows the structured format: S:, O:, A:, P: without using markdown or special formatting. 
Create a Medical SOAP note summary from the dialogue, following these guidelines:\n    
S (Subjective): Summarize the patient's reported symptoms, including chief complaint and relevant history.
Rely on the patient's statements as the primary source and ensure standardized terminology.\n    
O (Objective): Highlight critical findings such as vital signs, lab results, and imaging, emphasizing important details like the side of the body affected and specific dosages. 
Include normal ranges where relevant.\n    
A (Assessment): Offer a concise assessment combining subjective and objective data. State the primary diagnosis and any differential diagnoses, noting potential complications and the prognostic outlook.\n    
P (Plan): Outline the management plan, covering medication, diet, consultations, and education. Ensure to mention necessary referrals to other specialties and address compliance challenges.\n    
Considerations: Compile the report based solely on the transcript provided. Use concise medical jargon and abbreviations for effective doctor communication.\n    
Please format the summary in a clean, simple list format without using markdown or bullet points. Use 'S:', 'O:', 'A:', 'P:' directly followed by the text. Avoid any styling or special characters.
TRANSCRIPT: \n"""

text = input("Input: ")
if not text:
    print("Error, input cannot be empty")
    exit()

# Method 1: Force generation by adding a SOAP starter after the prompt
full_prompt = soap_note_prompt + text

# Use the most complete Llama format
chat_template = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\nS: "

prompt = chat_template.format(prompt=full_prompt)

input_tokens = tokenizer.encode(prompt)
print(f"Tokens in prompt: {len(input_tokens)}")

params = og.GeneratorParams(model)
params.set_search_options(**search_options)
generator = og.Generator(model, params)
generator.append_tokens(input_tokens)

print("\nGenerating SOAP note...")
print("S: ", end='', flush=True)  # We already have "S: " in the prompt

# Generate the rest of the SOAP note
generated_text = ""
token_count = 0

try:
    while not generator.is_done() and token_count < 2000:  # Limit to 2000 tokens for safety
        generator.generate_next_token()
        new_token = generator.get_next_tokens()[0]
        decoded = tokenizer_stream.decode(new_token)
        
        # Skip if we're still in the input echo phase
        if token_count < 50 and (text[:20] in generated_text + decoded):
            token_count += 1
            continue
            
        print(decoded, end='', flush=True)
        generated_text += decoded
        token_count += 1
        
        # Stop if we see end markers
        if any(marker in decoded for marker in ["<|eot_id|>", "<|end_of_text|>", "</s>"]):
            break
            
except KeyboardInterrupt:
    print("\nInterrupted")

print()

# If that didn't work, try Method 2: Different prompt structure
if len(generated_text.strip()) < 50 or text[:50] in generated_text:
    print("\n\nMethod 1 didn't work well. Trying alternative method...")
    
    del generator  # Clean up
    
    # Try a simpler approach - maybe the model expects a different format
    simple_prompt = f"{soap_note_prompt}{text}\n\nSOAP Note:\nS: "
    
    input_tokens = tokenizer.encode(simple_prompt)
    
    params = og.GeneratorParams(model)
    params.set_search_options(**search_options)
    generator = og.Generator(model, params)
    generator.append_tokens(input_tokens)
    
    print("\nGenerating with simplified format...")
    print("S: ", end='', flush=True)
    
    generated_text = ""
    token_count = 0
    
    try:
        while not generator.is_done() and token_count < 2000:
            generator.generate_next_token()
            new_token = generator.get_next_tokens()[0]
            decoded = tokenizer_stream.decode(new_token)
            
            print(decoded, end='', flush=True)
            generated_text += decoded
            token_count += 1
            
            if any(marker in decoded for marker in ["<|eot_id|>", "<|end_of_text|>", "</s>"]):
                break
                
    except KeyboardInterrupt:
        print("\nInterrupted")
    
    print()
    del generator

print("\n--- Generation Complete ---")
'''
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model isn't deployed by any Inference Provider. ๐Ÿ™‹ Ask for provider support

Model tree for Johnyquest7/Genai_onnx

Quantized
(269)
this model