|
|
--- |
|
|
base_model: mistralai/Mistral-7B-Instruct-v0.3 |
|
|
library_name: transformers |
|
|
model_name: Doctor_AI_LoRA-Mistral-7B-Instructritvik77 |
|
|
tags: |
|
|
- generated_from_trainer |
|
|
- trl |
|
|
- medical |
|
|
- Doctor |
|
|
- PEFT |
|
|
- MEDICAL |
|
|
- AIMEDICAL |
|
|
- DOCTORai |
|
|
licence: license |
|
|
license: apache-2.0 |
|
|
datasets: |
|
|
- FreedomIntelligence/medical-o1-reasoning-SFT |
|
|
pipeline_tag: text-generation |
|
|
--- |
|
|
|
|
|
# Model Card for Doctor_AI_LoRA-Mistral-7B-Instructritvik77 |
|
|
|
|
|
This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3). |
|
|
It has been trained using [TRL](https://github.com/huggingface/trl). |
|
|
|
|
|
## Quick start |
|
|
|
|
|
```python |
|
|
# from peft import PeftModel, PeftConfig |
|
|
# from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig |
|
|
# from datasets import load_dataset |
|
|
# import torch |
|
|
|
|
|
# # Quantization config for 4-bit loading |
|
|
# bnb_config = BitsAndBytesConfig( |
|
|
# load_in_4bit=True, |
|
|
# bnb_4bit_quant_type="nf4", |
|
|
# bnb_4bit_compute_dtype=torch.bfloat16, |
|
|
# bnb_4bit_use_double_quant=True, |
|
|
# ) |
|
|
|
|
|
# # Repo ID for the PEFT model |
|
|
# peft_model_id = f"{username}/{output_dir}" # e.g., ritvik77/Mixtral-7B-LoRA-Salesforce-Optimized-AI-AgentCall |
|
|
# device = "auto" |
|
|
|
|
|
# # Load PEFT config from the Hub |
|
|
# config = PeftConfig.from_pretrained(peft_model_id) |
|
|
|
|
|
# # Load the base model (e.g., Mistral-7B) with quantization |
|
|
# model = AutoModelForCausalLM.from_pretrained( |
|
|
# config.base_model_name_or_path, # Base model ID stored in PEFT config |
|
|
# device_map="auto", |
|
|
# quantization_config=bnb_config, # Apply 4-bit quantization |
|
|
# ) |
|
|
|
|
|
# # Load tokenizer from the PEFT model repo |
|
|
# tokenizer = AutoTokenizer.from_pretrained(peft_model_id) |
|
|
|
|
|
# # Resize token embeddings to match tokenizer (if needed) |
|
|
# model.resize_token_embeddings(len(tokenizer)) |
|
|
|
|
|
# # Load PEFT adapters and apply them to the base model |
|
|
# model = PeftModel.from_pretrained(model, peft_model_id) |
|
|
|
|
|
# # Convert model to bfloat16 and set to evaluation mode |
|
|
# model.to(torch.bfloat16) |
|
|
# model.eval() |
|
|
|
|
|
import torch |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig |
|
|
from peft import PeftModel, PeftConfig |
|
|
|
|
|
# β
Quantization config for 4-bit loading (Memory Optimization) |
|
|
bnb_config = BitsAndBytesConfig( |
|
|
load_in_4bit=True, |
|
|
bnb_4bit_quant_type="nf4", # β
Improved precision for LoRA weights |
|
|
bnb_4bit_compute_dtype=torch.bfloat16, |
|
|
bnb_4bit_use_double_quant=True, # β
Reduces VRAM overhead |
|
|
) |
|
|
|
|
|
# β
Load tokenizer from fine-tuned checkpoint (Ensures token consistency) |
|
|
peft_model_id = "ritvik77/Doctor_AI_LoRA-Mistral-7B-Instructritvik77" |
|
|
tokenizer = AutoTokenizer.from_pretrained(peft_model_id, trust_remote_code=True) |
|
|
|
|
|
# β
Ensure `pad_token` is correctly assigned |
|
|
if tokenizer.pad_token is None: |
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
|
|
# β
Load Base Model with Quantization for Memory Efficiency |
|
|
model_name = "mistralai/Mistral-7B-Instruct-v0.3" |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
model_name, |
|
|
device_map="auto", # β
Efficiently maps to available GPUs |
|
|
quantization_config=bnb_config, # β
Efficient quantization for large models |
|
|
torch_dtype=torch.bfloat16 |
|
|
) |
|
|
|
|
|
# β
Resize Token Embeddings BEFORE Loading LoRA Adapter (Prevents size mismatch) |
|
|
model.resize_token_embeddings(len(tokenizer)) |
|
|
|
|
|
# β
Load PEFT Adapter (LoRA Weights) |
|
|
model = PeftModel.from_pretrained(model, peft_model_id) |
|
|
|
|
|
# β
Unfreeze LoRA layers to ensure they are trainable |
|
|
for name, param in model.named_parameters(): |
|
|
if "lora" in name: |
|
|
param.requires_grad = True |
|
|
|
|
|
# β
Confirm LoRA Layers Are Active |
|
|
if hasattr(model, 'print_trainable_parameters'): |
|
|
model.print_trainable_parameters() |
|
|
else: |
|
|
print("β Warning: LoRA adapter may not have loaded correctly.") |
|
|
|
|
|
# β
Ensure model is in evaluation mode for inference |
|
|
model.eval() |
|
|
|
|
|
# β
Sample Inference Code |
|
|
def generate_response(prompt, max_new_tokens=300, temperature=0.7): |
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=max_new_tokens, |
|
|
do_sample=True, |
|
|
temperature=temperature |
|
|
) |
|
|
return tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
# β
Sample Prompt for Medical Diagnosis |
|
|
prompt = "Patient reports chest pain and shortness of breath. What might be the diagnosis?" |
|
|
response = generate_response(prompt) |
|
|
print("\nπ©Ί **Diagnosis:**", response) |
|
|
|
|
|
print("π PEFT model loaded successfully with resized embeddings!") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## Training procedure |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
This model was trained with SFT. |
|
|
|
|
|
### Framework versions |
|
|
|
|
|
- TRL: 0.15.2 |
|
|
- Transformers: 4.48.3 |
|
|
- Pytorch: 2.5.1+cu124 |
|
|
- Datasets: 3.3.2 |
|
|
- Tokenizers: 0.21.0 |
|
|
|
|
|
## Citations |
|
|
|
|
|
|
|
|
|
|
|
Cite TRL as: |
|
|
|
|
|
```bibtex |
|
|
@misc{vonwerra2022trl, |
|
|
title = {{TRL: Transformer Reinforcement Learning}}, |
|
|
author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin GallouΓ©dec}, |
|
|
year = 2020, |
|
|
journal = {GitHub repository}, |
|
|
publisher = {GitHub}, |
|
|
howpublished = {\url{https://github.com/huggingface/trl}} |
|
|
} |
|
|
``` |