Super slow initialization
Anyone noticing the initialization is super slow?
Just have something like this:
# florence
model_id = "microsoft/Florence-2-large"
self.florence = (
AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
)
.eval()
.to(self.device, self.torch_dtype)
)
self.florence_processor = AutoProcessor.from_pretrained(
model_id, trust_remote_code=True
)
I've putting my logging to debug level and getting this kind of stuff:
DEBUG:hydra.core.utils:Setting JobRuntime:name=UNKNOWN_NAME
DEBUG:hydra.core.utils:Setting JobRuntime:name=app
DEBUG:matplotlib:matplotlib data path: /home/vd/.venv/lib/python3.10/site-packages/matplotlib/mpl-data
DEBUG:matplotlib:CONFIGDIR=/home/vd/.config/matplotlib
DEBUG:matplotlib:interactive is False
DEBUG:matplotlib:platform is linux
DEBUG:matplotlib:CACHEDIR=/home/vd/.cache/matplotlib
DEBUG:matplotlib.font_manager:Using fontManager instance from /home/vd/.cache/matplotlib/fontlist-v390.json
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /microsoft/Florence-2-large/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /microsoft/Florence-2-large/resolve/main/configuration_florence2.py HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /microsoft/Florence-2-large/resolve/main/modeling_florence2.py HTTP/1.1" 200 0
FutureWarning: Importing from timm.models.layers is deprecated, please import via timm.layers
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /microsoft/Florence-2-large/resolve/main/model.safetensors HTTP/1.1" 404 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /api/models/microsoft/Florence-2-large HTTP/1.1" 200 4318
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /api/models/microsoft/Florence-2-large/commits/main HTTP/1.1" 200 7458
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /api/models/microsoft/Florence-2-large/discussions?p=0 HTTP/1.1" 200 29657
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /api/models/microsoft/Florence-2-large/discussions?p=1 HTTP/1.1" 200 29389
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /api/models/microsoft/Florence-2-large/discussions?p=2 HTTP/1.1" 200 594
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): safetensors-convert.hf.space:443
DEBUG:urllib3.connectionpool:https://safetensors-convert.hf.space:443 "POST /call/run HTTP/1.1" 200 47
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): safetensors-convert.hf.space:443
DEBUG:urllib3.connectionpool:https://safetensors-convert.hf.space:443 "GET /call/run/6331028fd1c94743b7d7c1c524c85931 HTTP/1.1" 200 None
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /api/models/microsoft/Florence-2-large/commits/main HTTP/1.1" 200 7458
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /api/models/microsoft/Florence-2-large/discussions?p=0 HTTP/1.1" 200 29657
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /api/models/microsoft/Florence-2-large/discussions?p=1 HTTP/1.1" 200 29389
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /api/models/microsoft/Florence-2-large/discussions?p=2 HTTP/1.1" 200 594
Have you solved this problem?
- 1 here
I fixed it by directly initializing the model and loading the checkpoint, instead of relying on transformers.
@Xsong123 Could you please share the code how did you initialize it directly? I tried a few options but they fails with some florence runtime errors
I faced similar issue, but for microsoft/Florence-2-base
with transformers==4.51.3. I resolved this by loading model from pretrained checkpoint (file pytorch_model.bin
exists) and saving using save_pretrained
. New model directory contains not pytorch_model.bin
, but model.safetensors
. After that model loading is very fast.
Code to reproduce
pip install transformers==4.51.3 timy
import os
from transformers import AutoModelForCausalLM, AutoProcessor
import timy
import json
def load_model(model_id: str):
florence = AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
).eval()
florence_processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
return florence, florence_processor
def save_model_and_processor(
florence: AutoModelForCausalLM, florence_processor: AutoProcessor, model_path: str
):
florence.save_pretrained(model_path)
florence_processor.save_pretrained(model_path)
# fix because after saving model, model_type for vision_config is lost
with open(os.path.join(model_path, "config.json"), "r") as f:
config = json.load(f)
config["vision_config"]["model_type"] = "davit"
with open(os.path.join(model_path, "config.json"), "w") as f:
json.dump(config, f)
if __name__ == "__main__":
with timy.Timer("Prepare standalone model"):
model, processor = load_model("microsoft/Florence-2-base")
new_model_path = "new_model"
with timy.Timer("Save model and processor"):
save_model_and_processor(model, processor, new_model_path)
with timy.Timer("Load new model and processor"):
model, processor = load_model(new_model_path)
Logs:
Prepare standalone model 83.104714 seconds
Save model and processor 3.589017 seconds
Load new model and processor 1.996852 seconds
@tonny123321 Sorry for the late reply β it's been a while since I worked on this. I can't quite remember where I saved my code. If I recall correctly, I manually instantiated the model based on the official Microsoft/Florence-2-large model files, and then loaded the weights using load_state_dict directly. You could try that approach. Alternatively, you might want to follow @MateuszW 's suggestion above β that could be a more straightforward way.
@tonny123321 I found my code today, here it is below. Hope it helps!
from Florence_2_large.configuration_florence2 import Florence2Config
from Florence_2_large.modeling_florence2 import Florence2ForConditionalGeneration
import torch
from transformers import AutoProcessor
from accelerate import Accelerator
FLORENCE2_MODEL_ID = "Florence_2_large"
config_path = "./Florence_2_large/config.json"
pretrained_model_path = "./Florence_2_large/pytorch_model.bin"
florence2_config = Florence2Config.from_pretrained(config_path)
florence2_model_raw = Florence2ForConditionalGeneration(florence2_config)
florence2_model_raw.load_state_dict(torch.load(pretrained_model_path, map_location="cpu"))
florence2_model_raw = florence2_model_raw.to(torch.float16) # Convert model to float16 before preparing
florence2_processor = AutoProcessor.from_pretrained(FLORENCE2_MODEL_ID, trust_remote_code=True)
# Prepare the model - handles device placement and DDP wrapping if needed
accelerator = Accelerator()
florence2_model = accelerator.prepare(florence2_model_raw)
accelerator.print("Florence-2 model prepared.")