Fas1 commited on
Commit
d76ca84
·
verified ·
1 Parent(s): b042832
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import gradio as gr
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 
5
 
6
  # Токен, если модель приватная
7
  hf_token = os.getenv("HF_TOKEN", None)
@@ -14,22 +15,23 @@ tokenizer = AutoTokenizer.from_pretrained(
14
  trust_remote_code=True,
15
  use_fast=True,
16
  )
17
- # Ensure offload directory exists before loading
18
- os.makedirs("offload", exist_ok=True)
19
 
20
  model = AutoModelForCausalLM.from_pretrained(
21
  model_path,
22
  token=hf_token,
23
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
 
 
 
 
 
 
 
24
  device_map="auto",
25
  offload_dir="offload",
26
  offload_state_dict=True,
27
- low_cpu_mem_usage=True,
28
- trust_remote_code=True,
29
  )
30
 
31
-
32
-
33
  # Создаём пайплайн
34
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
35
 
 
2
  import gradio as gr
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
5
+ from accelerate import dispatch_model
6
 
7
  # Токен, если модель приватная
8
  hf_token = os.getenv("HF_TOKEN", None)
 
15
  trust_remote_code=True,
16
  use_fast=True,
17
  )
 
 
18
 
19
  model = AutoModelForCausalLM.from_pretrained(
20
  model_path,
21
  token=hf_token,
22
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
23
+ trust_remote_code=True,
24
+ )
25
+
26
+ # Dispatch model to devices with offloading
27
+ os.makedirs("offload", exist_ok=True)
28
+ model = dispatch_model(
29
+ model,
30
  device_map="auto",
31
  offload_dir="offload",
32
  offload_state_dict=True,
 
 
33
  )
34
 
 
 
35
  # Создаём пайплайн
36
  pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
37