Bahaedev commited on
Commit
92f93f9
·
verified ·
1 Parent(s): cb69e12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -8
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import threading
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
5
  import gradio as gr
6
  from fastapi import FastAPI
7
  from pydantic import BaseModel
@@ -22,14 +23,23 @@ MODEL_ID = "tiiuae/Falcon3-3B-Instruct"
22
 
23
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
24
 
25
- # Load model in 4-bit for faster CPU/GPU inference (requires bitsandbytes)
26
- model = AutoModelForCausalLM.from_pretrained(
27
- MODEL_ID,
28
- load_in_4bit=True,
29
- device_map="auto",
30
- torch_dtype=torch.float16,
31
- trust_remote_code=True
32
- )
 
 
 
 
 
 
 
 
 
33
 
34
  # Create optimized text-generation pipeline
35
  pipe = pipeline(
 
2
  import threading
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
5
+ from importlib.metadata import PackageNotFoundError
6
  import gradio as gr
7
  from fastapi import FastAPI
8
  from pydantic import BaseModel
 
23
 
24
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
25
 
26
+ # Attempt 4-bit quantization; fallback if bitsandbytes is not installed
27
+ try:
28
+ model = AutoModelForCausalLM.from_pretrained(
29
+ MODEL_ID,
30
+ load_in_4bit=True,
31
+ device_map="auto",
32
+ torch_dtype=torch.float16,
33
+ trust_remote_code=True
34
+ )
35
+ except PackageNotFoundError:
36
+ print("bitsandbytes not found; loading full model without quantization.")
37
+ model = AutoModelForCausalLM.from_pretrained(
38
+ MODEL_ID,
39
+ device_map="auto",
40
+ torch_dtype=torch.float16,
41
+ trust_remote_code=True
42
+ )
43
 
44
  # Create optimized text-generation pipeline
45
  pipe = pipeline(