MojoHz commited on
Commit
0e6ecd3
·
verified ·
1 Parent(s): b4a405e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -17,8 +17,8 @@ from langchain_community.llms import HuggingFacePipeline # Updated import for H
17
  device = 'cpu' # Use CPU since CUDA is unavailable
18
  print(device)
19
 
20
- # Load model without quantization or GPU optimizations
21
- model_id = 'HuggingFaceH4/zephyr-7b-beta'
22
  model_config = transformers.AutoConfig.from_pretrained(
23
  model_id,
24
  trust_remote_code=True,
@@ -32,11 +32,12 @@ model = transformers.AutoModelForCausalLM.from_pretrained(
32
  )
33
  tokenizer = AutoTokenizer.from_pretrained(model_id)
34
 
 
35
  query_pipeline = transformers.pipeline(
36
  'text-generation',
37
  model=model,
38
  tokenizer=tokenizer,
39
- torch_dtype=torch.float16,
40
  max_length=6000,
41
  max_new_tokens=500,
42
  device_map="auto"
 
17
  device = 'cpu' # Use CPU since CUDA is unavailable
18
  print(device)
19
 
20
+ # Load GPT-2 model instead of zephyr-7b-beta
21
+ model_id = 'gpt2' # Use GPT-2, a smaller and CPU-friendly model
22
  model_config = transformers.AutoConfig.from_pretrained(
23
  model_id,
24
  trust_remote_code=True,
 
32
  )
33
  tokenizer = AutoTokenizer.from_pretrained(model_id)
34
 
35
+ # Create the query pipeline for GPT-2 model
36
  query_pipeline = transformers.pipeline(
37
  'text-generation',
38
  model=model,
39
  tokenizer=tokenizer,
40
+ torch_dtype=torch.float32, # Use float32 for CPU compatibility
41
  max_length=6000,
42
  max_new_tokens=500,
43
  device_map="auto"