rivapereira123 commited on
Commit
3f67405
·
verified ·
1 Parent(s): dbbaa64

Update finetune_flan_t5.py

Browse files
Files changed (1) hide show
  1. finetune_flan_t5.py +14 -19
finetune_flan_t5.py CHANGED
@@ -3,19 +3,12 @@ from transformers import (
3
  AutoTokenizer,
4
  AutoModelForSeq2SeqLM,
5
  TrainingArguments,
6
- DataCollatorForSeq2Seq
 
7
  )
8
  from trl import SFTTrainer
9
  import torch
10
 
11
- # First check and update packages if needed
12
- def check_versions():
13
- import subprocess
14
- import sys
15
- subprocess.run([sys.executable, "-m", "pip", "install", "--upgrade", "transformers", "accelerate", "trl"])
16
-
17
- check_versions()
18
-
19
  # 1. Load and prepare dataset
20
  dataset = load_dataset("json", data_files="data/med_q_n_a_converted.jsonl", split="train")
21
 
@@ -24,12 +17,19 @@ dataset = dataset.map(lambda x: {
24
  "text": f"### Instruction:\n{x['input']}\n\n### Response:\n{x['output']}"
25
  })
26
 
27
- # 2. Load model and tokenizer
28
  model_name = "google/flan-t5-base"
29
  tokenizer = AutoTokenizer.from_pretrained(model_name)
30
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
31
 
32
- # 3. Training arguments - modified to avoid deprecated parameters
 
 
 
 
 
 
 
 
33
  training_args = TrainingArguments(
34
  output_dir="./flan-t5-medical-finetuned",
35
  per_device_train_batch_size=4,
@@ -41,12 +41,10 @@ training_args = TrainingArguments(
41
  evaluation_strategy="no",
42
  fp16=torch.cuda.is_available(),
43
  report_to="none",
44
- # Add these to avoid version conflicts
45
- use_cpu=not torch.cuda.is_available(),
46
  remove_unused_columns=False
47
  )
48
 
49
- # 4. Initialize trainer with updated configuration
50
  trainer = SFTTrainer(
51
  model=model,
52
  tokenizer=tokenizer,
@@ -57,10 +55,7 @@ trainer = SFTTrainer(
57
  tokenizer,
58
  model=model,
59
  padding=True
60
- ),
61
- # Remove deprecated parameters
62
- max_seq_length=None,
63
- formatting_func=None
64
  )
65
 
66
  # 5. Start training
 
3
  AutoTokenizer,
4
  AutoModelForSeq2SeqLM,
5
  TrainingArguments,
6
+ DataCollatorForSeq2Seq,
7
+ FlaxAutoModelForSeq2SeqLM # Added for explicit model loading
8
  )
9
  from trl import SFTTrainer
10
  import torch
11
 
 
 
 
 
 
 
 
 
12
  # 1. Load and prepare dataset
13
  dataset = load_dataset("json", data_files="data/med_q_n_a_converted.jsonl", split="train")
14
 
 
17
  "text": f"### Instruction:\n{x['input']}\n\n### Response:\n{x['output']}"
18
  })
19
 
20
+ # 2. Load model and tokenizer - METHOD 1: Explicit FLAN-T5 loading
21
  model_name = "google/flan-t5-base"
22
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
23
 
24
+ # METHOD 1: Load model directly without AutoModel
25
+ from transformers import T5ForConditionalGeneration
26
+ model = T5ForConditionalGeneration.from_pretrained(model_name)
27
+
28
+ # METHOD 2: Or install Japanese support (if needed)
29
+ # pip install transformers[ja]
30
+ # Then use AutoModel as before
31
+
32
+ # 3. Training arguments
33
  training_args = TrainingArguments(
34
  output_dir="./flan-t5-medical-finetuned",
35
  per_device_train_batch_size=4,
 
41
  evaluation_strategy="no",
42
  fp16=torch.cuda.is_available(),
43
  report_to="none",
 
 
44
  remove_unused_columns=False
45
  )
46
 
47
+ # 4. Initialize trainer
48
  trainer = SFTTrainer(
49
  model=model,
50
  tokenizer=tokenizer,
 
55
  tokenizer,
56
  model=model,
57
  padding=True
58
+ )
 
 
 
59
  )
60
 
61
  # 5. Start training