Spaces:

rivapereira123
/

firstaid

Sleeping

App Files Files Community

rivapereira123 commited on Jul 16

Commit

3f67405

verified ·

1 Parent(s): dbbaa64

Update finetune_flan_t5.py

Browse files

Files changed (1) hide show

finetune_flan_t5.py +14 -19

finetune_flan_t5.py CHANGED Viewed

@@ -3,19 +3,12 @@ from transformers import (
     AutoTokenizer,
     AutoModelForSeq2SeqLM,
     TrainingArguments,
-    DataCollatorForSeq2Seq
 )
 from trl import SFTTrainer
 import torch
-# First check and update packages if needed
-def check_versions():
-    import subprocess
-    import sys
-    subprocess.run([sys.executable, "-m", "pip", "install", "--upgrade", "transformers", "accelerate", "trl"])
-check_versions()
 # 1. Load and prepare dataset
 dataset = load_dataset("json", data_files="data/med_q_n_a_converted.jsonl", split="train")
@@ -24,12 +17,19 @@ dataset = dataset.map(lambda x: {
     "text": f"### Instruction:\n{x['input']}\n\n### Response:\n{x['output']}"
 })
-# 2. Load model and tokenizer
 model_name = "google/flan-t5-base"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-# 3. Training arguments - modified to avoid deprecated parameters
 training_args = TrainingArguments(
     output_dir="./flan-t5-medical-finetuned",
     per_device_train_batch_size=4,
@@ -41,12 +41,10 @@ training_args = TrainingArguments(
     evaluation_strategy="no",
     fp16=torch.cuda.is_available(),
     report_to="none",
-    # Add these to avoid version conflicts
-    use_cpu=not torch.cuda.is_available(),
     remove_unused_columns=False
 )
-# 4. Initialize trainer with updated configuration
 trainer = SFTTrainer(
     model=model,
     tokenizer=tokenizer,
@@ -57,10 +55,7 @@ trainer = SFTTrainer(
         tokenizer,
         model=model,
         padding=True
-    ),
-    # Remove deprecated parameters
-    max_seq_length=None,
-    formatting_func=None
 )
 # 5. Start training

     AutoTokenizer,
     AutoModelForSeq2SeqLM,
     TrainingArguments,
+    DataCollatorForSeq2Seq,
+    FlaxAutoModelForSeq2SeqLM  # Added for explicit model loading
 )
 from trl import SFTTrainer
 import torch
 # 1. Load and prepare dataset
 dataset = load_dataset("json", data_files="data/med_q_n_a_converted.jsonl", split="train")
     "text": f"### Instruction:\n{x['input']}\n\n### Response:\n{x['output']}"
 })
+# 2. Load model and tokenizer - METHOD 1: Explicit FLAN-T5 loading
 model_name = "google/flan-t5-base"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+# METHOD 1: Load model directly without AutoModel
+from transformers import T5ForConditionalGeneration
+model = T5ForConditionalGeneration.from_pretrained(model_name)
+# METHOD 2: Or install Japanese support (if needed)
+# pip install transformers[ja]
+# Then use AutoModel as before
+# 3. Training arguments
 training_args = TrainingArguments(
     output_dir="./flan-t5-medical-finetuned",
     per_device_train_batch_size=4,
     evaluation_strategy="no",
     fp16=torch.cuda.is_available(),
     report_to="none",
     remove_unused_columns=False
 )
+# 4. Initialize trainer
 trainer = SFTTrainer(
     model=model,
     tokenizer=tokenizer,
         tokenizer,
         model=model,
         padding=True
+    )
 )
 # 5. Start training