Spaces:
Sleeping
Sleeping
| from huggingface_hub import HfApi, upload_folder, create_repo | |
| from transformers import AutoTokenizer, AutoConfig | |
| import os | |
| import shutil | |
| import tempfile | |
| # --- Configuration --- | |
| HUGGING_FACE_USERNAME = "voxmenthe" # Your Hugging Face username | |
| MODEL_NAME_ON_HF = "modernbert-imdb-sentiment" # The name of the model on Hugging Face | |
| REPO_ID = f"{HUGGING_FACE_USERNAME}/{MODEL_NAME_ON_HF}" | |
| # Original base model from which the tokenizer and initial config were derived | |
| ORIGINAL_BASE_MODEL_NAME = "answerdotai/ModernBERT-base" | |
| # Local path to your fine-tuned model checkpoint | |
| LOCAL_MODEL_CHECKPOINT_DIR = "checkpoints" | |
| FINE_TUNED_MODEL_FILENAME = "mean_epoch5_0.9575acc_0.9575f1.pt" # Your best checkpoint | |
| # If your fine-tuned model is just a .pt file, ensure you also have a config.json for ModernBertForSentiment | |
| # For simplicity, we'll re-save the config from the fine-tuned model structure if possible, or from original base. | |
| # Files from your project to include (e.g., custom model code, inference script) | |
| # The user has moved these to the root directory. | |
| PROJECT_FILES_TO_UPLOAD = [ | |
| "config.yaml", | |
| "inference.py", | |
| "models.py", | |
| "train_utils.py", | |
| "classifiers.py", | |
| "README.md" | |
| ] | |
| def upload_model_and_tokenizer(): | |
| api = HfApi() | |
| # Create the repository on Hugging Face Hub (if it doesn't exist) | |
| print(f"Creating repository {REPO_ID} on Hugging Face Hub...") | |
| create_repo(repo_id=REPO_ID, repo_type="model", exist_ok=True) | |
| # Create a temporary directory to gather all files for upload | |
| with tempfile.TemporaryDirectory() as tmp_upload_dir: | |
| print(f"Created temporary directory for upload: {tmp_upload_dir}") | |
| # 1. Save tokenizer files from the ORIGINAL_BASE_MODEL_NAME | |
| print(f"Saving tokenizer from {ORIGINAL_BASE_MODEL_NAME} to {tmp_upload_dir}...") | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(ORIGINAL_BASE_MODEL_NAME) | |
| tokenizer.save_pretrained(tmp_upload_dir) | |
| print("Tokenizer files saved.") | |
| except Exception as e: | |
| print(f"Error saving tokenizer from {ORIGINAL_BASE_MODEL_NAME}: {e}") | |
| print("Please ensure this model name is correct and accessible.") | |
| return | |
| # 2. Save base model config.json (architecture) from ORIGINAL_BASE_MODEL_NAME | |
| # This is crucial for AutoModelForSequenceClassification.from_pretrained(REPO_ID) to work. | |
| print(f"Saving model config.json from {ORIGINAL_BASE_MODEL_NAME} to {tmp_upload_dir}...") | |
| try: | |
| config = AutoConfig.from_pretrained(ORIGINAL_BASE_MODEL_NAME) | |
| # If your fine-tuned ModernBertForSentiment has specific architectural changes in its config | |
| # that are NOT automatically handled by loading the state_dict (e.g. num_labels if not standard), | |
| # you might need to update 'config' here before saving. | |
| # For now, we assume the base config is sufficient or your model's state_dict handles it. | |
| config.save_pretrained(tmp_upload_dir) | |
| print("Model config.json saved.") | |
| except Exception as e: | |
| print(f"Error saving config.json from {ORIGINAL_BASE_MODEL_NAME}: {e}") | |
| return | |
| # 3. Copy fine-tuned model checkpoint to temporary directory | |
| # The fine-tuned weights should be named 'pytorch_model.bin' or 'model.safetensors' for HF to auto-load. | |
| # Or, your config.json in the repo should point to the custom name. | |
| # For simplicity, we'll rename it to HF standard name of pytorch_model.bin. | |
| local_checkpoint_path = os.path.join(LOCAL_MODEL_CHECKPOINT_DIR, FINE_TUNED_MODEL_FILENAME) | |
| if os.path.exists(local_checkpoint_path): | |
| hf_model_path = os.path.join(tmp_upload_dir, "pytorch_model.bin") | |
| shutil.copyfile(local_checkpoint_path, hf_model_path) | |
| print(f"Copied fine-tuned model {FINE_TUNED_MODEL_FILENAME} to {hf_model_path}.") | |
| else: | |
| print(f"Error: Fine-tuned model checkpoint {local_checkpoint_path} not found.") | |
| return | |
| # 4. Copy other project files | |
| for project_file in PROJECT_FILES_TO_UPLOAD: | |
| local_project_file_path = project_file # Files are now at the root | |
| if os.path.exists(local_project_file_path): | |
| shutil.copy(local_project_file_path, os.path.join(tmp_upload_dir, os.path.basename(project_file))) | |
| print(f"Copied project file {project_file} to {tmp_upload_dir}.") | |
| else: | |
| print(f"Warning: Project file {project_file} not found at {local_project_file_path}.") | |
| # 5. Upload the contents of the temporary directory | |
| print(f"Uploading all files from {tmp_upload_dir} to {REPO_ID}...") | |
| try: | |
| upload_folder( | |
| folder_path=tmp_upload_dir, | |
| repo_id=REPO_ID, | |
| repo_type="model", | |
| commit_message=f"Upload fine-tuned model, tokenizer, and supporting files for {MODEL_NAME_ON_HF}" | |
| ) | |
| print("All files uploaded successfully!") | |
| except Exception as e: | |
| print(f"Error uploading folder to Hugging Face Hub: {e}") | |
| if __name__ == "__main__": | |
| # Make sure you are logged in to Hugging Face CLI: | |
| # Run `huggingface-cli login` or `huggingface-cli login --token YOUR_HF_WRITE_TOKEN` in your terminal first. | |
| print("Starting upload process...") | |
| print(f"Target Hugging Face Repo ID: {REPO_ID}") | |
| print("Ensure you have run 'huggingface-cli login' with a write token.") | |
| upload_model_and_tokenizer() |