You need to agree to share your contact information to access this model

This repository is publicly accessible, but you have to accept the conditions to access its files and content.

Model Card for meinvirgos/aina-translator-es-ast-quantized-cuda

Translator spanish - asturian

quantized version of: projecte-aina/aina-translator-es-ast to 4 bits

Model Details

Model Description

This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated.

Developed by: [email protected]
Funded by [optional]:
Shared by [optional]:
Model type: M2M100
Language(s) (NLP): spanish, asturian
License: cc-by-nc-4.0
Finetuned from model [optional]: projecte-aina/aina-translator-es-ast

Model Sources [optional]

Repository: projecte-aina/aina-translator-es-ast
Paper [optional]:
Demo [optional]:

Uses

Translation from spanish to asturian

Direct Use

The model is intended to be used when the memory amount is a problem

Out-of-Scope Use

Bias, Risks, and Limitations

Recommendations

Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.

How to Get Started with the Model

Use the code below to get started with the model.

from transformers import pipeline
pipe = pipeline("translation", model="meinvirgos/aina-translator-es-ast-quantized-cuda", src_lang = "spa_Latn", tgt_lang = "ast_Latn")
print (pipe("Tu padre es guapo")[0].get("translation_text"))

Another example:

# Inicialization
!pip install bitsandbytes
# This works only for colab notebook
from google.colab import userdata
miToken = userdata.get('HF_TOKEN')
from huggingface_hub import login
login(token=miToken)

# Load model
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer


model_name = "meinvirgos/aina-translator-es-ast-quantized-cuda"
tokenizer_name = "meinvirgos/aina-translator-es-ast-quantized-cuda"

tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, token=True, src_lang="spa_Latn")

model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Function to test the model
import torch
import tensorflow as tf
def traducir(modelo, texto):
  in_tokens = tokenizer.encode(texto, return_tensors="tf")
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  in_tokens_pt = torch.from_dlpack(tf.experimental.dlpack.to_dlpack(in_tokens)).to(device)
  translated_tokens = modelo.generate(in_tokens_pt, forced_bos_token_id=tokenizer.convert_tokens_to_ids("ast_Latn"))
  result = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
  print (result)

traducir (model, "Tu padre es guapo")
# Debería ser [El to pá ye guapu]

How the model was obtained

Inicialization

!pip install bitsandbytes

import torch
import tensorflow as tf

# This example is from colab
from google.colab import userdata
miToken = userdata.get('HF_TOKEN')
from huggingface_hub import login
login(token=miToken)

Model read

from transformers import M2M100ForConditionalGeneration, NllbTokenizer


model_name = "projecte-aina/aina-translator-es-ast"
tokenizer_name = "facebook/nllb-200-distilled-600M"

tokenizer = NllbTokenizer.from_pretrained(tokenizer_name, token=True, src_lang="spa_Latn")

# model = M2M100ForConditionalGeneration.from_pretrained(model_name)

Function to test the model

def traducir(modelo, texto):
  #texto = "Tener un perro es bueno"
  print(texto)
  in_tokens = tokenizer.encode(texto, return_tensors="tf")
  for token in in_tokens[0]:
        print(f"{token}: \"{tokenizer.decode([token])}\"")


  # Convertimos tensor de TensorFlow a PyTorch y lo asignamos a CPU
  device = torch.device('cuda')
  print(f"Using device: {device}") # Optional: Print the chosen device
  in_tokens_pt = torch.from_dlpack(tf.experimental.dlpack.to_dlpack(in_tokens)).to(device)
  print ("-------------")
  print(in_tokens_pt)
  print ("-------------")

  translated_tokens = modelo.generate(in_tokens_pt, forced_bos_token_id=tokenizer.convert_tokens_to_ids("ast_Latn"))
  for token in translated_tokens[0]:
        print(f"{token}: \"{tokenizer.decode([token])}\"")

  result = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
  print (result)

import os
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

# traducir (model, "eres guapo")

Quantization and save

Quantize with BitaAndBytes to 4 bits, giving a model with 1/4 of the original size.

from transformers import BitsAndBytesConfig, AutoModelForSeq2SeqLM
# load model in model as 4-bit
nf4_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype = torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

device = torch.device('cuda')

model_nf4 = AutoModelForSeq2SeqLM.from_pretrained(model_name, 
                                                device_map=device,
                                                quantization_config=nf4_config)
print_size_of_model(model_nf4)
traducir (model_nf4, "eres guapo")
# Salvamos
model_nf4.save_pretrained("model_nf4")
model_new = AutoModelForSeq2SeqLM.from_pretrained("model_nf4")
traducir (model_new, "eres guapo")

# Upload HuggingFace
# model_nf4.push_to_hub("meinvirgos/aina-translator-es-ast-quantized-cuda")

Hardware

CUDA

Software

transformers BitsAndBytes

Model Card Authors [optional]

[email protected]

Downloads last month: -

Safetensors

Model size

0.4B params

Tensor type

F32

F16

Inference Providers NEW

This model isn't deployed by any Inference Provider. 🙋 Ask for provider support