metadata
			base_model: unsloth/Qwen3-4B-Base
tags:
  - text-generation-inference
  - transformers
  - unsloth
  - qwen3
  - trl
license: apache-2.0
language:
  - en
Uploaded model
- Developed by: Sengil
- License: apache-2.0
- Finetuned from model : unsloth/Qwen3-4B-Base
!pip install transformers accelerate peft
usage
import torch
print(torch.cuda.is_available())
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# Model ve tokenizer yükle
model_name = "Sengil/qwen3-4b-turkish-summarizer"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",         # accelerate devreye girer
    torch_dtype=torch.float16  # optimize belleği
)
model.eval()
# Mesaj formatı
messages = [
    {"role": "system", "content": "Sen bir özetleyicisin. Sana verilen metni özetle."},
    {"role": "user", "content": "text. . ."},
]
# Chat template ile prompt oluştur
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# Tokenizer ile input tensor'ları oluştur
inputs = tokenizer(prompt, return_tensors="pt")  # Cihaza atama yapma!
# İnferans (generate) işlemi
with torch.no_grad():
    outputs = model.generate(
        input_ids=inputs["input_ids"].to(model.device),
        attention_mask=inputs["attention_mask"].to(model.device),
        max_new_tokens=128,
        do_sample=False,
        temperature=0.7,
    )
# Çıktıyı çöz
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("\n📝 Özet:", summary)
