Spaces:
Sleeping
Sleeping
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import re # β λ¬Έμ₯ λΆλ¦¬μ© | |
# 1. λλ°μ΄μ€ μ€μ | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# 2. νκ΅μ΄ GPT-2 λͺ¨λΈκ³Ό ν ν¬λμ΄μ λ‘λ | |
tokenizer = AutoTokenizer.from_pretrained("skt/kogpt2-base-v2") | |
model = AutoModelForCausalLM.from_pretrained("skt/kogpt2-base-v2").to(device) | |
# 3. νκ΅μ΄ μμ€ μμ± ν¨μ (4λ¬Έμ₯λ§ μΆλ ₯) | |
def generate_korean_story(prompt, max_length=300, num_sentences=4): | |
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device) | |
outputs = model.generate( | |
input_ids, | |
max_length=max_length, | |
min_length=100, | |
do_sample=True, | |
temperature=0.9, | |
top_k=50, | |
top_p=0.95, | |
repetition_penalty=1.2, | |
no_repeat_ngram_size=3, | |
eos_token_id=tokenizer.eos_token_id | |
) | |
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# λ¬Έμ₯ λ¨μλ‘ μλ₯΄κΈ° (μ κ·ννμμΌλ‘ λ§μΉ¨ν/λ¬Όμν/λλν κΈ°μ€) | |
sentences = re.split(r'(?<=[.?!])\s+', full_text.strip()) | |
# μμμ 4λ¬Έμ₯λ§ μ ν ν ν©μΉκΈ° | |
story = " ".join(sentences[:num_sentences]) | |
return story | |
# 4. μ€ν | |
if __name__ == "__main__": | |
user_prompt = input("π μμ€μ μμ λ¬Έμ₯μ μ λ ₯νμΈμ (νκ΅μ΄): ") | |
result = generate_korean_story(user_prompt, max_length=500, num_sentences=4) | |
print("\nπ μμ±λ νκ΅μ΄ μμ€ (4λ¬Έμ₯):\n") | |
print(result) | |