Spaces:
Sleeping
Sleeping
File size: 1,541 Bytes
4944c37 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import re # โ ๋ฌธ์ฅ ๋ถ๋ฆฌ์ฉ
# 1. ๋๋ฐ์ด์ค ์ค์
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 2. ํ๊ตญ์ด GPT-2 ๋ชจ๋ธ๊ณผ ํ ํฌ๋์ด์ ๋ก๋
tokenizer = AutoTokenizer.from_pretrained("skt/kogpt2-base-v2")
model = AutoModelForCausalLM.from_pretrained("skt/kogpt2-base-v2").to(device)
# 3. ํ๊ตญ์ด ์์ค ์์ฑ ํจ์ (4๋ฌธ์ฅ๋ง ์ถ๋ ฅ)
def generate_korean_story(prompt, max_length=300, num_sentences=4):
input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
outputs = model.generate(
input_ids,
max_length=max_length,
min_length=100,
do_sample=True,
temperature=0.9,
top_k=50,
top_p=0.95,
repetition_penalty=1.2,
no_repeat_ngram_size=3,
eos_token_id=tokenizer.eos_token_id
)
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# ๋ฌธ์ฅ ๋จ์๋ก ์๋ฅด๊ธฐ (์ ๊ทํํ์์ผ๋ก ๋ง์นจํ/๋ฌผ์ํ/๋๋ํ ๊ธฐ์ค)
sentences = re.split(r'(?<=[.?!])\s+', full_text.strip())
# ์์์ 4๋ฌธ์ฅ๋ง ์ ํ ํ ํฉ์น๊ธฐ
story = " ".join(sentences[:num_sentences])
return story
# 4. ์คํ
if __name__ == "__main__":
user_prompt = input("๐ ์์ค์ ์์ ๋ฌธ์ฅ์ ์
๋ ฅํ์ธ์ (ํ๊ตญ์ด): ")
result = generate_korean_story(user_prompt, max_length=500, num_sentences=4)
print("\n๐ ์์ฑ๋ ํ๊ตญ์ด ์์ค (4๋ฌธ์ฅ):\n")
print(result)
|