Spaces:

ttajihoon
/

GSGPT

Sleeping

App Files Files Community

GSGPT / app.py

ttajihoon

Update app.py

202a7ab verified 7 months ago

raw

history blame contribute delete

1.97 kB

	import streamlit as st
	import pandas as pd
	from docx import Document
	from transformers import pipeline

	# 모델 로드 (KoGPT2)
	model = pipeline("text-generation", model="skt/kogpt2-base-v2")

	# 파일 업로드 UI
	st.title("파일 업로드 및 처리")
	uploaded_word_file = st.file_uploader("Word 파일을 업로드하세요 (.docx)", type="docx")
	uploaded_excel_file = st.file_uploader("Excel 파일을 업로드하세요 (.xlsx)", type="xlsx")

	# Word 파일 처리
	if uploaded_word_file is not None:
	doc = Document(uploaded_word_file)
	word_content = []

	# Word 문서에서 텍스트 추출
	for para in doc.paragraphs:
	word_content.append(para.text)

	word_text = "\n".join(word_content)
	st.write("업로드된 Word 파일의 텍스트:")
	st.write(word_text)

	# 텍스트 처리 (KoGPT2 모델 사용)
	if st.button("Word 파일 텍스트 처리"):
	processed_text = model(word_text, max_length=100)[0]['generated_text']
	st.write("처리된 텍스트:")
	st.write(processed_text)

	# Excel 파일 처리
	if uploaded_excel_file is not None:
	df = pd.read_excel(uploaded_excel_file)
	st.write("업로드된 Excel 파일:")
	st.write(df)

	# 예시: 'Column_name' 열에 대해 텍스트 처리
	if 'Column_name' in df.columns:
	df['Processed_Column'] = df['Column_name'].apply(lambda x: model(str(x), max_length=100)[0]['generated_text'])
	st.write("처리된 Excel 데이터:")
	st.write(df)

	# 처리된 결과를 새로운 Excel 파일로 다운로드
	output_file = "processed_file.xlsx"
	df.to_excel(output_file, index=False)

	st.download_button(
	label="처리된 Excel 파일 다운로드",
	data=open(output_file, "rb").read(),
	file_name=output_file,
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
	)