Spaces:

sdfsdh
/

koalpaca-search-demo

Sleeping

App Files Files Community

sdfsdh commited on Jun 9, 2025

Commit

e0a73dc

verified ·

1 Parent(s): b36b1be

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +233 -0

src/streamlit_app.py CHANGED Viewed

	@@ -0,0 +1,233 @@

+import streamlit as st
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import zipfile
+import os
+import tempfile
+import json
+from huggingface_hub import hf_hub_download
+# 환경 변수 및 캐시 디렉토리 설정 (생략, 기존 코드와 동일)
+# ... (환경 변수 및 캐시 디렉토리 생성 부분은 기존 코드와 동일)
+HF_REPO_ID = "sdfsdh/koalpaca-cpu-model"
+HF_FILENAME = "koalpaca_cpu_deployment.zip"
+def download_model_from_hf_hub():
+    try:
+        st.info("📥 Hugging Face Model Repository에서 다운로드 중...")
+        zip_path = hf_hub_download(
+            repo_id=HF_REPO_ID,
+            filename=HF_FILENAME,
+            cache_dir="/tmp/hf_hub_cache",
+            repo_type="model"
+        )
+        st.success(f"✅ 다운로드 완료: {os.path.getsize(zip_path) / 1024**3:.2f}GB")
+        st.info(f"📁 다운로드 경로: {zip_path}")
+        return zip_path
+    except Exception as e:
+        st.error(f"❌ Hugging Face Hub 다운로드 실패: {e}")
+        return None
+def verify_zip_file(zip_path):
+    try:
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            zip_ref.testzip()
+            file_list = zip_ref.namelist()
+            st.info(f"📦 ZIP 파일 검증 성공: {len(file_list)}개 파일")
+            return True
+    except zipfile.BadZipFile:
+        st.error("❌ 유효하지 않은 ZIP 파일")
+        return False
+    except Exception as e:
+        st.error(f"❌ ZIP 파일 검증 실패: {e}")
+        return False
+def extract_zip_streaming(zip_path, extract_path):
+    """RAM 사용량 최소화를 위한 스트리밍 방식 압축 해제"""
+    st.info("📦 스트리밍 방식으로 압축 해제 중...")
+    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+        for member in zip_ref.infolist():
+            # 디렉토리면 생성만
+            extracted_path = os.path.join(extract_path, member.filename)
+            if member.is_dir():
+                os.makedirs(extracted_path, exist_ok=True)
+                continue
+            # 상위 디렉토리 생성
+            os.makedirs(os.path.dirname(extracted_path), exist_ok=True)
+            # 파일 스트리밍 복사
+            with zip_ref.open(member) as src, open(extracted_path, "wb") as dst:
+                while True:
+                    chunk = src.read(1024 * 1024)  # 1MB 버퍼
+                    if not chunk:
+                        break
+                    dst.write(chunk)
+    st.success("✅ 압축 해제 완료!")
+@st.cache_resource
+def download_and_load_model():
+    try:
+        temp_dir = "/tmp/koalpaca_model"
+        os.makedirs(temp_dir, exist_ok=True)
+        extract_path = os.path.join(temp_dir, "koalpaca_cpu_deployment")
+        zip_path = download_model_from_hf_hub()
+        if zip_path is None:
+            raise Exception("Hugging Face Hub 다운로드 실패")
+        if not verify_zip_file(zip_path):
+            raise Exception("ZIP 파일 검증 실패")
+        # 기존 extractall() → 스트리밍 방식으로 변경
+        extract_zip_streaming(zip_path, extract_path)
+        # 모델 경로 탐색 (기존 코드와 동일)
+        model_path = None
+        for root, dirs, files in os.walk(extract_path):
+            if "cpu_quantized_model.pt" in files or "tokenizer.json" in files:
+                model_path = root
+                break
+        if model_path is None:
+            subdirs = [d for d in os.listdir(extract_path) if os.path.isdir(os.path.join(extract_path, d))]
+            if subdirs:
+                model_path = os.path.join(extract_path, subdirs[0])
+            else:
+                model_path = extract_path
+        st.info(f"📁 모델 경로: {model_path}")
+        st.info(f"📋 파일 목록: {os.listdir(model_path)}")
+        st.info("📝 토크나이저 로딩 중...")
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_path,
+            trust_remote_code=True,
+            use_fast=False,
+            cache_dir="/tmp/transformers_cache"
+        )
+        quantized_model_path = os.path.join(model_path, "cpu_quantized_model.pt")
+        if os.path.exists(quantized_model_path):
+            st.info("⚡ 양자화된 CPU 모델 로드 중...")
+            model = AutoModelForCausalLM.from_pretrained(
+                "beomi/KoAlpaca-Polyglot-5.8B",
+                torch_dtype=torch.float32,
+                device_map="cpu",
+                trust_remote_code=True,
+                low_cpu_mem_usage=True,
+                cache_dir="/tmp/transformers_cache"
+            )
+            checkpoint = torch.load(quantized_model_path, map_location="cpu")
+            if 'model_state_dict' in checkpoint:
+                model.load_state_dict(checkpoint['model_state_dict'])
+                st.success("✅ 양자화된 모델 로드 완료!")
+            else:
+                raise KeyError("model_state_dict not found")
+        else:
+            st.info("🧠 표준 모델 로드 중...")
+            model = AutoModelForCausalLM.from_pretrained(
+                "beomi/KoAlpaca-Polyglot-5.8B",
+                torch_dtype=torch.float16,
+                device_map="auto",
+                trust_remote_code=True,
+                low_cpu_mem_usage=True,
+                cache_dir="/tmp/transformers_cache"
+            )
+        model.eval()
+        st.info("🔧 Pipeline 생성 중...")
+        pipe = pipeline(
+            'text-generation',
+            model=model,
+            tokenizer=tokenizer,
+            device_map="auto"
+        )
+        return pipe, tokenizer
+    except Exception as e:
+        st.error(f"❌ 모델 로드 실패: {str(e)}")
+        import traceback
+        st.error(f"상세 오류: {traceback.format_exc()}")
+        return None, None
+def generate_response(pipe, prompt, max_new_tokens=200):
+    try:
+        formatted_prompt = f"### 질문: {prompt}\n\n### 답변:"
+        result = pipe(
+            formatted_prompt,
+            do_sample=True,
+            max_new_tokens=max_new_tokens,
+            temperature=0.7,
+            top_p=0.9,
+            return_full_text=False,
+            eos_token_id=2,
+        )
+        return result[0]['generated_text'].strip()
+    except Exception as e:
+        return f"응답 생성 중 오류: {str(e)}"
+def main():
+    st.title("🤖 KoAlpaca CPU 서버")
+    st.markdown("Hugging Face Model Repository에서 로드된 KoAlpaca 모델 서빙")
+    st.info("🔧 캐시 디렉토리 권한 문제 해결됨")
+    st.info(f"📁 TRANSFORMERS_CACHE: {os.environ.get('TRANSFORMERS_CACHE')}")
+    st.info(f"📁 HF_HOME: {os.environ.get('HF_HOME')}")
+    st.info(f"📁 Model Repository: {HF_REPO_ID}")
+    st.info(f"📄 파일명: {HF_FILENAME}")
+    with st.spinner("모델 초기화 중... (캐시 권한 문제 해결됨, 5-10분 소요)"):
+        pipe, tokenizer = download_and_load_model()
+    if pipe is None:
+        st.error("❌ 모델 로드에 실패했습니다.")
+        st.info("💡 해결 방법:")
+        st.markdown(f"""
+        1. 캐시 디렉토리 권한 문제는 해결되었습니다
+        2. Model Repository 접근 확인: https://huggingface.co/{HF_REPO_ID}
+        3. 네트워크 연결 상태 확인
+        """)
+        return
+    st.success("✅ KoAlpaca 모델 서빙 준비 완료!")
+    st.header("🔌 API 인터페이스")
+    prompt = st.text_area(
+        "질문 입력:",
+        height=100,
+        placeholder="예: 삼성전자의 재무상태는 어떻습니까?"
+    )
+    col1, col2 = st.columns(2)
+    with col1:
+        max_tokens = st.slider("최대 토큰 수", 50, 500, 200)
+    with col2:
+        temperature = st.slider("Temperature", 0.1, 1.0, 0.7)
+    if st.button("🚀 응답 생성", type="primary"):
+        if prompt.strip():
+            with st.spinner("응답 생성 중..."):
+                response = generate_response(pipe, prompt, max_tokens)
+                st.markdown("### 📝 응답:")
+                st.write(response)
+                with st.expander("📊 JSON 응답"):
+                    api_response = {
+                        "prompt": prompt,
+                        "response": response,
+                        "model": "KoAlpaca-Polyglot-5.8B",
+                        "max_tokens": max_tokens,
+                        "temperature": temperature,
+                        "source": f"HF Model Repository: {HF_REPO_ID}"
+                    }
+                    st.json(api_response)
+        else:
+            st.warning("질문을 입력해주세요.")
+    with st.expander("🔧 시스템 정보"):
+        system_info = {
+            "플랫폼": "Hugging Face Spaces",
+            "모델": "KoAlpaca-Polyglot-5.8B",
+            "최적화": "CPU 양자화",
+            "메모리": "16GB RAM",
+            "소스": f"HF Model Repository: {HF_REPO_ID}",
+            "캐시 디렉토리": "/tmp (권한 문제 해결됨)",
+            "TRANSFORMERS_CACHE": os.environ.get('TRANSFORMERS_CACHE'),
+            "HF_HOME": os.environ.get('HF_HOME')
+        }
+        st.json(system_info)
+if __name__ == "__main__":
+    main()