Spaces:

gkdud00
/

project-tdm

Sleeping

App Files Files Community

hy commited on 11 days ago

Commit

7134b06

1 Parent(s): 7f4feea

Fix server code and sync with remote

Browse files

Files changed (6) hide show

.gitignore +3 -1
aggro_model.py +188 -0
database.py +5 -1
main.py +24 -19
mismatch_model.py +127 -0
requirements.txt +0 -0

.gitignore CHANGED Viewed

@@ -3,4 +3,6 @@ __pycache__/
 *.pyc
 .venv/
 venv/
-info.md

 *.pyc
 .venv/
 venv/
+info.md
+*.pth
+*.pkl

aggro_model.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import torch
+import torch.nn as nn
+from transformers import BertTokenizer, BertModel
+import pickle
+import re
+import os
+import numpy as np
+# =============================================================================
+# 1. 모델 클래스 정의
+# =============================================================================
+# (1) 규칙 기반 스코어러 클래스 복원
+class RuleBasedScorer:
+    def __init__(self):
+        # 파일에서 로드될 때 이 변수들이 덮어씌워집니다.
+        self.patterns = {}
+        self.pattern_names = {}
+        self.symbol_patterns = {
+            'repeated': re.compile(r'([!?…~])\1+'),
+            'ellipsis': re.compile(r'\.\.\.|…')
+        }
+    def get_score(self, title):
+        words = re.findall(r'[가-힣A-Za-z0-9]+', str(title))
+        scores = {}
+        # 부호 점수
+        rep = len(self.symbol_patterns['repeated'].findall(title))
+        ell = len(self.symbol_patterns['ellipsis'].findall(title))
+        symbol_score = (rep * 30) + (ell * 10)
+        for p in [11, 12, 13, 14]:
+            word_score = 0
+            # 단어 매칭 점수
+            for word in words:
+                if word in self.patterns[p]:
+                    # 가중치 적용
+                    word_score += np.log1p(self.patterns[p][word]) * 2
+            total = 0
+            if p == 11: total = symbol_score
+            elif p == 12: total = word_score + (symbol_score * 0.5)
+            else: total = word_score
+            scores[p] = total
+        # 최종 점수 산출
+        max_pattern = max(scores, key=scores.get)
+        max_score = min(100, scores[max_pattern])
+        return {
+            'score': max_score,
+            'pattern': max_pattern,
+            'pattern_name': self.pattern_names.get(max_pattern, '알 수 없음')
+        }
+# (2) KoBERT 모델 클래스 복원
+class FishingClassifier(nn.Module):
+    def __init__(self, bert, num_classes=2):
+        super().__init__()
+        self.bert = bert
+        self.dropout = nn.Dropout(0.3)
+        self.fc = nn.Linear(768, num_classes)
+    def forward(self, input_ids, mask):
+        _, pooled = self.bert(input_ids=input_ids, attention_mask=mask, return_dict=False)
+        return self.fc(self.dropout(pooled))
+# =============================================================================
+# 2. 모델 로드
+# =============================================================================
+print("[AggroModel] 시스템 로딩 시작...")
+aggro_model = None
+tokenizer = None
+rule_scorer = None
+device = torch.device("cpu") # 서버는 CPU 사용
+# --- A. 규칙 기반 모델 로드 (.pkl) ---
+try:
+    pkl_path = "rule_based_scorer.pkl"
+    if os.path.exists(pkl_path):
+        with open(pkl_path, "rb") as f:
+            rule_scorer = pickle.load(f)
+        print("✅ [Aggro] 규칙 기반 모델(PKL) 로드 성공!")
+    else:
+        print(f"⚠️ [Aggro] 규칙 파일 없음: {pkl_path}")
+except Exception as e:
+    print(f"🚨 [Aggro] 규칙 모델 로드 실패: {e}")
+# --- B. KoBERT 모델 로드 (.pth) ---
+try:
+    # 1. 토크나이저 & 기본 모델 로드
+    # (에러 나면 'monologg/kobert'로 변경 시도)
+    MODEL_NAME = 'skt/kobert-base-v1'
+    tokenizer = BertTokenizer.from_pretrained(MODEL_NAME)
+    bert_base = BertModel.from_pretrained(MODEL_NAME)
+    # 2. 뼈대 생성
+    aggro_model = FishingClassifier(bert_base).to(device)
+    # 3. 가중치 파일 로드
+    pth_path = "bert_fishing_model_best.pth"
+    if os.path.exists(pth_path):
+        state_dict = torch.load(pth_path, map_location=device)
+        aggro_model.load_state_dict(state_dict)
+        aggro_model.eval()
+        print("✅ [Aggro] KoBERT 모델(PTH) 로드 성공!")
+    else:
+        print(f"⚠️ [Aggro] KoBERT 파일 없음: {pth_path}")
+        aggro_model = None
+except Exception as e:
+    print(f"🚨 [Aggro] KoBERT 로드 실패: {e}")
+    aggro_model = None
+# =============================================================================
+# 3. 메인 함수 (main.py에서 호출)
+# =============================================================================
+def get_aggro_score(title: str) -> dict:
+    """
+    제목을 받아 낚시성 점수와 근거를 반환하는 함수
+    """
+    # 1. 규칙 기반 점수
+    rule_score = 0.0
+    rule_pattern = "분석 불가"
+    if rule_scorer:
+        try:
+            res = rule_scorer.get_score(title)
+            rule_score = res['score'] # 0~100점
+            rule_pattern = res['pattern_name']
+        except Exception as e:
+            print(f"[Aggro] 규칙 계산 오류: {e}")
+    # 2. KoBERT 점수
+    bert_score = 0.0
+    if aggro_model and tokenizer:
+        try:
+            inputs = tokenizer(
+                title,
+                return_tensors='pt',
+                padding="max_length",
+                truncation=True,
+                max_length=64
+            )
+            input_ids = inputs['input_ids'].to(device)
+            mask = inputs['attention_mask'].to(device)
+            with torch.no_grad():
+                outputs = aggro_model(input_ids, mask)
+                probs = torch.softmax(outputs, dim=1)
+                bert_score = probs[0][1].item() * 100 # 0~100
+        except Exception as e:
+            print(f"[Aggro] KoBERT 예측 오류: {e}")
+            bert_score = 50.0
+    # 3. 최종 합산 (규칙 40% + BERT 60%)
+    if rule_scorer and aggro_model:
+        final_score = (rule_score * 0.4) + (bert_score * 0.6)
+    elif aggro_model:
+        final_score = bert_score
+    elif rule_scorer:
+        final_score = rule_score
+    else:
+        final_score = 0.0
+    # 4. 결과 텍스트
+    normalized_score = min(final_score / 100.0, 1.0) # 0~1 변환
+    reason = "제목이 평이합니다."
+    recommendation = "양호합니다."
+    if final_score >= 70:
+        reason = f"AI 예측({bert_score:.0f}점) 및 규칙({rule_pattern})에 의해 낚시성으로 판단되었습니다."
+        recommendation = "객관적인 사실 위주의 제목으로 수정해주세요."
+    elif final_score >= 40:
+        reason = f"일부 낚시성 요소({rule_pattern})가 감지되었습니다."
+        recommendation = "표현을 조금 더 다듬는 것을 권장합니다."
+    return {
+        "score": round(normalized_score, 2),
+        "reason": reason,
+        "recommendation": recommendation
+    }

database.py CHANGED Viewed

@@ -8,7 +8,11 @@ SQLALCHEMY_DATABASE_URL = os.environ.get("SQLALCHEMY_DATABASE_URL")
 # 2. DB 연결 엔진 생성
 engine = create_engine(
-    SQLALCHEMY_DATABASE_URL
 )
 # 3. DB와 통신할 세션(Session) 생성

 # 2. DB 연결 엔진 생성
 engine = create_engine(
+    SQLALCHEMY_DATABASE_URL,
+    pool_pre_ping=True,      # <--- (1) 연결하기 전에 Ping
+    pool_recycle=300,        # <--- (2) 5분(300초)마다 연결을 새것으로 교체 (오래된 연결 끊기 방지)
+    pool_size=5,             # <--- (3) 동시에 유지할 연결 개수
+    max_overflow=10          # <--- (4) 갑자기 몰릴 때 추가로 허용할 연결 개수
 )
 # 3. DB와 통신할 세션(Session) 생성

main.py CHANGED Viewed

@@ -6,7 +6,8 @@ from database import engine, SessionLocal
 from sqlalchemy.orm import Session # 필수 import
 from models import Article, AnalysisResult
 from crossref_model import get_crossref_score_and_reason
 models.Base.metadata.create_all(bind=engine)
 from database import SessionLocal # database.py에서 정의한 SessionLocal 가져오기
@@ -51,21 +52,25 @@ app = FastAPI()
 @app.post("/api/v1/analyze", response_model=AnalysisResponse)
 def analyze_article(request: ArticleRequest,db: Session = Depends(get_db)):
-    """기사 분석 API (Aggro/Mismatch는 '가짜' 데이터 반환)"""
-    # 1. aggro(가짜 점수)
-    dummy_aggro = ScoreBreakdown(
-        score=0.95,
-        reason="'결국', '경악' 등 자극적인 표현이 사용되었습니다.",
-        recommendation="제목의 자극적인 표현을 수정하세요.",
         found_urls=None
     )
-    # 2. mismatch (가짜 점수)
-    dummy_mismatch = ScoreBreakdown(
-        score=0.15,
-        reason="제목과 본문의 핵심 내용이 의미적으로 일치합니다.",
-        recommendation="양호합니다.",
         found_urls=None
     )
@@ -97,8 +102,8 @@ def analyze_article(request: ArticleRequest,db: Session = Depends(get_db)):
     w_mismatch = 0.4
     w_crossref = 0.2
-    final_score = (dummy_aggro.score * w_aggro) + \
-                  (dummy_mismatch.score * w_mismatch) + \
                   (final_crossref.score * w_crossref)
     final_level = "안전"
@@ -124,8 +129,8 @@ def analyze_article(request: ArticleRequest,db: Session = Depends(get_db)):
     # 3. 'analysis_results' 테이블에 분석 결과 저장
     new_result = AnalysisResult(
         article_id=new_article.article_id, # 외래 키 연결
-        aggro_score=dummy_aggro.score,
-        mismatch_score=dummy_mismatch.score,
         crossref_score=final_crossref.score,
         final_risk=final_score,
     )
@@ -140,12 +145,12 @@ def analyze_article(request: ArticleRequest,db: Session = Depends(get_db)):
         final_risk_score=round(final_score, 2), # 소수점 2자리로 반올림
         final_risk_level=final_level,
         breakdown={
-            "aggro_score": dummy_aggro,
-            "mismatch_score": dummy_mismatch,
             "crossref_score": final_crossref
         }
     )
 @app.get("/")
 def read_root():
-    return {"message": "AI 기사 분석 서버 v1.0 (Dummy Server)"}

 from sqlalchemy.orm import Session # 필수 import
 from models import Article, AnalysisResult
 from crossref_model import get_crossref_score_and_reason
+from mismatch_model import calculate_mismatch_score  # <-- 팀원 함수 불러오기
+from aggro_model import get_aggro_score
 models.Base.metadata.create_all(bind=engine)
 from database import SessionLocal # database.py에서 정의한 SessionLocal 가져오기
 @app.post("/api/v1/analyze", response_model=AnalysisResponse)
 def analyze_article(request: ArticleRequest,db: Session = Depends(get_db)):
+    """기사 분석 API"""
+    # 1. AggroScore
+    aggro_result = get_aggro_score(request.article_title)
+    real_aggro = ScoreBreakdown(
+        score=aggro_result["score"],
+        reason=aggro_result["reason"],
+        recommendation=aggro_result["recommendation"],
         found_urls=None
     )
+    # 2. mismatch
+    mismatch_result = calculate_mismatch_score(request.article_title, request.article_body)
+    real_mismatch = ScoreBreakdown(
+        score=mismatch_result["score"],
+        reason=mismatch_result["reason"],
+        recommendation=mismatch_result["recommendation"],
         found_urls=None
     )
     w_mismatch = 0.4
     w_crossref = 0.2
+    final_score = (real_aggro.score * w_aggro) + \
+                  (real_mismatch.score * w_mismatch) + \
                   (final_crossref.score * w_crossref)
     final_level = "안전"
     # 3. 'analysis_results' 테이블에 분석 결과 저장
     new_result = AnalysisResult(
         article_id=new_article.article_id, # 외래 키 연결
+        aggro_score=real_aggro.score,
+        mismatch_score=real_mismatch.score,
         crossref_score=final_crossref.score,
         final_risk=final_score,
     )
         final_risk_score=round(final_score, 2), # 소수점 2자리로 반올림
         final_risk_level=final_level,
         breakdown={
+            "aggro_score": real_aggro,
+            "mismatch_score": real_mismatch,
             "crossref_score": final_crossref
         }
     )
 @app.get("/")
 def read_root():
+    return {"message": "AI 기사 분석 서버"}

mismatch_model.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import torch
+import torch.nn.functional as F
+from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
+from sentence_transformers import SentenceTransformer, util
+# 디바이스 설정 (GPU 우선, 없으면 CPU)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"✅ 현재 실행 환경: {device}")
+# =============================================================================
+# 2. 모델 로드 (시간이 조금 걸릴 수 있습니다)
+# =============================================================================
+print("\n⏳ [1/3] KoBART 요약 모델 로딩 중...")
+kobart_summarizer = pipeline(
+    "summarization",
+    model="gogamza/kobart-summarization",
+    device=0 if torch.cuda.is_available() else -1
+)
+print("⏳ [2/3] SBERT 유사도 모델 로딩 중...")
+sbert_model = SentenceTransformer('jhgan/ko-sroberta-multitask')
+print("⏳ [3/3] NLI(모순 탐지) 모델 로딩 중...")
+nli_model_name = "Huffon/klue-roberta-base-nli"
+nli_tokenizer = AutoTokenizer.from_pretrained(nli_model_name)
+nli_model = AutoModelForSequenceClassification.from_pretrained(nli_model_name).to(device)
+print("🎉 모든 모델 로드 완료!\n")
+# =============================================================================
+# 3. 도우미 함수 정의 (Worker Functions)
+# =============================================================================
+def summarize_kobart_strict(text):
+    """KoBART를 사용하여 본문을 요약합니다."""
+    # 본문이 너무 짧으면 요약 생략 (오류 방지)
+    if len(text) < 50:
+        return text
+    try:
+        result = kobart_summarizer(
+            text,
+            min_length=15,
+            max_length=128,
+            num_beams=4,
+            no_repeat_ngram_size=3,
+            early_stopping=True
+        )[0]['summary_text']
+        return result.strip()
+    except Exception as e:
+        return text[:100] # 실패 시 앞부분 반환
+def get_cosine_similarity(title, summary):
+    """SBERT로 제목과 요약문의 코사인 유사도를 계산합니다."""
+    emb1 = sbert_model.encode(title, convert_to_tensor=True)
+    emb2 = sbert_model.encode(summary, convert_to_tensor=True)
+    return util.cos_sim(emb1, emb2).item()
+def get_mismatch_score(summary, title):
+    """NLI 모델로 요약문(전제)과 제목(가설) 사이의 모순 확률을 계산합니다."""
+    inputs = nli_tokenizer(
+        summary, title,
+        return_tensors='pt',
+        truncation=True,
+        max_length=512
+    ).to(device)
+    # RoBERTa 모델 에러 방지 (token_type_ids 제거)
+    if "token_type_ids" in inputs:
+        del inputs["token_type_ids"]
+    with torch.no_grad():
+        outputs = nli_model(**inputs)
+        probs = F.softmax(outputs.logits, dim=-1)[0]
+    # Huffon/klue-roberta-base-nli 라벨 순서: [Entailment, Neutral, Contradiction]
+    # 모순(Contradiction) 확률 반환 (Index 2)
+    return round(probs[2].item(), 4)
+# =============================================================================
+# 4. 최종 메인 함수 (Main Logic)
+# =============================================================================
+def calculate_mismatch_score(article_title, article_body):
+    """
+    Grid Search 결과 최적 가중치 적용:
+    - w1 (SBERT, 의미적 거리): 0.8
+    - w2 (NLI, 논리적 모순): 0.2
+    - Threshold (임계값): 0.45 이상이면 '위험'
+    """
+    #if not (kobart_summarizer and sbert_model and nli_model):
+     #   return {"score": 0.0, "reason": "모델 로딩 실패", "recommendation": "서버 확인 필요"}
+    # 1. 본문 요약
+    summary = summarize_kobart_strict(article_body)
+    # 2. SBERT 의미적 거리 (1 - 유사도)
+    sbert_sim = get_cosine_similarity(article_title, summary)
+    semantic_distance = 1 - sbert_sim
+    # 3. NLI 논리적 모순 확률
+    nli_contradiction = get_mismatch_score(summary, article_title)
+    # 4. 최종 점수 산출
+    w1, w2 = 0.8, 0.2
+    final_score = (w1 * semantic_distance) + (w2 * nli_contradiction)
+    reason = (
+        f"[디버그 모드]\n"
+        f"1. 요약문: {summary}\n"
+        f"2. SBERT 거리: {semantic_distance:.4f}\n"
+        f"3. NLI 모순: {nli_contradiction:.4f}"
+    )
+    #reason = f"제목과 본문의 의미적 거리({semantic_distance:.4f})와 모순 확률({nli_contradiction:.4f})이 반영되었습니다."
+    # 5. 결과 판정 (Threshold 0.45 기준)
+    if final_score >= 0.45:
+        recommendation = "제목이 본문의 내용을 왜곡하거나 모순될 가능성이 높습니다."
+    else:
+        recommendation = "제목과 본문의 내용이 일치합니다."
+    # main.py로 전달할 데이터
+    return {
+        "score": round(final_score, 4),
+        "reason": reason,
+        "recommendation": recommendation
+    }

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ