Initial model upload

Browse files

Files changed (5) hide show

README.md +51 -0
accuracey.png +0 -0
clickbait_model.pkl +3 -0
requirements.txt +3 -0
train_clickbait.py +64 -0

README.md CHANGED Viewed

@@ -1,3 +1,54 @@
 ---
 license: mit
 ---

 ---
 license: mit
+language: en
+datasets:
+- amananandrai/clickbait-dataset
+metrics:
+- accuracy
+tags:
+- sklearn
+- text-classification
+- clickbait
+widget:
+- text: "You Won't Believe What Happens Next!"
+  example_title: "Clickbait Example"
+- text: "Scientists Discover New Planet in Solar System"
+  example_title: "Non-Clickbait Example"
 ---
+# Clickbait Detection Model (Logistic Regression)
+هذا نموذج تعلم آلة (Scikit-learn Pipeline) تم تدريبه لتصنيف عناوين الأخبار (Headlines) إلى "Clickbait" (عنوان مثير) أو "Not Clickbait" (عنوان عادي).
+## 🚀 كيف تستخدم النموذج
+تم حفظ النموذج كـ `Pipeline` كامل من `sklearn`، وهو يتضمن `TfidfVectorizer` و `LogisticRegression`. هذا يعني أنه يتعامل مع النص مباشرة.
+```python
+import joblib
+# قم بتحميل النموذج من Hugging Face Hub
+# (تأكد من تثبيت huggingface_hub: pip install huggingface_hub)
+from huggingface_hub import hf_hub_download
+model_path = hf_hub_download(repo_id="[Ma120]/[clickbait-detector]", filename="clickbait_model.pkl")
+model = joblib.load(model_path)
+# اختبر النموذج
+headlines = [
+    "You Won't Believe What Happens Next!",
+    "Local Library Announces Summer Reading Program",
+    "10 Signs You're a Genius (Number 7 Will Shock You)",
+    "Government Passes New Budget Bill"
+]
+predictions = model.predict(headlines)
+# 1 = Clickbait, 0 = Not Clickbait
+for headline, pred in zip(headlines, predictions):
+    label = "Clickbait" if pred == 1 else "Not Clickbait"
+    print(f"[{label}] {headline}")
+# يمكنك أيضاً الحصول على الاحتمالات
+# probabilities = model.predict_proba(headlines)
+# print(probabilities)

accuracey.png ADDED Viewed

clickbait_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a2096bff7ac569925320f09b0808ffec384ea47cec1e36f46d1fc366f0183bd
+size 222500

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+scikit-learn
+pandas
+joblib

train_clickbait.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.linear_model import LogisticRegression
+from sklearn.pipeline import Pipeline
+import joblib
+import os # تم إضافة هذه المكتبة لفحص الملفات
+# اسم ملف البيانات الذي سيبحث عنه الكود في نفس المجلد
+DATA_FILE = "clickbait_data.csv"
+def train_model():
+    """
+    الدالة الرئيسية لتدريب النموذج وحفظه
+    """
+    print("Starting model training...")
+    # 1. تحميل البيانات
+    # التأكد من وجود الملف في نفس المجلد قبل محاولة قراءته
+    if not os.path.exists(DATA_FILE):
+        print(f"Error: '{DATA_FILE}' not found in the current directory.")
+        print("Please make sure the dataset is present before running the training.")
+        print("You can download it from Kaggle: https://www.kaggle.com/datasets/amananandrai/clickbait-dataset")
+        exit()
+    try:
+        df = pd.read_csv(DATA_FILE)
+    except Exception as e:
+        print(f"Error reading {DATA_FILE}: {e}")
+        exit()
+    print(f"Dataset loaded: {len(df)} headlines.")
+    # 2. تحديد المدخلات والمخرجات
+    X = df['headline']
+    y = df['clickbait']
+    # 3. تقسيم البيانات
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    # 4. بناء "الخط" (Pipeline)
+    # هذا يدمج خطوتين: تحويل النص لأرقام (TfidfVectorizer) والتصنيف (LogisticRegression)
+    model_pipeline = Pipeline([
+        ('vectorizer', TfidfVectorizer(max_features=5000)), # نأخذ أهم 5000 كلمة
+        ('classifier', LogisticRegression(max_iter=1000))
+    ])
+    # 5. التدريب
+    print("Training the model... (This may take a minute)")
+    model_pipeline.fit(X_train, y_train)
+    # 6. التقييم
+    accuracy = model_pipeline.score(X_test, y_test)
+    print(f"Training complete. Model accuracy: {accuracy * 100:.2f}%")
+    # 7. حفظ النموذج
+    joblib.dump(model_pipeline, "clickbait_model.pkl")
+    print("Model saved successfully as 'clickbait_model.pkl'")
+# --- تشغيل الكود ---
+# هذا السطر يضمن أن الكود سيعمل فقط عند تشغيل الملف مباشرة
+if __name__ == "__main__":
+    train_model()