File size: 6,380 Bytes
954b14a da7403f 954b14a f6db94e 954b14a baba8a2 4ae98ee da7403f 567e525 954b14a baba8a2 954b14a da7403f 954b14a bc24340 da7403f 4ae98ee da7403f 954b14a 4ae98ee da7403f 4ae98ee da7403f 4ae98ee da7403f 954b14a da7403f 4ae98ee da7403f 954b14a 4ae98ee 954b14a da7403f 4ae98ee da7403f 4ae98ee da7403f 954b14a da7403f 4ae98ee bc24340 4ae98ee bc24340 4ae98ee bc24340 4ae98ee 954b14a 567e525 954b14a 567e525 954b14a 567e525 954b14a 567e525 954b14a bc24340 da7403f 954b14a 567e525 954b14a da7403f 4ae98ee 954b14a f6db94e 567e525 954b14a 4ae98ee 954b14a 4ae98ee 567e525 f6db94e 954b14a f6db94e 954b14a f6db94e 4ae98ee 567e525 da7403f 954b14a 4ae98ee 567e525 4ae98ee 954b14a da7403f 954b14a 4ae98ee da7403f 954b14a da7403f 954b14a da7403f 954b14a 4ae98ee f6db94e 954b14a 567e525 954b14a 4ae98ee 954b14a 567e525 954b14a 567e525 4ae98ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
import streamlit as st
import pandas as pd
import json
import matplotlib.pyplot as plt
from fpdf import FPDF
import tempfile
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# ---------------------------------------------------------------------
st.set_page_config(page_title="Smart Log Copilot", layout="wide")
# ---------------------------------------------------------------------
MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
@st.cache_resource
def load_llm():
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
device_map="auto",
torch_dtype=torch.float16 if torch.cuda.is_available() else "auto"
)
return tokenizer, model
tokenizer, model = load_llm()
def llm(prompt, max_new_tokens=150):
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
output = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
pad_token_id=tokenizer.eos_token_id,
do_sample=False
)
return tokenizer.decode(output[0], skip_special_tokens=True).replace(prompt, "").strip()
INTENT_SYSTEM_PROMPT = """
Convert the user question into JSON for log analysis β no explanation, no text before or after.
VALID actions:
- "run_log_query"
- "scan_anomalies"
- "user_risk_report"
- "global_risk_report"
FORMAT:
{
"action": "",
"parameters": {
"users": "any" or ["username"],
"time_range": "",
"focus": "",
"extra": ""
}
}
Return JSON only.
"""
SUMMARY_SYSTEM_PROMPT = """
Write a SOC-style incident summary for a security manager.
Include risk details + root cause + recommended actions.
"""
PLACEHOLDER_IMG = "https://dummyimage.com/600x300/ff0000/ffffff&text=Anomaly+Screenshot"
# --------------------- SAFE INTENT EXTRACTION -----------------------
def extract_intent(question):
raw = llm(INTENT_SYSTEM_PROMPT + "\nUSER: " + question + "\nReturn JSON now:")
if "{" not in raw or "}" not in raw:
return None
raw = raw[raw.find("{"): raw.rfind("}") + 1]
try:
return json.loads(raw)
except:
return None
# --------------------- ANOMALY ANALYTICS ----------------------------
def detect_anomalies(df):
anomalies = []
df["timestamp"] = pd.to_datetime(df["timestamp"], errors="coerce")
fails = df[df["status"].str.contains("fail", case=False, na=False)]
if len(fails) >= 3:
anomalies.append({"type": "login_failures", "details": f"{len(fails)} failed logins"})
off = df[(df["timestamp"].dt.hour >= 23) | (df["timestamp"].dt.hour < 6)]
if len(off) > 0:
anomalies.append({"type": "off_hours", "details": f"{len(off)} off-hours logins"})
sys_count = df.groupby(df["timestamp"].dt.date).system.nunique()
if any(sys_count >= 5):
anomalies.append({"type": "many_systems", "details": "5+ systems accessed in a day"})
if "country" in df.columns:
loc = df.groupby(df["timestamp"].dt.date).country.nunique()
if any(loc >= 2):
anomalies.append({"type": "impossible_travel", "details": "multiple countries in one day"})
return anomalies
def risk_score(anoms):
if not anoms: return "π’", "Low"
if len(anoms) <= 2: return "π‘", "Medium"
return "π΄", "High"
# -------------------------- PDF -------------------------------------
def build_pdf(risk_icon, risk_label, summary, anomalies):
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.multi_cell(0, 8, "Security Report β Smart Log Copilot")
pdf.multi_cell(0, 8, f"Risk Level: {risk_icon} {risk_label}")
pdf.ln(4)
pdf.multi_cell(0, 6, summary)
pdf.ln(4)
pdf.multi_cell(0, 6, "Detected anomalies:")
if anomalies:
for a in anomalies:
pdf.multi_cell(0, 6, f"- {a['type']}: {a['details']}")
else:
pdf.multi_cell(0, 6, "None")
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
pdf.output(tmp.name)
return tmp.name
# -------------------------- UI -------------------------------------
st.title("π Smart Log Copilot (CSV-powered LLM)")
uploaded = st.file_uploader("Upload CSV log file", type=["csv"])
df = pd.read_csv(uploaded) if uploaded else None
if df is not None:
df.columns = [c.lower().strip() for c in df.columns] # ensure case-insensitive
st.success(f"CSV loaded ({len(df)} rows)")
st.dataframe(df.head(20))
st.markdown("---")
question = st.text_input("Ask a question about the logs:")
if "history" not in st.session_state:
st.session_state.history = []
col1, col2 = st.columns([3, 2])
with col1:
if question and df is not None:
with st.spinner("π§ Analyzing logsβ¦"):
intent = extract_intent(question)
params = intent.get("parameters", {}) if intent else {}
users = params.get("users", "any") # <--- FIXED
filtered = df if users == "any" else df[df["user"].str.lower().isin(
[u.lower() for u in users] if isinstance(users, list) else [users.lower()]
)]
anomalies = detect_anomalies(filtered)
icon, label = risk_score(anomalies)
summary_prompt = (
SUMMARY_SYSTEM_PROMPT +
f"\nQUESTION: {question}\nMATCHED ROWS: {len(filtered)}\nANOMALIES: {json.dumps(anomalies)}\n"
)
summary = llm(summary_prompt)
reply = f"{icon} **Risk Level: {label}**\n\n{summary}"
st.session_state.history.append(("user", question))
st.session_state.history.append(("assistant", reply))
for role, text in st.session_state.history:
st.chat_message(role).write(text)
with col2:
if df is not None and question:
if anomalies:
st.image(PLACEHOLDER_IMG, caption="Anomaly Screenshot")
if "system" in df.columns:
fig, ax = plt.subplots(figsize=(4, 2))
df["system"].value_counts().plot(kind="bar", ax=ax)
st.pyplot(fig)
if st.button("π Download PDF Report"):
pdf_file = build_pdf(icon, label, summary, anomalies)
with open(pdf_file, "rb") as f:
st.download_button(
"Download PDF",
f,
file_name="security_report.pdf",
mime="application/pdf"
)
|