Vishakha commited on
Commit
f7422b5
Β·
0 Parent(s):

Clean push from scratch

Browse files
Files changed (6) hide show
  1. README.md +3 -0
  2. app.py +146 -0
  3. cyber_docs/gdpr.txt +17 -0
  4. cyber_docs/iso27001.txt +9 -0
  5. cyber_docs/nist.txt +11 -0
  6. requirements.txt +5 -0
README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # AI-Powered Cybersecurity GRC Chatbot
2
+
3
+ This Streamlit-based chatbot answers queries related to GDPR, ISO 27001, and NIST using LangChain and local vector search.
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ st.set_page_config(page_title="GRC Chatbot", page_icon="πŸ›‘οΈ", layout="centered")
4
+ st.markdown("""
5
+ <style>
6
+ .main {
7
+ background-color: #0F1117;
8
+ color: white;
9
+ font-family: 'Segoe UI', sans-serif;
10
+ }
11
+ .block-container {
12
+ padding-top: 2rem;
13
+ padding-bottom: 2rem;
14
+ }
15
+ .stTextInput>div>div>input {
16
+ background-color: #1c1e26;
17
+ color: white;
18
+ }
19
+ .stTextInput label, .stTextArea label {
20
+ color: #ffffff;
21
+ }
22
+ .chat-box {
23
+ background-color: #1c1e26;
24
+ padding: 15px;
25
+ margin: 10px 0;
26
+ border-radius: 8px;
27
+ border: 1px solid #303030;
28
+ }
29
+ </style>
30
+ """, unsafe_allow_html=True)
31
+
32
+ # 🧠 Initialize chat history
33
+ if "history" not in st.session_state:
34
+ st.session_state.history = []
35
+
36
+ from langchain_community.document_loaders import TextLoader
37
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
38
+ from langchain_community.vectorstores import FAISS
39
+ from langchain_community.embeddings import HuggingFaceEmbeddings
40
+ import streamlit as st
41
+ from transformers import pipeline
42
+ # Load summarizer pipeline
43
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
44
+
45
+ # Load GRC documents from cyber_docs folder
46
+ docs = []
47
+ for file in ["cyber_docs/nist.txt", "cyber_docs/iso27001.txt", "cyber_docs/gdpr.txt"]:
48
+ loader = TextLoader(file)
49
+ docs.extend(loader.load())
50
+
51
+ # Split documents into chunks
52
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
53
+ split_docs = text_splitter.split_documents(docs)
54
+
55
+ # Use HuggingFace Local Embeddings (NO OpenAI key needed)
56
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
57
+
58
+ # Create vector store using FAISS
59
+ vectorstore = FAISS.from_documents(split_docs, embeddings)
60
+
61
+ # Simple LLM-like function (Fetch top similar docs)
62
+ def simple_llm(query):
63
+ # Search top matching chunks (across all documents)
64
+ matched_docs = vectorstore.similarity_search(query, k=5)
65
+
66
+ # Try to filter results: Prefer documents matching query keywords
67
+ keyword = ""
68
+ if "gdpr" in query.lower():
69
+ keyword = "gdpr"
70
+ elif "nist" in query.lower():
71
+ keyword = "nist"
72
+ elif "iso" in query.lower() or "27001" in query:
73
+ keyword = "iso"
74
+
75
+ filtered_docs = [
76
+ doc for doc in matched_docs if keyword in doc.metadata['source'].lower()
77
+ ]
78
+
79
+ # Fallback: If filter returns nothing, use original results
80
+ if not filtered_docs:
81
+ filtered_docs = matched_docs
82
+
83
+ combined_text = " ".join([doc.page_content for doc in filtered_docs])
84
+ summary = summarizer(combined_text, max_length=200, min_length=50, do_sample=False)
85
+ return summary[0]['summary_text']
86
+
87
+
88
+ # Streamlit UI
89
+ st.title("πŸ›‘οΈ AI Chatbot for GRC")
90
+ uploaded_file = st.file_uploader("πŸ“ Upload a new GRC .txt file", type="txt")
91
+ if uploaded_file is not None:
92
+ file_path = os.path.join("cyber_docs", uploaded_file.name)
93
+
94
+ # Save uploaded file to cyber_docs folder
95
+ with open(file_path, "wb") as f:
96
+ f.write(uploaded_file.getbuffer())
97
+
98
+ st.success(f"βœ… {uploaded_file.name} uploaded successfully!")
99
+ # Load the uploaded file
100
+ loader = TextLoader(file_path)
101
+ new_docs = loader.load()
102
+ docs.extend(new_docs)
103
+
104
+ # Re-split all documents (old + new)
105
+ split_docs = text_splitter.split_documents(docs)
106
+
107
+ # Rebuild the vectorstore with updated docs
108
+ vectorstore = FAISS.from_documents(split_docs, embeddings)
109
+
110
+ query = st.text_input("Ask your GRC question:")
111
+
112
+ if st.button("🧹 Clear Chat"):
113
+ st.session_state.history = []
114
+ st.experimental_rerun()
115
+
116
+ st.markdown("""
117
+ **πŸ’‘ Example Queries:**
118
+ - What are the functions of NIST?
119
+ - Explain GDPR principles.
120
+ - What is ISO 27001 risk assessment?
121
+ """)
122
+
123
+ if query:
124
+ try:
125
+ result = simple_llm(query)
126
+
127
+ st.session_state.history.append((query,result))
128
+ st.subheader("πŸ” Answer:")
129
+ st.write(result)
130
+
131
+ if st.session_state.history:
132
+ st.markdown("---")
133
+ st.subheader("πŸ’¬ Chat History")
134
+
135
+ for i, (q, a) in enumerate(reversed(st.session_state.history), 1):
136
+ st.markdown(f"""
137
+ <div class='chat-box'>
138
+ <p><b>πŸ§‘ You:</b> {q}</p>
139
+ <p><b>πŸ€– Bot:</b> {a}</p>
140
+ </div>
141
+ """, unsafe_allow_html=True)
142
+
143
+ except Exception as e:
144
+ st.error(f"An error occurred: {e}")
145
+ else:
146
+ st.info("Please enter a query related to GRC (NIST, ISO 27001, GDPR).")
cyber_docs/gdpr.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The General Data Protection Regulation (GDPR) is a data privacy law applicable in the European Union.
2
+
3
+ Key Principles of GDPR:
4
+ 1. **Lawfulness, Fairness & Transparency** – Data should be processed legally, fairly, and in a transparent manner.
5
+ 2. **Purpose Limitation** – Data should only be collected for clear, specified, and legitimate purposes.
6
+ 3. **Data Minimization** – Only the minimum necessary data should be collected.
7
+ 4. **Accuracy** – Data must be accurate and kept up to date.
8
+ 5. **Storage Limitation** – Data should only be stored for as long as necessary.
9
+ 6. **Integrity & Confidentiality** – Data must be protected against unauthorized or unlawful processing and accidental loss.
10
+ 7. **Accountability** – The data controller is responsible for demonstrating GDPR compliance.
11
+
12
+ GDPR also includes data subject rights like:
13
+ - Right to Access
14
+ - Right to Rectification
15
+ - Right to Erasure ("Right to be forgotten")
16
+ - Right to Data Portability
17
+
cyber_docs/iso27001.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ISO/IEC 27001 is an international standard for Information Security Management Systems (ISMS).
2
+
3
+ Key Features of ISO 27001:
4
+ 1. **Risk-Based Approach** – Identifies, assesses, and treats information security risks systematically.
5
+ 2. **Security Controls** – Includes 114 controls across 14 domains like access control, cryptography, and supplier relationships.
6
+ 3. **Continual Improvement** – Promotes PDCA (Plan-Do-Check-Act) cycle to improve ISMS.
7
+ 4. **Compliance** – Helps meet legal, contractual, and regulatory requirements.
8
+
9
+ ISO 27001 certification demonstrates that an organization is committed to securing information assets. It is suitable for businesses of all sizes and industries.
cyber_docs/nist.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ The NIST Cybersecurity Framework (CSF) is developed by the National Institute of Standards and Technology (USA).
2
+
3
+ Functions of NIST CSF:
4
+ 1. **Identify** – Understand the organization's cybersecurity risks, assets, and policies.
5
+ 2. **Protect** – Implement safeguards like access controls and awareness training.
6
+ 3. **Detect** – Enable timely discovery of cybersecurity events.
7
+ 4. **Respond** – Take action regarding detected events.
8
+ 5. **Recover** – Restore operations and services after an incident.
9
+
10
+ NIST provides a common language and methodology for managing cybersecurity risks. It is widely adopted by governments and organizations to enhance their cybersecurity posture.
11
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ transformers
3
+ langchain
4
+ faiss-cpu
5
+ sentence-transformers