Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import bs4 | |
| from langchain_community.document_loaders import WebBaseLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain_cohere import ChatCohere | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain_core.prompts import PromptTemplate | |
| from dotenv import load_dotenv | |
| load_dotenv('.env') | |
| st.header("MKOM UGM RAG App") | |
| def get_rag_chain(): | |
| # Only keep post title, headers, and content from the full HTML. | |
| bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content")) | |
| loader = WebBaseLoader( | |
| web_paths=( | |
| "https://um.ugm.ac.id/ragam-seleksi-pascasarjana/", | |
| "https://um.ugm.ac.id/persyaratan-pendaftaran-magister/", | |
| "https://um.ugm.ac.id/persyaratan-pendaftaran-program-spesialis/", | |
| "https://um.ugm.ac.id/persyaratan-pendaftaran-subspesialis/", | |
| "https://um.ugm.ac.id/persyaratan-pendaftaran-doktor/", | |
| "https://um.ugm.ac.id/prosedur-pendaftaran-magister/", | |
| "https://um.ugm.ac.id/prosedur-pendaftaran-program-spesialis/", | |
| "https://um.ugm.ac.id/prosedur-pendaftaran-program-subspesialis/", | |
| "https://um.ugm.ac.id/prosedur-pendaftaran-doktor-2/", | |
| "https://um.ugm.ac.id/program-studi-program-magister-2/", | |
| "https://um.ugm.ac.id/program-studi-dan-daya-tampung-program-spesialis/", | |
| "https://um.ugm.ac.id/program-studi-program-doktor/", | |
| "https://um.ugm.ac.id/jadwal-seleksi-magister-dan-doktor/", | |
| "https://um.ugm.ac.id/jadwal-kegiatan-seleksi-program-spesialis/", | |
| "https://mkom.ugm.ac.id/alur-pendaftaran-magister/", | |
| "https://mkom.ugm.ac.id/informasi-pendaftaran-program-pra-s2-ilmu-komputer/", | |
| "https://mkom.ugm.ac.id/informasi-pendaftaran-program-s2-magister/", | |
| "https://mkom.ugm.ac.id/program-dual-degree-double-degree-magister-ilmu-komputer/", | |
| "https://mkom.ugm.ac.id/informasi-pendaftaran-program-s3-doktor/" | |
| ), | |
| bs_kwargs={"parse_only": bs4_strainer}, | |
| ) | |
| docs = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1000, chunk_overlap=200, add_start_index=True | |
| ) | |
| all_splits = text_splitter.split_documents(docs) | |
| vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name='firqaaa/indo-sentence-bert-base')) | |
| retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6}) | |
| llm = ChatCohere(model="command-r") | |
| def format_docs(docs): | |
| return "\n\n".join(doc.page_content for doc in docs) | |
| template = """Gunakan konteks berikut untuk menjawab pertanyaan pada bagian akhir. | |
| Jika kamu tidak tahu jawabannya, katakan saja bahwa kamu tidak tahu, jangan mencoba untuk mengarang jawaban. | |
| Selalu katakan "Terima kasih sudah bertanya!" pada setiap akhir jawaban. | |
| {context} | |
| Pertanyaan: {question} | |
| Jawaban:""" | |
| custom_rag_prompt = PromptTemplate.from_template(template) | |
| rag_chain = ( | |
| {"context": retriever | format_docs, "question": RunnablePassthrough()} | |
| | custom_rag_prompt | |
| | llm | |
| | StrOutputParser() | |
| ) | |
| return rag_chain | |
| rag_chain = get_rag_chain() | |
| question = st.text_input("Tanya ujian masuk Pascasarjana Universitas Gadjah Mada") | |
| if question: | |
| response = rag_chain.invoke(question) | |
| st.write(response) |