Spaces:

Shrikrishna
/

resumeCatPredictor

Sleeping

App Files Files Community

resumeCatPredictor / app.py

Shrikrishna

Update app.py

7217837 verified 10 months ago

raw

history blame contribute delete

4.32 kB

	import streamlit as st
	import pickle
	import re

	#loading models
	clf = pickle.load(open('clf.pkl','rb'))
	tfidf = pickle.load(open('tfidf.pkl','rb'))

	def clean_resume(resume_text):
	"""
	Clean the text in the resume i.e. remove unwanted chars in the text. For e.g.
	1 URLs,
	2 Hashtags,
	3 Mentions,
	4 Special Chars,
	5 Punctuations

	Parameters:
	resume_text (str): The input resume text to be cleaned.

	Returns:
	clean_text (str): Clean Resume.
	"""
	clean_text = re.sub('http\S+\s*', ' ', resume_text)
	clean_text = re.sub('RT\|cc', ' ', clean_text)
	clean_text = re.sub('#\S+', '', clean_text)
	clean_text = re.sub('@\S+', ' ', clean_text)
	clean_text = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{\|}~"""), ' ', clean_text)
	clean_text = re.sub(r'[^\x00-\x7f]', r' ', clean_text)
	clean_text = re.sub('\s+', ' ', clean_text)
	return clean_text

	def remove_stopwords(text, language='english'):
	"""
	Remove stopwords from a given text.

	Parameters:
	text (str): The input text from which to remove stopwords.
	language (str): The language of the stopwords. Default is 'english'.

	Returns:
	filtered_text (str): Text without stopwords.
	"""
	stop_words = set([
	"i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your",
	"yours", "yourself", "yourselves", "he", "him", "his", "himself", "she",
	"her", "hers", "herself", "it", "its", "itself", "they", "them", "their",
	"theirs", "themselves", "what", "which", "who", "whom", "this", "that",
	"these", "those", "am", "is", "are", "was", "were", "be", "been", "being",
	"have", "has", "had", "having", "do", "does", "did", "doing", "a", "an",
	"the", "and", "but", "if", "or", "because", "as", "until", "while", "of",
	"at", "by", "for", "with", "about", "against", "between", "into", "through",
	"during", "before", "after", "above", "below", "to", "from", "up", "down",
	"in", "out", "on", "off", "over", "under", "again", "further", "then",
	"once", "here", "there", "when", "where", "why", "how", "all", "any",
	"both", "each", "few", "more", "most", "other", "some", "such", "no",
	"nor", "not", "only", "own", "same", "so", "than", "too", "very", "s",
	"t", "can", "will", "just", "don", "should", "now"
	])
	words = text.split()
	filtered_words = [word for word in words if word.lower() not in stop_words]
	filtered_text = ' '.join(filtered_words)
	return filtered_text

	# web app
	def main():
	st.title("Resume Screening App")
	uploaded_file = st.file_uploader('Upload Resume', type=['txt','pdf'])

	if uploaded_file is not None:
	try:
	resume_bytes = uploaded_file.read()
	resume_text = resume_bytes.decode('utf-8')
	except UnicodeDecodeError:
	# If UTF-8 decoding fails, try decoding with 'latin-1'
	resume_text = resume_bytes.decode('latin-1')

	cleaned_resume = clean_resume(resume_text)
	cleaned_resume = remove_stopwords(cleaned_resume)
	input_features = tfidf.transform([cleaned_resume])
	prediction_id = clf.predict(input_features)[0]
	st.write(prediction_id)

	# Map category ID to category name
	category_mapping = {
	15: "Java Developer",
	23: "Testing",
	8: "DevOps Engineer",
	20: "Python Developer",
	24: "Web Designing",
	12: "HR",
	13: "Hadoop",
	3: "Blockchain",
	10: "ETL Developer",
	18: "Operations Manager",
	6: "Data Science",
	22: "Sales",
	16: "Mechanical Engineer",
	1: "Arts",
	7: "Database",
	11: "Electrical Engineering",
	14: "Health and fitness",
	19: "PMO",
	4: "Business Analyst",
	9: "DotNet Developer",
	2: "Automation Testing",
	17: "Network Security Engineer",
	21: "SAP Developer",
	5: "Civil Engineer",
	0: "Advocate",
	}

	category_name = category_mapping.get(prediction_id, "Unknown")

	st.write("Predicted Category:", category_name)



	# python main
	if __name__ == "__main__":
	main()