Spaces:

NursNurs
/

detect_mental_disorder

d9d1579 about 1 year ago

3.91 kB

	# Import from 3rd party libraries
	import streamlit as st
	import streamlit.components.v1 as components
	# import streamlit_analytics
	import pandas as pd
	import numpy as np
	import re
	from sklearn.metrics.pairwise import cosine_similarity
	import string
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer
	nltk.download("stopwords")
	nltk.download('wordnet')
	from sentence_transformers import SentenceTransformer
	import plotly.express as px
	import pandas as pd
	from sklearn.decomposition import PCA

	st.set_page_config(page_title="Mental disorder by description", page_icon="🤖")

	def convert_string_to_numpy_array(s):
	'''Function to convert a string to a NumPy array'''
	numbers_list = re.findall(r'-?\d+\.\d+', s)
	return np.array(numbers_list, dtype=np.float64)

	#load the model
	@st.cache_resource
	def get_models():
	st.write('Loading the model...')
	name = "stsb-bert-large"
	model = SentenceTransformer(name)
	st.write("The app is loaded and ready to use!")
	lemmatizer = WordNetLemmatizer()
	return model, lemmatizer

	model, lemmatizer = get_models()
	stop_words = set(stopwords.words('english'))

	#load the dataframe with disorder embeddings
	@st.cache_data # 👈 Add the caching decorator
	def load_data():
	df_icd = pd.read_csv('icd_embedded.csv')
	df_icd['numpy_array'] = df_icd['Embeddings'].apply(convert_string_to_numpy_array)
	icd_embeddings = np.array(df_icd["numpy_array"].tolist())
	return df_icd, icd_embeddings

	df_icd, icd_embeddings = load_data()

	#create a list of disease names
	@st.cache_data # 👈 Add the caching decorator
	def create_disease_list():
	disease_names = []
	for name in df_icd["Disease"]:
	disease_names.append(name)
	return disease_names

	disease_names = create_disease_list()

	if 'descriptions' not in st.session_state:
	st.session_state.descriptions = []

	def similarity_top(descr_emb, disorder_embs):
	# reshaping the character_embedding to match the shape of mental_disorder_embeddings
	descr_emb = descr_emb.reshape(1, -1)
	# calculating the cosine similarity
	similarity_scores = cosine_similarity(disorder_embs, descr_emb)

	scores_names = []
	for score, name in zip(similarity_scores, disease_names):
	data = {"disease_name": name, "similarity_score": score}
	scores_names.append(data)

	scores_names = sorted(scores_names, key=lambda x: x['similarity_score'], reverse=True)

	results = []

	for item in scores_names:
	disease_name = item['disease_name']
	similarity_score = item['similarity_score'][0]
	results.append((disease_name, similarity_score))

	return results[:5]


	# with text_spinner_placeholder:
	# with st.spinner("Please wait while your Tweet is being generated..."):
	# mood_prompt = f"{mood} " if mood else ""
	# if style:
	# twitter = twe.Tweets(account=style)
	# tweets = twitter.fetch_tweets()
	# tweets_prompt = "\n\n".join(tweets)
	# prompt = (
	# f"Write a {mood_prompt}Tweet about {topic} in less than 120 characters "
	# f"and in the style of the following Tweets:\n\n{tweets_prompt}\n\n"

	# Configure Streamlit page and state
	st.title("Detect the disorder")
	st.markdown(
	"This mini-app predicts a mental disorder based on your description."
	)

	input = st.text_input(label="Your description)", placeholder="Insert a description of a character")
	if input:
	input_embed = model.encode(input)
	sim_score = similarity_top(input_embed, icd_embeddings)
	st.write(sim_score)

	# mood = st.text_input(
	# label="Mood (e.g. inspirational, funny, serious) (optional)",
	# placeholder="inspirational",
	# )
	# style = st.text_input(
	# label="Twitter account handle to style-copy recent Tweets (optional, limited by Twitter's API)",
	# placeholder="elonmusk",
	# )

	text_spinner_placeholder = st.empty()