Spaces:

Francesco-A
/

SemanticSearch_QA-v2.1

Sleeping

App Files Files Community

SemanticSearch_QA-v2.1 / app.py

Francesco-A

Update app.py

571c594 7 months ago

raw

history blame

9.42 kB

	# AUTOGENERATED! DO NOT EDIT! File to edit: ../drive/MyDrive/Codici/Python/Apps/Gradio_App/SemanticSearch_QA-v2.1.ipynb.

	# %% auto 0
	__all__ = ['model_name', 'qa_model', 'contexts', 'question', 'df_results', 'question_1', 'question_2', 'question_3', 'question_4',
	'question_5', 'question_6', 'question_7', 'question_8', 'question_9', 'question_10', 'title', 'description',
	'data', 'context_df', 'question_input', 'contexts_input', 'n_answers_input', 'full_context_input',
	'confidence_threshold_input', 'intf', 'get_answers']

	# %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/SemanticSearch_QA-v2.1.ipynb 3
	import pandas as pd
	import gradio as gr
	import transformers
	from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

	# model_name = 'Francesco-A/bert-finetuned-squad-v1'
	model_name = "deepset/roberta-base-squad2"

	qa_model = pipeline(task = 'question-answering',
	model = model_name,
	tokenizer = model_name)

	# %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/SemanticSearch_QA-v2.1.ipynb 6
	def get_answers(question, contexts, n_answers=1, full_context=True, confidence_threshold = 0.5):
	results = []

	if isinstance(contexts, pd.DataFrame):
	# If it's a DataFrame, get the values from the 'Context' column as a list
	contexts = contexts['Context'].to_list()


	for i, context in enumerate(contexts):
	QA_input = {'question': question, 'context': context}
	res = qa_model(question = QA_input['question'], context = QA_input['context'])

	results_dict = {
	'context_idx': i,
	# 'Question': question,
	'Answer': res['answer'],
	'Score': round(res['score'],3)
	}

	if full_context:
	results_dict['Full Context'] = context

	results.append(results_dict)

	df = pd.DataFrame(results)
	df = df[df['Score'] >= confidence_threshold]
	df = df.sort_values(by='Score', ascending=False).head(n_answers)
	df = df.reset_index(drop=True) # Reset index after sorting

	return df

	# Example usage:
	contexts = [
	'The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks.',
	'Model conversion enables interoperability between different NLP libraries.',
	'Converting models allows for leveraging the strengths of various tools.'
	]

	question = "Why is model conversion important?"

	df_results = get_answers(question,contexts,n_answers=2,full_context=False, confidence_threshold = 0.25)
	df_results

	# %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/SemanticSearch_QA-v2.1.ipynb 7
	# Define example question(s)
	question_1 = "What are the main features of the new XPhone 20?"
	question_2 = "What are some benefits of regular exercise?"
	question_3 = "What is the color of a rose?"
	question_4 = "What's photosynthesis?"
	question_5 = "At what temperature does water boil?"
	question_6 = "Where can I find potassium?"
	question_7 = "How does the internet function?"
	question_8 = "What are the ingredients for making a classic margarita?"
	question_9 = "How does cellular respiration work?"
	question_10 = "Is money important?"

	# Define example contexts as a list of strings
	contexts = [
	"The XPhone 20 is expected to come with an improved camera system, featuring advanced image stabilization and enhanced low-light capabilities.",
	"Regular exercise has been shown to reduce the risk of chronic diseases such as heart disease, diabetes, and certain types of cancer.",
	"Roses come in various colors, including red, pink, yellow, white, and even blue (though blue roses are rare and often created through genetic modification).",
	"Photosynthesis occurs in the chloroplasts of plant cells, where chlorophyll captures sunlight and converts it into chemical energy.",
	"Water boils at different temperatures depending on factors like altitude and atmospheric pressure. At sea level, it boils at 100 degrees Celsius or 212 degrees Fahrenheit.",
	"Potassium is an essential mineral that can be found in various foods such as bananas, potatoes, spinach, and oranges.",
	"The internet functions through a complex system of data transmission protocols, routers, and servers that allow for the exchange of information globally.",
	"A classic margarita typically consists of tequila, lime juice, triple sec (or orange liqueur), and is often served with a salted rim.",
	"Cellular respiration takes place in the mitochondria of cells, where glucose and oxygen are converted into ATP (adenosine triphosphate) and carbon dioxide.",
	"Money is a medium of exchange that facilitates transactions of goods and services. Its importance lies in its role in economic systems and the ability to represent value.",
	"The XPhone 20 may feature an OLED display for vibrant colors and deep blacks, providing a high-quality visual experience.",
	"Exercise releases endorphins, which are chemicals in the brain that help improve mood and reduce feelings of stress and anxiety.",
	"Different species of roses can have variations in color, including shades of red, pink, yellow, and white.",
	"During photosynthesis, plants also release oxygen as a byproduct, which is essential for the survival of many organisms on Earth.",
	"Water boils at a lower temperature at higher altitudes due to the reduced atmospheric pressure. For example, in the mountains, it may boil below 100 degrees Celsius.",
	"Potassium is crucial for proper muscle function, nerve function, and maintaining fluid balance in the body.",
	"The internet relies on a system of interconnected networks, including wired and wireless connections, to transmit data across the globe.",
	"In addition to the core ingredients, a classic margarita can also be garnished with a wedge of lime for added flavor.",
	"Cellular respiration involves several stages, including glycolysis, the Krebs cycle, and the electron transport chain, to extract energy from glucose.",
	"Money serves as a unit of account, allowing for standardized pricing and valuation of goods and services in economies worldwide."
	"The XPhone 20 is rumored to feature a smaller notch, providing more screen real estate for users. This allows for an immersive viewing experience.",
	"Photosynthesis is the process by which plants convert carbon dioxide, water, and sunlight into glucose and oxygen.",
	"Bananas are a great source of potassium.",
	"The theory of relativity was developed by Albert Einstein and revolutionized our understanding of space and time.",
	"The Eiffel Tower is located in Paris, France.",
	"Reports suggest that the XPhone 20 will have significant improvements in battery life compared to its predecessor. Users can expect a longer-lasting device.",
	"A penny saved is a penny earned.",
	"Water boils at 100 degrees Celsius.",
	"The Great Wall of China is one of the most impressive architectural feats in history.",
	"The capital of Japan is Tokyo.",
	"One of the anticipated features of the XPhone 20 is a faster and more powerful A16 chip. This will result in smoother and more efficient performance.",
	"Roses are red, violets are blue.",
	"Regular exercise can help improve cardiovascular health and strengthen muscles.",
	"A classic margarita is made with tequila, lime juice, and orange liqueur.",
	"Cellular respiration is the process by which cells convert glucose and oxygen into energy, carbon dioxide, and water.",
	"The internet is a global network of interconnected computers and servers that allows the sharing of information and resources.",
	"Mount Everest is the highest mountain in the world, located in the Himalayas.",
	"The sun rises in the east and sets in the west.",
	"The Mona Lisa is a famous portrait painting by Leonardo da Vinci.",
	"The Declaration of Independence was adopted by the Continental Congress on July 4, 1776.",
	]

	# %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/SemanticSearch_QA-v2.1.ipynb 10
	title = 'SemanticSearch_QA-v2'
	description = """
	QA model: [deepset/roberta-base-squad2](https://huggingface.co/deepset/roberta-base-squad2)
	"""

	data = {
	'Context': contexts,
	}

	context_df = pd.DataFrame(data)

	question_input = gr.Textbox(label="Question", placeholder="Enter your question here")
	contexts_input = gr.Dataframe(value=context_df, headers=["Context"], interactive=True, type="pandas", label="Contexts")
	n_answers_input = gr.Slider(minimum=1, maximum=10, step=1, value=1, label="Number of Answers")
	full_context_input = gr.Checkbox(label="Include Full Context", value=True)
	confidence_threshold_input = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0.5, label="Confidence Threshold")

	intf = gr.Interface(fn=get_answers,
	inputs= [question_input, contexts_input, n_answers_input,full_context_input,confidence_threshold_input],
	outputs= gr.components.Dataframe(type="pandas", row_count=5),
	examples = [[question_1,context_df,3,False,0.1],
	[question_2,context_df,5,True,0.1],
	[question_4,context_df,10,False,0.1]],

	title=title,
	description=description,
	# article=long_desc
	)

	intf.launch(inline=True,
	# share=True
	)