40point12

Running

40point12 / app.py

Émile

Adding example, simpler model for anonymization

88d4118 6 days ago

6.52 kB

	from haystack.components.generators import OpenAIGenerator
	from haystack.utils import Secret
	from haystack.components.builders.prompt_builder import PromptBuilder
	from haystack.components.routers import ConditionalRouter
	from haystack import Pipeline
	from haystack.components.embedders import SentenceTransformersTextEmbedder
	from haystack_integrations.document_stores.chroma import ChromaDocumentStore
	from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever
	import rsa
	from cryptography.fernet import Fernet
	from presidio_analyzer import AnalyzerEngine
	from presidio_anonymizer import AnonymizerEngine

	import gradio as gr

	embedding_model = "Alibaba-NLP/gte-multilingual-base"


	document_store = ChromaDocumentStore(
	persist_path="vstore_4012"
	)


	##################################
	####### Answering pipeline #######
	##################################
	no_answer_message = (
	"I'm not allowed to answer this question. Please ask something related to "
	"APIs access in accordance DSA’s transparency and data-sharing provisions. "
	"Is there anything else I can do for you? "
	)

	relevance_prompt_template = """
	Classify whether this user is asking for something related to social media APIs,
	the Digital Services Act (DSA), or any topic related to online platforms’ compliance
	with legal and data-sharing frameworks.

	Relevant topics include:
	- Social media API access
	- Data transparency
	- Compliance with DSA provisions
	- Online platform regulations

	Here is their message:
	{{query}}

	Here are the two previous messages. ONLY refer to these if the above message refers previous ones.

	{% for message in user_history[-2:] %}
	* {{message["content"]}}

	{% endfor %}

	Instructions:
	- Respond with “YES” if the query pertains to any of the relevant topics listed above and not mixed with off-topic content.
	- Respond with “NO” if the query is off-topic and does not relate to the topics listed above.

	Examples:
	- Query: "How does the DSA affect API usage?"
	- Response: "YES"

	- Query: "How to make a pancake with APIs?"
	- Response: "NO"

	"""

	routes = [
	{
	"condition": "{{'YES' in replies[0]}}",
	"output": "{{query}}",
	"output_name": "query",
	"output_type": str,
	},
	{
	"condition": "{{'NO' in replies[0]}}",
	"output": no_answer_message,
	"output_name": "no_answer",
	"output_type": str,
	}
	]

	query_prompt_template = """
	Conversation history:
	{{conv_history}}

	Here is what the user has requested:
	{{query}}

	Instructions:
	- Craft a concise, short informative answer to the user's request using the information provided below.
	- Synthesize the key points into a seamless response that appears as your own expert knowledge.
	- Avoid direct quotes or explicit references to the documents.
	- You are directly answering the user's query.

	Relevant Information:
	{% for document in documents %}
	- {{ document.content }}
	{% endfor %}

	"""

	def setup_generator(model_name, api_key_env_var="OPENAI_API_KEY", max_tokens=8192):
	return OpenAIGenerator(
	api_key=Secret.from_env_var(api_key_env_var),
	model=model_name,
	generation_kwargs={"max_tokens": max_tokens}
	)


	llm = setup_generator("gpt-4o-mini", max_tokens=30)
	llm2 = setup_generator("gpt-4o-mini")


	embedder = SentenceTransformersTextEmbedder(
	model=embedding_model,
	trust_remote_code=True,
	progress_bar=False
	)
	retriever = ChromaEmbeddingRetriever(document_store)

	router = ConditionalRouter(routes=routes)
	prompt_builder = PromptBuilder(template=relevance_prompt_template)
	prompt_builder2 = PromptBuilder(template=query_prompt_template)


	answer_query = Pipeline()

	answer_query.add_component("prompt_builder", prompt_builder)
	answer_query.add_component("llm", llm)
	answer_query.add_component("router", router)
	answer_query.add_component("embedder", embedder)
	answer_query.add_component("retriever", retriever)
	answer_query.add_component("prompt_builder2", prompt_builder2)
	answer_query.add_component("llm2", llm2)

	answer_query.connect("prompt_builder", "llm")
	answer_query.connect("llm", "router")
	answer_query.connect("router.query", "embedder")
	answer_query.connect("embedder", "retriever")
	answer_query.connect("retriever", "prompt_builder2")
	answer_query.connect("prompt_builder2", "llm2")

	answer_query.warm_up()


	##########################
	####### Logging ##########
	##########################


	analyzer = AnalyzerEngine()
	anonymizer = AnonymizerEngine()

	def hide_sensitive_info(text):
	analysis = analyzer.analyze(
	text=text,
	language="en",
	)
	result = anonymizer.anonymize(
	text=text,
	analyzer_results=analysis
	)
	return result.text

	def log_QA(question, answer):
	message = f"User: {question}\nAssistant: {answer}"
	message_no_info = hide_sensitive_info(message)
	print(message_no_info, end="\n\n")

	##########################
	####### Gradio app #######
	##########################

	def chat(message, history):
	"""
	Chat function for Gradio. Uses the pipeline to produce next answer.
	"""
	conv_history = "\n\n".join([f'{message["role"]}: {message["content"]}' for message in history[-2:]])
	user_history = [message for message in history if message["role"] == "user"]
	results = answer_query.run({"user_history": user_history,
	"query": message,
	"conv_history": conv_history,
	"top_k":3})
	if "llm2" in results:
	answer = results["llm2"]["replies"][0]
	elif "router" in results and "no_answer" in results["router"]:
	answer = results["router"]["no_answer"]
	else:
	answer = "Sorry, a mistake occured"
	log_QA(message, answer)
	return answer

	examples = [
	"What is Article 40.12 of the Digital Services Act, and how does it help researchers?",
	"How can I start the process of requesting platform data for research?",
	"How do I submit a data access request for Meta’s API under the DSA?",
	"What are the authentication and setup steps for Youtube’s API?",
	"What specific types of data can I access through Snapchat’s API?"
	]

	if __name__ == "__main__":
	interface = gr.ChatInterface(
	fn=chat,
	type="messages",
	title="40.12 Chatbot",
	description="Ask me anything about social media APIs, the Digital Services Act (DSA), or online platform regulations.",
	examples=examples
	)

	interface.launch()