TinyLlama-Cinder-Agent-Rag / tinyllama_agent_cinder_txtai-rag.py

Upload tinyllama_agent_cinder_txtai-rag.py

c6cdb52 verified 6 months ago

5.8 kB


	import requests
	import os
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	import torch.nn as nn
	from torchsummary import summary
	from accelerate import dispatch_model, infer_auto_device_map
	from txtai import Embeddings
	from txtai.pipeline import LLM
	#pip3 install git+https://github.com/neuml/txtai#egg=txtai[pipeline-llm]


	# Wikipedia Embeddings Database
	embeddings = Embeddings()
	embeddings.load(provider="huggingface-hub", container="neuml/txtai-wikipedia")

	#os.environ['OMP_NUM_THREADS'] = '6'

	#
	#DuckDuckGo
	#
	def query_duckduckgo(query):
	"""Query DuckDuckGo API for a given search term and return the results."""
	url = "https://api.duckduckgo.com/"
	params = {
	'q': query,
	'format': 'json',
	'pretty': '1',
	'no_html': '1'
	}

	try:
	response = requests.get(url, params=params)
	response.raise_for_status() # Raises an HTTPError for bad responses
	return response.json()
	except requests.RequestException as e:
	print(f"An error occurred: {e}")
	return None

	def handle_query(user_input):
	"""Process user input and display the answer from DuckDuckGo."""
	result = query_duckduckgo(user_input)
	if result and 'AbstractText' in result and result['AbstractText']:
	print(result['AbstractText'])
	else:
	print("DuckDuck Go failed. Going to Wiki.")
	result ="\n".join([x["text"] for x in embeddings.search(user_input)])
	print("Restults from Wiki: \n",result)




	# Load model and tokenizer
	model_path = "Josephgflowers/TinyLlama-Cinder-Agent-Rag"#
	# Define the device (CPU or GPU)
	#device = torch.device("cuda")
	device = torch.device("cpu")
	model = AutoModelForCausalLM.from_pretrained(model_path,ignore_mismatched_sizes=True).to(device)

	print(model)
	total_params = sum(p.numel() for p in model.parameters())
	print("Total number of parameters: ", total_params)

	sequence_length = 2048 # or whatever your specific sequence length is
	#embedding_size = 2048 # as per your model's definition

	tokenizer = AutoTokenizer.from_pretrained(model_path)
	stop_token =2 #3556 </ #2 #128247
	#'</s>' 2



	def chat_with_model(prompt_text, stop_token, model, tokenizer):
	# Encode the prompt text
	encoded_prompt = tokenizer.encode(prompt_text, add_special_tokens=False, return_tensors="pt").to(device)

	# Generate response
	output_sequences = model.generate(
	input_ids=encoded_prompt,
	#max_length=len(encoded_prompt[0]) + 256,
	max_new_tokens=256,
	temperature=0.1,
	repetition_penalty=1.2,
	top_k=20,
	top_p=0.9,
	do_sample=True,
	num_return_sequences=1,
	eos_token_id=stop_token
	)

	# Decode the generated sequence
	generated_sequence = output_sequences[0].tolist()
	text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True)
	response_text = text[len(prompt_text):].strip() # Extract only the response text
	#response_text = response_text.replace("<s>","").replace("</s>","")
	return response_text

	# Initialize conversation history

	conversation_history = ''#'<s>\n<\|system\|>\nYou are a helpful assistant.</s>\n'#'<s>\n<\|system\|>\nYou are a

	# Get user's preference for input mode and character name
	input_mode = 'text' ##input("Enter 'text' for text input or 'speech' for speech input: ").lower()
	character_name = '<\|user\|>' # input("Enter your character name (USER, JONAH, JOSEPH, KIMBERLY, etc.): ")


	#
	#handle_query(user_input)
	# Chat loop
	num_chat = 1
	while num_chat <= 20:
	question = input(f"{character_name}: ")
	user_input = question # Get text input from user
	#context = "\n".join([x["text"] for x in embeddings.search(question)])
	context= handle_query(user_input)
	#print('History: '+ conversation_history)
	prompt_text = f"""
	<s>
	<\|system\|>
	You will be given documentation as context to answer a users question. You are an expert at summarization. Pay close attention to the key concepts. Use only information from the Context in your answer.
	</s>
	<\|data\|>
	Context:
	{context}
	-Use only the above context to answer the question.
	</s>
	<\|user\|>
	Here is information on "{question}". Extract only the above information into topic, category, keywords, and summary formatted in JSON. Think through the most critical information to provide then respond with the JSON object of topic, category, keywords, and summary.
	</s>
	<\|assistant\|>

	"""
	#topic, category, keywords, and summary formatted in JSON. Think through the most critical information to provide then respond with the JSON object of topic, category, keywords, and summary
	#Here is information on "{question}". Extract only the above information into topic, category, keywords, and summary formatted in JSON. Think through the most critical information to provide then respond with the JSON object of topic, category, keywords, and summary

	#Use only the documentation provided to answer this question: {question}


	response_text = chat_with_model(prompt_text, stop_token, model, tokenizer)
	response_text = response_text.replace('<s>','')
	#print('Response: '+ context)

	# Extract assistant's response from the response_text
	response_text = response_text.split('</s>\n', 1)[0] # Extract the first message from the assistant

	print(f"\n______________________________________________\n\nAssistant: {response_text}")

	# Update conversation history
	conversation_history += f"{prompt_text}{response_text}</s>\n"
	if len(conversation_history) > 2048:
	conversation_history = conversation_history[1024:]
	else:
	conversation_history = conversation_history

	num_chat += 1