enricorampazzo commited on
Commit
062179e
1 Parent(s): 00b67ca

added support for npu

Browse files
app.py CHANGED
@@ -1,8 +1,10 @@
1
  from pathlib import Path
2
 
3
  from prompts.prompts_manager import PromptsManager
 
4
  from repository.ollama import OllamaRepository
5
- from llm.llm import ModelRoles, Model
 
6
  from form.form import work_categories, build_form_data_from_answers, write_pdf_form
7
 
8
 
@@ -21,21 +23,27 @@ if __name__ == '__main__':
21
  user_prompt = input(f"Please describe what you need to do. To get the best results "
22
  f"try to answer all the following questions:\n{'\n'.join(prompts_manager.questions)}\n\n>")
23
 
24
- ollama_repository = OllamaRepository(Model("llama3.1",
25
- ModelRoles("system", "user", "assistant")),
26
- prompts_manager.system_prompt,
27
- )
28
- ollama_repository.send_prompt(f"Ingest the following information: {user_prompt}")
29
- answers = {}
30
- for idx, q in enumerate(prompts_manager.verification_prompt):
31
- answer = ollama_repository.send_prompt(
32
- f"Answer the following question, if the answer is not present just answer null. Keep the answer brief: {q}")
33
- answers[idx] = None if 'null' in answer["content"].lower() else answer['content']
 
 
 
 
 
 
34
  missing_answers = check_for_missing_answers(answers)
35
  while missing_answers:
36
  ask_again(missing_answers, prompts_manager.questions, answers)
37
  missing_answers = check_for_missing_answers(answers)
38
- answer = ollama_repository.send_prompt(
39
  f"The work to do is {answers[1]}. Given the following categories {work_categories.values()} which ones are the most relevant? Only return one categories, separated by a semicolon")
40
  categories = []
41
  for category in answer["content"].split(";"):
 
1
  from pathlib import Path
2
 
3
  from prompts.prompts_manager import PromptsManager
4
+ from repository.intel_npu import IntelNpuRepository
5
  from repository.ollama import OllamaRepository
6
+ from llm.llm import Model
7
+ from repository.repository import ModelRoles
8
  from form.form import work_categories, build_form_data_from_answers, write_pdf_form
9
 
10
 
 
23
  user_prompt = input(f"Please describe what you need to do. To get the best results "
24
  f"try to answer all the following questions:\n{'\n'.join(prompts_manager.questions)}\n\n>")
25
 
26
+ # repository = OllamaRepository(Model("llama3.1",
27
+ # ModelRoles("system", "user", "assistant")),
28
+ # prompts_manager.system_prompt,
29
+ # )
30
+ repository = IntelNpuRepository("meta-llama/Meta-Llama-3-8B-Instruct")
31
+ repository.init()
32
+ repository.send_prompt(f"Ingest the following information: {user_prompt}")
33
+ answers = {x:None for x in range(0,11)}
34
+ answer = repository.send_prompt(f"Answer the following questions, if the answer is not present just answer null. Put the answers between curly braces, separate each answer with a comma, keep the answer brief and maintain the order in which the questions are asked. Do not add any preamble: {"\n".join(prompts_manager.verification_prompt)}")
35
+ for idx, a in enumerate(answer['content'].split(",")):
36
+ answers[idx] = None if 'null' in a else a
37
+
38
+ # for idx, q in enumerate(prompts_manager.verification_prompt):
39
+ # answer = repository.send_prompt(
40
+ # f"Answer the following questions, if the answer is not present just answer null. Keep the answer brief and separate each answer with a comma and maintain the order in which the questions are asked: {q}")
41
+ # answers[idx] = None if 'null' in answer["content"].lower() else answer['content']
42
  missing_answers = check_for_missing_answers(answers)
43
  while missing_answers:
44
  ask_again(missing_answers, prompts_manager.questions, answers)
45
  missing_answers = check_for_missing_answers(answers)
46
+ answer = repository.send_prompt(
47
  f"The work to do is {answers[1]}. Given the following categories {work_categories.values()} which ones are the most relevant? Only return one categories, separated by a semicolon")
48
  categories = []
49
  for category in answer["content"].split(";"):
llm/llm.py CHANGED
@@ -1,8 +1,4 @@
1
- class ModelRoles:
2
- def __init__(self, system_role: str, user_role: str, ai_role: str):
3
- self.system_role: str = system_role
4
- self.user_role: str = user_role
5
- self.ai_role: str = ai_role
6
 
7
 
8
  class Model:
 
1
+ from repository.repository import ModelRoles
 
 
 
 
2
 
3
 
4
  class Model:
repository/intel_npu.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from intel_npu_acceleration_library import NPUModelForCausalLM, int4, int8
2
+ from intel_npu_acceleration_library.compiler import CompilerConfig
3
+ from transformers import AutoTokenizer
4
+
5
+ from repository.repository import Repository, ModelRoles
6
+
7
+
8
+ class IntelNpuRepository(Repository):
9
+ def __init__(self, model_name: str):
10
+ self.model_name = model_name
11
+ self.message_history: list[dict[str, str]] = []
12
+ self.roles = ModelRoles("system", "user", "assistant")
13
+ self.model = None
14
+ self.tokenizer = None
15
+ self.terminators = None
16
+
17
+ def get_model_roles(self) -> ModelRoles:
18
+ return self.roles
19
+
20
+ def get_model_name(self) -> str:
21
+ return self.model_name
22
+
23
+ def get_message_history(self) -> list[dict[str, str]]:
24
+ return self.message_history
25
+
26
+ def set_message_for_role(self, message:str, role: str):
27
+ self.get_message_history().append({"role": role, "content": message})
28
+
29
+ def init(self):
30
+ compiler_conf = CompilerConfig(dtype=int4)
31
+ self.model = NPUModelForCausalLM.from_pretrained(self.get_model_name(), use_cache=True, config=compiler_conf, export=True, temperature=0).eval()
32
+ self.tokenizer = AutoTokenizer.from_pretrained(self.get_model_name())
33
+ self.terminators = [self.tokenizer.eos_token_id, self.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
34
+
35
+ def send_prompt(self, prompt: str, add_to_history: bool = True) -> dict[str, str]:
36
+ self.get_message_history().append({"role":self.get_model_roles().user_role, "content":prompt})
37
+ input_ids = (self.tokenizer.apply_chat_template(self.get_message_history(), add_generation_prompt=True, return_tensors="pt")
38
+ .to(self.model.device))
39
+ outputs = self.model.generate(input_ids, eos_token_id=self.terminators, do_sample=True, max_new_tokens=2000)
40
+ generated_token_array = outputs[0][len(input_ids[0]):]
41
+ generated_tokens = "".join(self.tokenizer.batch_decode(generated_token_array, skip_special_tokens=True))
42
+ answer = {"role": self.get_model_roles().ai_role, "content":generated_tokens}
43
+ if add_to_history:
44
+ self.message_history.append(answer)
45
+ else:
46
+ self.message_history.pop()
47
+ return answer
repository/ollama.py CHANGED
@@ -1,16 +1,17 @@
1
  import ollama
2
  from ollama import Options
3
 
4
- from llm.llm import ModelRoles, Model
 
5
 
6
 
7
- class OllamaRepository:
8
- def __init__(self, model:Model, system_msg):
9
  self.model: Model = model
10
  self.system_msg: str = system_msg
11
  self.message_history: list[dict[str, str]] = [{"role": self.model.roles.system_role, "content": system_msg}]
12
 
13
- def send_prompt(self, prompt:str, add_to_history:bool = True) -> dict[str, str]:
14
  options: Options = Options(temperature=0)
15
  self.message_history.append({"role": self.model.roles.user_role, "content":prompt})
16
  response = ollama.chat(self.model.name, self.message_history, options=options)
@@ -20,3 +21,9 @@ class OllamaRepository:
20
  else:
21
  self.message_history.pop()
22
  return answer
 
 
 
 
 
 
 
1
  import ollama
2
  from ollama import Options
3
 
4
+ from llm.llm import Model
5
+ from repository.repository import Repository, ModelRoles
6
 
7
 
8
+ class OllamaRepository(Repository):
9
+ def __init__(self, model: Model, system_msg):
10
  self.model: Model = model
11
  self.system_msg: str = system_msg
12
  self.message_history: list[dict[str, str]] = [{"role": self.model.roles.system_role, "content": system_msg}]
13
 
14
+ def send_prompt(self, prompt: str, add_to_history: bool = True) -> dict[str, str]:
15
  options: Options = Options(temperature=0)
16
  self.message_history.append({"role": self.model.roles.user_role, "content":prompt})
17
  response = ollama.chat(self.model.name, self.message_history, options=options)
 
21
  else:
22
  self.message_history.pop()
23
  return answer
24
+
25
+ def get_model_name(self) -> str:
26
+ return self.model.name
27
+
28
+ def get_model_roles(self) -> ModelRoles:
29
+ return self.model.roles
repository/repository.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import abc
2
+
3
+
4
+ class ModelRoles:
5
+ def __init__(self, system_role: str, user_role: str, ai_role: str):
6
+ self.system_role: str = system_role
7
+ self.user_role: str = user_role
8
+ self.ai_role: str = ai_role
9
+
10
+
11
+ class Repository(abc.ABC):
12
+
13
+ def get_model_name(self) -> str:
14
+ pass
15
+
16
+ def get_model_roles(self) -> ModelRoles:
17
+ pass
18
+
19
+ def get_message_history(self) -> list[dict[str, str]]:
20
+ pass
21
+
22
+ def send_prompt(self, prompt: str, add_to_history: bool) -> dict[str, str]:
23
+ pass
24
+
25
+ def set_message_for_role(self, message:str, role: ModelRoles):
26
+ pass
27
+
28
+ def init(self):
29
+ pass
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  PyPDFForm
2
- ollama
 
 
1
  PyPDFForm
2
+ ollama
3
+ intel-npu-acceleration-library