enricorampazzo commited on
Commit
e7cf363
1 Parent(s): 00a8791

removed NPU-related code to make HF happy

Browse files
deps/intel_npu_acceleration_library-1.3.0-cp310-cp310-win_amd64.whl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:af65c7048377c8000a48dd00174cbe47f5c9264fca511dc0d1669148df22214d
3
- size 39727569
 
 
 
 
repository/intel_npu.py CHANGED
@@ -1,58 +1,58 @@
1
- import json
2
- from pathlib import Path
3
-
4
- from intel_npu_acceleration_library import NPUModelForCausalLM, int4
5
- from intel_npu_acceleration_library.compiler import CompilerConfig
6
- from transformers import AutoTokenizer
7
-
8
- from repository.repository_abc import Repository, Model
9
-
10
-
11
- class IntelNpuRepository(Repository):
12
- def __init__(self, model_info: Model, system_msg: str = None, log_to_file: Path = None):
13
- self.model_info: Model = model_info
14
- self.message_history: list[dict[str, str]] = []
15
- self.set_message_for_role(self.model_info.roles.system_role, system_msg)
16
- self.model = None
17
- self.tokenizer = None
18
- self.terminators = None
19
- self.log_to_file = log_to_file
20
-
21
- def get_model_info(self) -> Model:
22
- return self.model_info
23
-
24
- def get_message_history(self) -> list[dict[str, str]]:
25
- return self.message_history
26
-
27
- def init(self):
28
- compiler_conf = CompilerConfig(dtype=int4)
29
- self.model = NPUModelForCausalLM.from_pretrained(self.model_info.name, use_cache=True, config=compiler_conf,
30
- export=True, temperature=0).eval()
31
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_info.name)
32
- self.terminators = [self.tokenizer.eos_token_id, self.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
33
-
34
- def send_prompt(self, prompt: str, add_to_history: bool = True) -> dict[str, str]:
35
- pass
36
- print("prompt to be sent: " + prompt)
37
- user_prompt = {"role": self.model_info.roles.user_role, "content": prompt}
38
- if self.log_to_file:
39
- with open(self.log_to_file, "a+") as log_file:
40
- log_file.write(json.dumps(user_prompt, indent=2))
41
- log_file.write("\n")
42
- self.get_message_history().append(user_prompt)
43
- input_ids = (self.tokenizer.apply_chat_template(self.get_message_history(), add_generation_prompt=True,
44
- return_tensors="pt")
45
- .to(self.model.device))
46
- outputs = self.model.generate(input_ids, eos_token_id=self.terminators, do_sample=True, max_new_tokens=2000, cache_position=None)
47
- generated_token_array = outputs[0][len(input_ids[0]):]
48
- generated_tokens = "".join(self.tokenizer.batch_decode(generated_token_array, skip_special_tokens=True))
49
- answer = {"role": self.get_model_info().roles.ai_role, "content": generated_tokens}
50
- if self.log_to_file:
51
- with open(self.log_to_file, "a+") as log_file:
52
- log_file.write(json.dumps(answer, indent=2))
53
- log_file.write("\n")
54
- if add_to_history:
55
- self.message_history.append(answer)
56
- else:
57
- self.message_history.pop()
58
- return answer
 
1
+ # import json
2
+ # from pathlib import Path
3
+ #
4
+ # from intel_npu_acceleration_library import NPUModelForCausalLM, int4
5
+ # from intel_npu_acceleration_library.compiler import CompilerConfig
6
+ # from transformers import AutoTokenizer
7
+ #
8
+ # from repository.repository_abc import Repository, Model
9
+ #
10
+ #
11
+ # class IntelNpuRepository(Repository):
12
+ # def __init__(self, model_info: Model, system_msg: str = None, log_to_file: Path = None):
13
+ # self.model_info: Model = model_info
14
+ # self.message_history: list[dict[str, str]] = []
15
+ # self.set_message_for_role(self.model_info.roles.system_role, system_msg)
16
+ # self.model = None
17
+ # self.tokenizer = None
18
+ # self.terminators = None
19
+ # self.log_to_file = log_to_file
20
+ #
21
+ # def get_model_info(self) -> Model:
22
+ # return self.model_info
23
+ #
24
+ # def get_message_history(self) -> list[dict[str, str]]:
25
+ # return self.message_history
26
+ #
27
+ # def init(self):
28
+ # compiler_conf = CompilerConfig(dtype=int4)
29
+ # self.model = NPUModelForCausalLM.from_pretrained(self.model_info.name, use_cache=True, config=compiler_conf,
30
+ # export=True, temperature=0).eval()
31
+ # self.tokenizer = AutoTokenizer.from_pretrained(self.model_info.name)
32
+ # self.terminators = [self.tokenizer.eos_token_id, self.tokenizer.convert_tokens_to_ids("<|eot_id|>")]
33
+ #
34
+ # def send_prompt(self, prompt: str, add_to_history: bool = True) -> dict[str, str]:
35
+ # pass
36
+ # print("prompt to be sent: " + prompt)
37
+ # user_prompt = {"role": self.model_info.roles.user_role, "content": prompt}
38
+ # if self.log_to_file:
39
+ # with open(self.log_to_file, "a+") as log_file:
40
+ # log_file.write(json.dumps(user_prompt, indent=2))
41
+ # log_file.write("\n")
42
+ # self.get_message_history().append(user_prompt)
43
+ # input_ids = (self.tokenizer.apply_chat_template(self.get_message_history(), add_generation_prompt=True,
44
+ # return_tensors="pt")
45
+ # .to(self.model.device))
46
+ # outputs = self.model.generate(input_ids, eos_token_id=self.terminators, do_sample=True, max_new_tokens=2000, cache_position=None)
47
+ # generated_token_array = outputs[0][len(input_ids[0]):]
48
+ # generated_tokens = "".join(self.tokenizer.batch_decode(generated_token_array, skip_special_tokens=True))
49
+ # answer = {"role": self.get_model_info().roles.ai_role, "content": generated_tokens}
50
+ # if self.log_to_file:
51
+ # with open(self.log_to_file, "a+") as log_file:
52
+ # log_file.write(json.dumps(answer, indent=2))
53
+ # log_file.write("\n")
54
+ # if add_to_history:
55
+ # self.message_history.append(answer)
56
+ # else:
57
+ # self.message_history.pop()
58
+ # return answer
repository/repository.py CHANGED
@@ -1,6 +1,6 @@
1
  from pathlib import Path
2
 
3
- from repository.intel_npu import IntelNpuRepository
4
  from repository.ollama import OllamaRepository
5
  from repository.ondemand import OndemandRepository
6
  from repository.repository_abc import Model
@@ -13,8 +13,8 @@ def get_repository(implementation: str, model: Model, system_msg: str = None, lo
13
  raise ValueError(f"Unknown implementation {implementation}. Known implementations: {known_implementations}")
14
  if "ollama" == implementation:
15
  return OllamaRepository(model, system_msg)
16
- if "intel_npu" == implementation:
17
- return IntelNpuRepository(model, system_msg, log_to_file)
18
  if "ondemand" == implementation:
19
  return OndemandRepository(model, system_msg, log_to_file)
20
  if "testing" == implementation:
 
1
  from pathlib import Path
2
 
3
+ # from repository.intel_npu import IntelNpuRepository
4
  from repository.ollama import OllamaRepository
5
  from repository.ondemand import OndemandRepository
6
  from repository.repository_abc import Model
 
13
  raise ValueError(f"Unknown implementation {implementation}. Known implementations: {known_implementations}")
14
  if "ollama" == implementation:
15
  return OllamaRepository(model, system_msg)
16
+ # if "intel_npu" == implementation:
17
+ # return IntelNpuRepository(model, system_msg, log_to_file)
18
  if "ondemand" == implementation:
19
  return OndemandRepository(model, system_msg, log_to_file)
20
  if "testing" == implementation:
requirements.txt CHANGED
@@ -2,4 +2,4 @@ PyPDFForm
2
  ollama
3
  transformers
4
  streamlit
5
- deps/intel_npu_acceleration_library-1.3.0-cp310-cp310-win_amd64.whl
 
2
  ollama
3
  transformers
4
  streamlit
5
+ # deps/intel_npu_acceleration_library-1.3.0-cp310-cp310-win_amd64.whl