rheremans commited on
Commit
56ded97
1 Parent(s): b494b42

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -5
app.py CHANGED
@@ -1,19 +1,50 @@
 
 
 
1
  from langchain_community.llms import HuggingFaceHub
2
  from langchain.callbacks.manager import CallbackManager
3
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
 
4
  from langchain.chains import LLMChain
5
- from langchain.prompts import PromptTemplate
6
- from langchain_community.llms import LlamaCpp
 
 
 
 
 
 
 
7
 
8
  question = "Who won the FIFA World Cup in the year 1994? "
9
  template = """Question: {question}
10
  Answer: Let's think step by step."""
11
 
12
  prompt = PromptTemplate.from_template(template)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- llm = HuggingFaceHub(
15
- repo_id="BramVanroy/Llama-2-13b-chat-dutch", model_kwargs={"temperature": 0.5, "max_length": 64}
16
- )
17
  llm_chain = LLMChain(prompt=prompt, llm=llm)
18
 
19
  print(llm_chain.invoke(question))
 
1
+ from transformers import pipeline, Conversation, AutoTokenizer, AutoModelForCausalLM
2
+ from langchain.llms import HuggingFacePipeline
3
+ from langchain.prompts import PromptTemplate
4
  from langchain_community.llms import HuggingFaceHub
5
  from langchain.callbacks.manager import CallbackManager
6
  from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
7
+
8
  from langchain.chains import LLMChain
9
+
10
+ #1: "meta-llama/Llama-2-13b-chat-hf",
11
+ #2: "BramVanroy/Llama-2-13b-chat-dutch"
12
+ my_config = {'model_name': "meta-llama/Llama-2-13b-chat-hf", #"./Bram", #BramVanroy/Llama-2-13b-chat-dutch",
13
+ 'do_sample': True, 'temperature': 0.1,
14
+ 'repetition_penalty': 1.1, 'max_new_tokens': 500, }
15
+
16
+ print(f"Selected model: {my_config['model_name']}")
17
+ print(f"Parameters are: {my_config}")
18
 
19
  question = "Who won the FIFA World Cup in the year 1994? "
20
  template = """Question: {question}
21
  Answer: Let's think step by step."""
22
 
23
  prompt = PromptTemplate.from_template(template)
24
+ def generate_with_llama_chat(my_config):
25
+ print('tokenizer')
26
+ tokenizer = AutoTokenizer.from_pretrained(my_config['model_name'])
27
+ print('causal')
28
+ model = AutoModelForCausalLM.from_pretrained(my_config['model_name'])
29
+ print('Pipeline')
30
+ chatbot = pipeline("text-generation",model=my_config['model_name'],
31
+ tokenizer=tokenizer,
32
+ do_sample=my_config['do_sample'],
33
+ temperature=my_config['temperature'],
34
+ repetition_penalty=my_config['repetition_penalty'],
35
+ #max_length=my_config['max_length'],
36
+ max_new_tokens=my_config['max_new_tokens'],
37
+ model_kwargs={"device_map": "auto","load_in_8bit": True})
38
+ return chatbot
39
+
40
+ llama_chat = generate_with_llama_chat(my_config)
41
+
42
+ # Set up callback manager to print output word by word
43
+ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
44
+
45
+ llm = HuggingFacePipeline(pipeline=llama_chat, callback_manager=callback_manager)
46
+
47
 
 
 
 
48
  llm_chain = LLMChain(prompt=prompt, llm=llm)
49
 
50
  print(llm_chain.invoke(question))