yassin566 commited on
Commit
d794735
·
verified ·
1 Parent(s): 4e61ba8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -0
app.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from airllm import AutoModel
2
+ MAX_LENGTH = 128
3
+ model = AutoModel.from_pretrained("v2ray/Llama-3-70B")
4
+ input_text = [
5
+ 'What is the capital of United States?'
6
+ ]
7
+ input_tokens = model.tokenizer(input_text,
8
+ return_tensors="pt",
9
+ return_attention_mask=False,
10
+ truncation=True,
11
+ max_length=MAX_LENGTH,
12
+ padding=False)
13
+
14
+ generation_output = model.generate(
15
+ input_tokens['input_ids'].cuda(),
16
+ max_new_tokens=20,
17
+ use_cache=True,
18
+ return_dict_in_generate=True)
19
+
20
+ output = model.tokenizer.decode(generation_output.sequences[0])
21
+ print(output)