vasilee commited on
Commit
81d492d
·
1 Parent(s): 349e2a3

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +3 -3
main.py CHANGED
@@ -21,9 +21,9 @@ embeddingModel = AutoModel.from_pretrained('./multilingual-e5-base')
21
 
22
  # chatGpt replacement
23
  inferenceTokenizer = AutoTokenizer.from_pretrained(
24
- "./ct2fast-flan-alpaca-xl")
25
  inferenceTranslator = Translator(
26
- "./ct2fast-flan-alpaca-xl", compute_type="int8", device="cpu")
27
 
28
 
29
  class EmbeddingRequest(BaseModel):
@@ -79,7 +79,7 @@ async def inference(request: InferenceRequest):
79
  inferenceTokenizer.encode(input_text))
80
 
81
  results = inferenceTranslator.translate_batch(
82
- [input_tokens], max_input_length=0, max_decoding_length=max_length, num_hypotheses=1, repetition_penalty=1.3, sampling_topk=30, sampling_temperature=1.1, use_vmap=True)
83
 
84
  output_tokens = results[0].hypotheses[0]
85
  output_text = inferenceTokenizer.decode(
 
21
 
22
  # chatGpt replacement
23
  inferenceTokenizer = AutoTokenizer.from_pretrained(
24
+ "./ct2fast-flan-alpaca-xxl")
25
  inferenceTranslator = Translator(
26
+ "./ct2fast-flan-alpaca-xxl", compute_type="int8", device="cpu")
27
 
28
 
29
  class EmbeddingRequest(BaseModel):
 
79
  inferenceTokenizer.encode(input_text))
80
 
81
  results = inferenceTranslator.translate_batch(
82
+ [input_tokens], max_input_length=0, max_decoding_length=max_length, num_hypotheses=1, repetition_penalty=1.3, sampling_topk=30, sampling_temperature=1.1, use_vmap=True, disable_unk=True)
83
 
84
  output_tokens = results[0].hypotheses[0]
85
  output_text = inferenceTokenizer.decode(