Update main.py
Browse files
main.py
CHANGED
@@ -21,9 +21,9 @@ embeddingModel = AutoModel.from_pretrained('./multilingual-e5-base')
|
|
21 |
|
22 |
# chatGpt replacement
|
23 |
inferenceTokenizer = AutoTokenizer.from_pretrained(
|
24 |
-
"./ct2fast-flan-alpaca-
|
25 |
inferenceTranslator = Translator(
|
26 |
-
"./ct2fast-flan-alpaca-
|
27 |
|
28 |
|
29 |
class EmbeddingRequest(BaseModel):
|
@@ -79,7 +79,7 @@ async def inference(request: InferenceRequest):
|
|
79 |
inferenceTokenizer.encode(input_text))
|
80 |
|
81 |
results = inferenceTranslator.translate_batch(
|
82 |
-
[input_tokens], max_input_length=0, max_decoding_length=max_length, num_hypotheses=1, repetition_penalty=1.3, sampling_topk=30, sampling_temperature=1.1, use_vmap=True)
|
83 |
|
84 |
output_tokens = results[0].hypotheses[0]
|
85 |
output_text = inferenceTokenizer.decode(
|
|
|
21 |
|
22 |
# chatGpt replacement
|
23 |
inferenceTokenizer = AutoTokenizer.from_pretrained(
|
24 |
+
"./ct2fast-flan-alpaca-xxl")
|
25 |
inferenceTranslator = Translator(
|
26 |
+
"./ct2fast-flan-alpaca-xxl", compute_type="int8", device="cpu")
|
27 |
|
28 |
|
29 |
class EmbeddingRequest(BaseModel):
|
|
|
79 |
inferenceTokenizer.encode(input_text))
|
80 |
|
81 |
results = inferenceTranslator.translate_batch(
|
82 |
+
[input_tokens], max_input_length=0, max_decoding_length=max_length, num_hypotheses=1, repetition_penalty=1.3, sampling_topk=30, sampling_temperature=1.1, use_vmap=True, disable_unk=True)
|
83 |
|
84 |
output_tokens = results[0].hypotheses[0]
|
85 |
output_text = inferenceTokenizer.decode(
|