|
import json
|
|
from tensorflow.keras.preprocessing.text import tokenizer_from_json
|
|
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
|
from tensorflow.keras.models import load_model
|
|
import numpy as np
|
|
|
|
class TranslationModel:
|
|
def __init__(self, model_path,en_tokenizer,fr_tokenizer,max_len):
|
|
|
|
self.model = load_model(model_path)
|
|
|
|
self.english_tokenizer = en_tokenizer
|
|
|
|
self.french_tokenizer = fr_tokenizer
|
|
|
|
|
|
self.max_seq_length = max_len
|
|
|
|
def logits_index(self,text):
|
|
input_sequence = self.english_tokenizer.texts_to_sequences([text])
|
|
padded_input_sequence = pad_sequences(input_sequence, maxlen=self.max_seq_length, padding='post')
|
|
decoded_text = '<start>'
|
|
|
|
for i in range(self.max_seq_length):
|
|
target_sequence = self.french_tokenizer.texts_to_sequences([decoded_text])
|
|
padded_target_sequence = pad_sequences(target_sequence, maxlen=self.max_seq_length, padding='post')[:, :-1]
|
|
|
|
prediction = self.model([padded_input_sequence, padded_target_sequence])
|
|
|
|
idx = np.argmax(prediction[0, i, :])
|
|
|
|
return idx |