QARAC / qarac /models /QaracEncoderModel.py
PeteBleackley
Ensure tokenizer is on GPU
ca642d2
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 5 10:01:39 2023
@author: peter
"""
import transformers
import qarac.models.layers.GlobalAttentionPoolingHead
class QaracEncoderModel(transformers.PreTrainedModel):
def __init__(self,path):
"""
Creates the endocer model
Parameters
----------
base_model : transformers.TFRobertaModel
The base model
Returns
-------
None.
"""
config = transformers.PretrainedConfig.from_pretrained(path)
super(QaracEncoderModel,self).__init__(config)
self.encoder = transformers.RobertaModel.from_pretrained(path)
self.head = qarac.models.layers.GlobalAttentionPoolingHead.GlobalAttentionPoolingHead(config)
def forward(self,input_ids,
attention_mask=None):
"""
Vectorizes a tokenised text
Parameters
----------
inputs : tensorflow.Tensor
tokenized text to endode
Returns
-------
tensorflow.Tensor
Vector representing the document
"""
if attention_mask is None and 'attention_mask' in input_ids:
(input_ids,attention_mask) = (input_ids['input_ids'],input_ids['attention_mask'])
print('input_ids',input_ids.device)
print('attention_mask',attention_mask.device)
return self.head(self.encoder(input_ids,
attention_mask).last_hidden_state,
attention_mask)