stance-pl / preprocessing.py
Dawid Motyka
app and model
834d42f
raw
history blame
1.05 kB
import logging
logger = logging.getLogger('stancedatasets')
def format_masked_lm_prompt(stance_df, tokenizer, prompt_type, lang='en', masked_lm_tokens=1):
masks_str = "".join([tokenizer.mask_token] * masked_lm_tokens)
if lang == 'pl':
if prompt_type == 1:
return list(stance_df['text']), \
list('Moja postawa w kierunku ' + stance_df['target'] + ' jest: ' + masks_str + '.')
elif prompt_type == 2:
return list(stance_df['text']), \
list('Moja postawa w kierunku ' + stance_df['target'] + ' jest ' + masks_str + '.')
elif prompt_type == 3:
return list(stance_df['text']), \
list('Więc moja postawa w kierunku ' + stance_df['target'] + ' jest: ' + masks_str + '.')
elif prompt_type == 4:
return list(stance_df['text']), \
list('Więc moja postawa w kierunku ' + stance_df['target'] + ' jest ' + masks_str + '.')
raise ValueError(f'unknown prompt_type: {prompt_type} for language {lang}')