inoid commited on
Commit
78455b7
·
1 Parent(s): f9a5344

Add appy_chat_template process

Browse files
Files changed (1) hide show
  1. spanish_medica_llm.py +3 -2
spanish_medica_llm.py CHANGED
@@ -698,13 +698,14 @@ def run_training_process():
698
  configAndRunTraining(base_model,train_dataset, eval_dataset, tokenizer)
699
 
700
  def run_finnetuning_process():
701
-
702
  #Loggin to Huggin Face
703
  login(token = os.environ.get('HG_FACE_TOKEN'))
704
  os.environ['WANDB_DISABLED'] = 'true'
705
  tokenizer = loadSpanishTokenizer()
706
  medicalSpanishDataset = applyChatInstructFormat( loadSpanishDatasetFinnetuning())
707
- print ( tokenizer.apply_chat_template(medicalSpanishDataset[5], tokenize=False))
 
708
  print('----------------------------------------------------------')
709
  medicalSpanishDataset = tokenizer.apply_chat_template(medicalSpanishDataset, tokenize=False)
710
  medicalSpanishDataset = medicalSpanishDataset.train_test_split(0.2, seed=203984)
 
698
  configAndRunTraining(base_model,train_dataset, eval_dataset, tokenizer)
699
 
700
  def run_finnetuning_process():
701
+
702
  #Loggin to Huggin Face
703
  login(token = os.environ.get('HG_FACE_TOKEN'))
704
  os.environ['WANDB_DISABLED'] = 'true'
705
  tokenizer = loadSpanishTokenizer()
706
  medicalSpanishDataset = applyChatInstructFormat( loadSpanishDatasetFinnetuning())
707
+ print ( tokenizer.apply_chat_template(medicalSpanishDataset[5]['raw_text'], tokenize=False))
708
+
709
  print('----------------------------------------------------------')
710
  medicalSpanishDataset = tokenizer.apply_chat_template(medicalSpanishDataset, tokenize=False)
711
  medicalSpanishDataset = medicalSpanishDataset.train_test_split(0.2, seed=203984)