inoid commited on
Commit
1b17c2f
1 Parent(s): db73536

Add appy_chat_template process

Browse files
Files changed (1) hide show
  1. spanish_medica_llm.py +2 -1
spanish_medica_llm.py CHANGED
@@ -692,7 +692,7 @@ def run_training_process():
692
 
693
  train_dataset, eval_dataset, test_dataset = splitDatasetInTestValid( medicalSpanishDataset )
694
 
695
- base_model = loadBaseModel(MISTRAL_BASE_MODEL_ID)
696
  base_model = modelLoraConfigBioMistral(base_model)
697
 
698
  configAndRunTraining(base_model,train_dataset, eval_dataset, tokenizer)
@@ -703,6 +703,7 @@ def run_finnetuning_process():
703
  os.environ['WANDB_DISABLED'] = 'true'
704
  tokenizer = loadSpanishTokenizer()
705
  medicalSpanishDataset = applyChatInstructFormat( loadSpanishDatasetFinnetuning())
 
706
  medicalSpanishDataset = tokenizer.apply_chat_template(medicalSpanishDataset, tokenize=False)
707
  medicalSpanishDataset = medicalSpanishDataset.train_test_split(0.2, seed=203984)
708
  train_dataset, eval_dataset, test_dataset = splitDatasetInTestValid( medicalSpanishDataset )
 
692
 
693
  train_dataset, eval_dataset, test_dataset = splitDatasetInTestValid( medicalSpanishDataset )
694
 
695
+ base_model = loadBaseModel(MISTRAL_BASE_MODEL_ID)
696
  base_model = modelLoraConfigBioMistral(base_model)
697
 
698
  configAndRunTraining(base_model,train_dataset, eval_dataset, tokenizer)
 
703
  os.environ['WANDB_DISABLED'] = 'true'
704
  tokenizer = loadSpanishTokenizer()
705
  medicalSpanishDataset = applyChatInstructFormat( loadSpanishDatasetFinnetuning())
706
+ print (medicalSpanishDataset[5])
707
  medicalSpanishDataset = tokenizer.apply_chat_template(medicalSpanishDataset, tokenize=False)
708
  medicalSpanishDataset = medicalSpanishDataset.train_test_split(0.2, seed=203984)
709
  train_dataset, eval_dataset, test_dataset = splitDatasetInTestValid( medicalSpanishDataset )