Spaces:
Runtime error
Runtime error
Add appy_chat_template process
Browse files- spanish_medica_llm.py +3 -2
spanish_medica_llm.py
CHANGED
@@ -698,13 +698,14 @@ def run_training_process():
|
|
698 |
configAndRunTraining(base_model,train_dataset, eval_dataset, tokenizer)
|
699 |
|
700 |
def run_finnetuning_process():
|
701 |
-
|
702 |
#Loggin to Huggin Face
|
703 |
login(token = os.environ.get('HG_FACE_TOKEN'))
|
704 |
os.environ['WANDB_DISABLED'] = 'true'
|
705 |
tokenizer = loadSpanishTokenizer()
|
706 |
medicalSpanishDataset = applyChatInstructFormat( loadSpanishDatasetFinnetuning())
|
707 |
-
print ( tokenizer.apply_chat_template(medicalSpanishDataset[5], tokenize=False))
|
|
|
708 |
print('----------------------------------------------------------')
|
709 |
medicalSpanishDataset = tokenizer.apply_chat_template(medicalSpanishDataset, tokenize=False)
|
710 |
medicalSpanishDataset = medicalSpanishDataset.train_test_split(0.2, seed=203984)
|
|
|
698 |
configAndRunTraining(base_model,train_dataset, eval_dataset, tokenizer)
|
699 |
|
700 |
def run_finnetuning_process():
|
701 |
+
|
702 |
#Loggin to Huggin Face
|
703 |
login(token = os.environ.get('HG_FACE_TOKEN'))
|
704 |
os.environ['WANDB_DISABLED'] = 'true'
|
705 |
tokenizer = loadSpanishTokenizer()
|
706 |
medicalSpanishDataset = applyChatInstructFormat( loadSpanishDatasetFinnetuning())
|
707 |
+
print ( tokenizer.apply_chat_template(medicalSpanishDataset[5]['raw_text'], tokenize=False))
|
708 |
+
|
709 |
print('----------------------------------------------------------')
|
710 |
medicalSpanishDataset = tokenizer.apply_chat_template(medicalSpanishDataset, tokenize=False)
|
711 |
medicalSpanishDataset = medicalSpanishDataset.train_test_split(0.2, seed=203984)
|