# my_text_to_text_dataset.py | |
from datasets import Dataset | |
from text_generation import YourTextGenerationModel | |
def generate_text_to_text_data(): | |
# Your data generation logic here | |
prompts = ["Tell a story about", "Describe a scene with", "Explain the concept of"] | |
your_model = YourTextGenerationModel() | |
generated_texts = [your_model.generate_text(prompt) for prompt in prompts] | |
data = { | |
"input_text": prompts, | |
"target_text": generated_texts | |
} | |
return data | |
def create_text_to_text_huggingface_dataset(): | |
data = generate_text_to_text_data() | |
dataset = Dataset.from_dict(data) | |
return dataset | |
if __name__ == "__main__": | |
text_to_text_huggingface_dataset = create_text_to_text_huggingface_dataset() | |
text_to_text_huggingface_dataset.save_to_disk("my_text_to_text_dataset") |