|
|
|
|
|
from datasets import Dataset |
|
from text_generation import YourTextGenerationModel |
|
|
|
def generate_text_to_text_data(): |
|
|
|
prompts = ["Tell a story about", "Describe a scene with", "Explain the concept of"] |
|
|
|
vocab_size = 10000 |
|
embedding_dim = 128 |
|
hidden_dim = 256 |
|
|
|
your_model = YourTextGenerationModel(vocab_size, embedding_dim, hidden_dim) |
|
generated_texts = [your_model.generate_text(prompt) for prompt in prompts] |
|
|
|
data = { |
|
"input_text": prompts, |
|
"target_text": generated_texts |
|
} |
|
return data |
|
|
|
def create_text_to_text_huggingface_dataset(): |
|
data = generate_text_to_text_data() |
|
dataset = Dataset.from_dict(data) |
|
return dataset |
|
|
|
if __name__ == "__main__": |
|
text_to_text_huggingface_dataset = create_text_to_text_huggingface_dataset() |
|
text_to_text_huggingface_dataset.save_to_disk("my_text_to_text_dataset") |