Pro-AI-tests / my_text_to_text_dataset.py
ProCreations's picture
Waw
37a709c
raw
history blame
1.08 kB
# my_text_to_text_dataset.py
from datasets import Dataset
from text_generation import YourTextGenerationModel
def generate_text_to_text_data():
# Your data generation logic here
prompts = ["Tell a story about", "Describe a scene with", "Explain the concept of"]
vocab_size = 10000 # Replace with your actual vocabulary size
embedding_dim = 128 # Replace with your desired embedding dimension
hidden_dim = 256 # Replace with your desired hidden dimension
your_model = YourTextGenerationModel(vocab_size, embedding_dim, hidden_dim)
generated_texts = [your_model.generate_text(prompt) for prompt in prompts]
data = {
"input_text": prompts,
"target_text": generated_texts
}
return data
def create_text_to_text_huggingface_dataset():
data = generate_text_to_text_data()
dataset = Dataset.from_dict(data)
return dataset
if __name__ == "__main__":
text_to_text_huggingface_dataset = create_text_to_text_huggingface_dataset()
text_to_text_huggingface_dataset.save_to_disk("my_text_to_text_dataset")