# my_text_to_text_dataset.py

from datasets import Dataset
from text_generation import YourTextGenerationModel

def generate_text_to_text_data():
    # Your data generation logic here
    prompts = ["Tell a story about", "Describe a scene with", "Explain the concept of"]

    vocab_size = 10000  # Replace with your actual vocabulary size
    embedding_dim = 128  # Replace with your desired embedding dimension
    hidden_dim = 256  # Replace with your desired hidden dimension

    your_model = YourTextGenerationModel(vocab_size, embedding_dim, hidden_dim)
    generated_texts = [your_model.generate_text(prompt) for prompt in prompts]

    data = {
        "input_text": prompts,
        "target_text": generated_texts
    }
    return data

def create_text_to_text_huggingface_dataset():
    data = generate_text_to_text_data()
    dataset = Dataset.from_dict(data)
    return dataset

if __name__ == "__main__":
    text_to_text_huggingface_dataset = create_text_to_text_huggingface_dataset()
    text_to_text_huggingface_dataset.save_to_disk("my_text_to_text_dataset")