ProCreations
/

Pro-AI-TG

Model card Files Files and versions Community

Pro-AI-TG / tg.py

ProCreations's picture

Rename tg.ggml to tg.py

f46e612 verified 11 months ago

history blame contribute delete

1.72 kB

	import tensorflow as tf

	# Improved training corpus with more data and variety
	corpus = """
	The quick brown fox jumps over the lazy dog.
	Machine learning is the study of algorithms that can learn from data.
	Natural language processing is a subfield of artificial intelligence concerned with the interactions between computers and human language.
	Deep learning is a class of machine learning algorithms that use multiple layers of artificial neural networks to learn from data.
	"""

	# Tokenization and vectorization (one-hot encoding for simplicity)
	tokens = corpus.split()
	vocab_size = len(set(tokens))
	token_vectors = tf.one_hot(tf.strings.lookup(tokens, tf.constant([0] * vocab_size)), vocab_size)

	# Model architecture with embedding, LSTM layers, and dense output
	model = tf.keras.Sequential([
	tf.keras.layers.Embedding(vocab_size, 8), # Embedding layer
	tf.keras.layers.LSTM(16, return_sequences=True), # Recurrent LSTM layer
	tf.keras.layers.LSTM(16),
	tf.keras.layers.Dense(vocab_size) # Output layer
	])

	# Compile the model with categorical crossentropy loss and Adam optimizer
	model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

	# Train the model for a limited number of epochs (adjust as needed)
	model.fit(token_vectors[:-1], token_vectors[1:], epochs=20)

	# Generate text starting with "The"
	prompt_vector = tf.one_hot(tf.constant([tokens.index("The")]), vocab_size)
	for i in range(10):
	prediction = model.predict(tf.expand_dims(prompt_vector, axis=0))
	predicted_index = tf.argmax(prediction, axis=1).numpy()[0]
	prompt_vector = tf.concat([prompt_vector, tf.one_hot([predicted_index], vocab_size)], axis=0)
	print(tokens[predicted_index], end=" ")