ProCreations
/

Pro-AI-TG

Model card Files Files and versions Community

one

#1

by ProCreations - opened Mar 5, 2024

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

This PR is in draft mode

Files changed (1) hide show

tg.py +26 -32

tg.py CHANGED Viewed

@@ -1,36 +1,30 @@
-import tensorflow as tf
-# Improved training corpus with more data and variety
-corpus = """
-The quick brown fox jumps over the lazy dog.
-Machine learning is the study of algorithms that can learn from data.
-Natural language processing is a subfield of artificial intelligence concerned with the interactions between computers and human language.
-Deep learning is a class of machine learning algorithms that use multiple layers of artificial neural networks to learn from data.
-"""
-# Tokenization and vectorization (one-hot encoding for simplicity)
-tokens = corpus.split()
-vocab_size = len(set(tokens))
-token_vectors = tf.one_hot(tf.strings.lookup(tokens, tf.constant([0] * vocab_size)), vocab_size)
-# Model architecture with embedding, LSTM layers, and dense output
-model = tf.keras.Sequential([
-    tf.keras.layers.Embedding(vocab_size, 8),  # Embedding layer
-    tf.keras.layers.LSTM(16, return_sequences=True),  # Recurrent LSTM layer
-    tf.keras.layers.LSTM(16),
-    tf.keras.layers.Dense(vocab_size)  # Output layer
-])
-# Compile the model with categorical crossentropy loss and Adam optimizer
-model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
-# Train the model for a limited number of epochs (adjust as needed)
-model.fit(token_vectors[:-1], token_vectors[1:], epochs=20)
-# Generate text starting with "The"
-prompt_vector = tf.one_hot(tf.constant([tokens.index("The")]), vocab_size)
-for i in range(10):
-  prediction = model.predict(tf.expand_dims(prompt_vector, axis=0))
-  predicted_index = tf.argmax(prediction, axis=1).numpy()[0]
-  prompt_vector = tf.concat([prompt_vector, tf.one_hot([predicted_index], vocab_size)], axis=0)
-  print(tokens[predicted_index], end=" ")

+import random
+def generate_text(corpus, start_word, max_length=100):
+  """
+  Generates text using a bigram language model.
+  Args:
+      corpus: A list of words from the training text.
+      start_word: The word to start the generation.
+      max_length: The maximum length of the generated text.
+  Returns:
+      A string of generated text.
+  """
+  text = start_word
+  prev_word = start_word
+  for _ in range(max_length):
+    # Get all words that follow the previous word in the corpus
+    next_word_candidates = [word for word in corpus if word[0] == prev_word[-1]]
+    # Randomly choose the next word based on their frequency
+    next_word = random.choices(next_word_candidates, weights=[corpus.count(w) for w in next_word_candidates])[0]
+    text += " " + next_word
+    prev_word = next_word
+  return text
+# Example usage
+corpus = ["hello", "world", "how", "are", "you", "today", "feeling", "great", "is", "a", "beautiful", "day"]
+start_word = "hello"
+generated_text = generate_text(corpus, start_word)
+print(generated_text)