Files changed (1) hide show
  1. tg.py +26 -32
tg.py CHANGED
@@ -1,36 +1,30 @@
1
- import tensorflow as tf
2
 
3
- # Improved training corpus with more data and variety
4
- corpus = """
5
- The quick brown fox jumps over the lazy dog.
6
- Machine learning is the study of algorithms that can learn from data.
7
- Natural language processing is a subfield of artificial intelligence concerned with the interactions between computers and human language.
8
- Deep learning is a class of machine learning algorithms that use multiple layers of artificial neural networks to learn from data.
9
- """
10
 
11
- # Tokenization and vectorization (one-hot encoding for simplicity)
12
- tokens = corpus.split()
13
- vocab_size = len(set(tokens))
14
- token_vectors = tf.one_hot(tf.strings.lookup(tokens, tf.constant([0] * vocab_size)), vocab_size)
15
 
16
- # Model architecture with embedding, LSTM layers, and dense output
17
- model = tf.keras.Sequential([
18
- tf.keras.layers.Embedding(vocab_size, 8), # Embedding layer
19
- tf.keras.layers.LSTM(16, return_sequences=True), # Recurrent LSTM layer
20
- tf.keras.layers.LSTM(16),
21
- tf.keras.layers.Dense(vocab_size) # Output layer
22
- ])
 
 
 
 
 
 
23
 
24
- # Compile the model with categorical crossentropy loss and Adam optimizer
25
- model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
26
-
27
- # Train the model for a limited number of epochs (adjust as needed)
28
- model.fit(token_vectors[:-1], token_vectors[1:], epochs=20)
29
-
30
- # Generate text starting with "The"
31
- prompt_vector = tf.one_hot(tf.constant([tokens.index("The")]), vocab_size)
32
- for i in range(10):
33
- prediction = model.predict(tf.expand_dims(prompt_vector, axis=0))
34
- predicted_index = tf.argmax(prediction, axis=1).numpy()[0]
35
- prompt_vector = tf.concat([prompt_vector, tf.one_hot([predicted_index], vocab_size)], axis=0)
36
- print(tokens[predicted_index], end=" ")
 
1
+ import random
2
 
3
+ def generate_text(corpus, start_word, max_length=100):
4
+ """
5
+ Generates text using a bigram language model.
 
 
 
 
6
 
7
+ Args:
8
+ corpus: A list of words from the training text.
9
+ start_word: The word to start the generation.
10
+ max_length: The maximum length of the generated text.
11
 
12
+ Returns:
13
+ A string of generated text.
14
+ """
15
+ text = start_word
16
+ prev_word = start_word
17
+ for _ in range(max_length):
18
+ # Get all words that follow the previous word in the corpus
19
+ next_word_candidates = [word for word in corpus if word[0] == prev_word[-1]]
20
+ # Randomly choose the next word based on their frequency
21
+ next_word = random.choices(next_word_candidates, weights=[corpus.count(w) for w in next_word_candidates])[0]
22
+ text += " " + next_word
23
+ prev_word = next_word
24
+ return text
25
 
26
+ # Example usage
27
+ corpus = ["hello", "world", "how", "are", "you", "today", "feeling", "great", "is", "a", "beautiful", "day"]
28
+ start_word = "hello"
29
+ generated_text = generate_text(corpus, start_word)
30
+ print(generated_text)