ProCreations
commited on
Rename tg.ggml to tg.py
Browse files
tg.ggml
DELETED
@@ -1,69 +0,0 @@
|
|
1 |
-
// Model definition
|
2 |
-
model MyModel {
|
3 |
-
// Input features
|
4 |
-
input text: string;
|
5 |
-
|
6 |
-
// Tokenization layer
|
7 |
-
tokenizer {
|
8 |
-
type: SentencePiece;
|
9 |
-
vocab_size: 256; // Adjust based on dataset
|
10 |
-
}
|
11 |
-
|
12 |
-
// Embedding layer
|
13 |
-
embeddings {
|
14 |
-
dim: 128;
|
15 |
-
}
|
16 |
-
|
17 |
-
// Encoder layers
|
18 |
-
encoder {
|
19 |
-
type: lstm;
|
20 |
-
units: 128;
|
21 |
-
num_layers: 2;
|
22 |
-
dropout: 0.2; // Add dropout for regularization
|
23 |
-
}
|
24 |
-
|
25 |
-
// Decoder layers
|
26 |
-
decoder {
|
27 |
-
type: gru;
|
28 |
-
units: 64;
|
29 |
-
num_layers: 2;
|
30 |
-
dropout: 0.1; // Add dropout for regularization
|
31 |
-
}
|
32 |
-
|
33 |
-
// Attention mechanism
|
34 |
-
attention {
|
35 |
-
type: scaled_dot_product;
|
36 |
-
}
|
37 |
-
|
38 |
-
// Output layer
|
39 |
-
output {
|
40 |
-
type: dense;
|
41 |
-
units: vocab_size;
|
42 |
-
}
|
43 |
-
|
44 |
-
// Loss function
|
45 |
-
loss {
|
46 |
-
type: softmax_cross_entropy;
|
47 |
-
}
|
48 |
-
|
49 |
-
// Optimizer
|
50 |
-
optimizer {
|
51 |
-
type: adam;
|
52 |
-
learning_rate: 0.001;
|
53 |
-
}
|
54 |
-
}
|
55 |
-
|
56 |
-
// Training parameters (in train.json)
|
57 |
-
{
|
58 |
-
"data_path": "path/to/your/training_data.txt",
|
59 |
-
"batch_size": 32,
|
60 |
-
"epochs": 10,
|
61 |
-
"use_scheduled_sampling": true, // Enable scheduled sampling
|
62 |
-
"clip_gradients": 5.0, // Add gradient clipping to prevent exploding gradients
|
63 |
-
"use_batch_norm": true // Enable batch normalization
|
64 |
-
}
|
65 |
-
|
66 |
-
// Inference parameters
|
67 |
-
inference {
|
68 |
-
// ... (Define input and output behavior)
|
69 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tg.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
|
3 |
+
# Improved training corpus with more data and variety
|
4 |
+
corpus = """
|
5 |
+
The quick brown fox jumps over the lazy dog.
|
6 |
+
Machine learning is the study of algorithms that can learn from data.
|
7 |
+
Natural language processing is a subfield of artificial intelligence concerned with the interactions between computers and human language.
|
8 |
+
Deep learning is a class of machine learning algorithms that use multiple layers of artificial neural networks to learn from data.
|
9 |
+
"""
|
10 |
+
|
11 |
+
# Tokenization and vectorization (one-hot encoding for simplicity)
|
12 |
+
tokens = corpus.split()
|
13 |
+
vocab_size = len(set(tokens))
|
14 |
+
token_vectors = tf.one_hot(tf.strings.lookup(tokens, tf.constant([0] * vocab_size)), vocab_size)
|
15 |
+
|
16 |
+
# Model architecture with embedding, LSTM layers, and dense output
|
17 |
+
model = tf.keras.Sequential([
|
18 |
+
tf.keras.layers.Embedding(vocab_size, 8), # Embedding layer
|
19 |
+
tf.keras.layers.LSTM(16, return_sequences=True), # Recurrent LSTM layer
|
20 |
+
tf.keras.layers.LSTM(16),
|
21 |
+
tf.keras.layers.Dense(vocab_size) # Output layer
|
22 |
+
])
|
23 |
+
|
24 |
+
# Compile the model with categorical crossentropy loss and Adam optimizer
|
25 |
+
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
|
26 |
+
|
27 |
+
# Train the model for a limited number of epochs (adjust as needed)
|
28 |
+
model.fit(token_vectors[:-1], token_vectors[1:], epochs=20)
|
29 |
+
|
30 |
+
# Generate text starting with "The"
|
31 |
+
prompt_vector = tf.one_hot(tf.constant([tokens.index("The")]), vocab_size)
|
32 |
+
for i in range(10):
|
33 |
+
prediction = model.predict(tf.expand_dims(prompt_vector, axis=0))
|
34 |
+
predicted_index = tf.argmax(prediction, axis=1).numpy()[0]
|
35 |
+
prompt_vector = tf.concat([prompt_vector, tf.one_hot([predicted_index], vocab_size)], axis=0)
|
36 |
+
print(tokens[predicted_index], end=" ")
|