ProCreations
commited on
Update tg.ggml
Browse files
tg.ggml
CHANGED
@@ -11,7 +11,7 @@ model MyModel {
|
|
11 |
|
12 |
// Embedding layer
|
13 |
embeddings {
|
14 |
-
dim:
|
15 |
}
|
16 |
|
17 |
// Encoder layers
|
@@ -19,13 +19,20 @@ model MyModel {
|
|
19 |
type: lstm;
|
20 |
units: 128;
|
21 |
num_layers: 2;
|
|
|
22 |
}
|
23 |
|
24 |
// Decoder layers
|
25 |
decoder {
|
26 |
type: gru;
|
27 |
units: 64;
|
28 |
-
num_layers:
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
}
|
30 |
|
31 |
// Output layer
|
@@ -46,9 +53,14 @@ model MyModel {
|
|
46 |
}
|
47 |
}
|
48 |
|
49 |
-
// Training parameters
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
52 |
}
|
53 |
|
54 |
// Inference parameters
|
|
|
11 |
|
12 |
// Embedding layer
|
13 |
embeddings {
|
14 |
+
dim: 128;
|
15 |
}
|
16 |
|
17 |
// Encoder layers
|
|
|
19 |
type: lstm;
|
20 |
units: 128;
|
21 |
num_layers: 2;
|
22 |
+
dropout: 0.2; // Add dropout for regularization
|
23 |
}
|
24 |
|
25 |
// Decoder layers
|
26 |
decoder {
|
27 |
type: gru;
|
28 |
units: 64;
|
29 |
+
num_layers: 2;
|
30 |
+
dropout: 0.1; // Add dropout for regularization
|
31 |
+
}
|
32 |
+
|
33 |
+
// Attention mechanism
|
34 |
+
attention {
|
35 |
+
type: scaled_dot_product;
|
36 |
}
|
37 |
|
38 |
// Output layer
|
|
|
53 |
}
|
54 |
}
|
55 |
|
56 |
+
// Training parameters (in train.json)
|
57 |
+
{
|
58 |
+
"data_path": "path/to/your/training_data.txt",
|
59 |
+
"batch_size": 32,
|
60 |
+
"epochs": 10,
|
61 |
+
"use_scheduled_sampling": true, // Enable scheduled sampling
|
62 |
+
"clip_gradients": 5.0, // Add gradient clipping to prevent exploding gradients
|
63 |
+
"use_batch_norm": true // Enable batch normalization
|
64 |
}
|
65 |
|
66 |
// Inference parameters
|