jordiclive
commited on
Commit
•
2069da8
1
Parent(s):
1ea9dde
Update README.md
Browse files
README.md
CHANGED
@@ -51,7 +51,7 @@ The model was trained with flash attention and gradient checkpointing and deepsp
|
|
51 |
- Batch size: 128
|
52 |
- Max Length: 2048
|
53 |
- Learning rate: 5e-5
|
54 |
-
- Lora _r_:
|
55 |
- Lora Alpha: 32
|
56 |
|
57 |
## Prompting
|
@@ -80,7 +80,7 @@ from transformers import GenerationConfig
|
|
80 |
|
81 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
82 |
dtype = torch.float16
|
83 |
-
repo_id = "jordiclive/alpaca_gpt4-dolly_15k-vicuna-
|
84 |
base_model = "decapoda-research/llama-30b-hf"
|
85 |
|
86 |
# Model Loading
|
|
|
51 |
- Batch size: 128
|
52 |
- Max Length: 2048
|
53 |
- Learning rate: 5e-5
|
54 |
+
- Lora _r_: 64
|
55 |
- Lora Alpha: 32
|
56 |
|
57 |
## Prompting
|
|
|
80 |
|
81 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
82 |
dtype = torch.float16
|
83 |
+
repo_id = "jordiclive/lora-llama-33B-alpaca_gpt4-dolly_15k-vicuna-r64"
|
84 |
base_model = "decapoda-research/llama-30b-hf"
|
85 |
|
86 |
# Model Loading
|