khoicrtp
/

test_model

Model card Files Files and versions Community

test_model / tests /test_lora.py

khoicrtp's picture

init

12001a9 over 1 year ago

history blame contribute delete

2.5 kB

	import torch


	def test_lora_layer_replacement(lit_llama):
	from lit_llama.lora import lora, CausalSelfAttention as LoRACausalSelfAttention
	from lit_llama.model import LLaMA, LLaMAConfig

	config = LLaMAConfig()
	config.n_layer = 2
	config.n_head = 4
	config.n_embd = 8
	config.block_size = 8
	config.vocab_size = 8

	with lora(r=8, alpha=8, dropout=0.1):
	model = LLaMA(config)

	assert isinstance(model.transformer.h[0].attn, LoRACausalSelfAttention)
	assert isinstance(model.transformer.h[1].attn, LoRACausalSelfAttention)


	def test_lora_merge_unmerge(lit_llama):
	from lit_llama.lora import lora, mark_only_lora_as_trainable
	from lit_llama.model import LLaMA, LLaMAConfig

	config = LLaMAConfig(n_layer=1, n_head=2, n_embd=8, block_size=8, vocab_size=8)

	with lora(r=8, alpha=8, dropout=0.1):
	model = LLaMA(config)

	initial_weight = model.transformer.h[0].attn.c_attn.weight.clone()
	model.train()
	assert torch.equal(model.transformer.h[0].attn.c_attn.weight, initial_weight)

	# perform an update to the LoRA weights
	mark_only_lora_as_trainable(model)
	optimizer = torch.optim.SGD(model.parameters(), lr=1.0)
	model(torch.randint(0, 8, size=(2, 4), dtype=torch.int64)).sum().backward()
	optimizer.step()
	optimizer.zero_grad()
	# the weight remains unchanged (only lora A and B change)
	assert torch.equal(model.transformer.h[0].attn.c_attn.weight, initial_weight)

	# 'merge' and then 'unmerge' should neutralize themselves
	weight_before = model.transformer.h[0].attn.c_attn.weight.clone()
	model.eval()
	assert not torch.equal(model.transformer.h[0].attn.c_attn.weight, weight_before)
	model.train()
	# note: numerically, `W + (A * B) - (A * B) == W` does not hold exactly
	assert torch.allclose(model.transformer.h[0].attn.c_attn.weight, weight_before)

	# calling eval/train multiple times in a row should not merge/unmerge multiple times
	model.eval()
	assert model.transformer.h[0].attn.c_attn.merged
	weight_after = model.transformer.h[0].attn.c_attn.weight.clone()
	model.eval()
	model.eval()
	assert torch.equal(model.transformer.h[0].attn.c_attn.weight, weight_after)
	model.train()
	assert not model.transformer.h[0].attn.c_attn.merged
	weight_after = model.transformer.h[0].attn.c_attn.weight.clone()
	model.train()
	model.train()
	assert torch.equal(model.transformer.h[0].attn.c_attn.weight, weight_after)