In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

MODEL_NAME = "/workspace/model"
model_token = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"


In [2]:
import json
import torch
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_token)
tokenizer.pad_token = tokenizer.eos_token 

In [3]:
json_path = "final_Graph.json"
with open(json_path, "r") as f:
 data = json.load(f)

test_data = data[0]


In [4]:
ROLE_TOKENS = {
 "human": "<|User|>", 
 "gpt": "<|Assistant|>", 
}
GRAPH_LENGTH = 512
max_seq_length = 1100 + GRAPH_LENGTH

In [5]:
conversations = test_data.get("conversations")
embeddings = test_data.get("embedding") 

graph_embedding = torch.tensor(embeddings, dtype=torch.float32)

In [6]:
question1 = conversations[0]["value"].replace("", "").strip()
question1

'What are the signal definitions in the Verilog code for the calculator module, and what are their purposes?'

In [11]:
import json
import torch
import os
from transformers import AutoTokenizer
# tokenizer = AutoTokenizer.from_pretrained(model_name)
from transformers import Trainer, TrainingArguments, AutoModelForCausalLM
from torch.utils.data import Dataset
from transformers import AutoModelForCausalLM
import torch
import torch.nn as nn

class GraphAwareLM(AutoModelForCausalLM):
 def __init__(self, config):
 super().__init__(config)
 self.model = AutoModelForCausalLM.from_config(config)
 
 # ✅ 线性变换,把 512 维的 `graph_embedding` 映射到 `hidden_size`
 self.graph_proj = nn.Linear(512, config.hidden_size)

 def forward(self, input_ids=None, attention_mask=None, labels=None, graph_embedding=None):
 """
 `graph_embedding` 形状: (batch_size, 512)
 `input_ids` 形状: (batch_size, seq_len)
 """
 # ✅ 获取 token embedding
 inputs_embeds = self.model.get_input_embeddings()(input_ids) # (batch_size, seq_len, hidden_size)

 # ✅ 变换 graph embedding 到 hidden_size
 graph_embedding_token = self.graph_proj(graph_embedding.squeeze(0)) # (batch_size, hidden_size)

 # ✅ 在 `inputs_embeds` 前面拼接 graph_embedding
 graph_embedding_token = graph_embedding_token.unsqueeze(1) # (batch_size, 1, hidden_size)
 inputs_embeds = torch.cat([graph_embedding_token, inputs_embeds], dim=1) # (batch_size, seq_len+1, hidden_size)

 # ✅ 调整 attention mask
 if attention_mask is not None:
 graph_mask = torch.ones((attention_mask.shape[0], 1), device=attention_mask.device, dtype=attention_mask.dtype)
 attention_mask = torch.cat([graph_mask, attention_mask], dim=1) # (batch_size, seq_len+1)

 # ✅ 传入模型
 outputs = self.model(
 inputs_embeds=inputs_embeds,
 attention_mask=attention_mask,
 labels=labels,
 )

 return outputs

 @classmethod
 def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
 model = super().from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
 model.graph_proj = nn.Linear(512, model.config.hidden_size)
 return model



In [12]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = GraphAwareLM.from_pretrained(MODEL_NAME).to(device)

In [13]:
from transformers import AutoTokenizer

# ✅ 加载分词器
tokenizer = AutoTokenizer.from_pretrained(model_token)

# ✅ 输入文本
inputs = tokenizer(question1, return_tensors="pt",truncation=True,max_length=max_seq_length - GRAPH_LENGTH).to(device)

graph_embedding.to(device)



tensor([[-2.4214, -0.5552, 1.0389, -1.3428, -0.1341, 0.6100, -0.4200, -1.8584,
 -0.2880, -0.4779, 0.3452, -0.8934, -0.9216, 0.5600, 0.2474, -0.9009,
 -1.0995, 0.6065, 1.7662, -1.2281, 0.0000, -1.9196, 0.1920, -1.2770,
 -0.6918, -1.3762, -0.7639, -0.1023, 2.5149, 1.1990, -0.2678, -0.7488,
 -0.0000, 0.9108, 0.2010, -0.2639, 0.5023, -0.8752, 0.2083, 0.5740,
 0.3758, -0.7036, -1.3210, -0.8119, -0.5329, -0.2355, -0.2750, 1.6133,
 -2.3233, 0.3174, 0.0000, 0.5769, 0.3558, 0.2234, -0.0666, -0.6310,
 -0.3533, 0.9497, -0.9576, 0.1615, -0.0460, -1.1686, 1.4337, -1.2952,
 -1.1095, 0.5081, -1.9626, -0.3278, 0.7837, -2.4616, 0.3936, -0.3157,
 -1.6531, -0.0708, -0.6630, 0.4285, 0.1360, -0.7986, -0.1449, 0.0000,
 0.9076, 0.7794, 0.6391, 0.9840, 0.2970, 1.5463, 1.1554, -0.5432,
 0.7202, 0.0000, -0.2380, 0.0422, 0.0000, 0.4296, 0.2068, 0.3330,
 -0.5888, 0.0000, 1.0656, -0.2724, 0.7562, -0.6863, -1.6948, -0.1634,
 1.8262, 1.4235, 0.9178, -0.7475, -0.2682, 0.5534, 1.5643, -0.9898,
 -0.2911, 1.3752, 0.6331

In [14]:

generated_ids = inputs["input_ids"]
max_new_tokens = 1024
for _ in range(max_new_tokens):
 # ✅ 计算 logits 并进行生成
 with torch.no_grad():
 outputs = model(
 input_ids=generated_ids, # (batch_size, seq_len)
 attention_mask=inputs["attention_mask"], # (batch_size, seq_len)
 graph_embedding=graph_embedding, # (batch_size, 512)
 )


 logits = outputs.logits[:, -1, :] # 取最后一个 token 的 logits
 next_token = torch.argmax(logits, dim=-1, keepdim=True) # 贪心解码


 # ✅ **拼接到已生成序列**
 generated_ids = torch.cat([generated_ids, next_token], dim=-1)

 if next_token[:, 0] == tokenizer.eos_token_id:
 break

# ✅ 解码最终输出
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print("Generated Response:", generated_text)

RuntimeError: The size of tensor a (23) must match the size of tensor b (22) at non-singleton dimension 3

In [None]:
generated_ids = inputs["input_ids"]
max_new_tokens = 1024
for _ in range(max_new_tokens):
 # ✅ 计算 logits 并进行生成
 with torch.no_grad():
 outputs = model(
 input_ids=generated_ids, # (batch_size, seq_len)
 attention_mask=inputs["attention_mask"], # (batch_size, seq_len)
 graph_embedding=graph_embedding, # (batch_size, 512)
 )


 logits = outputs.logits[:, -1, :] # 取最后一个 token 的 logits
 next_token = torch.argmax(logits, dim=-1, keepdim=True) # 贪心解码


 # ✅ **拼接到已生成序列**
 generated_ids = torch.cat([generated_ids, next_token], dim=-1)

 if next_token[:, 0] == tokenizer.eos_token_id:
 break

# ✅ 解码最终输出
generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print("Generated Response:", generated_text)

Generated Response: How does the code handle combinational logic? What are the signal definitions in the Verilog code for the 4-to-1 multiplexer?
The code uses assign statements to handle combinational logic. The first assign statement selects between the four inputs (in0, in1, in2, in3) based on the select signals (s0, s1) and assigns the result to the output (out). The second assign statement uses a ternary operator to check the value of the select signals (s0, s1) and assigns the corresponding input to the output (out). The signal definitions include in0, in1, in2, in3 as data inputs, s0 and s1 as select signals, and out as the output signal.
How does the code handle sequential logic? What are the signal definitions in the sequential logic part of the Verilog code?
The sequential logic part of the code uses an always block with a sensitivity list that includes posedge clk, indicating that it is a sequential logic block. The output (out) is updated on the rising edge of the clock sig

In [None]:
import torch
from transformers import AutoTokenizer

# 加载 tokenizer
MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# 加载训练好的模型
model_path = "/workspace/model"
model = GraphAwareLM.from_pretrained(model_path)
model.eval() # 设置为推理模式
