from huggingface_hub import InferenceClient from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings from langchain_community.vectorstores import Chroma from transformers import pipeline from sentence_transformers.cross_encoder import CrossEncoder import re import os def setupDB(domain, hasLLM): history = [] history.append("") history.append("") crossmodel = CrossEncoder("cross-encoder/stsb-distilroberta-base") models,allState = nandState() support_db = nandGetChroma(domain) insts_db = nandGetChroma("insts") pdf_dbs = [] if domain == 'en': pdfs = [] #"pdf_0em", "pdf_1em", "pdf_2em", "pdf_3em","pdf_4em"] for onepdf in pdfs: pdfdb = nandGetChroma(onepdf) pdf_dbs.append(pdfdb) para = {} para['history'] = history para['disnum'] = 10 para['domain'] = domain para['crossmodel'] = crossmodel para['insts_db'] = insts_db para['support_db'] = support_db para['pdf_dbs'] = pdf_dbs para['hasLLM'] = hasLLM return para def remapScore(domain, inscore): if domain == 'ch': xin = 1 - inscore a = -0.2 b = 1.2 y = a * xin * xin + b * xin return int(y * 100) else: xin = 1 - inscore a = -1.2 b = 2.2 y = a * xin * xin + b * xin return int(y * 100) def process_query(iniquery, para): query = re.sub("
", "", iniquery) ch2en, query = toEn(query) if ch2en: print(f"Received from connected users : {query}") else: print(f"Received from connected users : {query}", end='') disnum = para['disnum'] domain = para['domain'] history = para['history'] crossmodel = para['crossmodel'] insts_db = para['insts_db'] support_db = para['support_db'] pdf_dbs = para['pdf_dbs'] hasLLM = para['hasLLM'] ret = "" needScriptScores = crossmodel.predict([["write a perl ECO script", query]]) print(f"THE QUERY SCORE for creating eco script: score={needScriptScores[0]}") allapis = [] threshold = 0.45 if needScriptScores[0] > threshold: print(f"THE QUERY REQUIRES CREATING AN ECO SCRIPT score={needScriptScores[0]} > {threshold}") retinsts = insts_db.similarity_search_with_score(query, k=10) accu = 0 for inst in retinsts: instdoc = inst[0] instscore = inst[1] instname = instdoc.metadata['source'] otherfile = re.sub("^insts", "src_en", instname) otherfile = re.sub("\.\d+", "", otherfile) if not otherfile in allapis: allapis.append(otherfile) modfile = otherfile.replace("\\", "/") apisize = os.path.getsize(modfile) accu += apisize print(f"INST: {instname} SCORE: {instscore} API-size: {apisize} Accu: {accu}") results = [] docs = support_db.similarity_search_with_score(query, k=8) for doc in docs: results.append([doc[0], doc[1]]) for onepdfdb in pdf_dbs: pdocs = onepdfdb.similarity_search_with_score(query, k=8) for doc in pdocs: results.append([doc[0], doc[1]+0.2]) results.sort(key=lambda x: x[1]) docnum = len(results) index = 1 for ii in range(docnum): doc = results[ii][0] source = doc.metadata['source'] path = source #source.replace("\\", "/") #print(f"path={path}") if path in allapis: print(f"dont use path={path}, it's in instruction list") continue prefix = "Help:" if re.search("api\.", source): prefix = "API:" elif re.search("man\.", source): prefix = "Manual:" elif re.search("\.pdf$", source): prefix = "PDF:"; score = remapScore(domain, results[ii][1]) retcont = doc.page_content if re.search("\.pdf$", source): page = doc.metadata['page'] + 1 subpage = doc.metadata['subpage'] retcont += f"\nPDF{page} {subpage}\n" ret += f"Return {index} ({score}) {prefix} {retcont}\n" if len(ret) > 6000: break index += 1 if index > disnum: break if hasLLM: context = "Context information is below\n---------------------\n" if len(allapis): context += scriptExamples() for oneapi in allapis: modfile = oneapi.replace("\\", "/") cont = GetContent(modfile) cont = re.sub("", " API Detail:", cont) cont = re.sub('<.*?>', '', cont) cont = re.sub('Examples:.*', '', cont, flags=re.DOTALL) context += cont context += ret prompt = f"{context}\n" prompt += "------------------------------------------\n" if len(allapis): prompt += "Given the context information and not prior knowledge, creat a Perl ECO script by following the format and sequence in the script examples provided above.\n" #prompt += "1. Following the format in the script examples provided above.\n" #prompt += "2. Following the API sequence in the script examples above, for instance, APIs get_spare_cells and map_spare_cells should be after fix_design.\n" else: prompt += "Given the context information and not prior knowledge, answer the query.\n" prompt += f"Query: {query}\n" llmout = llmGenerate(prompt) history[0] = query history[1] = llmout #return llmout outlen = len(llmout) prolen = len(prompt) print(f"Prompt len: {prolen} LLMOUT len: {outlen}") return llmout allret = "LLM_OUTPUT_START:"+llmout+"\nEND OF LLM OUTPUT\n"+prompt return allret return ret def toEn(intxt): pattern = re.compile(r'[\u4e00-\u9fff]+') if pattern.search(intxt): translator = pipeline(task="translation", model="Helsinki-NLP/opus-mt-zh-en") ini_text = translator(intxt, max_length=500)[0]['translation_text'] out_text = re.sub("ECO foot", "ECO Script", ini_text) out_text = re.sub("web-based", "netlist", out_text) out_text = re.sub(r"\bweb\b", "netlist", out_text) out_text = re.sub(r"\bwebsheet\b", "netlist", out_text) out_text = re.sub(r"\bweblists?\b", "netlist", out_text) print(f"AFTER RESULT: {out_text}") return 1, out_text return 0, intxt def nandGetChroma(domain): models,allState = nandState() chdb = allState[domain]['chroma'] print(f"domain: {domain} has chroma dir {chdb}") model_ind = allState[domain]['model'] model_name = models[model_ind] embedding_function = SentenceTransformerEmbeddings(model_name=model_name) chroma_db = Chroma(persist_directory=chdb, embedding_function=embedding_function) return chroma_db def nandState(): models = {'em': "all-MiniLM-L6-v2", 'en': "all-mpnet-base-v2", 'ch': "shibing624/text2vec-base-chinese-sentence"} # chunk is to cut the big PDF page to smaller, 1000byte chunks, and chinese page into smaller chunks allState = {'insts':{'cstate':{},'pstate':{},'dir':'insts','json':'filestatus.insts.json','chroma':'chroma_db_insts','model':'en','chunk':0}, 'en':{'cstate':{},'pstate':{},'dir':'src_en','json':'filestatus.english.json','chroma':'chroma_db_en','model':'en','chunk':0}, 'ch':{'cstate':{},'pstate':{},'dir':'src_ch','json':'filestatus.chinese.json','chroma':'chroma_db_ch','model':'ch','chunk':1} } for ind in range(12): name = f"pdf_{ind}em" allState[name] = {'cstate':{},'pstate':{},'dir':f"pdf_sub{ind}",'json':f"filestatus.{name}.json",'chroma':f"chroma_db_{name}",'model':'em','chunk':1} return models, allState def formatPrompt(message, history): if history[0]: prompt = "Create a new query based on previous query/answer paire and current query:\n" prompt += f"Previous query: {history[0]}" prompt += f"Previous answer: {histroy[1]}" prompt += f"Current query: {message}" prompt += "New query:" return prompt return message def llmNewQuery(prompt, history): newpend = formatPrompt(prompt, history) newquery = llmGenerate(newpend) return newquery def llmGenerate(prompt, temperature=0.001, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0): #temperature = float(temperature) #if temperature < 1e-2: # temperature = 1e-2 top_p = float(top_p) generate_kwargs = dict( temperature=temperature, max_new_tokens=max_new_tokens, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42, ) llmclient = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2") stream = llmclient.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) output = "" for response in stream: output += response.token.text #yield output return output def thoseRemove(): those = ["redundant"] return those def GetContent(file): fcont = "" with open(file) as f: fcont = f.read() return fcont def scriptExamples(): exp = """ #The first ECO scipt example for manual ECO: use strict; setup_eco("eco_example"); read_library("tsmc.5nm.lib"); read_design("-imp", "implementation.gv"); set_top("topmod"); change_pin("u_abc/state_reg_0_/D", "INVX1", "", "-"); change_pin("u_abc/state_reg_1_/D", "INVX1", "", "-"); change_pin("u_abc/state_reg_2_/D", "INVX1", "", "-"); report_eco(); # ECO report check_design(); write_verilog("eco_verilog.v");# Write out ECO result in Verilog #End of the manual ECO script example #The second ECO script example for automatic ECO: use strict; setup_eco("eco_example");# Setup ECO name read_library("tsmc.5nm.lib");# Read in standard library # SVF files are optional, best to be used when the design involves multibit flops #read_svf("-ref", "reference.svf.txt"); #read_svf("-imp", "implementation.svf.txt"); read_design("-ref", "reference.gv"); read_design("-imp", "implementation.gv"); set_top("topmod");# Set the top module # Preserve DFT Test Logic set_ignore_output("scan_out*"); set_pin_constant("scan_enable", 0); set_pin_constant("scan_mode", 0); fix_design(); report_eco(); # ECO report check_design(); write_verilog("eco_verilog.v");# Write out ECO result in Verilog run_lec(); # Run GOF LEC to generate Formality help files #End of automatic ECO script example #The third ECO script example is for automatic metal only ECO: use strict; setup_eco("eco_example");# Setup ECO name read_library("tsmc.5nm.lib");# Read in standard library # SVF files are optional, best to be used when the design involves multibit flops #read_svf("-ref", "reference.svf.txt"); #read_svf("-imp", "implementation.svf.txt"); read_design("-ref", "reference.gv");# Read in Reference Netlist read_design("-imp", "implementation.gv"); set_top("topmod");# Set the top module set_ignore_output("scan_out*"); set_pin_constant("scan_enable", 0); set_pin_constant("scan_mode", 0); read_lef("tsmc.lef"); # Read LEF read_def("topmod.def"); # Read Design Exchange Format file fix_design(); # Must run before get_spare_cells and map_spare_cells get_spare_cells("*/*_SPARE*"); map_spare_cells(); report_eco(); # ECO report check_design();# Check if the ECO causes any issue, like floating write_verilog("eco_verilog.v");# Write out ECO result in Verilog write_perl("eco_result.pl");# Write out result in Perl script run_lec(); # Run GOF LEC to generate Formality help files #End of automatic ECO script example #The four ECO script example is the same as the third ECO script, except fix_design # list_file option to load in the ECO points list file converted from RTL-to-RTL LEC result fix_design("-list_file", "the_eco_points.txt"); #The 5th ECO script example is the same as the 3rd ECO script, except fix_design # Enable flatten mode ECO. The default mode is hierarchical. The flatten mode is for small fix but the changes go across # module boundaries fix_design("-flatten"); #The 6th ECO script is similar to the third ECO script, but it dumps formality help file after LEC run_lec(); # Run GOF LEC to generate Formality help files write_compare_points("compare_points.report"); write_formality_help_files("fm_dir/formality_help"); # formality_help files are generated in fm_dir folder #The 7th ECO script is similar to the third ECO script, but it uses gate array spare cells fix_design(); # Must run before get_spare_cells and map_spare_cells # Enable Gate Array Spare Cells Metal Only ECO Flow, map_spare_cells will map to Gate Array Cells only get_spare_cells("-gate_array", "G*", "-gate_array_filler", "GFILL*|GDCAP*"); map_spare_cells(); #The 8th ECO script is similar to the third ECO script, but it uses only deleted gates or freed up gates in ECO as spare cells fix_design(); # Must run before get_spare_cells and map_spare_cells get_spare_cells("-addfreed"); map_spare_cells(); #The 9th ECO script is manual ECO, find all memory hierarchically and tie the pin TEST_SHIFT of memory to net "TEST_EN" use strict; setup_eco("eco_example"); read_library("tsmc.3nm.lib"); read_design("-imp", "from_backend.gv"); set_top("topmod"); # Get all memories hierarchically, instance naming, "U_HMEM*" my @mems = get_cells("-hier", "U_HMEM*"); foreach my $mem (@mems){ change_pin("$mem/TEST_SHIFT", "TEST_EN"); } report_eco(); # ECO report check_design(); write_verilog("mem_eco.v"); """ return exp