Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import re | |
| import subprocess | |
| import tempfile | |
| from transformers import pipeline | |
| MODEL_ID = "ejschwartz/oo-method-test-model-bylibrary" | |
| classifier = pipeline( | |
| "text-classification", | |
| model=MODEL_ID, | |
| ) | |
| def run_model(text): | |
| results = classifier(text, top_k=None, truncation=True) | |
| if isinstance(results, dict): | |
| results = [results] | |
| if results and isinstance(results[0], list): | |
| results = results[0] | |
| confidences = [ | |
| {"label": entry["label"], "confidence": entry["score"]} | |
| for entry in results | |
| ] | |
| best_label = max(confidences, key=lambda entry: entry["confidence"])["label"] if confidences else "unknown" | |
| return {"label": best_label, "confidences": confidences} | |
| def get_all_dis(bname, addrs=None): | |
| anafile = tempfile.NamedTemporaryFile(prefix=os.path.basename(bname) + "_", suffix=".bat_ana") | |
| ananame = anafile.name | |
| addrstr = "" | |
| if addrs is not None: | |
| addrstr = " ".join([f"--function-at {x}" for x in addrs]) | |
| subprocess.check_output(f"bat-ana {addrstr} --no-post-analysis -o {ananame} {bname} 2>/dev/null", shell=True) | |
| output = subprocess.check_output(f"bat-dis --no-insn-address --no-bb-cfg-arrows --color=off {ananame} 2>/dev/null", shell=True) | |
| output = re.sub(b' +', b' ', output) | |
| func_dis = {} | |
| last_func = None | |
| current_output = [] | |
| for l in output.splitlines(): | |
| if l.startswith(b";;; function 0x"): | |
| if last_func is not None: | |
| func_dis[last_func] = b"\n".join(current_output) | |
| last_func = int(l.split()[2], 16) | |
| current_output.clear() | |
| if not b";;" in l: | |
| current_output.append(l) | |
| if last_func is not None: | |
| if last_func in func_dis: | |
| print("Warning: Ignoring multiple functions at the same address") | |
| else: | |
| func_dis[last_func] = b"\n".join(current_output) | |
| return func_dis | |
| def get_funs(f): | |
| funs = get_all_dis(f.name) | |
| return "\n".join(("%#x" % addr) for addr in funs.keys()) | |
| with gr.Blocks() as demo: | |
| all_dis_state = gr.State() | |
| gr.Markdown( | |
| """ | |
| # Function/Method Detector | |
| First, upload a binary. | |
| This model was only trained on 32-bit MSVC++ binaries. You can provide | |
| other types of binaries, but the result will probably be gibberish. | |
| """ | |
| ) | |
| file_widget = gr.File(label="Binary file") | |
| with gr.Column(visible=False) as col: | |
| #output = gr.Textbox("Output") | |
| gr.Markdown(""" | |
| Great, you selected an executable! Now pick the function you would like to analyze. | |
| """) | |
| fun_dropdown = gr.Dropdown(label="Select a function", choices=["Woohoo!"], interactive=True) | |
| gr.Markdown(""" | |
| Below you can find the selected function's disassembly, and the model's | |
| prediction of whether the function is an object-oriented method or a | |
| regular function. | |
| """) | |
| with gr.Row(visible=True) as result: | |
| disassembly = gr.Textbox(label="Disassembly", lines=20) | |
| with gr.Column(): | |
| clazz = gr.Label() | |
| example_widget = gr.Examples( | |
| examples=[f.path for f in os.scandir(os.path.join(os.path.dirname(__file__), "examples"))], | |
| inputs=file_widget, | |
| outputs=[all_dis_state, disassembly, clazz] | |
| ) | |
| def file_change_fn(file, progress=gr.Progress()): | |
| if file is None: | |
| return {col: gr.update(visible=False), | |
| all_dis_state: None} | |
| else: | |
| #fun_data = {42: 2, 43: 3} | |
| progress(0, desc="Disassembling executable") | |
| fun_data = get_all_dis(file.name) | |
| addrs = ["%#x" % addr for addr in fun_data.keys()] | |
| default_addr = addrs[0] if addrs else None | |
| return {col: gr.update(visible=True), | |
| fun_dropdown: gr.update(choices=addrs, value=default_addr), | |
| all_dis_state: fun_data | |
| } | |
| def function_change_fn(selected_fun, fun_data): | |
| disassembly_str = fun_data[int(selected_fun, 16)].decode("utf-8") | |
| load_results = run_model(disassembly_str) | |
| top_k = {e['label']: e['confidence'] for e in load_results['confidences']} | |
| return {disassembly: gr.update(value=disassembly_str), | |
| clazz: gr.update(value=top_k), | |
| } | |
| file_widget.change(file_change_fn, file_widget, [col, fun_dropdown, all_dis_state]) | |
| fun_dropdown.change(function_change_fn, [fun_dropdown, all_dis_state], [disassembly, clazz]) | |
| demo.queue() | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| #share=True, | |
| debug=True, | |
| show_error=True, | |
| ) | |