ejschwartz commited on
Commit
f154634
·
1 Parent(s): 5fcff16
Files changed (2) hide show
  1. Dockerfile +6 -5
  2. main.py +105 -33
Dockerfile CHANGED
@@ -11,8 +11,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
11
  apt-get -y update && apt-get -y install -y python3-pip python-is-python3 \
12
  git pkg-config libsentencepiece-dev nano sudo unzip
13
 
14
- # RUN --mount=type=cache,target=/root/.npm
15
- RUN --mount=type=cache,target=/root/.cache pip install --no-cache-dir --upgrade -r /code/requirements.txt
16
 
17
  # Install Ghidrathon
18
 
@@ -20,7 +19,7 @@ WORKDIR /tmp/
20
 
21
  RUN wget https://github.com/mandiant/Ghidrathon/releases/download/v4.0.0/Ghidrathon-v4.0.0.zip
22
  RUN unzip Ghidrathon-v4.0.0.zip -d ghidrathon
23
- RUN python -m pip install -r ghidrathon/requirements.txt
24
  RUN python ghidrathon/ghidrathon_configure.py /ghidra
25
  RUN unzip ghidrathon/Ghidrathon-v4.0.0.zip -d /ghidra/Ghidra/Extensions
26
 
@@ -28,10 +27,12 @@ RUN unzip ghidrathon/Ghidrathon-v4.0.0.zip -d /ghidra/Ghidra/Extensions
28
 
29
  WORKDIR /
30
 
31
- RUN git clone -b main https://github.com/edmcman/DIRTY
32
  #ADD ./DIRTY /DIRTY
33
 
34
- RUN --mount=type=cache,target=/root/.cache pip install --no-cache-dir --upgrade -r /DIRTY/requirements.txt
 
 
35
 
36
  # Set up a new user named "user" with user ID 1000
37
  RUN useradd -m -u 1000 user
 
11
  apt-get -y update && apt-get -y install -y python3-pip python-is-python3 \
12
  git pkg-config libsentencepiece-dev nano sudo unzip
13
 
14
+ RUN --mount=type=cache,target=/root/.cache pip install --upgrade -r /code/requirements.txt
 
15
 
16
  # Install Ghidrathon
17
 
 
19
 
20
  RUN wget https://github.com/mandiant/Ghidrathon/releases/download/v4.0.0/Ghidrathon-v4.0.0.zip
21
  RUN unzip Ghidrathon-v4.0.0.zip -d ghidrathon
22
+ RUN --mount=type=cache,target=/root/.cache python -m pip install -r ghidrathon/requirements.txt
23
  RUN python ghidrathon/ghidrathon_configure.py /ghidra
24
  RUN unzip ghidrathon/Ghidrathon-v4.0.0.zip -d /ghidra/Ghidra/Extensions
25
 
 
27
 
28
  WORKDIR /
29
 
30
+ RUN git clone -b main https://github.com/edmcman/DIRTY # 09-20-24
31
  #ADD ./DIRTY /DIRTY
32
 
33
+ RUN --mount=type=cache,target=/root/.cache pip install --upgrade -r /DIRTY/requirements.txt
34
+
35
+ RUN --mount=type=cache,target=/root/.cache (cd /root/.cache; (echo "b1e812b758eccf402271607c40fa491b5486742abf3706be174dc3f4fe87b9dd data1.tar.bz2" | sha256sum -c || wget -O '/root/.cache/data1.tar.bz2' 'https://cmu.box.com/shared/static/nx9fyn8jx0i9p4bftw8f2giqlufnoyj5')) && tar -xvjf /root/.cache/data1.tar.bz2 -C /DIRTY/dirty
36
 
37
  # Set up a new user named "user" with user ID 1000
38
  RUN useradd -m -u 1000 user
main.py CHANGED
@@ -6,78 +6,150 @@ import os
6
  import sys
7
  import json
8
 
 
9
  def get_functions(file):
10
 
11
  with tempfile.TemporaryDirectory() as TEMP_DIR:
12
 
13
- subprocess.run(f"/ghidra/support/analyzeHeadless {TEMP_DIR} Project -import {file} -postscript /home/user/app/scripts/dump_functions.py {TEMP_DIR}/funcs.json", shell=True)
 
 
 
14
 
15
  json_funcs = json.load(open(f"{TEMP_DIR}/funcs.json"))
16
 
17
  return json_funcs
18
 
 
19
  with gr.Blocks() as demo:
20
 
21
- all_dis_state = gr.State()
22
 
23
  intro = gr.Markdown(
24
  """
25
- # DIRTY-Ghidra Testing
26
- First, upload a binary.
 
 
27
  """
28
  )
29
 
30
  file_widget = gr.File(label="Executable file")
31
 
32
  with gr.Column(visible=False) as col:
33
- #output = gr.Textbox("Output")
34
 
35
- gr.Markdown("""
36
- Great, you selected an executable! Now pick the function you would like to analyze.
37
- """)
 
 
 
 
38
 
39
- fun_dropdown = gr.Dropdown(label="Select a function", choices=["Woohoo!"], interactive=True)
 
 
40
 
41
- gr.Markdown("""
42
- Below you can find the selected function's disassembly, and the model's
43
- prediction of whether the function is an object-oriented method or a
44
- regular function.
45
- """)
46
 
47
  with gr.Row(visible=True) as result:
48
- disassembly = gr.Textbox(label="Disassembly", lines=20)
49
- with gr.Column():
50
- clazz = gr.Label()
51
- #interpret_button = gr.Button("Interpret (very slow)")
52
- #interpretation = gr.components.Interpretation(disassembly)
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  def file_change_fn(file):
55
 
56
  if file is None:
57
- return {
58
- col: gr.update(visible=False),
59
- all_dis_state: None
60
- }
61
  else:
62
 
63
  try:
64
  progress = gr.Progress()
65
- progress(0, desc="Analyzing binary...")
 
 
 
66
  fun_data = get_functions(file.name)
67
- #print(fun_data)
68
 
69
- addrs = [(f"{name} ({hex(int(addr))})", int(addr)) for addr, name in fun_data.items()]
70
- except:
71
- raise gr.Error("Unable to obtain functions")
 
 
 
72
 
73
  return {
74
- col: gr.Column(visible=True),
75
- fun_dropdown: gr.Dropdown(choices=addrs, value=addrs[0][1]),
76
- all_dis_state: fun_data
77
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  # Need to put intro as output to get progress to work!
80
- file_widget.change(file_change_fn, file_widget, outputs=[intro, col, fun_dropdown, all_dis_state])
 
 
 
 
 
 
 
 
81
 
82
  # spaces only shows stderr..
83
  os.dup2(sys.stdout.fileno(), sys.stderr.fileno())
 
6
  import sys
7
  import json
8
 
9
+
10
  def get_functions(file):
11
 
12
  with tempfile.TemporaryDirectory() as TEMP_DIR:
13
 
14
+ subprocess.run(
15
+ f"/ghidra/support/analyzeHeadless {TEMP_DIR} Project -import {file} -postscript /home/user/app/scripts/dump_functions.py {TEMP_DIR}/funcs.json",
16
+ shell=True,
17
+ )
18
 
19
  json_funcs = json.load(open(f"{TEMP_DIR}/funcs.json"))
20
 
21
  return json_funcs
22
 
23
+
24
  with gr.Blocks() as demo:
25
 
26
+ state = gr.State()
27
 
28
  intro = gr.Markdown(
29
  """
30
+ # DIRTY-Ghidra Inference Demo
31
+ Welcome! This is a demo of DIRTY-Ghidra, a tool that predict names and types for variables for Ghidra's decompiler.
32
+
33
+ To get started, upload a binary.
34
  """
35
  )
36
 
37
  file_widget = gr.File(label="Executable file")
38
 
39
  with gr.Column(visible=False) as col:
40
+ # output = gr.Textbox("Output")
41
 
42
+ gr.Markdown(
43
+ """
44
+ Great, you selected an executable! Now pick the function you would like
45
+ to analyze. Simple functions (without variables) will probably fail, so
46
+ you may have to try a few before you find one that works.
47
+ """
48
+ )
49
 
50
+ fun_dropdown = gr.Dropdown(
51
+ label="Select a function", choices=["Woohoo!"], interactive=True
52
+ )
53
 
54
+ gr.Markdown(
55
+ """
56
+ Below you can find some information.
57
+ """
58
+ )
59
 
60
  with gr.Row(visible=True) as result:
61
+ disassembly = gr.Textbox(
62
+ label="Disassembly", value="Please wait...", lines=20
63
+ )
64
+ original_decompile = gr.Textbox(
65
+ label="Original Decompilation", value="Please wait...", lines=20
66
+ )
67
+ decompile = gr.Textbox(
68
+ label="Renamed and retyped Decompilation",
69
+ value="Please wait...",
70
+ lines=20,
71
+ )
72
+ model_output = gr.Textbox(
73
+ label="Model Output", value="Please wait...", lines=4
74
+ )
75
+ # with gr.Column():
76
+ # clazz = gr.Label()
77
+ # interpret_button = gr.Button("Interpret (very slow)")
78
+ # interpretation = gr.components.Interpretation(disassembly)
79
 
80
  def file_change_fn(file):
81
 
82
  if file is None:
83
+ return {col: gr.update(visible=False), state: {"file": None}}
 
 
 
84
  else:
85
 
86
  try:
87
  progress = gr.Progress()
88
+ progress(
89
+ 0,
90
+ desc=f"Analyzing binary {os.path.basename(file.name)} with Ghidra...",
91
+ )
92
  fun_data = get_functions(file.name)
93
+ # print(fun_data)
94
 
95
+ addrs = [
96
+ (f"{name} ({hex(int(addr))})", int(addr))
97
+ for addr, name in fun_data.items()
98
+ ]
99
+ except Exception as e:
100
+ raise gr.Error(f"Unable to analyze binary with Ghidra: {e}")
101
 
102
  return {
103
+ col: gr.Column(visible=True),
104
+ fun_dropdown: gr.Dropdown(choices=addrs, value=addrs[0][1]),
105
+ state: {"file": file},
106
+ }
107
+
108
+ def function_change_fn(selected_fun, state, progress=gr.Progress()):
109
+
110
+ # disassembly_str = fun_data[int(selected_fun, 16)].decode("utf-8")
111
+ # load_results = model.fn(disassembly_str)
112
+ # top_k = {e['label']: e['confidence'] for e in load_results['confidences']}
113
+
114
+ with tempfile.TemporaryDirectory() as TEMP_DIR:
115
+
116
+ print(selected_fun)
117
+
118
+ progress(0, desc=f"Running DIRTY Ghidra on {hex(selected_fun)}...")
119
+
120
+ try:
121
+ subprocess.run(
122
+ f"/ghidra/support/analyzeHeadless {TEMP_DIR} Project -import {state['file'].name} -postscript /DIRTY/scripts/DIRTY_infer.py {TEMP_DIR}/funcs.json {selected_fun}",
123
+ shell=True,
124
+ )
125
+
126
+ json_info = json.load(open(f"{TEMP_DIR}/funcs.json"))
127
+
128
+ if "exception" in json_info:
129
+ raise gr.Error(f"DIRTY Ghidra failed: {json_info['exception']}")
130
+
131
+ except Exception as e:
132
+ raise gr.Error(f"Unable to run DIRTY Ghidra: {e}")
133
+
134
+ #print(json_info)
135
+
136
+ return {
137
+ disassembly: gr.Textbox(value=json_info["disassembly"]),
138
+ original_decompile: gr.Textbox(value=json_info["original_decompile"]),
139
+ decompile: gr.Textbox(value=json_info["decompile"]),
140
+ model_output: gr.Textbox(value=json_info["model_output"]),
141
+ }
142
 
143
  # Need to put intro as output to get progress to work!
144
+ file_widget.change(
145
+ file_change_fn, file_widget, outputs=[intro, state, col, fun_dropdown]
146
+ )
147
+
148
+ fun_dropdown.change(
149
+ function_change_fn,
150
+ inputs=[fun_dropdown, state],
151
+ outputs=[disassembly, original_decompile, decompile, model_output],
152
+ )
153
 
154
  # spaces only shows stderr..
155
  os.dup2(sys.stdout.fileno(), sys.stderr.fileno())