Sadjad Alikhani commited on
Commit
48ca955
1 Parent(s): e69b52d

Update input_preprocess.py

Browse files
Files changed (1) hide show
  1. input_preprocess.py +365 -364
input_preprocess.py CHANGED
@@ -1,365 +1,366 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- Created on Fri Sep 13 16:13:29 2024
4
-
5
- This script generates preprocessed data from wireless communication scenarios,
6
- including token generation, patch creation, and data sampling for machine learning models.
7
-
8
- @author: salikha4
9
- """
10
-
11
- import numpy as np
12
- import os
13
- from tqdm import tqdm
14
- import time
15
- import pickle
16
- import DeepMIMOv3
17
-
18
- #%% Scenarios List
19
- def scenarios_list():
20
- """Returns an array of available scenarios."""
21
- return np.array([
22
- 'city_18_denver', 'city_15_indianapolis', 'city_19_oklahoma',
23
- 'city_12_fortworth', 'city_11_santaclara', 'city_7_sandiego'
24
- ])
25
-
26
- #%% Token Generation
27
- def tokenizer(selected_scenario_names=None, manual_data=None, gen_raw=True):
28
- """
29
- Generates tokens by preparing and preprocessing the dataset.
30
-
31
- Args:
32
- scenario_idxs (list): Indices of the scenarios.
33
- patch_gen (bool): Whether to generate patches. Defaults to True.
34
- patch_size (int): Size of each patch. Defaults to 16.
35
- gen_deepMIMO_data (bool): Whether to generate DeepMIMO data. Defaults to False.
36
- gen_raw (bool): Whether to generate raw data. Defaults to False.
37
- save_data (bool): Whether to save the preprocessed data. Defaults to False.
38
-
39
- Returns:
40
- preprocessed_data, sequence_length, element_length: Preprocessed data and related dimensions.
41
- """
42
-
43
- if manual_data is not None:
44
- patches = patch_maker(np.expand_dims(np.array(manual_data), axis=1))
45
- else:
46
- # Patch generation or loading
47
- deepmimo_data = [DeepMIMO_data_gen(scenario_name) for scenario_name in selected_scenario_names]
48
- n_scenarios = len(selected_scenario_names)
49
-
50
- cleaned_deepmimo_data = [deepmimo_data_cleaning(deepmimo_data[scenario_idx]) for scenario_idx in range(n_scenarios)]
51
-
52
- patches = [patch_maker(cleaned_deepmimo_data[scenario_idx]) for scenario_idx in range(n_scenarios)]
53
- patches = np.vstack(patches)
54
-
55
- # Define dimensions
56
- patch_size = patches.shape[2]
57
- n_patches = patches.shape[1]
58
- n_masks_half = int(0.15 * n_patches / 2)
59
- # sequence_length = n_patches + 1
60
- # element_length = patch_size
61
-
62
- word2id = {'[CLS]': 0.2 * np.ones((patch_size)), '[MASK]': 0.1 * np.ones((patch_size))}
63
-
64
- # Generate preprocessed channels
65
- preprocessed_data = []
66
- for user_idx in tqdm(range(len(patches)), desc="Processing items"):
67
- sample = make_sample(user_idx, patches, word2id, n_patches, n_masks_half, patch_size, gen_raw=gen_raw)
68
- preprocessed_data.append(sample)
69
-
70
- return preprocessed_data
71
-
72
- #%%
73
- def deepmimo_data_cleaning(deepmimo_data):
74
- idxs = np.where(deepmimo_data['user']['LoS'] != -1)[0]
75
- cleaned_deepmimo_data = deepmimo_data['user']['channel'][idxs]
76
- return np.array(cleaned_deepmimo_data) * 1e6
77
-
78
- #%% Patch Creation
79
- def patch_maker(original_ch, patch_size=16, norm_factor=1e6):
80
- """
81
- Creates patches from the dataset based on the scenario.
82
-
83
- Args:-
84
- patch_size (int): Size of each patch.
85
- scenario (str): Selected scenario for data generation.
86
- gen_deepMIMO_data (bool): Whether to generate DeepMIMO data.
87
- norm_factor (int): Normalization factor for channels.
88
-
89
- Returns:
90
- patch (numpy array): Generated patches.
91
- """
92
- # idxs = np.where(data['user']['LoS'] != -1)[0]
93
-
94
- # # Reshaping and normalizing channels
95
- # original_ch = data['user']['channel'][idxs]
96
- flat_channels = original_ch.reshape((original_ch.shape[0], -1)).astype(np.csingle)
97
- flat_channels_complex = np.hstack((flat_channels.real, flat_channels.imag))
98
-
99
- # Create patches
100
- n_patches = flat_channels_complex.shape[1] // patch_size
101
- patch = np.zeros((len(flat_channels_complex), n_patches, patch_size))
102
- for idx in range(n_patches):
103
- patch[:, idx, :] = flat_channels_complex[:, idx * patch_size:(idx + 1) * patch_size]
104
-
105
- return patch
106
-
107
-
108
- #%% Data Generation for Scenario Areas
109
- def DeepMIMO_data_gen(scenario):
110
- """
111
- Generates or loads data for a given scenario.
112
-
113
- Args:
114
- scenario (str): Scenario name.
115
- gen_deepMIMO_data (bool): Whether to generate DeepMIMO data.
116
- save_data (bool): Whether to save generated data.
117
-
118
- Returns:
119
- data (dict): Loaded or generated data.
120
- """
121
- import DeepMIMOv3
122
-
123
- parameters, row_column_users, n_ant_bs, n_ant_ue, n_subcarriers = get_parameters(scenario)
124
-
125
- deepMIMO_dataset = DeepMIMOv3.generate_data(parameters)
126
- uniform_idxs = uniform_sampling(deepMIMO_dataset, [1, 1], len(parameters['user_rows']),
127
- users_per_row=row_column_users[scenario]['n_per_row'])
128
- data = select_by_idx(deepMIMO_dataset, uniform_idxs)[0]
129
-
130
- return data
131
-
132
- #%%%
133
- def get_parameters(scenario):
134
-
135
- n_ant_bs = 32 #32
136
- n_ant_ue = 1
137
- n_subcarriers = 32 #32
138
- scs = 30e3
139
-
140
- row_column_users = {
141
- 'city_18_denver': {
142
- 'n_rows': 85,
143
- 'n_per_row': 82
144
- },
145
- 'city_15_indianapolis': {
146
- 'n_rows': 80,
147
- 'n_per_row': 79
148
- },
149
- 'city_19_oklahoma': {
150
- 'n_rows': 82,
151
- 'n_per_row': 75
152
- },
153
- 'city_12_fortworth': {
154
- 'n_rows': 86,
155
- 'n_per_row': 72
156
- },
157
- 'city_11_santaclara': {
158
- 'n_rows': 47,
159
- 'n_per_row': 114
160
- },
161
- 'city_7_sandiego': {
162
- 'n_rows': 71,
163
- 'n_per_row': 83
164
- }}
165
-
166
- parameters = DeepMIMOv3.default_params()
167
- parameters['dataset_folder'] = './scenarios'
168
- parameters['scenario'] = scenario
169
-
170
- if scenario == 'O1_3p5':
171
- parameters['active_BS'] = np.array([4])
172
- elif scenario in ['city_18_denver', 'city_15_indianapolis']:
173
- parameters['active_BS'] = np.array([3])
174
- else:
175
- parameters['active_BS'] = np.array([1])
176
-
177
- if scenario == 'Boston5G_3p5':
178
- parameters['user_rows'] = np.arange(row_column_users[scenario]['n_rows'][0],
179
- row_column_users[scenario]['n_rows'][1])
180
- else:
181
- parameters['user_rows'] = np.arange(row_column_users[scenario]['n_rows'])
182
- parameters['bs_antenna']['shape'] = np.array([n_ant_bs, 1]) # Horizontal, Vertical
183
- parameters['bs_antenna']['rotation'] = np.array([0,0,-135]) # (x,y,z)
184
- parameters['ue_antenna']['shape'] = np.array([n_ant_ue, 1])
185
- parameters['enable_BS2BS'] = False
186
- parameters['OFDM']['subcarriers'] = n_subcarriers
187
- parameters['OFDM']['selected_subcarriers'] = np.arange(n_subcarriers)
188
-
189
- parameters['OFDM']['bandwidth'] = scs * n_subcarriers / 1e9
190
- parameters['num_paths'] = 20
191
-
192
- return parameters, row_column_users, n_ant_bs, n_ant_ue, n_subcarriers
193
-
194
-
195
- #%% Sample Generation
196
- def make_sample(user_idx, patch, word2id, n_patches, n_masks, patch_size, gen_raw=False):
197
- """
198
- Generates a sample for each user, including masking and tokenizing.
199
-
200
- Args:
201
- user_idx (int): Index of the user.
202
- patch (numpy array): Patches data.
203
- word2id (dict): Dictionary for special tokens.
204
- n_patches (int): Number of patches.
205
- n_masks (int): Number of masks.
206
- patch_size (int): Size of each patch.
207
- gen_raw (bool): Whether to generate raw tokens.
208
-
209
- Returns:
210
- sample (list): Generated sample for the user.
211
- """
212
-
213
- tokens = patch[user_idx]
214
- input_ids = np.vstack((word2id['[CLS]'], tokens))
215
-
216
- real_tokens_size = int(n_patches / 2)
217
- masks_pos_real = np.random.choice(range(0, real_tokens_size), size=n_masks, replace=False)
218
- masks_pos_imag = masks_pos_real + real_tokens_size
219
- masked_pos = np.hstack((masks_pos_real, masks_pos_imag)) + 1
220
-
221
- masked_tokens = []
222
- for pos in masked_pos:
223
- original_masked_tokens = input_ids[pos].copy()
224
- masked_tokens.append(original_masked_tokens)
225
- if not gen_raw:
226
- rnd_num = np.random.rand()
227
- if rnd_num < 0.1:
228
- input_ids[pos] = np.random.rand(patch_size)
229
- elif rnd_num < 0.9:
230
- input_ids[pos] = word2id['[MASK]']
231
-
232
- return [input_ids, masked_tokens, masked_pos]
233
-
234
-
235
- #%% Sampling and Data Selection
236
- def uniform_sampling(dataset, sampling_div, n_rows, users_per_row):
237
- """
238
- Performs uniform sampling on the dataset.
239
-
240
- Args:
241
- dataset (dict): DeepMIMO dataset.
242
- sampling_div (list): Step sizes along [x, y] dimensions.
243
- n_rows (int): Number of rows for user selection.
244
- users_per_row (int): Number of users per row.
245
-
246
- Returns:
247
- uniform_idxs (numpy array): Indices of the selected samples.
248
- """
249
- cols = np.arange(users_per_row, step=sampling_div[0])
250
- rows = np.arange(n_rows, step=sampling_div[1])
251
- uniform_idxs = np.array([j + i * users_per_row for i in rows for j in cols])
252
-
253
- return uniform_idxs
254
-
255
- def select_by_idx(dataset, idxs):
256
- """
257
- Selects a subset of the dataset based on the provided indices.
258
-
259
- Args:
260
- dataset (dict): Dataset to trim.
261
- idxs (numpy array): Indices of users to select.
262
-
263
- Returns:
264
- dataset_t (list): Trimmed dataset based on selected indices.
265
- """
266
- dataset_t = [] # Trimmed dataset
267
- for bs_idx in range(len(dataset)):
268
- dataset_t.append({})
269
- for key in dataset[bs_idx].keys():
270
- dataset_t[bs_idx]['location'] = dataset[bs_idx]['location']
271
- dataset_t[bs_idx]['user'] = {k: dataset[bs_idx]['user'][k][idxs] for k in dataset[bs_idx]['user']}
272
-
273
- return dataset_t
274
-
275
- #%% Save and Load Utilities
276
- def save_var(var, path):
277
- """
278
- Saves a variable to a pickle file.
279
-
280
- Args:
281
- var (object): Variable to be saved.
282
- path (str): Path to save the file.
283
-
284
- Returns:
285
- None
286
- """
287
- path_full = path if path.endswith('.p') else (path + '.pickle')
288
- with open(path_full, 'wb') as handle:
289
- pickle.dump(var, handle)
290
-
291
- def load_var(path):
292
- """
293
- Loads a variable from a pickle file.
294
-
295
- Args:
296
- path (str): Path of the file to load.
297
-
298
- Returns:
299
- var (object): Loaded variable.
300
- """
301
- path_full = path if path.endswith('.p') else (path + '.pickle')
302
- with open(path_full, 'rb') as handle:
303
- var = pickle.load(handle)
304
-
305
- return var
306
-
307
- #%%
308
-
309
- def label_gen(task, data, scenario, n_beams=64):
310
-
311
- idxs = np.where(data['user']['LoS'] != -1)[0]
312
-
313
- if task == 'LoS/NLoS Classification':
314
- label = data['user']['LoS'][idxs]
315
- elif task == 'Beam Prediction':
316
- parameters, row_column_users, n_ant_bs, n_ant_ue, n_subcarriers = get_parameters(scenario)
317
- n_users = len(data['user']['channel'])
318
- n_subbands = 1
319
- fov = 120
320
-
321
- # Setup Beamformers
322
- beam_angles = np.around(np.arange(-fov/2, fov/2+.1, fov/(n_beams-1)), 2)
323
-
324
- F1 = np.array([steering_vec(parameters['bs_antenna']['shape'],
325
- phi=azi*np.pi/180,
326
- kd=2*np.pi*parameters['bs_antenna']['spacing']).squeeze()
327
- for azi in beam_angles])
328
-
329
- full_dbm = np.zeros((n_beams, n_subbands, n_users), dtype=float)
330
- for ue_idx in tqdm(range(n_users), desc='Computing the channel for each user'):
331
- if data['user']['LoS'][ue_idx] == -1:
332
- full_dbm[:,:,ue_idx] = np.nan
333
- else:
334
- chs = F1 @ data['user']['channel'][ue_idx]
335
- full_linear = np.abs(np.mean(chs.squeeze().reshape((n_beams, n_subbands, -1)), axis=-1))
336
- full_dbm[:,:,ue_idx] = np.around(20*np.log10(full_linear) + 30, 1)
337
-
338
- best_beams = np.argmax(np.mean(full_dbm,axis=1), axis=0)
339
- best_beams = best_beams.astype(float)
340
- best_beams[np.isnan(full_dbm[0,0,:])] = np.nan
341
- # max_bf_pwr = np.max(np.mean(full_dbm,axis=1), axis=0)
342
-
343
- label = best_beams[idxs]
344
-
345
- return label.astype(int)
346
-
347
- def steering_vec(array, phi=0, theta=0, kd=np.pi):
348
- # phi = azimuth
349
- # theta = elevation
350
- idxs = DeepMIMOv3.ant_indices(array)
351
- resp = DeepMIMOv3.array_response(idxs, phi, theta+np.pi/2, kd)
352
- return resp / np.linalg.norm(resp)
353
-
354
-
355
- def label_prepend(deepmimo_data, preprocessed_chs, task, scenario_idxs, n_beams=64):
356
- labels = []
357
- for scenario_idx in scenario_idxs:
358
- scenario_name = scenarios_list()[scenario_idx]
359
- # data = DeepMIMO_data_gen(scenario_name)
360
- data = deepmimo_data[scenario_idx]
361
- labels.extend(label_gen(task, data, scenario_name, n_beams=n_beams))
362
-
363
- preprocessed_chs = [preprocessed_chs[i] + [labels[i]] for i in range(len(preprocessed_chs))]
364
-
 
365
  return preprocessed_chs
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Fri Sep 13 16:13:29 2024
4
+
5
+ This script generates preprocessed data from wireless communication scenarios,
6
+ including token generation, patch creation, and data sampling for machine learning models.
7
+
8
+ @author: salikha4
9
+ """
10
+
11
+ import numpy as np
12
+ import os
13
+ from tqdm import tqdm
14
+ import time
15
+ import pickle
16
+ import DeepMIMOv3
17
+
18
+ #%% Scenarios List
19
+ def scenarios_list():
20
+ """Returns an array of available scenarios."""
21
+ return np.array([
22
+ 'city_18_denver', 'city_15_indianapolis', 'city_19_oklahoma',
23
+ 'city_12_fortworth', 'city_11_santaclara', 'city_7_sandiego'
24
+ ])
25
+
26
+ #%% Token Generation
27
+ def tokenizer(selected_scenario_names=None, manual_data=None, gen_raw=True):
28
+ """
29
+ Generates tokens by preparing and preprocessing the dataset.
30
+
31
+ Args:
32
+ scenario_idxs (list): Indices of the scenarios.
33
+ patch_gen (bool): Whether to generate patches. Defaults to True.
34
+ patch_size (int): Size of each patch. Defaults to 16.
35
+ gen_deepMIMO_data (bool): Whether to generate DeepMIMO data. Defaults to False.
36
+ gen_raw (bool): Whether to generate raw data. Defaults to False.
37
+ save_data (bool): Whether to save the preprocessed data. Defaults to False.
38
+
39
+ Returns:
40
+ preprocessed_data, sequence_length, element_length: Preprocessed data and related dimensions.
41
+ """
42
+
43
+ if manual_data is not None:
44
+ #patches = patch_maker(np.expand_dims(np.array(manual_data), axis=1))
45
+ patches = patch_maker(torch.tensor(manual_data, dtype=torch.complex64).unsqueeze(1))
46
+ else:
47
+ # Patch generation or loading
48
+ deepmimo_data = [DeepMIMO_data_gen(scenario_name) for scenario_name in selected_scenario_names]
49
+ n_scenarios = len(selected_scenario_names)
50
+
51
+ cleaned_deepmimo_data = [deepmimo_data_cleaning(deepmimo_data[scenario_idx]) for scenario_idx in range(n_scenarios)]
52
+
53
+ patches = [patch_maker(cleaned_deepmimo_data[scenario_idx]) for scenario_idx in range(n_scenarios)]
54
+ patches = np.vstack(patches)
55
+
56
+ # Define dimensions
57
+ patch_size = patches.shape[2]
58
+ n_patches = patches.shape[1]
59
+ n_masks_half = int(0.15 * n_patches / 2)
60
+ # sequence_length = n_patches + 1
61
+ # element_length = patch_size
62
+
63
+ word2id = {'[CLS]': 0.2 * np.ones((patch_size)), '[MASK]': 0.1 * np.ones((patch_size))}
64
+
65
+ # Generate preprocessed channels
66
+ preprocessed_data = []
67
+ for user_idx in tqdm(range(len(patches)), desc="Processing items"):
68
+ sample = make_sample(user_idx, patches, word2id, n_patches, n_masks_half, patch_size, gen_raw=gen_raw)
69
+ preprocessed_data.append(sample)
70
+
71
+ return preprocessed_data
72
+
73
+ #%%
74
+ def deepmimo_data_cleaning(deepmimo_data):
75
+ idxs = np.where(deepmimo_data['user']['LoS'] != -1)[0]
76
+ cleaned_deepmimo_data = deepmimo_data['user']['channel'][idxs]
77
+ return np.array(cleaned_deepmimo_data) * 1e6
78
+
79
+ #%% Patch Creation
80
+ def patch_maker(original_ch, patch_size=16, norm_factor=1e6):
81
+ """
82
+ Creates patches from the dataset based on the scenario.
83
+
84
+ Args:-
85
+ patch_size (int): Size of each patch.
86
+ scenario (str): Selected scenario for data generation.
87
+ gen_deepMIMO_data (bool): Whether to generate DeepMIMO data.
88
+ norm_factor (int): Normalization factor for channels.
89
+
90
+ Returns:
91
+ patch (numpy array): Generated patches.
92
+ """
93
+ # idxs = np.where(data['user']['LoS'] != -1)[0]
94
+
95
+ # # Reshaping and normalizing channels
96
+ # original_ch = data['user']['channel'][idxs]
97
+ flat_channels = original_ch.reshape((original_ch.shape[0], -1)).astype(np.csingle)
98
+ flat_channels_complex = np.hstack((flat_channels.real, flat_channels.imag))
99
+
100
+ # Create patches
101
+ n_patches = flat_channels_complex.shape[1] // patch_size
102
+ patch = np.zeros((len(flat_channels_complex), n_patches, patch_size))
103
+ for idx in range(n_patches):
104
+ patch[:, idx, :] = flat_channels_complex[:, idx * patch_size:(idx + 1) * patch_size]
105
+
106
+ return patch
107
+
108
+
109
+ #%% Data Generation for Scenario Areas
110
+ def DeepMIMO_data_gen(scenario):
111
+ """
112
+ Generates or loads data for a given scenario.
113
+
114
+ Args:
115
+ scenario (str): Scenario name.
116
+ gen_deepMIMO_data (bool): Whether to generate DeepMIMO data.
117
+ save_data (bool): Whether to save generated data.
118
+
119
+ Returns:
120
+ data (dict): Loaded or generated data.
121
+ """
122
+ import DeepMIMOv3
123
+
124
+ parameters, row_column_users, n_ant_bs, n_ant_ue, n_subcarriers = get_parameters(scenario)
125
+
126
+ deepMIMO_dataset = DeepMIMOv3.generate_data(parameters)
127
+ uniform_idxs = uniform_sampling(deepMIMO_dataset, [1, 1], len(parameters['user_rows']),
128
+ users_per_row=row_column_users[scenario]['n_per_row'])
129
+ data = select_by_idx(deepMIMO_dataset, uniform_idxs)[0]
130
+
131
+ return data
132
+
133
+ #%%%
134
+ def get_parameters(scenario):
135
+
136
+ n_ant_bs = 32 #32
137
+ n_ant_ue = 1
138
+ n_subcarriers = 32 #32
139
+ scs = 30e3
140
+
141
+ row_column_users = {
142
+ 'city_18_denver': {
143
+ 'n_rows': 85,
144
+ 'n_per_row': 82
145
+ },
146
+ 'city_15_indianapolis': {
147
+ 'n_rows': 80,
148
+ 'n_per_row': 79
149
+ },
150
+ 'city_19_oklahoma': {
151
+ 'n_rows': 82,
152
+ 'n_per_row': 75
153
+ },
154
+ 'city_12_fortworth': {
155
+ 'n_rows': 86,
156
+ 'n_per_row': 72
157
+ },
158
+ 'city_11_santaclara': {
159
+ 'n_rows': 47,
160
+ 'n_per_row': 114
161
+ },
162
+ 'city_7_sandiego': {
163
+ 'n_rows': 71,
164
+ 'n_per_row': 83
165
+ }}
166
+
167
+ parameters = DeepMIMOv3.default_params()
168
+ parameters['dataset_folder'] = './scenarios'
169
+ parameters['scenario'] = scenario
170
+
171
+ if scenario == 'O1_3p5':
172
+ parameters['active_BS'] = np.array([4])
173
+ elif scenario in ['city_18_denver', 'city_15_indianapolis']:
174
+ parameters['active_BS'] = np.array([3])
175
+ else:
176
+ parameters['active_BS'] = np.array([1])
177
+
178
+ if scenario == 'Boston5G_3p5':
179
+ parameters['user_rows'] = np.arange(row_column_users[scenario]['n_rows'][0],
180
+ row_column_users[scenario]['n_rows'][1])
181
+ else:
182
+ parameters['user_rows'] = np.arange(row_column_users[scenario]['n_rows'])
183
+ parameters['bs_antenna']['shape'] = np.array([n_ant_bs, 1]) # Horizontal, Vertical
184
+ parameters['bs_antenna']['rotation'] = np.array([0,0,-135]) # (x,y,z)
185
+ parameters['ue_antenna']['shape'] = np.array([n_ant_ue, 1])
186
+ parameters['enable_BS2BS'] = False
187
+ parameters['OFDM']['subcarriers'] = n_subcarriers
188
+ parameters['OFDM']['selected_subcarriers'] = np.arange(n_subcarriers)
189
+
190
+ parameters['OFDM']['bandwidth'] = scs * n_subcarriers / 1e9
191
+ parameters['num_paths'] = 20
192
+
193
+ return parameters, row_column_users, n_ant_bs, n_ant_ue, n_subcarriers
194
+
195
+
196
+ #%% Sample Generation
197
+ def make_sample(user_idx, patch, word2id, n_patches, n_masks, patch_size, gen_raw=False):
198
+ """
199
+ Generates a sample for each user, including masking and tokenizing.
200
+
201
+ Args:
202
+ user_idx (int): Index of the user.
203
+ patch (numpy array): Patches data.
204
+ word2id (dict): Dictionary for special tokens.
205
+ n_patches (int): Number of patches.
206
+ n_masks (int): Number of masks.
207
+ patch_size (int): Size of each patch.
208
+ gen_raw (bool): Whether to generate raw tokens.
209
+
210
+ Returns:
211
+ sample (list): Generated sample for the user.
212
+ """
213
+
214
+ tokens = patch[user_idx]
215
+ input_ids = np.vstack((word2id['[CLS]'], tokens))
216
+
217
+ real_tokens_size = int(n_patches / 2)
218
+ masks_pos_real = np.random.choice(range(0, real_tokens_size), size=n_masks, replace=False)
219
+ masks_pos_imag = masks_pos_real + real_tokens_size
220
+ masked_pos = np.hstack((masks_pos_real, masks_pos_imag)) + 1
221
+
222
+ masked_tokens = []
223
+ for pos in masked_pos:
224
+ original_masked_tokens = input_ids[pos].copy()
225
+ masked_tokens.append(original_masked_tokens)
226
+ if not gen_raw:
227
+ rnd_num = np.random.rand()
228
+ if rnd_num < 0.1:
229
+ input_ids[pos] = np.random.rand(patch_size)
230
+ elif rnd_num < 0.9:
231
+ input_ids[pos] = word2id['[MASK]']
232
+
233
+ return [input_ids, masked_tokens, masked_pos]
234
+
235
+
236
+ #%% Sampling and Data Selection
237
+ def uniform_sampling(dataset, sampling_div, n_rows, users_per_row):
238
+ """
239
+ Performs uniform sampling on the dataset.
240
+
241
+ Args:
242
+ dataset (dict): DeepMIMO dataset.
243
+ sampling_div (list): Step sizes along [x, y] dimensions.
244
+ n_rows (int): Number of rows for user selection.
245
+ users_per_row (int): Number of users per row.
246
+
247
+ Returns:
248
+ uniform_idxs (numpy array): Indices of the selected samples.
249
+ """
250
+ cols = np.arange(users_per_row, step=sampling_div[0])
251
+ rows = np.arange(n_rows, step=sampling_div[1])
252
+ uniform_idxs = np.array([j + i * users_per_row for i in rows for j in cols])
253
+
254
+ return uniform_idxs
255
+
256
+ def select_by_idx(dataset, idxs):
257
+ """
258
+ Selects a subset of the dataset based on the provided indices.
259
+
260
+ Args:
261
+ dataset (dict): Dataset to trim.
262
+ idxs (numpy array): Indices of users to select.
263
+
264
+ Returns:
265
+ dataset_t (list): Trimmed dataset based on selected indices.
266
+ """
267
+ dataset_t = [] # Trimmed dataset
268
+ for bs_idx in range(len(dataset)):
269
+ dataset_t.append({})
270
+ for key in dataset[bs_idx].keys():
271
+ dataset_t[bs_idx]['location'] = dataset[bs_idx]['location']
272
+ dataset_t[bs_idx]['user'] = {k: dataset[bs_idx]['user'][k][idxs] for k in dataset[bs_idx]['user']}
273
+
274
+ return dataset_t
275
+
276
+ #%% Save and Load Utilities
277
+ def save_var(var, path):
278
+ """
279
+ Saves a variable to a pickle file.
280
+
281
+ Args:
282
+ var (object): Variable to be saved.
283
+ path (str): Path to save the file.
284
+
285
+ Returns:
286
+ None
287
+ """
288
+ path_full = path if path.endswith('.p') else (path + '.pickle')
289
+ with open(path_full, 'wb') as handle:
290
+ pickle.dump(var, handle)
291
+
292
+ def load_var(path):
293
+ """
294
+ Loads a variable from a pickle file.
295
+
296
+ Args:
297
+ path (str): Path of the file to load.
298
+
299
+ Returns:
300
+ var (object): Loaded variable.
301
+ """
302
+ path_full = path if path.endswith('.p') else (path + '.pickle')
303
+ with open(path_full, 'rb') as handle:
304
+ var = pickle.load(handle)
305
+
306
+ return var
307
+
308
+ #%%
309
+
310
+ def label_gen(task, data, scenario, n_beams=64):
311
+
312
+ idxs = np.where(data['user']['LoS'] != -1)[0]
313
+
314
+ if task == 'LoS/NLoS Classification':
315
+ label = data['user']['LoS'][idxs]
316
+ elif task == 'Beam Prediction':
317
+ parameters, row_column_users, n_ant_bs, n_ant_ue, n_subcarriers = get_parameters(scenario)
318
+ n_users = len(data['user']['channel'])
319
+ n_subbands = 1
320
+ fov = 120
321
+
322
+ # Setup Beamformers
323
+ beam_angles = np.around(np.arange(-fov/2, fov/2+.1, fov/(n_beams-1)), 2)
324
+
325
+ F1 = np.array([steering_vec(parameters['bs_antenna']['shape'],
326
+ phi=azi*np.pi/180,
327
+ kd=2*np.pi*parameters['bs_antenna']['spacing']).squeeze()
328
+ for azi in beam_angles])
329
+
330
+ full_dbm = np.zeros((n_beams, n_subbands, n_users), dtype=float)
331
+ for ue_idx in tqdm(range(n_users), desc='Computing the channel for each user'):
332
+ if data['user']['LoS'][ue_idx] == -1:
333
+ full_dbm[:,:,ue_idx] = np.nan
334
+ else:
335
+ chs = F1 @ data['user']['channel'][ue_idx]
336
+ full_linear = np.abs(np.mean(chs.squeeze().reshape((n_beams, n_subbands, -1)), axis=-1))
337
+ full_dbm[:,:,ue_idx] = np.around(20*np.log10(full_linear) + 30, 1)
338
+
339
+ best_beams = np.argmax(np.mean(full_dbm,axis=1), axis=0)
340
+ best_beams = best_beams.astype(float)
341
+ best_beams[np.isnan(full_dbm[0,0,:])] = np.nan
342
+ # max_bf_pwr = np.max(np.mean(full_dbm,axis=1), axis=0)
343
+
344
+ label = best_beams[idxs]
345
+
346
+ return label.astype(int)
347
+
348
+ def steering_vec(array, phi=0, theta=0, kd=np.pi):
349
+ # phi = azimuth
350
+ # theta = elevation
351
+ idxs = DeepMIMOv3.ant_indices(array)
352
+ resp = DeepMIMOv3.array_response(idxs, phi, theta+np.pi/2, kd)
353
+ return resp / np.linalg.norm(resp)
354
+
355
+
356
+ def label_prepend(deepmimo_data, preprocessed_chs, task, scenario_idxs, n_beams=64):
357
+ labels = []
358
+ for scenario_idx in scenario_idxs:
359
+ scenario_name = scenarios_list()[scenario_idx]
360
+ # data = DeepMIMO_data_gen(scenario_name)
361
+ data = deepmimo_data[scenario_idx]
362
+ labels.extend(label_gen(task, data, scenario_name, n_beams=n_beams))
363
+
364
+ preprocessed_chs = [preprocessed_chs[i] + [labels[i]] for i in range(len(preprocessed_chs))]
365
+
366
  return preprocessed_chs