marcos_client/marcompile.py at master · mriLab-i3M/marcos_client · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
#!/usr/bin/env python3
# Basic CSV -> machine code compiler for marga

import numpy as np
import warnings
from marmachine import *
try:
    from local_config import grad_board
except ModuleNotFoundError:
    grad_board = "gpa-fhdo"

grad_data_bufs = (1, 2)

max_removed_instructions = 1000

def debug_print(*args, **kwargs):
    # print(*args, **kwargs)
    pass

def col2buf(col_idx, value):
    """ Returns a tuple of (buffer indices), (values), (value masks)
    Value masks specify which bits are actually relevant on the output.
    Can accept arrays of values."""
    if col_idx in (1, 2, 3, 4): # TX
        buf_idx = col_idx + 4, # TX0_I, TX0_Q, TX1_I, TX1_Q
        val = value,
        mask = 0xffff,
    elif col_idx in (5, 6, 7, 8, 9, 10, 11, 12): # grad
        # Only encode value and channel into words here.  Precise
        # timing and broadcast logic will be handled at the next stage
        if grad_board == "gpa-fhdo":
            if col_idx in (9, 10, 11, 12):
                raise RuntimeError("GPA-FHDO is selected, but you are trying to control OCRA1")
            grad_chan = col_idx - 5
            val_full = value | 0x80000 | ( grad_chan << 16 ) | (grad_chan << 25)
        elif grad_board == "ocra1":
            if col_idx in (5, 6, 7, 8):
                raise RuntimeError("OCRA1 is selected, but you are trying to control GPA-FHDO")
            grad_chan = col_idx - 9
            val_full = value << 2 | 0x00100000 | (grad_chan << 25) | 0x01000000 # always broadcast by default
        else:
            raise ValueError("Unknown grad board")

        buf_idx = 2, 1 # GRAD_MSB, GRAD_LSB
        val = val_full >> 16, val_full & 0xffff
        mask = 0xffff, 0xffff
    elif col_idx in (13, 14): # RX rate
        buf_idx = col_idx - 10, # RX0_RATE, RX1_RATE
        val = value,
        mask = 0xffff,
    elif col_idx in (15, 16): # RX rate valid
        buf_idx = 16, # RX_CTRL
        bit_idx = col_idx - 15
        val = value << (4 + bit_idx),
        mask = 0x1 << (4 + bit_idx),
    elif col_idx in (17, 18): # RX resets, active low
        buf_idx = 16, # RX_CTRL
        bit_idx = col_idx - 17
        val = value << (6 + bit_idx),
        mask = 0x1 << (6 + bit_idx),
    elif col_idx in (19, 20): # RX resets, active low
        buf_idx = 16, # RX_CTRL
        bit_idx = col_idx - 19
        val = value << (8 + bit_idx),
        mask = 0x1 << (8 + bit_idx),
    elif col_idx in (21, 22, 23): # TX/RX gates, external trig
        buf_idx = 15, # GATES_LEDS
        bit_idx = col_idx - 21
        val = value << bit_idx,
        mask = 0x1 << bit_idx,
    elif col_idx == 24: # LEDs
        buf_idx = 15, # GATES_LEDS
        val = value << 8,
        mask = 0xff00,
    elif col_idx in (25, 26, 27): # LO freqs
        lo_lsb_buf = 9 + 2*(col_idx - 25) # 9, 11 or 13
        buf_idx = lo_lsb_buf, lo_lsb_buf + 1 # DDS[0,1,2]_PHASE_LSB, DDS[0,1,2]_PHASE_MSB
        val = value & 0xffff, value >> 16
        mask = 0xffff, 0x7fff
    elif col_idx in (28, 29, 30): # LO phase reset
        lo_msb_buf = 10 + 2*(col_idx - 28) # DDS[0,1,2]_PHASE_MSB
        buf_idx = lo_msb_buf,
        val = value << 15,
        mask = 0x8000,
    elif col_idx in (31, 32): # LO source for RX demodulation
        buf_idx = 16, # RX_CTRL
        bit_idx = (col_idx - 31) * 2
        val = value << bit_idx,
        mask = 0x0003 << bit_idx,

    return np.uint16(buf_idx), np.uint16(val), np.uint16(mask)

def csv2bin(path, quick_start=False, initial_bufs=np.zeros(MARGA_BUFS, dtype=np.uint16), latencies = np.zeros(MARGA_BUFS, dtype=np.int32)):
    """ initial_bufs: starting state of output buffers, to track with instructions
    quick_start: strip out the initial RAM-writing dead time if the CSV was generated by the simulator or similar
    latencies: inherent buffer latencies to take into
    account. Latencies are primarily relevant to the gradients, but
    can be adjusted to suit various other external hardware effects
    like slow RF amps, very long cables etc
    """

    # Input: CSV column, starting from 0 for tx0 i and ending with 21 for leds
    # Output: corresponding buffer index or indices to change

    data = np.loadtxt(path, skiprows=1, delimiter=',', comments='#').astype(np.uint32)
    with open(path, 'r') as csvf:
        cols = csvf.readline().strip().split(',')[1:]

    assert cols[-1] == ' csv_version_0.2', "Wrong CSV format"

    if quick_start:
        # remove dead time in the beginning taken up by simulated memory writes, if the input CSV is generated from the simulator
        # data[1:, 0] = data[1:, 0] - data[1, 0] + latencies.max()
        data[1:, 0] = data[1:, 0] - data[1, 0] + 10

    # Boolean: compare data offset by one row in time
    data_diff = data[:-1,1:] != data[1:,1:]

    changelist = []
    changelist_grad = []

    for k, dd in enumerate(data_diff):
        clocktime = data[k + 1, 0]
        dw = np.where(dd)[0] # indices where data changed
        for col_idx, value in zip(dw + 1, data[k + 1][dw + 1]):
            buf_idces, vals, masks = col2buf(col_idx, value)
            for bi, v, m in zip(buf_idces, vals, masks):
                change = clocktime - latencies[bi], bi, v, m
                if bi in grad_data_bufs:
                    changelist_grad.append(change)
                else:
                    changelist.append(change)

    return cl2bin(changelist, changelist_grad, initial_bufs)

def dict2bin(sd, initial_bufs=np.zeros(MARGA_BUFS, dtype=np.uint16), latencies = np.zeros(MARGA_BUFS, dtype=np.int32)):
    """sd: sequence dictionary, consisting of something in the form of:

     {'tx0_i': ( np.array([100, 102, 304, 506]), np.array([1, 200, 65535, 20000]) ),
      'fhdo_vx': ( np.array([3000, 4500, 5900, 7000]), np.array([1, 2, 55555, 33333]) ),
      'fhdo_vy': ( np.array([10000, 12000, 14000, 16000]), np.array([1, 2, 55555, 33333]) ) }

    etc. Same binary format as in the CSV file.

    latencies: inherent buffer latencies to take into
    account. Latencies are primarily relevant to the gradients, but
    can be adjusted to suit various other external hardware effects
    like slow RF amps, very long cables etc
    """

    col_arr = ['clock cycles', 'tx0_i', 'tx0_q', 'tx1_i', 'tx1_q', 'fhdo_vx', 'fhdo_vy', 'fhdo_vz', 'fhdo_vz2',
               'ocra1_vx', 'ocra1_vy', 'ocra1_vz', 'ocra1_vz2', 'rx0_rate', 'rx1_rate',
               'rx0_rate_valid', 'rx1_rate_valid', 'rx0_rst_n', 'rx1_rst_n', 'rx0_en', 'rx1_en',
               'tx_gate', 'rx_gate', 'trig_out', 'leds',
               'lo0_freq', 'lo1_freq', 'lo2_freq', 'lo0_rst', 'lo1_rst', 'lo2_rst',
               'rx0_lo', 'rx1_lo', ] # TODO: these two rows aren't yet in the CSV and thus aren't tested by test_marga_model.py

    changelist = []
    changelist_grad = []

    for k, vals in sd.items(): # iterate over dictionary keys
        col_idx = col_arr.index(k)
        changelist_grad_local = []
        buf_idces, values, masks = col2buf(col_idx, vals[1]) # single element or array of values
        t_corr = vals[0] - latencies[buf_idces[0]]
        for bi, vv, m in zip(buf_idces, values, masks):
            for t, v in zip(t_corr, vv):
                change = t, bi, v, m
                if bi in grad_data_bufs:
                    changelist_grad_local.append(change)
                else:
                    changelist.append(change)

        # needed to keep coupled LSB/MSB pairs together in case
        # multiple events occur on different channels simultaneously
        if len(changelist_grad_local) != 0:
            changelist_grad_local.sort(key=lambda change: change[0])
            changelist_grad += changelist_grad_local

    return cl2bin(changelist, changelist_grad, initial_bufs)

def cl2bin(changelist, changelist_grad,
           initial_bufs=np.zeros(MARGA_BUFS, dtype=np.uint16)):

    """Central compilation function; accept in two changelists,
    changelist for all the direct-buffer outputs (TX, most configurable
    parameters, etc) and the other, changelist_grad, for the outputs used
    to control hardware with non-trivial internal timing behaviour
    (currently only the gradient boards). Also accepts non-default initial
    values to program the buffers to."""

    # Process the grad changelist, depending on what GPA is being used etc
    # Sort in pairs of changes, because otherwise channels can get mixed up
    changelist_grad_paired = [ [k, m] for k, m in zip(changelist_grad[::2], changelist_grad[1::2]) ]
    sortfn = lambda change: change[0]
    # changelist_grad.sort(key=sortfn) # sort by time
    sortfn_paired = lambda change: change[0][0]
    changelist_grad_paired.sort(key=sortfn_paired) # sort by time
    changelist_grad = [k for sl in changelist_grad_paired for k in sl] # https://stackabuse.com/python-how-to-flatten-list-of-lists/

    t_last = [0, 0] # no updates have previously happened; [LSB, MSB]
    spi_div = (initial_bufs[0] & 0xfc) >> 2
    changelist_grad_shifted = []
    num_chgs = [0, 0] # [LSB, MSB]
    grad_vals = [initial_bufs[1], initial_bufs[2]] # [LSB, MSB] current output data
    grad_vals_old = [0, 0] # [LSB, MSB] previous output data

    for c in changelist_grad:
        t = c[0]
        debug_print("t: ", t, " t_last: ", t_last, "num_chgs: ", num_chgs, " c: ", c)
        idx = c[1] - 1 # 0 for LSB, 1 for MSB
        msb = idx == 1
        data = c[2]
        # if data == grad_vals[idx]: # no actual change to buffer output
        #     continue # skip this change
        # else:
        #     grad_vals_old[idx] = grad_vals[idx]
        #     grad_vals[idx] = data # update the last known buffer value

        if t == t_last[idx]:
            num_chgs[idx] += 1
            # assume the changes in changelist_grad are paired with LSBs/MSBs matching each other's grad channels stored sequentially,
            # and that for each event, the MSB update is first
            if grad_board == "ocra1": # simultaneous with another grad update
                if msb:
                    if num_chgs[1]: # MSB buffer and not the first grad event on this timestep
                        # turn broadcast off if this isn't the first grad event on this timestep
                        data = data & ~np.uint16(0x0100)
                        # return LSB back to old values, since this one is now done in the past
                        grad_vals[:] = grad_vals_old # revert the last known buffer values
                # else:
                #     if data == grad_vals[idx]: # no actual change to buffer output compared to earlier LSB at this timestep
                #         continue # skip this change

                # move non-broadcast events back in time, so that synchronisation will be done in ocra1_iface core
                changelist_grad_shifted.append( (c[0]-num_chgs[idx], c[1], data, c[3]) )
                num_chgs[idx] += 1
            elif grad_board == "gpa-fhdo":
                # don't do anything; currently will cause an error
                # later since multiple events can't happen at the same
                # time for GPA-FHDO
                changelist_grad_shifted.append(c)
        else:
            if t - t_last[idx] < 24 * (1 + spi_div) + 2: #
                warnings.warn("Gradient updates are too frequent for selected SPI divider. Missed samples are likely!", MarGradWarning)

            # if data == grad_vals[idx]: # no actual change to buffer output
            #     continue # skip this change

            t_last[idx] = t
            grad_vals[idx] = data # update the last known buffer value
            changelist_grad_shifted.append(c)
            num_chgs = [0, 0]

    changelist += changelist_grad_shifted
    changelist.sort(key=sortfn) # sort by time

    # Track removed instruction events, but only warn when the number exceeds a minimum
    removed_instruction_warnings = []

    # Process and combine the change list into discrete sets of operations at each time, i.e. an output list
    def cl2ol(changelist):
        current_bufs = initial_bufs.copy()
        current_time = changelist[0][0]
        unique_times = []
        unique_changes = []
        change_masks = np.zeros(MARGA_BUFS, dtype=np.uint16)
        changed = np.zeros(MARGA_BUFS, dtype=bool)

        def close_timestep(time):
            ch_idces = np.where(changed)[0]
            # buf_time_offsets = np.zeros(MARGA_BUFS, dtype=int32)
            buf_time_offsets = 0
            unique_changes.append( [time, ch_idces, current_bufs[ch_idces], buf_time_offsets] )
            change_masks[:] = np.zeros(MARGA_BUFS, dtype=np.uint16)
            changed[:] = np.zeros(MARGA_BUFS, dtype=bool)

        for time, buf, val, mask in changelist:
            if time != current_time:
                close_timestep(current_time)
                current_time = time
            buf_diff = (current_bufs[buf] ^ val) & mask
            assert buf_diff & change_masks[buf] == 0, "Tried to set a buffer to two values at once"
            if buf_diff == 0:
                if buf not in (1, 2):
                    # gradient buffers will have unneeded instructions
                    # all the time, so not worth warning the user for
                    # those
                    removed_instruction_warnings.append( "Instruction at tick {:d}, buffer {:d}, value 0x{:04x}, mask 0x{:04x} will have no effect. Skipping...".format(time, buf, val, mask) )
                continue
            val_masked = val & mask
            old_val_unmasked = current_bufs[buf] & ~mask
            new_val = old_val_unmasked | val_masked
            change_masks[buf] |= mask
            current_bufs[buf] = new_val
            changed[buf] = True

        close_timestep(current_time)

        return unique_changes

    changes = cl2ol(changelist)

    # warn about all the removed instructions if there are more than a maximum number
    if len(removed_instruction_warnings) > max_removed_instructions:
        for riw in removed_instruction_warnings:
            warnings.warn(riw, MarRemovedInstructionWarning)
        warnings.warn("NOTE: Fewer than {:d} removed-instruction warnings will not be printed -- keep this in mind when searching for the root cause.".format(max_removed_instructions))

    # Process time offsets
    for ch, ch_prev in zip( reversed(changes[1:]), reversed(changes[:-1]) ):
        # does the current timestep need to output more data than can
        # fit into the time gap since the previous timestep?
        timestep = np.int32(ch[0] - ch_prev[0])
        timediff = np.int32(ch[1].size - timestep)
        # if timestep < ch[1].size: # not enough time

        if timediff > 0:
            ch_prev[0] -= timediff # move prev. event into the past
            ch_prev[3] = timediff # make prev. event's buffers output in its future

    # convert to differential timesteps
    last_time = 0
    for ch in changes:
        ch0 = ch[0]
        ch[0] = ch0 - last_time
        last_time = ch0

    # Interpretation of each element of changes list:
    # [time when all instructions for this change will have completed,
    #  buffers that need to be changed,
    #  values to set the buffers to,
    #  the delay until the buffers will output their values]

    ### Write out instructions

    # Write out initial buffer values
    bdata = []
    addr = 0
    states = initial_bufs
    # reversed order, so that grad board is enabled last of all (to avoid spurious initial transfer)
    for k, ib in enumerate(reversed(initial_bufs)):
        bdata.append(instb(MARGA_BUFS-1-k, k, ib))

    last_buf_time_left = np.zeros(MARGA_BUFS, dtype=np.int32)
    buf_time_left = np.zeros(MARGA_BUFS, dtype=np.int32)
    # buf_empty_time = np.zeros(MARGA_BUFS, dtype=np.int32)
    debug_print("changes:")
    for k in changes:
        debug_print(k)

    for event in changes:
        b_instrs = event[1].size
        dtime = event[0]

        # soak up any extra time which is in excess of what the instructions need to execute synchronously
        excess_dtime = dtime - b_instrs
        excess_dtime_tmp = excess_dtime
        while excess_dtime_tmp > 2: # delay of 3 or more cycles needed
            wait_time = min(excess_dtime_tmp, COUNTER_MAX + 3) # delay for the time instruction
            bdata.append(insta(IWAIT, wait_time - 3))
            excess_dtime_tmp -= wait_time
            debug_print("i wait ", wait_time - 3)
        if excess_dtime_tmp: # final delay of 1 or 2 cycles
            for k in range(dtime - b_instrs):
                debug_print("i nop")
                bdata.append(insta(INOP, 0))

        # time left after delays from nops or waits
        # dtime_eff could be increased later with a more advanced
        # compiler, to make the buffers bear more of the internal
        # delays
        # dtime_eff = b_instrs

        # count down the times until each channel buffer will be empty
        buf_time_left -= excess_dtime
        buf_time_left[buf_time_left < 0] = 0
        this_time_offset = event[3]
        debug_print("--- dtime {:2d}, this_time_offset: {:2d}, b_instrs: {:2d}, lbtl: ".format(dtime, this_time_offset, b_instrs), last_buf_time_left[5:9])
        for m, (ind, dat) in enumerate(zip(event[1], event[2])):
            execution_delay = b_instrs - m - 1 #+ time - 2
            btli = buf_time_left[ind]
            buf_empty = btli <= m
            if buf_empty: # buffer empty for this instruction; need an appropriate delay only for sync
                # (check against m since with successive cycles, remaining buffers will empty out)
                extra_delay = execution_delay + this_time_offset
                buf_time_left[ind] = this_time_offset + b_instrs
            else:
                # buffer already not empty on this cycle
                extra_delay = this_time_offset - btli + b_instrs - 1
                buf_time_left[ind] += extra_delay + 1

            debug_print("bti={:d} btli={:d} m={:d} empty={:d} edel={:d} instb i {:d} del {:d} dat {:d}".format(
                buf_time_left[ind], btli, m, buf_empty, execution_delay, ind, extra_delay, dat))
            bdata.append(instb(ind, extra_delay, dat))

        buf_time_left -= b_instrs # take into account execution time of this timestep

    # Finish sequence
    bdata.append(insta(IFINISH, 0))
    return bdata

CIC_SLOWEST_RATE_NEAREST_POW2 = 1 << np.ceil(np.log2(CIC_SLOWEST_RATE)).astype(int)

def cic_words(rate, set_cic_shift=False):
    # Calculate the data words to transfer to the CIC for a given rate

    # FLoating-point calculation
    assert np.all( (CIC_FASTEST_RATE <= rate) & (rate <= CIC_SLOWEST_RATE) ), "RX rate outside valid range"
    gain_factor_log2 = CIC_STAGES * np.log2( CIC_SLOWEST_RATE_NEAREST_POW2 / rate )
    gain_shift = np.int32(gain_factor_log2) # rounded down
    a = (1 << CIC_RATE_DATAWIDTH) | gain_shift
    b = (0 << CIC_RATE_DATAWIDTH) | rate
    excess_factor = 2**(gain_factor_log2 - gain_shift)
    # b = (2 << rate_datawidth) | int(factor_excess * (1 << (rate_datawidth - 1)) ) # TODO: tell Benjamin about assumed 1 - i.e. save a bit for multiplicand by assuming it's between 1 and 2

    if set_cic_shift:
        return (a, b), excess_factor
    else:
        return (b,), excess_factor

if __name__ == "__main__":
    csv2bin("/tmp/marga.csv")