-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathextrn.mac
More file actions
536 lines (447 loc) · 21.4 KB
/
extrn.mac
File metadata and controls
536 lines (447 loc) · 21.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
; Copyright 1995-2021 Mersenne Research, Inc. All rights reserved
; Author: George Woltman
; Email: woltman@alum.mit.edu
;
; Common EXTRN definitions for the assembly language files. Well, the EXTRN's to global variables have been replaced
; a C structure so we can support multi-threading. The definitions here must match the structure defined in gwtables.h.
; Shortcuts for typecasting. Be aware that pointers are different sizes in 32-bit and 64-bit mode.
BPTR EQU <BYTE PTR>
DPTR EQU <DWORD PTR>
QPTR EQU <QWORD PTR>
XPTR EQU <XMMWORD PTR>
YPTR EQU <YMMWORD PTR>
ZPTR EQU <ZMMWORD PTR>
IFNDEF X86_64
PPTR EQU <DWORD PTR>
ELSE
PPTR EQU <QWORD PTR>
ENDIF
; In 32-bit mode we are so starved for registers that we are forced to
; use the stack pointer to access the asm_data. In 64-bit mode we
; use one of the extra 8 registers. We can't use the rsp trick because
; that violates the Window's exception handling stack unwind mechanism.
IFNDEF X86_64
AD_BASE EQU <rsp+push_amt>
ELSE
AD_BASE EQU <r11>
ENDIF
; This structure defines the C / ASM code data interface. A pointer to
; this structure is passed to nearly every asm routine.
; Pointer to the entire structure
ASM_DATA EQU PPTR [AD_BASE+0*SZPTR]
DESTARG EQU PPTR [AD_BASE+0*SZPTR]
DIST_TO_FFTSRCARG EQU PPTR [AD_BASE+1*SZPTR]
DIST_TO_MULSRCARG EQU PPTR [AD_BASE+2*SZPTR]
NORMRTN EQU PPTR [AD_BASE+3*SZPTR]
SAVED_RSP EQU PPTR [AD_BASE+4*SZPTR]
SRCARG EQU PPTR [AD_BASE+5*SZPTR]
SRC2ARG EQU PPTR [AD_BASE+6*SZPTR]
DEST2ARG EQU PPTR [AD_BASE+7*SZPTR]
DATA_ADDR EQU PPTR [AD_BASE+8*SZPTR]
DATA_PREFETCH EQU PPTR [AD_BASE+9*SZPTR]
PREMULT_ADDR EQU PPTR [AD_BASE+10*SZPTR]
PREMULT_PREFETCH EQU PPTR [AD_BASE+11*SZPTR]
PASS1_WAKE_UP_THREADS EQU PPTR [AD_BASE+12*SZPTR]
PASS1_PRE_CARRIES EQU PPTR [AD_BASE+13*SZPTR]
PASS1_POST_CARRIES EQU PPTR [AD_BASE+14*SZPTR]
PASS1_GET_NEXT_BLOCK EQU PPTR [AD_BASE+15*SZPTR]
DBLARG EQU QPTR [AD_BASE+16*SZPTR]
NUMARG EQU DPTR [AD_BASE+16*SZPTR+8]
FFTLEN EQU DPTR [AD_BASE+16*SZPTR+12]
MAXERR EQU QPTR [AD_BASE+16*SZPTR+16]
ALL_COMPLEX_FFT EQU BPTR [AD_BASE+16*SZPTR+24]
B_IS_2 EQU BPTR [AD_BASE+16*SZPTR+25]
RATIONAL_FFT EQU BPTR [AD_BASE+16*SZPTR+26]
ZERO_PADDED_FFT EQU BPTR [AD_BASE+16*SZPTR+27]
ZPAD_TYPE EQU BPTR [AD_BASE+16*SZPTR+28]
ffttype EQU BPTR [AD_BASE+16*SZPTR+29]
TOP_CARRY_NEEDS_ADJUSTING EQU BPTR [AD_BASE+16*SZPTR+30]
SPREAD_CARRY_OVER_EXTRA_WORDS EQU BPTR [AD_BASE+16*SZPTR+31]
zero_fft EQU BPTR [AD_BASE+16*SZPTR+32]
const_fft EQU BPTR [AD_BASE+16*SZPTR+33]
add_sub_smallmul_op EQU BPTR [AD_BASE+16*SZPTR+34]
mul4_opcode EQU BPTR [AD_BASE+16*SZPTR+35]
;; UNUSED_CHARS[4] EQU BPTR [AD_BASE+16*SZPTR+36]
ADDIN_ROW EQU DPTR [AD_BASE+16*SZPTR+40]
ADDIN_OFFSET EQU DPTR [AD_BASE+16*SZPTR+44]
ADDIN_VALUE EQU QPTR [AD_BASE+16*SZPTR+48]
ttmp_ff_inv EQU QPTR [AD_BASE+16*SZPTR+56]
THREAD_NUM EQU DPTR [AD_BASE+16*SZPTR+64]
THIS_BLOCK EQU DPTR [AD_BASE+16*SZPTR+68]
NEXT_BLOCK EQU DPTR [AD_BASE+16*SZPTR+72]
LAST_PASS1_BLOCK EQU DPTR [AD_BASE+16*SZPTR+76]
normcount1 EQU DPTR [AD_BASE+16*SZPTR+80]
count1 EQU DPTR [AD_BASE+16*SZPTR+84]
count2 EQU DPTR [AD_BASE+16*SZPTR+88]
count3 EQU DPTR [AD_BASE+16*SZPTR+92]
count4 EQU DPTR [AD_BASE+16*SZPTR+96]
count5 EQU DPTR [AD_BASE+16*SZPTR+100]
addcount1 EQU DPTR [AD_BASE+16*SZPTR+104]
normval1 EQU DPTR [AD_BASE+16*SZPTR+108]
normval4 EQU DPTR [AD_BASE+16*SZPTR+112]
cache_line_multiplier EQU DPTR [AD_BASE+16*SZPTR+116]
BIGLIT_INCR2 EQU DPTR [AD_BASE+16*SZPTR+120] ;; Not used by AVX code
BIGLIT_INCR4 EQU DPTR [AD_BASE+16*SZPTR+124] ;; Not used by AVX code
carries EQU PPTR [AD_BASE+16*SZPTR+128]
norm_grp_mults EQU PPTR [AD_BASE+17*SZPTR+128]
norm_col_mults EQU PPTR [AD_BASE+18*SZPTR+128]
norm_biglit_array EQU PPTR [AD_BASE+19*SZPTR+128]
norm_ptr1 EQU PPTR [AD_BASE+20*SZPTR+128]
norm_ptr2 EQU PPTR [AD_BASE+21*SZPTR+128] ;; Not used by AVX code
normblkdst EQU PPTR [AD_BASE+22*SZPTR+128]
normblkdst8 EQU PPTR [AD_BASE+23*SZPTR+128]
normval2 EQU PPTR [AD_BASE+24*SZPTR+128]
normval3 EQU PPTR [AD_BASE+25*SZPTR+128]
scratch_area EQU PPTR [AD_BASE+26*SZPTR+128]
plus1_premults EQU PPTR [AD_BASE+27*SZPTR+128]
fourKBgapsize EQU PPTR [AD_BASE+28*SZPTR+128]
pass2gapsize EQU PPTR [AD_BASE+29*SZPTR+128]
pass1blkdst EQU PPTR [AD_BASE+30*SZPTR+128]
pass2blkdst EQU PPTR [AD_BASE+31*SZPTR+128]
compressed_biglits EQU PPTR [AD_BASE+32*SZPTR+128]
compressed_fudges EQU PPTR [AD_BASE+33*SZPTR+128]
GWDATA EQU PPTR [AD_BASE+34*SZPTR+128]
PASS2_WAKE_UP_THREADS EQU PPTR [AD_BASE+35*SZPTR+128]
PASS2_GET_NEXT_BLOCK EQU PPTR [AD_BASE+36*SZPTR+128]
THREAD_WORK_ROUTINE EQU PPTR [AD_BASE+37*SZPTR+128]
xsincos_complex EQU PPTR [AD_BASE+38*SZPTR+128]
sincos1 EQU PPTR [AD_BASE+39*SZPTR+128] ;; Not used in AVX-512
sincos2 EQU PPTR [AD_BASE+40*SZPTR+128]
sincos3 EQU PPTR [AD_BASE+41*SZPTR+128]
sincos4 EQU PPTR [AD_BASE+42*SZPTR+128] ;; could be moved to UNION area like sincos6, sincos7, etc.
sincos5 EQU PPTR [AD_BASE+43*SZPTR+128] ;; could be moved to UNION area like sincos6, sincos7, etc.
;;hyperthread_id EQU PPTR [AD_BASE+44*SZPTR+128] ;; Only referenced in C code
;;hyperthread_work_to_do EQU PPTR [AD_BASE+45*SZPTR+128] ;; Only referenced in C code
SRC3ARG EQU PPTR [AD_BASE+46*SZPTR+128]
ASM_TIMERS EQU PPTR [AD_BASE+47*SZPTR+128]
COPYZERO EQU DPTR [AD_BASE+48*SZPTR+128]
K EQU QPTR [AD_BASE+48*SZPTR+160]
INVERSE_K EQU QPTR [AD_BASE+48*SZPTR+168]
TWO_TO_17 EQU QPTR [AD_BASE+48*SZPTR+176]
CARRY_ADJUST1 EQU QPTR [AD_BASE+48*SZPTR+184]
CARRY_ADJUST2 EQU QPTR [AD_BASE+48*SZPTR+192]
CARRY_ADJUST3 EQU QPTR [AD_BASE+48*SZPTR+200]
CARRY_ADJUST4 EQU QPTR [AD_BASE+48*SZPTR+208]
CARRY_ADJUST5 EQU QPTR [AD_BASE+48*SZPTR+216]
CARRY_ADJUST6 EQU QPTR [AD_BASE+48*SZPTR+224]
CARRY_ADJUST7 EQU QPTR [AD_BASE+48*SZPTR+232]
CARRY_ADJUST1_HI EQU QPTR [AD_BASE+48*SZPTR+240]
CARRY_ADJUST1_LO EQU QPTR [AD_BASE+48*SZPTR+248]
HIGH_WORD1_OFFSET EQU DPTR [AD_BASE+48*SZPTR+256]
HIGH_WORD2_OFFSET EQU DPTR [AD_BASE+48*SZPTR+260]
HIGH_WORD3_OFFSET EQU DPTR [AD_BASE+48*SZPTR+264]
HIGH_SCRATCH1_OFFSET EQU DPTR [AD_BASE+48*SZPTR+268]
HIGH_SCRATCH2_OFFSET EQU DPTR [AD_BASE+48*SZPTR+272]
HIGH_SCRATCH3_OFFSET EQU DPTR [AD_BASE+48*SZPTR+276]
ZPAD_INVERSE_K6 EQU QPTR [AD_BASE+48*SZPTR+280]
ZPAD_K6_HI EQU QPTR [AD_BASE+48*SZPTR+288]
ZPAD_K6_MID EQU QPTR [AD_BASE+48*SZPTR+296]
ZPAD_K6_LO EQU QPTR [AD_BASE+48*SZPTR+304]
ZPAD_SHIFT6 EQU QPTR [AD_BASE+48*SZPTR+312]
ZPAD_INVERSE_K5 EQU QPTR [AD_BASE+48*SZPTR+320]
ZPAD_K5_HI EQU QPTR [AD_BASE+48*SZPTR+328]
ZPAD_K5_MID EQU QPTR [AD_BASE+48*SZPTR+336]
ZPAD_K5_LO EQU QPTR [AD_BASE+48*SZPTR+344]
ZPAD_SHIFT5 EQU QPTR [AD_BASE+48*SZPTR+352]
ZPAD_INVERSE_K4 EQU QPTR [AD_BASE+48*SZPTR+360]
ZPAD_K4_HI EQU QPTR [AD_BASE+48*SZPTR+368]
ZPAD_K4_MID EQU QPTR [AD_BASE+48*SZPTR+376]
ZPAD_K4_LO EQU QPTR [AD_BASE+48*SZPTR+384]
ZPAD_SHIFT4 EQU QPTR [AD_BASE+48*SZPTR+392]
ZPAD_INVERSE_K3 EQU QPTR [AD_BASE+48*SZPTR+400]
ZPAD_K3_HI EQU QPTR [AD_BASE+48*SZPTR+408]
ZPAD_K3_MID EQU QPTR [AD_BASE+48*SZPTR+416]
ZPAD_K3_LO EQU QPTR [AD_BASE+48*SZPTR+424]
ZPAD_SHIFT3 EQU QPTR [AD_BASE+48*SZPTR+432]
ZPAD_INVERSE_K2 EQU QPTR [AD_BASE+48*SZPTR+440]
ZPAD_K2_HI EQU QPTR [AD_BASE+48*SZPTR+448]
ZPAD_K2_MID EQU QPTR [AD_BASE+48*SZPTR+456]
ZPAD_K2_LO EQU QPTR [AD_BASE+48*SZPTR+464]
ZPAD_SHIFT2 EQU QPTR [AD_BASE+48*SZPTR+472]
ZPAD_INVERSE_K1 EQU QPTR [AD_BASE+48*SZPTR+480]
ZPAD_K1_HI EQU QPTR [AD_BASE+48*SZPTR+488]
ZPAD_K1_LO EQU QPTR [AD_BASE+48*SZPTR+496]
ZPAD_SHIFT1 EQU QPTR [AD_BASE+48*SZPTR+504]
ZPAD0 EQU QPTR [AD_BASE+48*SZPTR+512]
ZPAD1 EQU QPTR [AD_BASE+48*SZPTR+520]
ZPAD2 EQU QPTR [AD_BASE+48*SZPTR+528]
ZPAD3 EQU QPTR [AD_BASE+48*SZPTR+536]
ZPAD4 EQU QPTR [AD_BASE+48*SZPTR+544]
ZPAD5 EQU QPTR [AD_BASE+48*SZPTR+552]
ZPAD6 EQU QPTR [AD_BASE+48*SZPTR+560]
;;UNUSED_DOUBLES[1] EQU QPTR [AD_BASE+48*SZPTR+568]
;; AVX-512 counts. Renamed from the counts above for better readability.
IFDEF X86_64
normblkdst4 EQU norm_ptr2 ;; Distance between 4 normalize blocks. Used in one-pass AVX-512 FFTs.
ENDIF
; The union begins here
UNION_BASE EQU AD_BASE+48*SZPTR+576
; Values only used in AVX-512 ffts
IFDEF X86_64
ZMM_SRC_INCR EQU PPTR [UNION_BASE+0*SZPTR+0*8]
ZMM_PASS2_ROUTINE EQU PPTR [UNION_BASE+1*SZPTR+0*8]
ZMM_CARRIES_ROUTINE EQU PPTR [UNION_BASE+2*SZPTR+0*8]
ZMM_OP_CARRIES_ROUTINE EQU PPTR [UNION_BASE+3*SZPTR+0*8]
ZMM_MINUS_C EQU QPTR [UNION_BASE+4*SZPTR+0*8]
ZMM_MULCONST EQU QPTR [UNION_BASE+4*SZPTR+1*8]
ZMM_MINUS_C_TIMES_MULCONST EQU QPTR [UNION_BASE+4*SZPTR+2*8]
ZMM_FIRST_BIGLIT_VALUES EQU BPTR [UNION_BASE+4*SZPTR+3*8]
ZMM_RNDVAL EQU QPTR [UNION_BASE+4*SZPTR+4*8]
ZMM_ABSVAL EQU QPTR [UNION_BASE+4*SZPTR+5*8]
ZMM_LARGE_BASE EQU QPTR [UNION_BASE+4*SZPTR+6*8]
ZMM_LARGE_BASE_INVERSE EQU QPTR [UNION_BASE+4*SZPTR+7*8]
ZMM_RNDVAL_TIMES_LARGE_BASE EQU QPTR [UNION_BASE+4*SZPTR+8*8]
ZMM_RNDVAL_OVER_LARGE_BASE EQU QPTR [UNION_BASE+4*SZPTR+9*8]
ZMM_SMALL_BASE EQU QPTR [UNION_BASE+4*SZPTR+10*8]
ZMM_SMALL_BASE_INVERSE EQU QPTR [UNION_BASE+4*SZPTR+11*8]
ZMM_RNDVAL_TIMES_SMALL_BASE EQU QPTR [UNION_BASE+4*SZPTR+12*8]
ZMM_RNDVAL_OVER_SMALL_BASE EQU QPTR [UNION_BASE+4*SZPTR+13*8]
ZMM_K_LO EQU QPTR [UNION_BASE+4*SZPTR+14*8]
ZMM_K_HI_OVER_SMALL_BASE EQU QPTR [UNION_BASE+4*SZPTR+15*8]
ZMM_K_HI_OVER_LARGE_BASE EQU QPTR [UNION_BASE+4*SZPTR+16*8]
ZMM_K_TIMES_MULCONST_LO EQU QPTR [UNION_BASE+4*SZPTR+17*8]
ZMM_K_TIMES_MULCONST_HI_OVER_SMALL_BASE EQU QPTR [UNION_BASE+4*SZPTR+18*8]
ZMM_K_TIMES_MULCONST_HI_OVER_LARGE_BASE EQU QPTR [UNION_BASE+4*SZPTR+19*8]
ZMM_383_707_383_1 EQU YPTR [UNION_BASE+4*SZPTR+20*8]
ZMM_ONE EQU QPTR [UNION_BASE+4*SZPTR+20*8]
ZMM_P383 EQU QPTR [UNION_BASE+4*SZPTR+21*8]
ZMM_SQRTHALF EQU QPTR [UNION_BASE+4*SZPTR+22*8]
ZMM_P383_1 EQU QPTR [UNION_BASE+4*SZPTR+23*8]
ZMM_TWO EQU QPTR [UNION_BASE+4*SZPTR+24*8]
ZMM_HALF EQU QPTR [UNION_BASE+4*SZPTR+25*8]
ZMM_P924_P383 EQU QPTR [UNION_BASE+4*SZPTR+26*8]
ZMM_P981_P195 EQU QPTR [UNION_BASE+4*SZPTR+27*8]
ZMM_P195 EQU QPTR [UNION_BASE+4*SZPTR+28*8]
ZMM_P831_P556 EQU QPTR [UNION_BASE+4*SZPTR+29*8]
ZMM_P556_P195 EQU QPTR [UNION_BASE+4*SZPTR+30*8]
ZMM_SQRT2 EQU QPTR [UNION_BASE+4*SZPTR+31*8]
ZMM_P866 EQU QPTR [UNION_BASE+4*SZPTR+32*8]
ZMM_P259_P707 EQU QPTR [UNION_BASE+4*SZPTR+33*8]
ZMM_P966_P707 EQU QPTR [UNION_BASE+4*SZPTR+34*8]
ZMM_P309 EQU QPTR [UNION_BASE+4*SZPTR+35*8]
ZMM_P809 EQU QPTR [UNION_BASE+4*SZPTR+36*8]
ZMM_P951 EQU QPTR [UNION_BASE+4*SZPTR+37*8]
ZMM_P588_P951 EQU QPTR [UNION_BASE+4*SZPTR+38*8]
ZMM_P623 EQU QPTR [UNION_BASE+4*SZPTR+39*8]
ZMM_P901 EQU QPTR [UNION_BASE+4*SZPTR+40*8]
ZMM_P975 EQU QPTR [UNION_BASE+4*SZPTR+41*8]
ZMM_P223 EQU QPTR [UNION_BASE+4*SZPTR+42*8]
ZMM_P434_P975 EQU QPTR [UNION_BASE+4*SZPTR+43*8]
ZMM_P782_P975 EQU QPTR [UNION_BASE+4*SZPTR+44*8]
ZMM_P901_P975 EQU QPTR [UNION_BASE+4*SZPTR+45*8]
ZMM_P623_P975 EQU QPTR [UNION_BASE+4*SZPTR+46*8]
ZMM_P223_P975 EQU QPTR [UNION_BASE+4*SZPTR+47*8]
ZMM_P1_P975 EQU QPTR [UNION_BASE+4*SZPTR+48*8]
ZMM_B EQU QPTR [UNION_BASE+4*SZPTR+49*8] ;; Used to calculate fudged group multiplier inverses
ZMM_ONE_OVER_B EQU QPTR [UNION_BASE+4*SZPTR+50*8] ;; Used to calculate fudged group multipliers
ZMM_PERMUTE1 EQU QPTR [UNION_BASE+4*SZPTR+51*8] ;; 0x0c040e0608000a02 = 8+4 0+4 8+6 0+6 8+0 0+0 8+2 0+2
ZMM_PERMUTE2 EQU QPTR [UNION_BASE+4*SZPTR+52*8] ;; 0x0d050f0709010b03 = 8+5 0+5 8+7 0+7 8+1 0+1 8+3 0+3
ZMM_TMPS EQU ZPTR [UNION_BASE+4*SZPTR+60*8]
ZMM_TMP1 EQU ZPTR [UNION_BASE+4*SZPTR+60*8]
ZMM_TMP2 EQU ZPTR [UNION_BASE+4*SZPTR+68*8]
ZMM_TMP3 EQU ZPTR [UNION_BASE+4*SZPTR+76*8]
ZMM_TMP4 EQU ZPTR [UNION_BASE+4*SZPTR+84*8]
ZMM_TMP5 EQU ZPTR [UNION_BASE+4*SZPTR+92*8]
ZMM_TMP6 EQU ZPTR [UNION_BASE+4*SZPTR+100*8]
ZMM_TMP7 EQU ZPTR [UNION_BASE+4*SZPTR+108*8]
ZMM_TMP8 EQU ZPTR [UNION_BASE+4*SZPTR+116*8]
ENDIF
; Values only used in AVX ffts
YMM_SRC_INCR1 EQU PPTR [UNION_BASE+0*SZPTR+0*8]
YMM_SRC_INCR2 EQU PPTR [UNION_BASE+1*SZPTR+0*8]
YMM_SRC_INCR3 EQU PPTR [UNION_BASE+2*SZPTR+0*8]
YMM_SRC_INCR4 EQU PPTR [UNION_BASE+3*SZPTR+0*8]
YMM_SRC_INCR5 EQU PPTR [UNION_BASE+4*SZPTR+0*8]
YMM_SRC_INCR6 EQU PPTR [UNION_BASE+5*SZPTR+0*8]
YMM_SRC_INCR7 EQU PPTR [UNION_BASE+6*SZPTR+0*8]
YMM_NORM_INCR1 EQU PPTR [UNION_BASE+7*SZPTR+0*8]
YMM_NORM_INCR2 EQU PPTR [UNION_BASE+8*SZPTR+0*8]
YMM_NORM_INCR3 EQU PPTR [UNION_BASE+9*SZPTR+0*8]
YMM_NORM_INCR4 EQU PPTR [UNION_BASE+10*SZPTR+0*8]
YMM_NORM_INCR5 EQU PPTR [UNION_BASE+11*SZPTR+0*8]
YMM_NORM_INCR6 EQU PPTR [UNION_BASE+12*SZPTR+0*8]
YMM_NORM_INCR7 EQU PPTR [UNION_BASE+13*SZPTR+0*8]
YMM_CARRIES_ROUTINE EQU PPTR [UNION_BASE+14*SZPTR+0*8]
YMM_PASS2_ROUTINE EQU PPTR [UNION_BASE+15*SZPTR+0*8]
YMM_MINUS_C EQU YPTR [UNION_BASE+16*SZPTR+0*8]
YMM_HALF EQU YPTR [UNION_BASE+16*SZPTR+4*8]
YMM_SQRTHALF EQU YPTR [UNION_BASE+16*SZPTR+8*8]
;UNUSED_YMM_DOUBLES EQU YPTR [UNION_BASE+16*SZPTR+12*8]
YMM_MAXERR EQU YPTR [UNION_BASE+16*SZPTR+16*8]
YMM_ABSVAL EQU YPTR [UNION_BASE+16*SZPTR+20*8]
YMM_BIGVAL EQU YPTR [UNION_BASE+16*SZPTR+24*8]
YMM_BIGBIGVAL EQU YPTR [UNION_BASE+16*SZPTR+28*8]
YMM_NORM012_FF EQU YPTR [UNION_BASE+16*SZPTR+32*8]
YMM_MULCONST EQU YPTR [UNION_BASE+16*SZPTR+36*8]
YMM_K_LO EQU YPTR [UNION_BASE+16*SZPTR+40*8]
YMM_K_HI EQU YPTR [UNION_BASE+16*SZPTR+44*8]
YMM_K_TIMES_MULCONST_LO EQU YPTR [UNION_BASE+16*SZPTR+48*8]
YMM_K_TIMES_MULCONST_HI EQU YPTR [UNION_BASE+16*SZPTR+52*8]
YMM_MINUS_C_TIMES_MULCONST EQU YPTR [UNION_BASE+16*SZPTR+56*8]
YMM_FIRST_BIGLIT_VALUES EQU BPTR [UNION_BASE+16*SZPTR+60*8]
;UNUSED_YMM_DOUBLES[3] EQU YPTR [UNION_BASE+16*SZPTR+61*8]
;UNUSED_YMM_DOUBLES2[4] EQU YPTR [UNION_BASE+16*SZPTR+64*8]
YMM_P924_P383 EQU YPTR [UNION_BASE+16*SZPTR+68*8]
YMM_P383 EQU YPTR [UNION_BASE+16*SZPTR+72*8]
YMM_P924 EQU YPTR [UNION_BASE+16*SZPTR+76*8]
YMM_P866 EQU YPTR [UNION_BASE+16*SZPTR+80*8]
YMM_P588 EQU YPTR [UNION_BASE+16*SZPTR+84*8]
YMM_P309 EQU YPTR [UNION_BASE+16*SZPTR+88*8]
YMM_P809 EQU YPTR [UNION_BASE+16*SZPTR+92*8]
YMM_P951 EQU YPTR [UNION_BASE+16*SZPTR+96*8]
YMM_P588_P951 EQU YPTR [UNION_BASE+16*SZPTR+100*8]
YMM_ONE EQU YPTR [UNION_BASE+16*SZPTR+104*8]
YMM_TWO EQU YPTR [UNION_BASE+16*SZPTR+108*8]
YMM_P975 EQU YPTR [UNION_BASE+16*SZPTR+112*8]
YMM_P782 EQU YPTR [UNION_BASE+16*SZPTR+116*8]
YMM_P623 EQU YPTR [UNION_BASE+16*SZPTR+120*8]
YMM_P901 EQU YPTR [UNION_BASE+16*SZPTR+124*8]
YMM_P434 EQU YPTR [UNION_BASE+16*SZPTR+128*8]
YMM_P223 EQU YPTR [UNION_BASE+16*SZPTR+132*8]
YMM_P975_P434 EQU YPTR [UNION_BASE+16*SZPTR+136*8]
YMM_P782_P434 EQU YPTR [UNION_BASE+16*SZPTR+140*8]
YMM_P259 EQU YPTR [UNION_BASE+16*SZPTR+144*8]
YMM_P966 EQU YPTR [UNION_BASE+16*SZPTR+148*8]
YMM_P259_P707 EQU YPTR [UNION_BASE+16*SZPTR+152*8]
YMM_P966_P707 EQU YPTR [UNION_BASE+16*SZPTR+156*8]
YMM_LIMIT_BIGMAX EQU YPTR [UNION_BASE+16*SZPTR+160*8]
YMM_LIMIT_INVERSE EQU YPTR [UNION_BASE+16*SZPTR+352*8]
YMM_TMPS EQU YPTR [UNION_BASE+16*SZPTR+544*8]
YMM_TMP1 EQU YPTR [UNION_BASE+16*SZPTR+544*8]
YMM_TMP2 EQU YPTR [UNION_BASE+16*SZPTR+548*8]
YMM_TMP3 EQU YPTR [UNION_BASE+16*SZPTR+552*8]
YMM_TMP4 EQU YPTR [UNION_BASE+16*SZPTR+556*8]
YMM_TMP5 EQU YPTR [UNION_BASE+16*SZPTR+560*8]
YMM_TMP6 EQU YPTR [UNION_BASE+16*SZPTR+564*8]
YMM_TMP7 EQU YPTR [UNION_BASE+16*SZPTR+568*8]
YMM_TMP8 EQU YPTR [UNION_BASE+16*SZPTR+572*8]
; Values only used in SSE2 ffts
sincos6 EQU PPTR [UNION_BASE+0*SZPTR] ;; Used in x87 FFTs too!
sincos7 EQU PPTR [UNION_BASE+1*SZPTR] ;; Used in x87 FFTs too!
sincos8 EQU PPTR [UNION_BASE+2*SZPTR] ;; Used in x87 FFTs too!
sincos9 EQU PPTR [UNION_BASE+3*SZPTR] ;; Used in x87 FFTs too!
sincos10 EQU PPTR [UNION_BASE+4*SZPTR] ;; Used in x87 FFTs too!
sincos11 EQU PPTR [UNION_BASE+5*SZPTR]
ZPAD_WORD5_OFFSET EQU PPTR [UNION_BASE+6*SZPTR]
ZPAD_WORD5_RBP_OFFSET EQU PPTR [UNION_BASE+7*SZPTR]
;;xmm_pass2_premults EQU PPTR [UNION_BASE+8*SZPTR] ;; Only referenced in C code
;;UNUSED_XMM_PTRS[7] EQU PPTR [UNION_BASE+9*SZPTR]
XMM_TWO EQU XPTR [UNION_BASE+16*SZPTR+0*8]
XMM_HALF EQU XPTR [UNION_BASE+16*SZPTR+2*8]
XMM_SQRTHALF EQU XPTR [UNION_BASE+16*SZPTR+4*8]
XMM_SUMOUT EQU XPTR [UNION_BASE+16*SZPTR+6*8]
XMM_MAXERR EQU XPTR [UNION_BASE+16*SZPTR+8*8]
XMM_ABSVAL EQU XPTR [UNION_BASE+16*SZPTR+10*8]
XMM_BIGVAL EQU XPTR [UNION_BASE+16*SZPTR+12*8]
XMM_BIGBIGVAL EQU XPTR [UNION_BASE+16*SZPTR+14*8]
XMM_BIGVAL_NEG EQU XPTR [UNION_BASE+16*SZPTR+16*8]
XMM_MINUS_C EQU XPTR [UNION_BASE+16*SZPTR+18*8]
XMM_NORM012_FF EQU XPTR [UNION_BASE+16*SZPTR+20*8]
XMM_MULCONST EQU XPTR [UNION_BASE+16*SZPTR+22*8]
XMM_K_LO EQU XPTR [UNION_BASE+16*SZPTR+24*8]
XMM_K_HI EQU XPTR [UNION_BASE+16*SZPTR+26*8]
XMM_K_TIMES_MULCONST_LO EQU XPTR [UNION_BASE+16*SZPTR+28*8]
XMM_K_TIMES_MULCONST_HI EQU XPTR [UNION_BASE+16*SZPTR+30*8]
XMM_MINUS_C_TIMES_MULCONST EQU XPTR [UNION_BASE+16*SZPTR+32*8]
XMM_P309 EQU XPTR [UNION_BASE+16*SZPTR+34*8] ; Values used in 20-reals
XMM_P809 EQU XPTR [UNION_BASE+16*SZPTR+36*8]
XMM_P951 EQU XPTR [UNION_BASE+16*SZPTR+38*8]
XMM_P588 EQU XPTR [UNION_BASE+16*SZPTR+40*8]
XMM_P618 EQU XPTR [UNION_BASE+16*SZPTR+42*8] ; Values used in PFA-5 (old v25 home-grown FFTs)
XMM_M809 EQU XPTR [UNION_BASE+16*SZPTR+44*8]
XMM_M262 EQU XPTR [UNION_BASE+16*SZPTR+46*8]
XMM_M382 EQU XPTR [UNION_BASE+16*SZPTR+48*8]
XMM_M162 EQU XPTR [UNION_BASE+16*SZPTR+50*8]
XMM_P866 EQU XPTR [UNION_BASE+16*SZPTR+52*8] ; Values used in PFA-6
XMM_P924 EQU XPTR [UNION_BASE+16*SZPTR+54*8]
XMM_P383 EQU XPTR [UNION_BASE+16*SZPTR+56*8]
XMM_M358 EQU XPTR [UNION_BASE+16*SZPTR+58*8] ; Values used in old PFA-7
XMM_P404 EQU XPTR [UNION_BASE+16*SZPTR+60*8]
XMM_P445 EQU XPTR [UNION_BASE+16*SZPTR+62*8]
XMM_P180 EQU XPTR [UNION_BASE+16*SZPTR+64*8]
XMM_P975 EQU XPTR [UNION_BASE+16*SZPTR+66*8] ; Values used in new and old PFA-7
XMM_P623 EQU XPTR [UNION_BASE+16*SZPTR+68*8]
XMM_P901 EQU XPTR [UNION_BASE+16*SZPTR+70*8] ; Values used in new PFA-7 (28-reals)
XMM_P782 EQU XPTR [UNION_BASE+16*SZPTR+72*8]
XMM_P434 EQU XPTR [UNION_BASE+16*SZPTR+74*8]
XMM_P223 EQU XPTR [UNION_BASE+16*SZPTR+76*8]
;;UNUSED_XMM_DOUBLES EQU XPTR [UNION_BASE+16*SZPTR+78*8]
XMM_TMP1 EQU XPTR [UNION_BASE+16*SZPTR+80*8]
XMM_TMP2 EQU XPTR [UNION_BASE+16*SZPTR+82*8]
XMM_TMP3 EQU XPTR [UNION_BASE+16*SZPTR+84*8]
XMM_TMP4 EQU XPTR [UNION_BASE+16*SZPTR+86*8]
XMM_TMP5 EQU XPTR [UNION_BASE+16*SZPTR+88*8]
XMM_TMP6 EQU XPTR [UNION_BASE+16*SZPTR+90*8]
XMM_TMP7 EQU XPTR [UNION_BASE+16*SZPTR+92*8]
XMM_TMP8 EQU XPTR [UNION_BASE+16*SZPTR+94*8]
XMM_LIMIT_BIGMAX EQU XPTR [UNION_BASE+16*SZPTR+96*8]
XMM_LIMIT_INVERSE EQU XPTR [UNION_BASE+16*SZPTR+192*8]
XMM_LIMIT_BIGMAX_NEG EQU XPTR [UNION_BASE+16*SZPTR+288*8]
XMM_TTP_FUDGE EQU XPTR [UNION_BASE+16*SZPTR+384*8]
XMM_TTMP_FUDGE EQU XPTR [UNION_BASE+16*SZPTR+416*8]
XMM_COL_MULTS EQU XPTR [UNION_BASE+16*SZPTR+448*8]
; Values only used in x87 ffts
;;sincos6 EQU PPTR [UNION_BASE+0*SZPTR] ;; Defined in XMM union area
;;sincos7 EQU PPTR [UNION_BASE+1*SZPTR] ;; Defined in XMM union area
;;sincos8 EQU PPTR [UNION_BASE+2*SZPTR] ;; Defined in XMM union area
;;sincos9 EQU PPTR [UNION_BASE+3*SZPTR] ;; Defined in XMM union area
;;sincos10 EQU PPTR [UNION_BASE+4*SZPTR] ;; Defined in XMM union area
zpad_addr EQU PPTR [UNION_BASE+5*SZPTR] ;; Next ZPAD value to write
pass2_premults EQU PPTR [UNION_BASE+6*SZPTR]
POSTFFT EQU BPTR [UNION_BASE+7*SZPTR]
;;UNUSED_X87_CHARS[3] EQU PPTR [UNION_BASE+7*SZPTR+1]
;;UNUSED_X87_PTRS[8] EQU PPTR [UNION_BASE+8*SZPTR]
SQRTHALF EQU QPTR [UNION_BASE+16*SZPTR+0*8]
SUMOUT EQU QPTR [UNION_BASE+16*SZPTR+1*8]
BIGVAL EQU DPTR [UNION_BASE+16*SZPTR+2*8]
BIGBIGVAL EQU DPTR [UNION_BASE+16*SZPTR+2*8+4]
MINUS_C EQU QPTR [UNION_BASE+16*SZPTR+3*8]
NORM012_FF EQU QPTR [UNION_BASE+16*SZPTR+4*8]
MULCONST EQU QPTR [UNION_BASE+16*SZPTR+5*8]
K_TIMES_MULCONST_LO EQU QPTR [UNION_BASE+16*SZPTR+6*8]
K_TIMES_MULCONST_HI EQU QPTR [UNION_BASE+16*SZPTR+7*8]
MINUS_C_TIMES_MULCONST EQU QPTR [UNION_BASE+16*SZPTR+8*8]
K_TIMES_MULCONST_HI_1 EQU QPTR [UNION_BASE+16*SZPTR+9*8]
K_TIMES_MULCONST_HI_2 EQU QPTR [UNION_BASE+16*SZPTR+10*8]
K_HI EQU QPTR [UNION_BASE+16*SZPTR+11*8]
K_LO EQU QPTR [UNION_BASE+16*SZPTR+12*8]
K_HI_1 EQU QPTR [UNION_BASE+16*SZPTR+13*8]
K_HI_2 EQU QPTR [UNION_BASE+16*SZPTR+14*8]
ALT_K_HI EQU QPTR [UNION_BASE+16*SZPTR+15*8]
ALT_K_LO EQU QPTR [UNION_BASE+16*SZPTR+16*8]
P309 EQU QPTR [UNION_BASE+16*SZPTR+17*8]
M809 EQU QPTR [UNION_BASE+16*SZPTR+18*8]
M262 EQU QPTR [UNION_BASE+16*SZPTR+19*8]
M382 EQU QPTR [UNION_BASE+16*SZPTR+20*8]
P951 EQU QPTR [UNION_BASE+16*SZPTR+21*8]
P588 EQU QPTR [UNION_BASE+16*SZPTR+22*8]
M162 EQU QPTR [UNION_BASE+16*SZPTR+23*8]
P618 EQU QPTR [UNION_BASE+16*SZPTR+24*8]
P623 EQU QPTR [UNION_BASE+16*SZPTR+25*8]
M358 EQU QPTR [UNION_BASE+16*SZPTR+26*8]
P404 EQU QPTR [UNION_BASE+16*SZPTR+27*8]
P975 EQU QPTR [UNION_BASE+16*SZPTR+28*8]
P445 EQU QPTR [UNION_BASE+16*SZPTR+29*8]
P180 EQU QPTR [UNION_BASE+16*SZPTR+30*8]
M223 EQU QPTR [UNION_BASE+16*SZPTR+31*8]
M901 EQU QPTR [UNION_BASE+16*SZPTR+32*8]
M691 EQU QPTR [UNION_BASE+16*SZPTR+33*8]
P866 EQU QPTR [UNION_BASE+16*SZPTR+34*8]
P433 EQU QPTR [UNION_BASE+16*SZPTR+35*8]
P577 EQU QPTR [UNION_BASE+16*SZPTR+36*8]
;;UNUSED_X87_DOUBLES[3] EQU QPTR [UNION_BASE+16*SZPTR+37*8]
P25 EQU DPTR [UNION_BASE+16*SZPTR+40*8]
P75 EQU DPTR [UNION_BASE+16*SZPTR+40*8+4]
P3 EQU DPTR [UNION_BASE+16*SZPTR+41*8]
HALF EQU DPTR [UNION_BASE+16*SZPTR+41*8+4]
TMP1 EQU QPTR [UNION_BASE+16*SZPTR+42*8]
TMP2 EQU QPTR [UNION_BASE+16*SZPTR+43*8]
TMP3 EQU QPTR [UNION_BASE+16*SZPTR+44*8]
TMP4 EQU QPTR [UNION_BASE+16*SZPTR+45*8]
TMP5 EQU QPTR [UNION_BASE+16*SZPTR+46*8]
TMP6 EQU QPTR [UNION_BASE+16*SZPTR+47*8]
LIMIT_BIGMAX EQU QPTR [UNION_BASE+16*SZPTR+48*8]
LIMIT_INVERSE EQU QPTR [UNION_BASE+16*SZPTR+80*8]
LIMIT_BIGMAX_NEG EQU QPTR [UNION_BASE+16*SZPTR+112*8]
TTP_FUDGE EQU QPTR [UNION_BASE+16*SZPTR+144*8]
TTMP_FUDGE EQU QPTR [UNION_BASE+16*SZPTR+176*8]