Skip to content

Commit 3964391

Browse files
committed
Restore assembly code for spilling callee-save registers.
This code was previously removed in change 194595 but the result was not reliable as noted in job004158.
1 parent d103d55 commit 3964391

4 files changed

Lines changed: 114 additions & 99 deletions

File tree

code/ss.h

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,25 +14,17 @@
1414
#include "mpm.h"
1515

1616

17-
/* StackContext -- some of the mutator's state
18-
*
19-
* The jumpBuffer is used to capture most of the mutator's state on
20-
* entry to the MPS, but can't capture it all. See
21-
* <design/stack-scan#.sol.setjmp.scan>.
22-
*/
17+
/* StackContext -- some of the mutator's state */
2318

24-
#include <setjmp.h>
25-
26-
typedef struct StackContextStruct {
27-
jmp_buf jumpBuffer;
28-
} StackContextStruct;
19+
typedef struct StackContextStruct StackContextStruct;
2920

3021

3122
/* StackHot -- capture a hot stack pointer
3223
*
3324
* Sets *stackOut to a stack pointer that includes the current frame.
3425
*/
3526

27+
ATTRIBUTE_NOINLINE
3628
void StackHot(void **stackOut);
3729

3830

@@ -59,6 +51,46 @@ void StackHot(void **stackOut);
5951

6052
/* STACK_CONTEXT_SAVE -- save the callee-saves and stack pointer */
6153

54+
#if (defined(MPS_BUILD_GC) || defined(MPS_BUILD_LL)) && defined(MPS_ARCH_I3)
55+
56+
struct StackContextStruct {
57+
Word calleeSave[4];
58+
};
59+
60+
#define STACK_CONTEXT_SAVE(sc) \
61+
BEGIN \
62+
Word *_save = (sc)->calleeSave; \
63+
__asm__ volatile ("mov %%ebx, %0" : "=m" (_save[0])); \
64+
__asm__ volatile ("mov %%esi, %0" : "=m" (_save[1])); \
65+
__asm__ volatile ("mov %%edi, %0" : "=m" (_save[2])); \
66+
__asm__ volatile ("mov %%ebp, %0" : "=m" (_save[3])); \
67+
END
68+
69+
#elif (defined(MPS_BUILD_GC) || defined(MPS_BUILD_LL)) && defined(MPS_ARCH_I6)
70+
71+
struct StackContextStruct {
72+
Word calleeSave[6];
73+
};
74+
75+
#define STACK_CONTEXT_SAVE(sc) \
76+
BEGIN \
77+
Word *_save = (sc)->calleeSave; \
78+
__asm__ volatile ("mov %%rbp, %0" : "=m" (_save[0])); \
79+
__asm__ volatile ("mov %%rbx, %0" : "=m" (_save[1])); \
80+
__asm__ volatile ("mov %%r12, %0" : "=m" (_save[2])); \
81+
__asm__ volatile ("mov %%r13, %0" : "=m" (_save[3])); \
82+
__asm__ volatile ("mov %%r14, %0" : "=m" (_save[4])); \
83+
__asm__ volatile ("mov %%r15, %0" : "=m" (_save[5])); \
84+
END
85+
86+
#else /* jmp_buf platforms */
87+
88+
#include <setjmp.h>
89+
90+
struct StackContextStruct {
91+
jmp_buf jumpBuffer;
92+
};
93+
6294
#if defined(MPS_OS_XC)
6395

6496
/* We call _setjmp rather than setjmp because we can be confident what
@@ -74,7 +106,9 @@ void StackHot(void **stackOut);
74106

75107
#define STACK_CONTEXT_SAVE(sc) ((void)setjmp((sc)->jumpBuffer))
76108

77-
#endif /* platform defines */
109+
#endif /* jmp_buf platforms */
110+
111+
#endif /* platform specific code */
78112

79113

80114
/* StackScan -- scan the mutator's stack and registers
@@ -84,7 +118,7 @@ void StackHot(void **stackOut);
84118
*/
85119

86120
extern Res StackScan(ScanState ss, void *stackCold,
87-
mps_area_scan_t scan_area, void *closure);
121+
mps_area_scan_t scan_area, void *closure);
88122

89123

90124
#endif /* ss_h */

design/stack-scan-areas.svg

Lines changed: 11 additions & 12 deletions
Loading

design/stack-scan.txt

Lines changed: 52 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,6 @@ unnecessary pinning and zone pollution; see job003525_.)
7878
_`.req.setjmp`: The implementation must follow the C Standard in its
7979
use of the ``setjmp()`` macro. (So that it is reliable and portable.)
8080

81-
_`.req.assembly.not`: The implementation should not use assembly
82-
language. (So that it can be developed in tools like Microsoft Visual
83-
Studio that don't support this.)
84-
8581

8682
Design
8783
------
@@ -91,12 +87,12 @@ and stack must be recorded when the mutator enters the MPS, if there
9187
is a possibility that the MPS might need to know the mutator context.
9288

9389
_`.sol.entry-points.fragile`: The analysis of which entry points might
94-
need to save the context (see `.analysis.entry-points`_ below) is fragile.
95-
It might be incorrect now, or become incomplete if we refactor the
96-
internals of tracing and polling. As a defence against errors of this
97-
form, ``StackScan()`` asserts that the context was saved, but if the
98-
client program continues from the assertion, it saves the context
99-
anyway and continues.
90+
need to save the context (see `.analysis.entry-points`_ below) is
91+
fragile. It might be incorrect now, or become incomplete if we
92+
refactor the internals of tracing and polling. As a defence against
93+
errors of this form, ``StackScan()`` asserts that the context was
94+
saved, but if the client program continues from the assertion, it
95+
saves the context anyway and continues.
10096

10197
_`.sol.registers`: Implementations spill the root registers onto the
10298
stack so that they can be scanned there.
@@ -108,69 +104,23 @@ _`.sol.registers.root.justify`: The caller-save registers will have
108104
been spilled onto the stack by the time the MPS is entered, so will be
109105
scanned by the stack scan.
110106

111-
_`.sol.setjmp`: The values in callee-save registers can be found by
112-
invoking ``setjmp()``. This forces any of the caller's callee-save
113-
registers into either the ``jmp_buf`` or the current stack frame.
114-
115-
_`.sol.setjmp.scan`: Although we might be able to decode the jump
116-
buffer in a platform-dependent way, it's hard to guarantee that an
117-
uncooperative compiler won't temporarily store a reference in any
118-
register or stack location. We must conservatively scan the whole of
119-
both.
120-
121-
_`.sol.setjmp.justify`: The [C1990]_ standard specifies that
122-
``jmp_buf``:
123-
124-
is an array type suitable for holding the information needed to
125-
restore a calling environment. The environment of a call to the
126-
``setjmp()`` macro consists of information sufficient for a call
127-
to the ``longjmp()`` function to return execution to the correct
128-
block and invocation of that block, were it called recursively.
129-
130-
We believe that any reasonable implementation of ``setjmp()`` must
131-
copy the callee-save registers either into the jump buffer or into the
132-
stack frame that invokes it in order to work as described. Otherwise,
133-
once the callee-save registers have been overwritten by other function
134-
calls, a ``longjmp()`` would result in the callee-save registers
135-
having the wrong values. A ``longjmp()`` can come from anywhere, and
136-
so the function using ``setjmp()`` can't rely on callee-save registers
137-
being saved by callees.
138-
139-
_`.sol.stack.hot`: We could decode the frame of the function that
140-
invokes ``setjmp()`` from the jump buffer in a platform-specific way,
141-
but we can do something simpler (if more hacky) by calling the stub
142-
function ``StackHot()`` which takes the address of its argument. So
143-
long as this stub function is not inlined into the caller, then on all
144-
supported platforms this yields a pointer that is pretty much at the
145-
hot end of the frame.
146-
147-
_`.sol.stack.hot.noinline`: The reason that ``StackHot()`` must not be
148-
inlined is that after inlining, the compiler might place ``stackOut``
149-
at a colder stack address than the ``StackContextStruct``, causing the
150-
latter not to be scanned. See `mail.gdr.2018-07-11.09-48`_.
151-
152-
.. _mail.gdr.2018-07-11.09-48: https://info.ravenbrook.com/mail/2018/07/11/09-48-49/0/
153-
154-
_`.sol.stack.nest`: We can take care of scanning the jump buffer
155-
itself by storing it in the same stack frame. That way a scan from the
156-
hot end determined by `.sol.stack.hot`_ to the cold end will contain
157-
all of the roots.
158-
159-
_`.sol.stack.platform`: As of version 1.115, all supported platforms
160-
are *full* and *descending* so the implementation in ``StackScan()``
161-
assumes this. New platforms must check this assumption.
162-
163-
_`.sol.xc.alternative`: On macOS, we could use ``getcontext()`` from
164-
libunwind (see here_), but that produces deprecation warnings and
165-
introduces a dependency on that library.
107+
_`.sol.registers.root.i3`: On IA-32, the callee-save registers are
108+
EBX, ESI, EDI and EBP [Fog]_.
166109

167-
.. _here: https://stackoverflow.com/questions/3592914/
110+
_`.sol.registers.root.i6`: On x86-64, the callee-save registers are
111+
RBP, RBX, R12, R13, R14, and R15 [Fog]_.
168112

113+
_`.sol.assembler`: On platforms that support inline assembler, it is
114+
straightforward to copy the callee-save registers into the
115+
``StackContextStruct`` using assembler instructions.
169116

170-
Analysis
171-
--------
117+
_`.sol.setjmp`: On platforms that do not support inline assembler (in
118+
particular, Microsoft Visual C/C++) or where we do not know the
119+
callee-save registers (the generic or "ANSI" platform), the
120+
``StackContextStruct`` contains a ``jmp_buf`` structure, and the
121+
values in callee-save registers are saved by invoking ``setjmp()``.
172122

173-
_`.analysis.setjmp`: The [C1990]_ standard says:
123+
The [C1990]_ standard says:
174124

175125
An invocation of the ``setjmp`` macro shall appear only in one of
176126
the following contexts:
@@ -195,6 +145,39 @@ And the [C1999]_ standard adds:
195145
If the invocation appears in any other context, the behavior is
196146
undefined.
197147

148+
_`.sol.stack.hot`: We could decode the ``StackContextStruct`` in a
149+
platform-specific way, but we can do something simpler (if more hacky)
150+
by calling the stub function ``StackHot()`` which takes the address of
151+
its argument. So long as this stub function is not inlined into the
152+
caller, then on all supported platforms this yields a pointer that is
153+
pretty much at the hot end of the stack.
154+
155+
_`.sol.stack.hot.noinline`: The reason that ``StackHot()`` must not be
156+
inlined is that after inlining, the compiler might place ``stackOut``
157+
at a colder stack address than the ``StackContextStruct``, causing the
158+
latter not to be scanned. See `mail.gdr.2018-07-11.09-48`_.
159+
160+
.. _mail.gdr.2018-07-11.09-48: https://info.ravenbrook.com/mail/2018/07/11/09-48-49/0/
161+
162+
_`.sol.stack.nest`: We can take care of scanning the
163+
``StackContextStruct`` itself by storing it in the same stack frame.
164+
That way a scan from the hot end determined by `.sol.stack.hot`_ to
165+
the cold end will contain all of the roots.
166+
167+
_`.sol.stack.platform`: As of version 1.115, all supported platforms
168+
are *full* and *descending* so the implementation in ``StackScan()``
169+
assumes this. New platforms must check this assumption.
170+
171+
_`.sol.xc.alternative`: On macOS, we could use ``getcontext()`` from
172+
libunwind (see here_), but that produces deprecation warnings and
173+
introduces a dependency on that library.
174+
175+
.. _here: https://stackoverflow.com/questions/3592914/
176+
177+
178+
Analysis
179+
--------
180+
198181
_`.analysis.entry-points`: Here's a reverse call graph (in the master
199182
sources at changelevel 189652) showing which entry points might call
200183
``StackScan()`` and so need to record the stack context::

manual/source/topic/porting.rst

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,10 @@ usable.
100100
:term:`registers` and :term:`control stack` of the thread that
101101
entered the MPS.
102102

103-
See :ref:`design-stack-scan` for the design, ``ss.h`` for the
104-
interface, and ``ss.c`` for a generic implementation that makes
105-
assumptions about the platform (in particular, that the stack grows
106-
downwards and :c:func:`setjmp` reliably captures the registers; see
107-
the design for details).
103+
See :ref:`design-stack-scan` for the design, and ``ss.h`` and
104+
``ss.c`` for the implementation. There is a generic implementation
105+
that relies on :c:func:`setjmp` to capture the callee-save
106+
registers.
108107

109108
#. The **thread manager** module suspends and resumes :term:`threads`,
110109
so that the MPS can gain exclusive access to :term:`memory (2)`,

0 commit comments

Comments
 (0)