Skip to content

Commit 87bdeae

Browse files
authored
Merge pull request #5470 from andralex/nomoresharedzisinparallelism
Eliminate static shared this from std/parallelism.d merged-on-behalf-of: unknown
2 parents b5e6365 + 4f9b7ef commit 87bdeae

1 file changed

Lines changed: 116 additions & 72 deletions

File tree

std/parallelism.d

Lines changed: 116 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -99,86 +99,80 @@ else version(NetBSD)
9999
version = useSysctlbyname;
100100
}
101101

102-
103-
version(Windows)
104-
{
105-
// BUGS: Only works on Windows 2000 and above.
106-
shared static this()
107-
{
108-
import core.sys.windows.windows : SYSTEM_INFO, GetSystemInfo;
109-
import std.algorithm.comparison : max;
110-
111-
SYSTEM_INFO si;
112-
GetSystemInfo(&si);
113-
totalCPUs = max(1, cast(uint) si.dwNumberOfProcessors);
114-
}
115-
116-
}
117-
else version(linux)
118-
{
119-
shared static this()
120-
{
121-
import core.sys.posix.unistd : _SC_NPROCESSORS_ONLN, sysconf;
122-
totalCPUs = cast(uint) sysconf(_SC_NPROCESSORS_ONLN);
123-
}
124-
}
125-
else version(Solaris)
126-
{
127-
shared static this()
128-
{
129-
import core.sys.posix.unistd : _SC_NPROCESSORS_ONLN, sysconf;
130-
totalCPUs = cast(uint) sysconf(_SC_NPROCESSORS_ONLN);
131-
}
132-
}
133-
else version(useSysctlbyname)
102+
/*
103+
A lazily initialized global constant. The underlying value is a shared global
104+
statically initialized to `outOfBandValue` which must not be a legit value of
105+
the constant. Upon the first call the situation is detected and the global is
106+
initialized by calling `initializer`. The initializer is assumed to be pure
107+
(even if not marked as such), i.e. return the same value upon repeated calls.
108+
For that reason, no special precautions are taken so `initializer` may be called
109+
more than one time leading to benign races on the cached value.
110+
111+
In the quiescent state the cost of the function is an atomic load from a global.
112+
113+
Params:
114+
T = The type of the pseudo-constant (may be qualified)
115+
outOfBandValue = A value that cannot be valid, it is used for initialization
116+
initializer = The function performing initialization; must be `nothrow`
117+
118+
Returns:
119+
The lazily initialized value
120+
*/
121+
package @property pure
122+
T lazilyInitializedConstant(T, alias outOfBandValue, alias initializer)()
123+
if (is(Unqual!T : T)
124+
&& is(typeof(initializer()) : T)
125+
&& is(typeof(outOfBandValue) : T))
134126
{
135-
extern(C) int sysctlbyname(
136-
const char *, void *, size_t *, void *, size_t
137-
);
138-
139-
shared static this()
140-
{
141-
version(OSX)
142-
{
143-
auto nameStr = "machdep.cpu.core_count\0".ptr;
144-
}
145-
else version(FreeBSD)
146-
{
147-
auto nameStr = "hw.ncpu\0".ptr;
148-
}
149-
else version(NetBSD)
127+
static T impl() nothrow
128+
{
129+
// Thread-local cache
130+
static Unqual!T tls = outOfBandValue;
131+
auto local = tls;
132+
// Shortest path, no atomic operations
133+
if (local != outOfBandValue) return local;
134+
// Process-level cache
135+
static shared Unqual!T result = outOfBandValue;
136+
// Initialize both process-level cache and tls
137+
local = atomicLoad(result);
138+
if (local == outOfBandValue)
150139
{
151-
auto nameStr = "hw.ncpu\0".ptr;
140+
local = initializer();
141+
atomicStore(result, local);
152142
}
153-
154-
uint ans;
155-
size_t len = uint.sizeof;
156-
sysctlbyname(nameStr, &ans, &len, null, 0);
157-
totalCPUs = ans;
143+
tls = local;
144+
return local;
158145
}
159146

147+
import std.traits : SetFunctionAttributes;
148+
alias Fun = SetFunctionAttributes!(typeof(&impl), "D",
149+
functionAttributes!(typeof(&impl)) | FunctionAttribute.pure_);
150+
auto purified = (() @trusted => cast(Fun) &impl)();
151+
return purified();
160152
}
161-
else
162-
{
163-
static assert(0, "Don't know how to get N CPUs on this OS.");
164-
}
165153

166-
immutable size_t cacheLineSize;
167-
shared static this()
154+
// Returns the size of a cache line.
155+
alias cacheLineSize =
156+
lazilyInitializedConstant!(immutable(size_t), size_t.max, cacheLineSizeImpl);
157+
158+
private size_t cacheLineSizeImpl() @nogc nothrow @trusted
168159
{
160+
size_t result = 0;
169161
import core.cpuid : datacache;
170-
size_t lineSize = 0;
171-
foreach (cachelevel; datacache)
162+
foreach (ref const cachelevel; datacache)
172163
{
173-
if (cachelevel.lineSize > lineSize && cachelevel.lineSize < uint.max)
164+
if (cachelevel.lineSize > result && cachelevel.lineSize < uint.max)
174165
{
175-
lineSize = cachelevel.lineSize;
166+
result = cachelevel.lineSize;
176167
}
177168
}
178-
179-
cacheLineSize = lineSize;
169+
return result;
180170
}
181171

172+
@nogc @safe nothrow unittest
173+
{
174+
assert(cacheLineSize == cacheLineSizeImpl);
175+
}
182176

183177
/* Atomics code. These forward to core.atomic, but are written like this
184178
for two reasons:
@@ -945,11 +939,64 @@ if (is(typeof(fun(args))) && isSafeTask!F)
945939
return ret;
946940
}
947941

942+
version(useSysctlbyname)
943+
private extern(C) int sysctlbyname(
944+
const char *, void *, size_t *, void *, size_t
945+
) @nogc nothrow;
946+
948947
/**
949948
The total number of CPU cores available on the current machine, as reported by
950949
the operating system.
951950
*/
952-
immutable uint totalCPUs;
951+
alias totalCPUs =
952+
lazilyInitializedConstant!(immutable(uint), uint.max, totalCPUsImpl);
953+
954+
uint totalCPUsImpl() @nogc nothrow @trusted
955+
{
956+
version(Windows)
957+
{
958+
// BUGS: Only works on Windows 2000 and above.
959+
import core.sys.windows.windows : SYSTEM_INFO, GetSystemInfo;
960+
import std.algorithm.comparison : max;
961+
SYSTEM_INFO si;
962+
GetSystemInfo(&si);
963+
return max(1, cast(uint) si.dwNumberOfProcessors);
964+
}
965+
else version(linux)
966+
{
967+
import core.sys.posix.unistd : _SC_NPROCESSORS_ONLN, sysconf;
968+
return cast(uint) sysconf(_SC_NPROCESSORS_ONLN);
969+
}
970+
else version(Solaris)
971+
{
972+
import core.sys.posix.unistd : _SC_NPROCESSORS_ONLN, sysconf;
973+
return cast(uint) sysconf(_SC_NPROCESSORS_ONLN);
974+
}
975+
else version(useSysctlbyname)
976+
{
977+
version(OSX)
978+
{
979+
auto nameStr = "machdep.cpu.core_count\0".ptr;
980+
}
981+
else version(FreeBSD)
982+
{
983+
auto nameStr = "hw.ncpu\0".ptr;
984+
}
985+
else version(NetBSD)
986+
{
987+
auto nameStr = "hw.ncpu\0".ptr;
988+
}
989+
990+
uint result;
991+
size_t len = result.sizeof;
992+
sysctlbyname(nameStr, &result, &len, null, 0);
993+
return result;
994+
}
995+
else
996+
{
997+
static assert(0, "Don't know how to get N CPUs on this OS.");
998+
}
999+
}
9531000

9541001
/*
9551002
This class serves two purposes:
@@ -3294,11 +3341,7 @@ terminating the main thread.
32943341
}());
32953342
}
32963343

3297-
private shared uint _defaultPoolThreads;
3298-
shared static this()
3299-
{
3300-
atomicStore(_defaultPoolThreads, totalCPUs - 1);
3301-
}
3344+
private shared uint _defaultPoolThreads = uint.max;
33023345

33033346
/**
33043347
These properties get and set the number of worker threads in the $(D TaskPool)
@@ -3308,7 +3351,8 @@ number of worker threads in the instance returned by $(D taskPool).
33083351
*/
33093352
@property uint defaultPoolThreads() @trusted
33103353
{
3311-
return atomicLoad(_defaultPoolThreads);
3354+
const local = atomicLoad(_defaultPoolThreads);
3355+
return local < uint.max ? local : totalCPUs - 1;
33123356
}
33133357

33143358
/// Ditto

0 commit comments

Comments
 (0)