-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain.bbl
More file actions
208 lines (175 loc) · 8.19 KB
/
Copy pathmain.bbl
File metadata and controls
208 lines (175 loc) · 8.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
\begin{thebibliography}{10}
\bibitem{Hoard}
E.~D. Berger, K.~S. McKinley, R.~D. Blumofe, and P.~R. Wilson.
\newblock Hoard: a scalable memory allocator for multithreaded applications.
\newblock In {\em ASPLOS-IX: Proceedings of the ninth international conference
on Architectural support for programming languages and operating systems},
pages 117--128, New York, NY, USA, 2000. ACM Press.
\bibitem{Berger:2001:CHM:378795.378821}
E.~D. Berger, B.~G. Zorn, and K.~S. McKinley.
\newblock Composing high-performance memory allocators.
\newblock In {\em Proceedings of the ACM SIGPLAN 2001 Conference on Programming
Language Design and Implementation}, PLDI '01, pages 114--124, New York, NY,
USA, 2001. ACM.
\bibitem{parsec}
C.~Bienia.
\newblock {\em Benchmarking Modern Multiprocessors}.
\newblock PhD thesis, Princeton University, January 2011.
\bibitem{falseshare:effect}
W.~J. Bolosky and M.~L. Scott.
\newblock False sharing and its effect on shared memory performance.
\newblock In {\em {SEDMS IV}: USENIX Symposium on Experiences with Distributed
and Multiprocessor Systems}, pages 57--71, Berkeley, CA, USA, 1993. USENIX
Association.
\bibitem{DBLP:conf/sc/BuckH04}
B.~R. Buck and J.~K. Hollingsworth.
\newblock Data centric cache measurement on the {Intel ltanium} 2 processor.
\newblock In {\em {SC '04}: Proc. of the 2004 ACM/IEEE Conf. on
Supercomputing}, page~58, Washington, DC, USA, 2004. IEEE Computer Society.
\bibitem{AMDIBS:07}
P.~J. Drongowski.
\newblock Instruction-based sampling: A new performance analysis technique for
{AMD} family 10h processors.
\newblock \url{http://developer.amd.com/Assets/AMD\_IBS\_paper\_EN.pdf},
November 2007.
\newblock Last accessed: Dec. 13, 2013.
\bibitem{gprof}
{Gprof community }.
\newblock Gnu gprof.
\newblock \url{https://sourceware.org/binutils/docs/gprof/}.
\bibitem{DBLP:conf/sigplan/GrahamKM82}
S.~L. Graham, P.~B. Kessler, and M.~K. McKusick.
\newblock gprof: a call graph execution profiler.
\newblock In {\em SIGPLAN Symposium on Compiler Construction}, pages 120--126,
1982.
\bibitem{falseshare:binaryinstrumentation1}
S.~M. G\"{u}nther and J.~Weidendorfer.
\newblock Assessing cache false sharing effects by dynamic binary
instrumentation.
\newblock In {\em WBIA '09: Proceedings of the Workshop on Binary
Instrumentation and Applications}, pages 26--33, New York, NY, USA, 2009.
ACM.
\bibitem{rtdsc}
Intel.
\newblock Using the rdtsc instruction for performance monitoring.
\newblock \url{https://www.ccsl.carleton.ca/~jamuir/rdtscpm1.pdf}.
\bibitem{Intel:VTune}
{Intel Corporation}.
\newblock Intel {VTune} performance analyzer.
\newblock \url{http://www.intel.com/software/products/vtune}.
\bibitem{detect:ptu}
{Intel Corporation}.
\newblock {\em Intel Performance Tuning Utility 3.2 Update}, November 2008.
\bibitem{detect:intel}
{Intel Corporation}.
\newblock Avoiding and identifying false sharing among threads.
\newblock
\url{http://software.intel.com/en-us/articles/avoiding-and-identifying-false-sharing-among-threads/},
February 2010.
\bibitem{IntelArch:PEBS:Sept09}
{Intel Corporation}.
\newblock {Intel 64 and IA-32} architectures software developer's manual,
{Volume 3B}: System programming guide, {Part 2, Number 253669-032}, June
2010.
\bibitem{DBLP:conf/sc/ItzkowitzWAK03}
M.~Itzkowitz, B.~J.~N. Wylie, C.~Aoki, and N.~Kosche.
\newblock Memory profiling using hardware counters.
\newblock In {\em SC '03: Proc. of the 2003 ACM/IEEE Conf. on Supercomputing},
page~17, Washington, DC, USA, 2003. IEEE Computer Society.
\bibitem{mldetect}
S.~Jayasena, S.~Amarasinghe, A.~Abeyweera, G.~Amarasinghe, H.~De~Silva,
S.~Rathnayake, X.~Meng, and Y.~Liu.
\newblock Detection of false sharing using machine learning.
\newblock In {\em Proceedings of SC13: International Conference for High
Performance Computing, Networking, Storage and Analysis}, SC '13, pages
30:1--30:9, New York, NY, USA, 2013. ACM.
\bibitem{oprofile}
J.~Levon and P.~Elie.
\newblock Oprofile: A system profiler for {Linux}, 2004.
\bibitem{falseshare:binaryinstrumentation2}
C.-L. Liu.
\newblock False sharing analysis for multithreaded programs.
\newblock Master's thesis, National Chung Cheng University, July 2009.
\bibitem{Sheriff}
T.~Liu and E.~D. Berger.
\newblock Sheriff: precise detection and automatic mitigation of false sharing.
\newblock In {\em Proceedings of the 2011 ACM international conference on
Object oriented programming systems languages and applications}, OOPSLA '11,
pages 3--18, New York, NY, USA, 2011. ACM.
\bibitem{Predator}
T.~Liu, C.~Tian, H.~Ziang, and E.~D. Berger.
\newblock Predator: Predictive false sharing detection.
\newblock In {\em Proceedings of 19th ACM SIGPLAN Symposium on Principles and
Practice of Parallel Programming}, PPOPP'14, New York, NY, USA, 2014. ACM.
\bibitem{ibs-sc}
X.~Liu and J.~M. {Mellor-Crummey}.
\newblock A data-centric profiler for parallel programs.
\newblock In {\em Proc. of the 2013 ACM/IEEE Conference on Supercomputing},
Denver, CO, USA, 2013.
\bibitem{ibs-pact}
X.~Liu, K.~Sharma, and J.~Mellor-Crummey.
\newblock Arraytool: A lightweight profiler to guide array regrouping.
\newblock In {\em Proceedings of the 23rd International Conference on Parallel
Architectures and Compilation}, PACT '14, pages 405--416, New York, NY, USA,
2014. ACM.
\bibitem{ibs-sc2}
X.~Liu and B.~Wu.
\newblock {ScaAnalyzer}: A tool to identify memory scalability bottlenecks in
parallel programs.
\newblock In {\em Proc. of the 2015 ACM/IEEE Conference on Supercomputing},
Austin, TX, USA, 2015.
\bibitem{OSdetection}
M.~Nanavati, M.~Spear, N.~Taylor, S.~Rajagopalan, D.~T. Meyer, W.~Aiello, and
A.~Warfield.
\newblock Whose cache line is it anyway?: operating system support for live
detection and repair of false sharing.
\newblock In {\em Proceedings of the 8th ACM European Conference on Computer
Systems}, EuroSys '13, pages 141--154, New York, NY, USA, 2013. ACM.
\bibitem{MESI}
M.~S. Papamarcos and J.~H. Patel.
\newblock A low-overhead coherence solution for multiprocessors with private
cache memories.
\newblock In {\em Proceedings of the 11th Annual International Symposium on
Computer Architecture}, ISCA '84, pages 348--354, New York, NY, USA, 1984.
ACM.
\bibitem{DProf}
A.~Pesterev, N.~Zeldovich, and R.~T. Morris.
\newblock Locating cache performance bottlenecks using data profiling.
\newblock In {\em EuroSys '10: Proceedings of the 5th European conference on
Computer systems}, pages 335--348, New York, NY, USA, 2010. ACM.
\bibitem{phoenix-hpca}
C.~Ranger, R.~Raghuraman, A.~Penmetsa, G.~Bradski, and C.~Kozyrakis.
\newblock Evaluating {MapReduce} for multi-core and multiprocessor systems.
\newblock In {\em HPCA '07: Proceedings of the 2007 IEEE 13th International
Symposium on High Performance Computer Architecture}, pages 13--24,
Washington, DC, USA, 2007. IEEE Computer Society.
\bibitem{falseshare:simulator}
M.~Schindewolf.
\newblock Analysis of cache misses using {SIMICS}.
\newblock Master's thesis, Institute for Computing Systems Architecture,
University of Edinburgh, 2007.
\bibitem{Dramon}
W.~Wang, T.~Dey, J.~Davidson, and M.~Soffa.
\newblock {DraMon}: Predicting memory bandwidth usage of multi-threaded
programs with high accuracy and low overhead.
\newblock In {\em High Performance Computer Architecture (HPCA), 2014 IEEE 20th
International Symposium on}, pages 380--391, Feb 2014.
\bibitem{Wicaksono11detectingfalse}
B.~Wicaksono, M.~Tolubaeva, and B.~Chapman.
\newblock Detecting false sharing in openmp applications using the darwin
framework.
\newblock In {\em In Proceedings of International Workshop on Languages and
Compilers for Parallel Computing}, 2011.
\bibitem{openmp}
B.~Wicaksono, M.~Tolubaeva, and B.~Chapman.
\newblock Detecting false sharing in openmp applications using the darwin
framework.
\newblock In S.~Rajopadhye and M.~Mills~Strout, editors, {\em Languages and
Compilers for Parallel Computing}, volume 7146 of {\em Lecture Notes in
Computer Science}, pages 283--297. Springer Berlin Heidelberg, 2013.
\bibitem{qinzhao}
Q.~Zhao, D.~Koh, S.~Raza, D.~Bruening, W.-F. Wong, and S.~Amarasinghe.
\newblock Dynamic cache contention detection in multi-threaded applications.
\newblock In {\em The International Conference on Virtual Execution
Environments}, Newport Beach, CA, Mar 2011.
\end{thebibliography}