HeyangQin.github.io/index.html at master · HeyangQin/HeyangQin.github.io · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
<!DOCTYPE html>
<html lang="en">

<head>
  <!-- Global site tag (gtag.js) - Google Analytics -->
  <script async src="https://www.googletagmanager.com/gtag/js?id=UA-27141560-1"></script>
  <script>
    window.dataLayer = window.dataLayer || [];
    function gtag() { dataLayer.push(arguments); }
    gtag('js', new Date());

    gtag('config', 'UA-27141560-1');
  </script>

  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
  <meta name="description" content="Website of Heyang Qin, Senior Researcher at Microsoft">
  <meta name="author" content="">

  <title>Heyang Qin - Microsoft</title>

  <!-- Bootstrap core CSS -->
  <link href="vendor/bootstrap/css/bootstrap.min.css" rel="stylesheet">

  <!-- Custom fonts for this template -->
  <link href="https://fonts.googleapis.com/css?family=Saira+Extra+Condensed:100,200,300,400,500,600,700,800,900"
    rel="stylesheet">
  <link href="https://fonts.googleapis.com/css?family=Open+Sans:300,300i,400,400i,600,600i,700,700i,800,800i"
    rel="stylesheet">
  <link href="vendor/font-awesome/css/font-awesome.min.css" rel="stylesheet">
  <link href="vendor/devicons/css/devicons.min.css" rel="stylesheet">
  <link href="vendor/simple-line-icons/css/simple-line-icons.css" rel="stylesheet">

  <!-- Custom styles for this template -->
  <link href="css/resume.min.css" rel="stylesheet">

</head>


<body id="page-top">

  <nav class="navbar navbar-expand-lg navbar-dark bg-primary fixed-top" id="sideNav">
    <a class="navbar-brand js-scroll-trigger" href="#page-top">
      <span class="d-block d-lg-none">Heyang Qin - Azure OpenAI</span>
      <span class="d-none d-lg-block">
        <img class="img-fluid img-profile rounded-circle mx-auto mb-2" src="img/profile.jpg" alt="">
      </span>
    </a>
    <div class="collapse navbar-collapse" id="navbarSupportedContent">
      <ul class="navbar-nav">
        <li class="nav-item"><a class="nav-link js-scroll-trigger" href="#about">About</a></li>
        <li class="nav-item"><a class="nav-link js-scroll-trigger" href="#experience">Experience</a></li>
        <li class="nav-item"><a class="nav-link js-scroll-trigger" href="#research">Research</a></li>
        <li class="nav-item"><a class="nav-link js-scroll-trigger" href="#publications">Publications</a></li>
        <li class="nav-item"><a class="nav-link js-scroll-trigger" href="#education">Education</a></li>
        <li class="nav-item"><a class="nav-link js-scroll-trigger" href="#interests">Interests</a></li>
      </ul>
    </div>
  </nav>

  <div class="container-fluid p-0 bg">

    <section class="resume-section p-3 p-lg-5 d-flex d-column" id="about">
      <div class="my-auto">
        <h1 class="mb-0">Heyang <span class="text-primary">Qin</span></h1>
        <div class="subheading mb-5">Senior Researcher · Microsoft Azure OpenAI ·
          <a href="mailto:heyangqin@microsoft.com">heyangqin@microsoft.com</a> ·
          <a href="mailto:qysnn1@gmail.com">qysnn1@gmail.com</a>
        </div>
        <p class="mb-5">
          I am a Senior Researcher at <strong>Microsoft Azure OpenAI</strong>, where I focus on building
          high-performance inference infrastructure for large-scale foundation models. My work involves optimizing
          inference engines, implementing advanced distributed inference logic, and supporting next-generation flagship
          models on both NVIDIA and AMD hardware.
        </p>
        <p class="mb-5">
          Previously, I was a core member of the <strong>DeepSpeed</strong> team, contributing to industry-leading
          scalable training and inference systems including DeepSpeed-ZeRO++, ZeRO3, and DeepSpeed-FastGen. I earned my
          Ph.D. from the University of Nevada, Reno in 2022. My career is dedicated to bridging the gap between massive
          model scale and hardware efficiency.
        </p>
        <ul class="list-inline list-social-icons mb-0">
          <li class="list-inline-item"><a href="CV.pdf" target="_blank"><span class="fa-stack fa-lg"><img
                  src="img/cv icon.png" style="width:52px;height:52x;border:0;"></span></a></li>
          <li class="list-inline-item"><a href="https://scholar.google.com/citations?user=0azIceIAAAAJ"
              target="_blank"><span class="fa-stack fa-lg"><img src="img/google scholar icon 2.png"
                  style="width:56px;height:56x;border:0;"></span></a></li>
          <li class="list-inline-item"><a href="https://github.com/HeyangQin" target="_blank"><span
                class="fa-stack fa-lg"><img src="img/github icon.png"
                  style="width:56px;height:56x;border:0;"></span></a></li>
          <li class="list-inline-item"><a href="https://www.linkedin.com/in/heyangqin/en" target="_blank"><span
                class="fa-stack fa-lg"><img src="img/linkedin icon.png"
                  style="width:58px;height:58x;border:0;"></span></a></li>
        </ul>
      </div>
    </section>

    <hr class="m-0">

    <section class="resume-section p-3 p-lg-5 d-flex flex-column" id="experience">
      <div class="my-auto">
        <h2 class="mb-5">Experience</h2>

        <div class="resume-item d-flex flex-column flex-md-row mb-5">
          <div class="resume-content mr-auto">
            <h3 class="mb-0">Senior Researcher</h3>
            <div class="subheading mb-3">Microsoft (Azure OpenAI)</div>
            <p>Developing next-generation inference infrastructure for flagship foundation models (including GPT-4 and
              GPT-5 series).</p>
            <ul>
              <li>Architected and implemented high-performance resharding logic for large-scale inference on AMD GPU
                clusters.</li>
              <li>Engineered custom kernels and execution graph optimizations to reduce TTFT and increase serving
                throughput.</li>
              <li>Collaborating on the deployment, scalability, and reliability of flagship models across global-scale
                data centers.</li>
            </ul>
          </div>
          <div class="resume-date text-md-right"><span class="text-primary">June 2024 - Present</span></div>
        </div>

        <div class="resume-item d-flex flex-column flex-md-row mb-5">
          <div class="resume-content mr-auto">
            <h3 class="mb-0">Researcher / Research Intern</h3>
            <div class="subheading mb-3">Microsoft (DeepSpeed Team)</div>
            <ul>
              <li><strong>DeepSpeed-ZeRO++:</strong> Lead author of the ZeRO++ optimization suite, reducing
                communication volume by up to 4x for LLM training.</li>
              <li><strong>DeepSpeed-FastGen:</strong> Contributed to the development of FastGen and MII, optimizing
                system-level inference efficiency and KV-cache management.</li>
              <li><strong>System Optimization:</strong> Developed communication primitives and apply system
                optimizations to resolve bottlenecks in distributed training and inference.</li>
            </ul>
          </div>
          <div class="resume-date text-md-right"><span class="text-primary">October 2020 - June 2024</span></div>
        </div>
      </div>
    </section>

    <hr class="m-0">

    <section class="resume-section p-3 p-lg-5 d-flex flex-column" id="research">
      <div class="my-auto">
        <h2 class="mb-5">Research</h2>

        <div class="resume-item d-flex flex-column flex-md-row mb-5">
          <div class="resume-content mr-auto">
            <p><b>
                <font size="4">ZeRO++: Extremely Efficient Large Scale Training Based on ZeRO Optimizer</font>
              </b></p>
            <p>ZeRO++ is a set of communication optimization strategies built on top of DeepSpeed ZeRO-3. It introduces
              quantized weights, hierarchical partitioning, and communication-efficient gradients to address
              communication bottlenecks in large-scale LLM training. This work enables efficient training of
              trillion-parameter models even on clusters with limited cross-node bandwidth.</p>
          </div>
        </div>

        <div class="resume-item d-flex flex-column flex-md-row mb-5">
          <div class="resume-content mr-auto">
            <p><b>
                <font size="4">SimiGrad: Fine-Grained Adaptive Batching for Large Scale Training</font>
              </b></p>
            <p>Large scale training requires massive parallelism, where large batch training is key but often costs
              generalization performance. We propose SimiGrad, a fully automated and lightweight adaptive batching
              methodology. By leveraging a representation of critical gradient noise information, we achieved a
              record-breaking batch size of 78k in BERT-Large pretraining while maintaining state-of-the-art model
              performance.</p>
          </div>
        </div>

        <div class="resume-item d-flex flex-column flex-md-row mb-5">
          <div class="resume-content mr-auto">
            <p><b>
                <font size="4">Region Based Reinforcement Learning (RRL) Scheduling for MLaaS</font>
              </b></p>
            <p>Parallelism settings in Machine Learning as a Service (MLaaS) have a critical impact on performance. We
              propose a region-based reinforcement learning (RRL) approach that can converge to near-optimal
              configurations orders of magnitude faster than traditional RL. This was further expanded into RRL Plus,
              using Bayesian optimization to automatically adjust region sizes for optimal serving efficiency.</p>
          </div>
        </div>
      </div>
    </section>

    <hr class="m-0">

    <section class="resume-section p-3 p-lg-5 d-flex flex-column" id="publications">

      <h2 class="mb-5">Selected Publications</h2>

      <p>Heyang Qin*, Guanhua Wang*, Sam Ade Jacobs, Connor Holmes, Samyam Rajbhandari, Olatunji Ruwase, Feng Yan, Lei
        Yang, Yuxiong He, <a href="ZeRO++.pdf" target="_blank"><u>ZeRO++: Extremely Efficient Collective Communication
            for Large Model Training</u></a>, <i>The Twelfth International Conference on Learning Representations, 2023
          (<b>ICLR 2023</b>)</i>.</p>


      <p>Heyang Qin, Samyam Rajbhandari, Olatunji Ruwase, Feng Yan, Lei Yang, Yuxiong He, <a
          href="2021_NIPS_SimiGrad.pdf" target="_blank"><u>SimiGrad: Fine-Grained Adaptive Batching for Large Scale
            Training using Gradient Similarity Measurement</u></a>, <i>in Proceedings of the Neural Information
          Processing Systems 2021 (<b>NeurIPS 2021</b>)</i>, Virtual, December, 2021 (<b>Acceptance rate:
          2371/9122=26%</b>). [<a href="NIPS 2021 SimiGrad.pptx" target="_blank"><u>Slides</u></a>]</p>

      <p>Heyang Qin, Syed Zawad, Yanqi Zhou, Sanjay Padhi, Lei Yang, and Feng Yan, Reinforcement Learning Empowered
        MLaaS Scheduling for Serving Intelligent Internet of Things, <i>IEEE Internet of Things Journal</i>, 2020
        (<b>Impact factor: 9.515</b>).</p>

      <p>Heyang Qin, Syed Zawad, Yanqi Zhou, Lei Yang, Dongfang Zhao, Feng Yan, <a href="QinZZYZ019.pdf"
          target="_blank"><u>Swift Machine Learning Model Serving Scheduling: A Region Based Reinforcement Learning
            Approach</u></a>, <i>in Proceedings of the International Conference for High Performance Computing,
          Networking, Storage and Analysis (<b>SC 2019</b>)</i>, Denver, CO, USA, Nov, 2019 (<b>Acceptance rate:
          78/344=22%</b>). [<a href="SC19 Presentation Heyang Qin.pptx" target="_blank"><u>Slides</u></a>]</p>


  </div>
  </section>

  <hr class="m-0">

  <section class="resume-section p-3 p-lg-5 d-flex flex-column" id="education">
    <div class="my-auto">
      <h2 class="mb-5">Education</h2>
      <div class="resume-item d-flex flex-column flex-md-row mb-5">
        <div class="resume-content mr-auto">
          <h3 class="mb-0">University of Nevada, Reno</h3>
          <div class="subheading mb-3">Ph.D. in Computer Science and Engineering</div>
          <p>GPA: 4.00 | Advisor: Dr. Feng Yan & Dr. Lei Yang</p>
        </div>
        <div class="resume-date text-md-right"><span class="text-primary">2017 - 2022</span></div>
      </div>
      <div class="resume-item d-flex flex-column flex-md-row">
        <div class="resume-content mr-auto">
          <h3 class="mb-0">University of Electronic Science and Technology of China</h3>
          <div class="subheading mb-3">Bachelor in Automation Engineering</div>
        </div>
        <div class="resume-date text-md-right"><span class="text-primary">2013 - 2017</span></div>
      </div>
    </div>
  </section>

  <hr class="m-0">

  <section class="resume-section p-3 p-lg-5 d-flex flex-column" id="interests">
    <div class="my-auto">
      <h2 class="mb-5">Personal Interests</h2>
      <p>I am an active contributor to the open-source AI community. Outside of research, I enjoy table tennis and
        volleyball. I also have a strong passion for archaic Chinese literature and detective fiction.</p>
    </div>
  </section>

  </div>

  <script src="vendor/jquery/jquery.min.js"></script>
  <script src="vendor/bootstrap/js/bootstrap.bundle.min.js"></script>
  <script src="vendor/jquery-easing/jquery.easing.min.js"></script>
  <script src="js/resume.min.js"></script>

</body>


</html>