diff --git a/adoc/chapters/architecture.adoc b/adoc/chapters/architecture.adoc
index cb45c8852..5edf4d98f 100644
--- a/adoc/chapters/architecture.adoc
+++ b/adoc/chapters/architecture.adoc
@@ -79,12 +79,16 @@ the SYCL features that will be used.
 A SYCL application runs on a <<sec:platformmodel, SYCL Platform>>.
 The application is structured in three scopes which specify the different
 sections; <<application-scope>>, <<command-group-scope>> and <<kernel-scope>>.
-The <<kernel-scope>> specifies a single kernel function that will be, or has
+The <<kernel-scope>> specifies a <<sycl-kernel-function>> that will be, or has
 been, compiled by a <<device-compiler>> and executed on a <<device>>.
-In this example <<kernel-scope>> is defined by lines 25 to 26.
+In this example <<kernel-scope>> is defined by lines 18 to 19, the body of the
+lambda passed to [code]#parallel_for#.
 The <<command-group-scope>> specifies a unit of work which is comprised of a
-<<sycl-kernel-function>> and <<accessor,accessors>>.
-In this example <<command-group-scope>> is defined by lines 20 to 28.
+<<sycl-kernel-function>> and any associated requirements.
+In this example the kernel is launched using the [code]#parallel_for# queue
+shortcut on lines 17 to 20, which implicitly defines the <<command-group-scope>>
+around the <<kernel-scope>>.
+All queue shortcuts can be found in <<sec:queue-class>>.
 The <<application-scope>> specifies all other code outside of a
 <<command-group-scope>>.
 These three scopes are used to control the application flow and the construction
@@ -112,29 +116,27 @@ In the case of [code]#parallel_for# the <<sycl-kernel-function>> will be
 executed over the given range from 0 to 1023.
 The different member functions to execute kernels can be found in
 <<subsec:invokingkernels>>.
-
-A <<command-group-scope>> is the syntactic scope wrapped by the construction of
-a <<command-group-function-object>> as seen on line 19.
-The <<command-group-function-object>> may invoke only a single
-<<sycl-kernel-function>>, and it takes a parameter of type command group
-[code]#handler#, which is constructed by the runtime.
-
 All the requirements for a kernel to execute are defined in this
 <<command-group-scope>>, as described in <<sec:executionmodel>>.
-In this case the constructor used for [code]#myQueue# on line 9 is the default
-constructor, which allows the queue to select the best underlying device to
-execute on, leaving the decision up to the runtime.
+Additionally, in this example, [code]#myQueue# is constructed on line 11 with
+the [code]#property::queue::in_order# property, which causes commands submitted
+to the queue to execute in the order they were enqueued.
+No <<device>> is specified in the queue constructor, so the constructor selects
+the best underlying device to execute on, leaving the decision up to the
+runtime.
 
 In SYCL, data that is required within a <<sycl-kernel-function>> must be
-contained within a <<buffer>>, <<image>>, or <<usm>> allocation, as described in
+contained within a <<usm>> allocation, <<image>>, or <<buffer>> as described in
 <<sec:memory.model>>.
-We construct a buffer on line 16.
-Access to the <<buffer>> is controlled via an <<accessor>> which is constructed
-on line 21.
-The <<buffer>> is used to keep track of access to the data and the <<accessor>>
-is used to request access to the data on a queue, as well as to track the
-dependencies between <<sycl-kernel-function>>.
-In this example the <<accessor>> is used to write to the data buffer on line 26.
+In this example we use a USM device allocation, created with
+[code]#malloc_device# on line 14, which returns a pointer to memory accessible
+on the <<device>> associated with [code]#myQueue#.
+The kernel writes through this pointer on line 19.
+Because device allocations are not accessible from the host, the results are
+explicitly copied back to the host [code]##std::vector## using
+[code]#myQueue.copy# on line 23, and the call to [code]#myQueue.wait# on line 25
+ensures all enqueued operations have completed before the host reads the data.
+Finally, the device allocation is released with [code]#free# on line 32.
 
 
 [[sec:platformmodel]]
diff --git a/adoc/code/anatomy.cpp b/adoc/code/anatomy.cpp
index 702c9523e..815844ec0 100644
--- a/adoc/code/anatomy.cpp
+++ b/adoc/code/anatomy.cpp
@@ -3,37 +3,36 @@
 
 #include <iostream>
 #include <sycl/sycl.hpp>
-using namespace sycl;  // (optional) avoids need for "sycl::" before SYCL names
+#include <vector>
+using namespace sycl; // (optional) avoids need for "sycl::" before SYCL names
 
 int main() {
-  int data[1024];  // Allocate data to be worked on
-
-  // Create a default queue to enqueue work to the default device
-  queue myQueue;
-
-  // By wrapping all the SYCL work in a {} block, we ensure
-  // all SYCL tasks must complete before exiting the block,
-  // because the destructor of resultBuf will wait
-  {
-    // Wrap our data variable in a buffer
-    buffer resultBuf{data, {1024}};
-
-    // Create a command group to issue commands to the queue
-    myQueue.submit([&](handler& cgh) {
-      // Request write access to the buffer without initialization
-      accessor writeResult{resultBuf, cgh, write_only, no_init};
-
-      // Enqueue a parallel_for task with 1024 work-items
-      cgh.parallel_for(1024, [=](id<1> idx) {
-        // Initialize each buffer element with its own rank number starting at 0
-        writeResult[idx] = idx;
-      });  // End of the kernel function
-    });    // End of our commands for this queue
-  }  // End of scope, so we wait for work producing resultBuf to complete
+  // Allocate host memory to store the results
+  std::vector<int> dataHost(1024);
+
+  // Create an in order queue to enqueue work to the default device
+  queue myQueue{property::queue::in_order()};
+
+  // Allocate device memory to be worked on
+  int *dataDevice = malloc_device<int>(1024, myQueue);
+
+  // Enqueue a parallel_for task with 1024 work-items
+  myQueue.parallel_for(1024, [=](id<1> idx) {
+    // Initialize each buffer element with its own rank number starting at 0
+    dataDevice[idx] = idx;
+  }); // End of the kernel function
+
+  // Copy the results back to the host from the device
+  myQueue.copy(dataDevice, dataHost.data(), 1024);
+
+  myQueue.wait(); // Wait for the queue to finish executing all the tasks
 
   // Print result
   for (int i = 0; i < 1024; i++)
-    std::cout << "data[" << i << "] = " << data[i] << std::endl;
+    std::cout << "dataHost[" << i << "] = " << dataHost[i] << std::endl;
+
+  // Free device memory
+  free(dataDevice, myQueue);
 
   return 0;
 }