diff --git a/adoc/chapters/architecture.adoc b/adoc/chapters/architecture.adoc index cb45c8852..5edf4d98f 100644 --- a/adoc/chapters/architecture.adoc +++ b/adoc/chapters/architecture.adoc @@ -79,12 +79,16 @@ the SYCL features that will be used. A SYCL application runs on a <>. The application is structured in three scopes which specify the different sections; <>, <> and <>. -The <> specifies a single kernel function that will be, or has +The <> specifies a <> that will be, or has been, compiled by a <> and executed on a <>. -In this example <> is defined by lines 25 to 26. +In this example <> is defined by lines 18 to 19, the body of the +lambda passed to [code]#parallel_for#. The <> specifies a unit of work which is comprised of a -<> and <>. -In this example <> is defined by lines 20 to 28. +<> and any associated requirements. +In this example the kernel is launched using the [code]#parallel_for# queue +shortcut on lines 17 to 20, which implicitly defines the <> +around the <>. +All queue shortcuts can be found in <>. The <> specifies all other code outside of a <>. These three scopes are used to control the application flow and the construction @@ -112,29 +116,27 @@ In the case of [code]#parallel_for# the <> will be executed over the given range from 0 to 1023. The different member functions to execute kernels can be found in <>. - -A <> is the syntactic scope wrapped by the construction of -a <> as seen on line 19. -The <> may invoke only a single -<>, and it takes a parameter of type command group -[code]#handler#, which is constructed by the runtime. - All the requirements for a kernel to execute are defined in this <>, as described in <>. -In this case the constructor used for [code]#myQueue# on line 9 is the default -constructor, which allows the queue to select the best underlying device to -execute on, leaving the decision up to the runtime. +Additionally, in this example, [code]#myQueue# is constructed on line 11 with +the [code]#property::queue::in_order# property, which causes commands submitted +to the queue to execute in the order they were enqueued. +No <> is specified in the queue constructor, so the constructor selects +the best underlying device to execute on, leaving the decision up to the +runtime. In SYCL, data that is required within a <> must be -contained within a <>, <>, or <> allocation, as described in +contained within a <> allocation, <>, or <> as described in <>. -We construct a buffer on line 16. -Access to the <> is controlled via an <> which is constructed -on line 21. -The <> is used to keep track of access to the data and the <> -is used to request access to the data on a queue, as well as to track the -dependencies between <>. -In this example the <> is used to write to the data buffer on line 26. +In this example we use a USM device allocation, created with +[code]#malloc_device# on line 14, which returns a pointer to memory accessible +on the <> associated with [code]#myQueue#. +The kernel writes through this pointer on line 19. +Because device allocations are not accessible from the host, the results are +explicitly copied back to the host [code]##std::vector## using +[code]#myQueue.copy# on line 23, and the call to [code]#myQueue.wait# on line 25 +ensures all enqueued operations have completed before the host reads the data. +Finally, the device allocation is released with [code]#free# on line 32. [[sec:platformmodel]] diff --git a/adoc/code/anatomy.cpp b/adoc/code/anatomy.cpp index 702c9523e..815844ec0 100644 --- a/adoc/code/anatomy.cpp +++ b/adoc/code/anatomy.cpp @@ -3,37 +3,36 @@ #include #include -using namespace sycl; // (optional) avoids need for "sycl::" before SYCL names +#include +using namespace sycl; // (optional) avoids need for "sycl::" before SYCL names int main() { - int data[1024]; // Allocate data to be worked on - - // Create a default queue to enqueue work to the default device - queue myQueue; - - // By wrapping all the SYCL work in a {} block, we ensure - // all SYCL tasks must complete before exiting the block, - // because the destructor of resultBuf will wait - { - // Wrap our data variable in a buffer - buffer resultBuf{data, {1024}}; - - // Create a command group to issue commands to the queue - myQueue.submit([&](handler& cgh) { - // Request write access to the buffer without initialization - accessor writeResult{resultBuf, cgh, write_only, no_init}; - - // Enqueue a parallel_for task with 1024 work-items - cgh.parallel_for(1024, [=](id<1> idx) { - // Initialize each buffer element with its own rank number starting at 0 - writeResult[idx] = idx; - }); // End of the kernel function - }); // End of our commands for this queue - } // End of scope, so we wait for work producing resultBuf to complete + // Allocate host memory to store the results + std::vector dataHost(1024); + + // Create an in order queue to enqueue work to the default device + queue myQueue{property::queue::in_order()}; + + // Allocate device memory to be worked on + int *dataDevice = malloc_device(1024, myQueue); + + // Enqueue a parallel_for task with 1024 work-items + myQueue.parallel_for(1024, [=](id<1> idx) { + // Initialize each buffer element with its own rank number starting at 0 + dataDevice[idx] = idx; + }); // End of the kernel function + + // Copy the results back to the host from the device + myQueue.copy(dataDevice, dataHost.data(), 1024); + + myQueue.wait(); // Wait for the queue to finish executing all the tasks // Print result for (int i = 0; i < 1024; i++) - std::cout << "data[" << i << "] = " << data[i] << std::endl; + std::cout << "dataHost[" << i << "] = " << dataHost[i] << std::endl; + + // Free device memory + free(dataDevice, myQueue); return 0; }