NVIDIA
diff --git a/‎docs/language.md‎
Lines changed: 151 additions & 0 deletions b/‎docs/language.md‎
Lines changed: 151 additions & 0 deletions
diff --git a/‎projects/com.nvidia.grcuda.test/src/com/nvidia/grcuda/test/DeviceTest.java‎
Lines changed: 150 additions & 0 deletions b/‎projects/com.nvidia.grcuda.test/src/com/nvidia/grcuda/test/DeviceTest.java‎
Lines changed: 150 additions & 0 deletions
diff --git a/‎projects/com.nvidia.grcuda/src/com/nvidia/grcuda/DeviceArray.java‎
Lines changed: 16 additions & 15 deletions b/‎projects/com.nvidia.grcuda/src/com/nvidia/grcuda/DeviceArray.java‎
Lines changed: 16 additions & 15 deletions
diff --git a/‎projects/com.nvidia.grcuda/src/com/nvidia/grcuda/GrCUDAContext.java‎
Lines changed: 4 additions & 0 deletions b/‎projects/com.nvidia.grcuda/src/com/nvidia/grcuda/GrCUDAContext.java‎
Lines changed: 4 additions & 0 deletions
@@ -263,6 +263,157 @@ buildkernel(
 
 See description in the [polyglot kernel launch](docs/launchkernel.md) documentation for details.
 
+### getdevices() and getdevice() Functions
+
+The `getdevices()` functions returns an array that contains all visible
+CUDA devices. `getdevice(k)` returns the `k` visible device, with
+`k` ranging from 0 to the number of visible devices - 1.
+
+```text
+devices = getdevices()
+device = getdevice(deviceOrdinal)
+```
+
+`deviceOrdinal`: integer `k` that for the kth device, `k` from 0 to
+the number of visible devices
+(see [cudaGetDevice](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html))
+
+Both functions return `Devices` objects which have the following members:
+
+Attribute `id`: the device ID (ordinal)
+
+Attribute `properties`: property objects containing device attributes
+returned by the CUDA runtime `cudaDeviceGetAttributeGet()`,
+`cudaMemgetInfo()` and `cuDeviceGetName()`.
+
+Method `isCurrent()`: method returns true iff `id` is the device
+on which the currently active host thread executes device code.
+
+Method `setCurrent()`: method sets `id` as the device the
+currently active host thread should execute device code.
+
+**Example:**
+
+```Python
+devices = polyglot.eval(language='grcuda', 'getdevices()')
+device0 = polyglot.eval(language='grcuda', 'getdevice(0)')
+# identical to device0 = devices[0]
+
+for device in devices:
+    print('{}: {}, {} multiprocessors'.format(device.id,
+       device.property.deviceName,
+       device.property.multiProcessorCount))
+# example output
+# 0: TITAN V, 80 multiprocessors
+# 1: Quadro GP100, 56 multiprocessors
+device0.setCurrent()
+print(device0.isCurrent())  # true
+```
+
+Table: Device Properties Names (see also
+[CUDA Runtime Documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html))
+| Property Name
+|-------------------------------------------|
+| `asyncEngineCount`                        |
+| `canFlushRemoteWrites`                    |
+| `canMapHostMemory`                        |
+| `canUseHostPointerForRegisteredMem`       |
+| `clockRate`                               |
+| `computeCapabilityMajor`                  |
+| `computeCapabilityMinor`                  |
+| `computeMode`                             |
+| `computePreemptionSupported`              |
+| `concurrentKernels`                       |
+| `concurrentManagedAccess`                 |
+| `cooperativeLaunch`                       |
+| `cooperativeMultiDeviceLaunch`            |
+| `deviceName`                              |
+| `directManagedMemAccessFromHost`          |
+| `eccEnabled`                              |
+| `freeDeviceMemory`                        |
+| `globalL1CacheSupported`                  |
+| `globalMemoryBusWidth`                    |
+| `gpuOverlap`                              |
+| `hostNativeAtomicSupported`               |
+| `hostRegisterSupported`                   |
+| `integrated`                              |
+| `isMultiGpuBoard`                         |
+| `kernelExecTimeout`                       |
+| `l2CacheSize`                             |
+| `localL1CacheSupported`                   |
+| `managedMemory`                           |
+| `maxBlockDimX`                            |
+| `maxBlockDimY`                            |
+| `maxBlockDimZ`                            |
+| `maxGridDimX`                             |
+| `maxGridDimY`                             |
+| `maxGridDimZ`                             |
+| `maxPitch`                                |
+| `maxRegistersPerBlock`                    |
+| `maxRegistersPerMultiprocessor`           |
+| `maxSharedMemoryPerBlock`                 |
+| `maxSharedMemoryPerBlockOptin`            |
+| `maxSharedMemoryPerMultiprocessor`        |
+| `maxSurface1DLayeredLayers`               |
+| `maxSurface1DWidth`                       |
+| `maxSurface2DHeight`                      |
+| `maxSurface2DLayeredHeight`               |
+| `maxSurface2DLayeredLayers`               |
+| `maxSurface2DLayeredWidth`                |
+| `maxSurface2DWidth`                       |
+| `maxSurface3DDepth`                       |
+| `maxSurface3DHeight`                      |
+| `maxSurface3DWidth`                       |
+| `maxSurfaceCubemapLayeredLayers`          |
+| `maxSurfaceCubemapLayeredWidth`           |
+| `maxSurfaceCubemapWidth`                  |
+| `maxTexture1DLayeredLayers`               |
+| `maxTexture1DLayeredWidth`                |
+| `maxTexture1DLinearWidth`                 |
+| `maxTexture1DMipmappedWidth`              |
+| `maxTexture1DWidth`                       |
+| `maxTexture2DGatherHeight`                |
+| `maxTexture2DGatherWidth`                 |
+| `maxTexture2DHeight`                      |
+| `maxTexture2DLayeredHeight`               |
+| `maxTexture2DLayeredLayers`               |
+| `maxTexture2DLayeredWidth`                |
+| `maxTexture2DLinearHeight`                |
+| `maxTexture2DLinearPitch`                 |
+| `maxTexture2DLinearWidth`                 |
+| `maxTexture2DMipmappedHeight`             |
+| `maxTexture2DMipmappedWidth`              |
+| `maxTexture2DWidth`                       |
+| `maxTexture3DDepth`                       |
+| `maxTexture3DDepthAlt`                    |
+| `maxTexture3DHeight`                      |
+| `maxTexture3DHeightAlt`                   |
+| `maxTexture3DWidth`                       |
+| `maxTexture3DWidthAlt`                    |
+| `maxTextureCubemapLayeredLayers`          |
+| `maxTextureCubemapLayeredWidth`           |
+| `maxTextureCubemapWidth`                  |
+| `maxThreadsPerBlock`                      |
+| `maxThreadsPerMultiProcessor`             |
+| `memoryClockRate`                         |
+| `multiGpuBoardGroupID`                    |
+| `multiProcessorCount`                     |
+| `pageableMemoryAccess`                    |
+| `pageableMemoryAccessUsesHostPageTables`  |
+| `pciBusId`                                |
+| `pciDeviceId`                             |
+| `pciDomainId`                             |
+| `singleToDoublePrecisionPerfRatio`        |
+| `streamPrioritiesSupported`               |
+| `surfaceAlignment`                        |
+| `tccDriver`                               |
+| `textureAlignment`                        |
+| `texturePitchAlignment`                   |
+| `totalConstantMemory`                     |
+| `totalDeviceMemory`                       |
+| `unifiedAddressing`                       |
+| `warpSize`                                |
+
 ### DeviceArray Constructor Function
 
 In addition to arrays expression, device arrays can also be
 
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of NVIDIA CORPORATION nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package com.nvidia.grcuda.test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import org.graalvm.polyglot.Context;
+import org.graalvm.polyglot.Value;
+import org.junit.Test;
+
+public class DeviceTest {
+
+    @Test
+    public void testDeviceCount() {
+        try (Context ctx = Context.newBuilder().allowAllAccess(true).build()) {
+            Value deviceCount = ctx.eval("grcuda", "cudaGetDeviceCount()");
+            assertTrue(deviceCount.isNumber());
+            assertTrue(deviceCount.asInt() > 0);
+        }
+    }
+
+    @Test
+    public void testGetDevicesLengthsMatchesDeviceCount() {
+        try (Context ctx = Context.newBuilder().allowAllAccess(true).build()) {
+            Value deviceCount = ctx.eval("grcuda", "cudaGetDeviceCount()");
+            assertTrue(deviceCount.isNumber());
+            assertTrue(deviceCount.asInt() > 0);
+            Value devices = ctx.eval("grcuda", "getdevices()");
+            assertEquals(deviceCount.asInt(), devices.getArraySize());
+        }
+    }
+
+    @Test
+    public void testGetDevicesMatchesAllGetDevice() {
+        try (Context ctx = Context.newBuilder().allowAllAccess(true).build()) {
+            Value devices = ctx.eval("grcuda", "getdevices()");
+            Value getDevice = ctx.eval("grcuda", "getdevice");
+            for (int i = 0; i < devices.getArraySize(); ++i) {
+                Value deviceFromArray = devices.getArrayElement(i);
+                Value deviceFromFunction = getDevice.execute(i);
+                assertEquals(i, deviceFromArray.getMember("id").asInt());
+                assertEquals(i, deviceFromFunction.getMember("id").asInt());
+            }
+        }
+    }
+
+    @Test
+    public void testCanReadSomeDeviceProperties() {
+        try (Context ctx = Context.newBuilder().allowAllAccess(true).build()) {
+            Value devices = ctx.eval("grcuda", "getdevices()");
+            for (int i = 0; i < devices.getArraySize(); ++i) {
+                Value device = devices.getArrayElement(i);
+                Value prop = device.getMember("properties");
+                // Sanity tests on some of the properties
+                // device name is a non-zero string
+                assertTrue(prop.getMember("deviceName").asString().length() > 0);
+
+                // compute capability is at least compute Kepler (3.0)
+                assertTrue(prop.getMember("computeCapabilityMajor").asInt() >= 3);
+
+                // there is at least one multiprocessors
+                assertTrue(prop.getMember("multiProcessorCount").asInt() > 0);
+
+                // there is some device memory
+                assertTrue(prop.getMember("totalDeviceMemory").asLong() > 0L);
+            }
+        }
+    }
+
+    @Test
+    public void testCanSelectDevice() {
+        try (Context ctx = Context.newBuilder().allowAllAccess(true).build()) {
+            Value devices = ctx.eval("grcuda", "getdevices()");
+            if (devices.getArraySize() > 1) {
+                Value firstDevice = devices.getArrayElement(0);
+                Value secondDevice = devices.getArrayElement(1);
+                secondDevice.invokeMember("setCurrent");
+                assertFalse(firstDevice.invokeMember("isCurrent").asBoolean());
+                assertTrue(secondDevice.invokeMember("isCurrent").asBoolean());
+
+                firstDevice.invokeMember("setCurrent");
+                assertTrue(firstDevice.invokeMember("isCurrent").asBoolean());
+                assertFalse(secondDevice.invokeMember("isCurrent").asBoolean());
+            } else {
+                // only one device available
+                Value device = devices.getArrayElement(0);
+                device.invokeMember("setCurrent");
+                assertTrue(device.invokeMember("isCurrent").asBoolean());
+            }
+        }
+    }
+
+    @Test
+    public void testDeviceMemoryAllocationReducesReportedFreeMemory() {
+        try (Context ctx = Context.newBuilder().allowAllAccess(true).build()) {
+            Value device = ctx.eval("grcuda", "getdevice(0)");
+            Value props = device.getMember("properties");
+            device.invokeMember("setCurrent");
+            long totalMemoryBefore = props.getMember("totalDeviceMemory").asLong();
+            long freeMemoryBefore = props.getMember("freeDeviceMemory").asLong();
+            assertTrue(freeMemoryBefore <= totalMemoryBefore);
+
+            // allocate memory on device (unmanaged)
+            long arraySizeBytes = freeMemoryBefore / 3;
+            Value cudaMalloc = ctx.eval("grcuda", "cudaMalloc");
+            Value cudaFree = ctx.eval("grcuda", "cudaFree");
+            Value gpuPointer = null;
+            try {
+                gpuPointer = cudaMalloc.execute(arraySizeBytes);
+                // After allocation total memory must be the same as before but
+                // the free memory must be lower by at least the amount of allocated bytes.
+                long totalMemoryAfter = props.getMember("totalDeviceMemory").asLong();
+                long freeMemoryAfter = props.getMember("freeDeviceMemory").asLong();
+                assertEquals(totalMemoryBefore, totalMemoryAfter);
+                assertTrue(freeMemoryAfter <= (freeMemoryBefore - arraySizeBytes));
+            } finally {
+                if (gpuPointer != null) {
+                    cudaFree.execute(gpuPointer);
+                }
+            }
+        }
+    }
+
+}
@@ -244,44 +244,45 @@ Object getMembers(boolean includeInternal) {
 
     @ExportMessage
     @SuppressWarnings("static-method")
-    boolean isMemberReadable(String member,
-                    @Shared("member") @Cached("createIdentityProfile()") ValueProfile memberProfile) {
-        return POINTER.equals(memberProfile.profile(member)) || COPY_FROM.equals(memberProfile.profile(member)) || COPY_TO.equals(memberProfile.profile(member));
+    boolean isMemberReadable(String memberName,
+                    @Shared("memberName") @Cached("createIdentityProfile()") ValueProfile memberProfile) {
+        String name = memberProfile.profile(memberName);
+        return POINTER.equals(name) || COPY_FROM.equals(name) || COPY_TO.equals(name);
     }
 
     @ExportMessage
-    Object readMember(String member,
-                    @Shared("member") @Cached("createIdentityProfile()") ValueProfile memberProfile) throws UnknownIdentifierException {
-        if (!isMemberReadable(member, memberProfile)) {
+    Object readMember(String memberName,
+                    @Shared("memberName") @Cached("createIdentityProfile()") ValueProfile memberProfile) throws UnknownIdentifierException {
+        if (!isMemberReadable(memberName, memberProfile)) {
             CompilerDirectives.transferToInterpreter();
-            throw UnknownIdentifierException.create(member);
+            throw UnknownIdentifierException.create(memberName);
         }
-        if (POINTER.equals(member)) {
+        if (POINTER.equals(memberName)) {
             return getPointer();
         }
-        if (COPY_FROM.equals(member)) {
+        if (COPY_FROM.equals(memberName)) {
             return new DeviceArrayCopyFunction(this, DeviceArrayCopyFunction.CopyDirection.FROM_POINTER);
         }
-        if (COPY_TO.equals(member)) {
+        if (COPY_TO.equals(memberName)) {
             return new DeviceArrayCopyFunction(this, DeviceArrayCopyFunction.CopyDirection.TO_POINTER);
         }
         CompilerDirectives.transferToInterpreter();
-        throw UnknownIdentifierException.create(member);
+        throw UnknownIdentifierException.create(memberName);
     }
 
     @ExportMessage
     @SuppressWarnings("static-method")
-    boolean isMemberInvocable(String member) {
-        return COPY_FROM.equals(member) || COPY_TO.equals(member);
+    boolean isMemberInvocable(String memberName) {
+        return COPY_FROM.equals(memberName) || COPY_TO.equals(memberName);
     }
 
     @ExportMessage
-    Object invokeMember(String member,
+    Object invokeMember(String memberName,
                     Object[] arguments,
                     @CachedLibrary(limit = "1") InteropLibrary interopRead,
                     @CachedLibrary(limit = "1") InteropLibrary interopExecute)
                     throws UnsupportedTypeException, ArityException, UnsupportedMessageException, UnknownIdentifierException {
-        return interopExecute.execute(interopRead.readMember(this, member), arguments);
+        return interopExecute.execute(interopRead.readMember(this, memberName), arguments);
     }
 
     @ExportMessage
 
@@ -34,6 +34,8 @@
 import com.nvidia.grcuda.functions.BuildKernelFunction;
 import com.nvidia.grcuda.functions.DeviceArrayFunction;
 import com.nvidia.grcuda.functions.FunctionTable;
+import com.nvidia.grcuda.functions.GetDeviceFunction;
+import com.nvidia.grcuda.functions.GetDevicesFunction;
 import com.nvidia.grcuda.gpu.CUDARuntime;
 import com.oracle.truffle.api.TruffleLanguage.Env;
 
@@ -56,6 +58,8 @@ public GrCUDAContext(Env env) {
         functionTable.registerFunction(new DeviceArrayFunction(cudaRuntime));
         functionTable.registerFunction(new BindKernelFunction(cudaRuntime));
         functionTable.registerFunction(new BuildKernelFunction(cudaRuntime));
+        functionTable.registerFunction(new GetDevicesFunction(cudaRuntime));
+        functionTable.registerFunction(new GetDeviceFunction(cudaRuntime));
     }
 
     public Env getEnv() {