From a3509f6cdf1bba2bf4bbf782065b5b582ba10729 Mon Sep 17 00:00:00 2001 From: n8vm Date: Tue, 16 Feb 2021 22:56:45 -0700 Subject: [PATCH 01/55] removing unified memory for framebuffers, as this hurts multigpu performance in non-nvlink configurations --- src/nvisii/nvisii.cpp | 253 ++++++++++++++++-------------------------- 1 file changed, 94 insertions(+), 159 deletions(-) diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index 0e9079c5..0d635a38 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -263,62 +263,6 @@ int getDeviceCount() { return owlGetDeviceCount(OptixData.context); } -OWLModule moduleCreate(OWLContext context, const char* ptxCode) -{ - return owlModuleCreate(context, ptxCode); -} - -OWLBuffer managedMemoryBufferCreate(OWLContext context, OWLDataType type, size_t count, void* init) -{ - return owlManagedMemoryBufferCreate(context, type, count, init); -} - -OWLBuffer deviceBufferCreate(OWLContext context, OWLDataType type, size_t count, void* init) -{ - return owlDeviceBufferCreate(context, type, count, init); -} - -void bufferDestroy(OWLBuffer buffer) -{ - owlBufferDestroy(buffer); -} - -void bufferResize(OWLBuffer buffer, size_t newItemCount) { - owlBufferResize(buffer, newItemCount); -} - -const void* bufferGetPointer(OWLBuffer buffer, int deviceId) -{ - return owlBufferGetPointer(buffer, deviceId); -} - -void bufferUpload(OWLBuffer buffer, const void *hostPtr) -{ - owlBufferUpload(buffer, hostPtr); -} - -CUstream getStream(OWLContext context, int deviceId) -{ - return owlContextGetStream(context, deviceId); -} - -OptixDeviceContext getOptixContext(OWLContext context, int deviceID) -{ - return owlContextGetOptixContext(context, deviceID); -} - -void buildPrograms(OWLContext context) { - owlBuildPrograms(context); -} - -void buildPipeline(OWLContext context) { - owlBuildPipeline(context); -} - -void buildSBT(OWLContext context) { - owlBuildSBT(context); -} - OWLMissProg missProgCreate(OWLContext context, OWLModule module, const char *programName, size_t sizeOfVarStruct, OWLVarDecl *vars, size_t numVars) { return owlMissProgCreate(context, module, programName, sizeOfVarStruct, vars, numVars); @@ -488,12 +432,12 @@ void resizeOptixFrameBuffer(uint32_t width, uint32_t height) OD.LP.frameSize.x = width; OD.LP.frameSize.y = height; - bufferResize(OD.frameBuffer, width * height); - bufferResize(OD.normalBuffer, width * height); - bufferResize(OD.albedoBuffer, width * height); - bufferResize(OD.scratchBuffer, width * height); - bufferResize(OD.mvecBuffer, width * height); - bufferResize(OD.accumBuffer, width * height); + owlBufferResize(OD.frameBuffer, width * height); + owlBufferResize(OD.normalBuffer, width * height); + owlBufferResize(OD.albedoBuffer, width * height); + owlBufferResize(OD.scratchBuffer, width * height); + owlBufferResize(OD.mvecBuffer, width * height); + owlBufferResize(OD.accumBuffer, width * height); // Reconfigure denoiser optixDenoiserComputeMemoryResources(OD.denoiser, OD.LP.frameSize.x, OD.LP.frameSize.y, &OD.denoiserSizes); @@ -503,18 +447,18 @@ void resizeOptixFrameBuffer(uint32_t width, uint32_t height) #else scratchSizeInBytes = OD.denoiserSizes.withOverlapScratchSizeInBytes; #endif - bufferResize(OD.denoiserScratchBuffer, scratchSizeInBytes); - bufferResize(OD.denoiserStateBuffer, OD.denoiserSizes.stateSizeInBytes); + owlBufferResize(OD.denoiserScratchBuffer, scratchSizeInBytes); + owlBufferResize(OD.denoiserStateBuffer, OD.denoiserSizes.stateSizeInBytes); - auto cudaStream = getStream(OD.context, 0); + auto cudaStream = owlContextGetStream(OD.context, 0); optixDenoiserSetup ( OD.denoiser, (cudaStream_t) cudaStream, (unsigned int) OD.LP.frameSize.x, (unsigned int) OD.LP.frameSize.y, - (CUdeviceptr) bufferGetPointer(OD.denoiserStateBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OD.denoiserStateBuffer, 0), OD.denoiserSizes.stateSizeInBytes, - (CUdeviceptr) bufferGetPointer(OD.denoiserScratchBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OD.denoiserScratchBuffer, 0), scratchSizeInBytes ); @@ -551,7 +495,7 @@ void initializeOptix(bool headless) owlEnableMotionBlur(OD.context); owlContextSetRayTypeCount(OD.context, 2); cudaSetDevice(0); // OWL leaves the device as num_devices - 1 after the context is created. set it back to 0. - OD.module = moduleCreate(OD.context, ptxCode); + OD.module = owlModuleCreate(OD.context, ptxCode); /* Setup Optix Launch Params */ OWLVarDecl launchParamVars[] = { @@ -626,21 +570,12 @@ void initializeOptix(bool headless) initializeFrameBuffer(512, 512); } - if (numGPUsFound > 1) { - OD.frameBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.accumBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.normalBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.albedoBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.scratchBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.mvecBuffer = managedMemoryBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - } else { - OD.frameBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.accumBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.normalBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.albedoBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.scratchBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.mvecBuffer = deviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - } + OD.frameBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); + OD.accumBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); + OD.normalBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); + OD.albedoBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); + OD.scratchBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); + OD.mvecBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); OD.LP.frameSize = glm::ivec2(512, 512); launchParamsSetBuffer(OD.launchParams, "frameBuffer", OD.frameBuffer); launchParamsSetBuffer(OD.launchParams, "normalBuffer", OD.normalBuffer); @@ -652,24 +587,24 @@ void initializeOptix(bool headless) /* Create Component Buffers */ // note, extra textures reserved for internal use - OD.entityBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(EntityStruct), Entity::getCount(), nullptr); - OD.transformBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(TransformStruct), Transform::getCount(), nullptr); - OD.cameraBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(CameraStruct), Camera::getCount(), nullptr); - OD.materialBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(MaterialStruct), Material::getCount(), nullptr); - OD.meshBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(MeshStruct), Mesh::getCount(), nullptr); - OD.lightBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(LightStruct), Light::getCount(), nullptr); - OD.textureBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(TextureStruct), Texture::getCount() + NUM_MAT_PARAMS * Material::getCount(), nullptr); - OD.volumeBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(VolumeStruct), Volume::getCount(), nullptr); - OD.volumeHandlesBuffer = deviceBufferCreate(OD.context, OWL_BUFFER, Volume::getCount(), nullptr); - OD.lightEntitiesBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), 1, nullptr); - OD.surfaceInstanceToEntityBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), 1, nullptr); - OD.volumeInstanceToEntityBuffer = deviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), 1, nullptr); - OD.vertexListsBuffer = deviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); - OD.normalListsBuffer = deviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); - OD.tangentListsBuffer = deviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); - OD.texCoordListsBuffer = deviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); - OD.indexListsBuffer = deviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); - OD.textureObjectsBuffer = deviceBufferCreate(OD.context, OWL_TEXTURE, Texture::getCount() + NUM_MAT_PARAMS * Material::getCount(), nullptr); + OD.entityBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(EntityStruct), Entity::getCount(), nullptr); + OD.transformBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(TransformStruct), Transform::getCount(), nullptr); + OD.cameraBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(CameraStruct), Camera::getCount(), nullptr); + OD.materialBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(MaterialStruct), Material::getCount(), nullptr); + OD.meshBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(MeshStruct), Mesh::getCount(), nullptr); + OD.lightBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(LightStruct), Light::getCount(), nullptr); + OD.textureBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(TextureStruct), Texture::getCount() + NUM_MAT_PARAMS * Material::getCount(), nullptr); + OD.volumeBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(VolumeStruct), Volume::getCount(), nullptr); + OD.volumeHandlesBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Volume::getCount(), nullptr); + OD.lightEntitiesBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), 1, nullptr); + OD.surfaceInstanceToEntityBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), 1, nullptr); + OD.volumeInstanceToEntityBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), 1, nullptr); + OD.vertexListsBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); + OD.normalListsBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); + OD.tangentListsBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); + OD.texCoordListsBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); + OD.indexListsBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); + OD.textureObjectsBuffer = owlDeviceBufferCreate(OD.context, OWL_TEXTURE, Texture::getCount() + NUM_MAT_PARAMS * Material::getCount(), nullptr); launchParamsSetBuffer(OD.launchParams, "entities", OD.entityBuffer); launchParamsSetBuffer(OD.launchParams, "transforms", OD.transformBuffer); @@ -784,15 +719,15 @@ void initializeOptix(bool headless) OD.rayGen = rayGenCreate(OD.context,OD.module,"rayGen", sizeof(RayGenData), rayGenVars,-1); owlRayGenSet1i(OD.rayGen, "deviceCount", numGPUsFound); - buildPrograms(OD.context); + owlBuildPrograms(OD.context); /* Temporary GAS. Required for certain older driver versions. */ const int NUM_VERTICES = 1; vec3 vertices[NUM_VERTICES] = {{ 0.f, 0.f, 0.f }}; const int NUM_INDICES = 1; ivec3 indices[NUM_INDICES] = {{ 0, 0, 0 }}; - OWLBuffer vertexBuffer = deviceBufferCreate(OD.context,OWL_FLOAT4,NUM_VERTICES,vertices); - OWLBuffer indexBuffer = deviceBufferCreate(OD.context,OWL_INT3,NUM_INDICES,indices); + OWLBuffer vertexBuffer = owlDeviceBufferCreate(OD.context,OWL_FLOAT4,NUM_VERTICES,vertices); + OWLBuffer indexBuffer = owlDeviceBufferCreate(OD.context,OWL_INT3,NUM_INDICES,indices); OWLGeom trianglesGeom = geomCreate(OD.context,OD.trianglesGeomType); trianglesSetVertices(trianglesGeom,vertexBuffer,NUM_VERTICES,sizeof(vec4),0); trianglesSetIndices(trianglesGeom,indexBuffer, NUM_INDICES,sizeof(ivec3),0); @@ -819,8 +754,8 @@ void initializeOptix(bool headless) launchParamsSetGroup(OD.launchParams, "volumesIAS", volumesIAS); // Build *SBT* required to trace the groups - buildPipeline(OD.context); - buildSBT(OD.context); + owlBuildPipeline(OD.context); + owlBuildSBT(OD.context); // Setup denoiser configureDenoiser(OD.enableAlbedoGuide, OD.enableNormalGuide, OD.enableKernelPrediction); @@ -1222,11 +1157,11 @@ void updateComponents() if (m->getTriangleIndices().size() == 0) throw std::runtime_error("ERROR: indices is 0"); // Next, allocate resources for the new mesh. - OD.vertexLists[m->getAddress()] = deviceBufferCreate(OD.context, OWL_USER_TYPE(vec3), m->getVertices().size(), m->getVertices().data()); - OD.normalLists[m->getAddress()] = deviceBufferCreate(OD.context, OWL_USER_TYPE(vec4), m->getNormals().size(), m->getNormals().data()); - OD.tangentLists[m->getAddress()] = deviceBufferCreate(OD.context, OWL_USER_TYPE(vec4), m->getTangents().size(), m->getTangents().data()); - OD.texCoordLists[m->getAddress()] = deviceBufferCreate(OD.context, OWL_USER_TYPE(vec2), m->getTexCoords().size(), m->getTexCoords().data()); - OD.indexLists[m->getAddress()] = deviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), m->getTriangleIndices().size(), m->getTriangleIndices().data()); + OD.vertexLists[m->getAddress()] = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(vec3), m->getVertices().size(), m->getVertices().data()); + OD.normalLists[m->getAddress()] = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(vec4), m->getNormals().size(), m->getNormals().data()); + OD.tangentLists[m->getAddress()] = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(vec4), m->getTangents().size(), m->getTangents().data()); + OD.texCoordLists[m->getAddress()] = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(vec2), m->getTexCoords().size(), m->getTexCoords().data()); + OD.indexLists[m->getAddress()] = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), m->getTriangleIndices().size(), m->getTriangleIndices().data()); // Create geometry and build BLAS OD.surfaceGeomList[m->getAddress()] = geomCreate(OD.context, OD.trianglesGeomType); @@ -1236,13 +1171,13 @@ void updateComponents() groupBuildAccel(OD.surfaceBlasList[m->getAddress()]); } - bufferUpload(OD.vertexListsBuffer, OD.vertexLists.data()); - bufferUpload(OD.texCoordListsBuffer, OD.texCoordLists.data()); - bufferUpload(OD.indexListsBuffer, OD.indexLists.data()); - bufferUpload(OD.normalListsBuffer, OD.normalLists.data()); - bufferUpload(OD.tangentListsBuffer, OD.tangentLists.data()); + owlBufferUpload(OD.vertexListsBuffer, OD.vertexLists.data()); + owlBufferUpload(OD.texCoordListsBuffer, OD.texCoordLists.data()); + owlBufferUpload(OD.indexListsBuffer, OD.indexLists.data()); + owlBufferUpload(OD.normalListsBuffer, OD.normalLists.data()); + owlBufferUpload(OD.tangentListsBuffer, OD.tangentLists.data()); Mesh::updateComponents(); - bufferUpload(OptixData.meshBuffer, Mesh::getFrontStruct()); + owlBufferUpload(OptixData.meshBuffer, Mesh::getFrontStruct()); } // Manage Volumes: Build / Rebuild BLAS @@ -1394,8 +1329,8 @@ void updateComponents() } owlInstanceGroupSetTransforms(OD.surfacesIAS,0,(const float*)t0OwlSurfaceTransforms.data()); owlInstanceGroupSetTransforms(OD.surfacesIAS,1,(const float*)t1OwlSurfaceTransforms.data()); - bufferResize(OD.surfaceInstanceToEntityBuffer, surfaceInstanceToEntity.size()); - bufferUpload(OD.surfaceInstanceToEntityBuffer, surfaceInstanceToEntity.data()); + owlBufferResize(OD.surfaceInstanceToEntityBuffer, surfaceInstanceToEntity.size()); + owlBufferUpload(OD.surfaceInstanceToEntityBuffer, surfaceInstanceToEntity.data()); } // Set volume transforms to IAS, upload volume instance to entity map @@ -1408,8 +1343,8 @@ void updateComponents() } owlInstanceGroupSetTransforms(OD.volumesIAS,0,(const float*)t0OwlVolumeTransforms.data()); owlInstanceGroupSetTransforms(OD.volumesIAS,1,(const float*)t1OwlVolumeTransforms.data()); - bufferResize(OD.volumeInstanceToEntityBuffer, volumeInstanceToEntity.size()); - bufferUpload(OD.volumeInstanceToEntityBuffer, volumeInstanceToEntity.data()); + owlBufferResize(OD.volumeInstanceToEntityBuffer, volumeInstanceToEntity.size()); + owlBufferUpload(OD.volumeInstanceToEntityBuffer, volumeInstanceToEntity.data()); } // Build IAS @@ -1419,7 +1354,7 @@ void updateComponents() launchParamsSetGroup(OD.launchParams, "surfacesIAS", OD.surfacesIAS); // Now that IAS have changed, we need to rebuild SBT - buildSBT(OD.context); + owlBuildSBT(OD.context); // Release any old IAS (TODO, don't rebuild if entity edit doesn't effect IAS...) if (oldSurfaceIAS) {owlGroupRelease(oldSurfaceIAS);} @@ -1434,14 +1369,14 @@ void updateComponents() if (!entities[eid].getMesh()) continue; OD.lightEntities.push_back(eid); } - bufferResize(OptixData.lightEntitiesBuffer, OD.lightEntities.size()); - bufferUpload(OptixData.lightEntitiesBuffer, OD.lightEntities.data()); + owlBufferResize(OptixData.lightEntitiesBuffer, OD.lightEntities.size()); + owlBufferUpload(OptixData.lightEntitiesBuffer, OD.lightEntities.data()); OD.LP.numLightEntities = uint32_t(OD.lightEntities.size()); launchParamsSetRaw(OD.launchParams, "numLightEntities", &OD.LP.numLightEntities); // Finally, upload entity structs to the GPU. Entity::updateComponents(); - bufferUpload(OptixData.entityBuffer, Entity::getFrontStruct()); + owlBufferUpload(OptixData.entityBuffer, Entity::getFrontStruct()); } // Manage textures and materials @@ -1568,13 +1503,13 @@ void updateComponents() } Material::updateComponents(); - bufferUpload(OptixData.materialBuffer, OptixData.materialStructs.data()); + owlBufferUpload(OptixData.materialBuffer, OptixData.materialStructs.data()); } - bufferUpload(OD.textureObjectsBuffer, OD.textureObjects.data()); + owlBufferUpload(OD.textureObjectsBuffer, OD.textureObjects.data()); Texture::updateComponents(); memcpy(OptixData.textureStructs.data(), Texture::getFrontStruct(), Texture::getCount() * sizeof(TextureStruct)); - bufferUpload(OptixData.textureBuffer, OptixData.textureStructs.data()); + owlBufferUpload(OptixData.textureBuffer, OptixData.textureStructs.data()); } // Manage transforms @@ -1602,13 +1537,13 @@ void updateComponents() // Manage Cameras if (Camera::areAnyDirty()) { Camera::updateComponents(); - bufferUpload(OptixData.cameraBuffer, Camera::getFrontStruct()); + owlBufferUpload(OptixData.cameraBuffer, Camera::getFrontStruct()); } // Manage lights if (Light::areAnyDirty()) { Light::updateComponents(); - bufferUpload(OptixData.lightBuffer, Light::getFrontStruct()); + owlBufferUpload(OptixData.lightBuffer, Light::getFrontStruct()); } } @@ -1644,9 +1579,9 @@ void denoiseImage() { synchronizeDevices(); auto &OD = OptixData; - auto cudaStream = getStream(OD.context, 0); + auto cudaStream = owlContextGetStream(OD.context, 0); - CUdeviceptr frameBuffer = (CUdeviceptr) bufferGetPointer(OD.frameBuffer, 0); + CUdeviceptr frameBuffer = (CUdeviceptr) owlBufferGetPointer(OD.frameBuffer, 0); std::vector inputLayers; OptixImage2D colorLayer; @@ -1655,7 +1590,7 @@ void denoiseImage() { colorLayer.format = OPTIX_PIXEL_FORMAT_FLOAT4; colorLayer.pixelStrideInBytes = 4 * sizeof(float); colorLayer.rowStrideInBytes = OD.LP.frameSize.x * 4 * sizeof(float); - colorLayer.data = (CUdeviceptr) bufferGetPointer(OD.frameBuffer, 0); + colorLayer.data = (CUdeviceptr) owlBufferGetPointer(OD.frameBuffer, 0); inputLayers.push_back(colorLayer); OptixImage2D albedoLayer; @@ -1664,7 +1599,7 @@ void denoiseImage() { albedoLayer.format = OPTIX_PIXEL_FORMAT_FLOAT4; albedoLayer.pixelStrideInBytes = 4 * sizeof(float); albedoLayer.rowStrideInBytes = OD.LP.frameSize.x * 4 * sizeof(float); - albedoLayer.data = (CUdeviceptr) bufferGetPointer(OD.albedoBuffer, 0); + albedoLayer.data = (CUdeviceptr) owlBufferGetPointer(OD.albedoBuffer, 0); if (OD.enableAlbedoGuide) inputLayers.push_back(albedoLayer); OptixImage2D normalLayer; @@ -1673,7 +1608,7 @@ void denoiseImage() { normalLayer.format = OPTIX_PIXEL_FORMAT_FLOAT4; normalLayer.pixelStrideInBytes = 4 * sizeof(float); normalLayer.rowStrideInBytes = OD.LP.frameSize.x * 4 * sizeof(float); - normalLayer.data = (CUdeviceptr) bufferGetPointer(OD.normalBuffer, 0); + normalLayer.data = (CUdeviceptr) owlBufferGetPointer(OD.normalBuffer, 0); if (OD.enableNormalGuide) inputLayers.push_back(normalLayer); OptixImage2D outputLayer = colorLayer; // can I get away with this? @@ -1692,8 +1627,8 @@ void denoiseImage() { OD.denoiser, cudaStream, &inputLayers[0], - (CUdeviceptr) bufferGetPointer(OD.hdrIntensityBuffer, 0), - (CUdeviceptr) bufferGetPointer(OD.denoiserScratchBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OD.hdrIntensityBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OD.denoiserScratchBuffer, 0), scratchSizeInBytes)); } @@ -1703,31 +1638,31 @@ void denoiseImage() { OD.denoiser, cudaStream, &inputLayers[0], - (CUdeviceptr) bufferGetPointer(OD.colorAvgBuffer, 0), - (CUdeviceptr) bufferGetPointer(OD.denoiserScratchBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OD.colorAvgBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OD.denoiserScratchBuffer, 0), scratchSizeInBytes)); } #endif params.denoiseAlpha = 0; // Don't touch alpha. params.blendFactor = 0.0f; // Show the denoised image only. - params.hdrIntensity = (CUdeviceptr) bufferGetPointer(OD.hdrIntensityBuffer, 0); + params.hdrIntensity = (CUdeviceptr) owlBufferGetPointer(OD.hdrIntensityBuffer, 0); #ifdef USE_OPTIX72 - params.hdrAverageColor = (CUdeviceptr) bufferGetPointer(OD.colorAvgBuffer, 0); + params.hdrAverageColor = (CUdeviceptr) owlBufferGetPointer(OD.colorAvgBuffer, 0); #endif OPTIX_CHECK(optixDenoiserInvoke( OD.denoiser, cudaStream, ¶ms, - (CUdeviceptr) bufferGetPointer(OD.denoiserStateBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OD.denoiserStateBuffer, 0), OD.denoiserSizes.stateSizeInBytes, inputLayers.data(), inputLayers.size(), /* inputOffsetX */ 0, /* inputOffsetY */ 0, &outputLayer, - (CUdeviceptr) bufferGetPointer(OD.denoiserScratchBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OD.denoiserScratchBuffer, 0), scratchSizeInBytes )); @@ -1741,7 +1676,7 @@ void drawFrameBufferToWindow() auto &OD = OptixData; cudaGraphicsMapResources(1, &OD.cudaResourceTex); - const void* fbdevptr = bufferGetPointer(OD.frameBuffer,0); + const void* fbdevptr = owlBufferGetPointer(OD.frameBuffer,0); cudaArray_t array; cudaGraphicsSubResourceGetMappedArray(&array, OD.cudaResourceTex, 0, 0); cudaMemcpyToArray(array, 0, 0, fbdevptr, OD.LP.frameSize.x * OD.LP.frameSize.y * sizeof(glm::vec4), cudaMemcpyDeviceToDevice); @@ -1857,9 +1792,9 @@ void configureDenoiser(bool useAlbedoGuide, bool useNormalGuide, bool useKernelP if (!OptixData.colorAvgBuffer) OptixData.colorAvgBuffer = owlDeviceBufferCreate(OptixData.context, OWL_USER_TYPE(float), 4, nullptr); if (!OptixData.denoiserScratchBuffer) - OptixData.denoiserScratchBuffer = deviceBufferCreate(OptixData.context, OWL_USER_TYPE(void*), 1, nullptr); + OptixData.denoiserScratchBuffer = owlDeviceBufferCreate(OptixData.context, OWL_USER_TYPE(void*), 1, nullptr); if (!OptixData.denoiserStateBuffer) - OptixData.denoiserStateBuffer = deviceBufferCreate(OptixData.context, OWL_USER_TYPE(void*), 1, nullptr); + OptixData.denoiserStateBuffer = owlDeviceBufferCreate(OptixData.context, OWL_USER_TYPE(void*), 1, nullptr); // Setup denoiser OptixDenoiserOptions options; @@ -1873,8 +1808,8 @@ void configureDenoiser(bool useAlbedoGuide, bool useNormalGuide, bool useKernelP if (OptixData.denoiser) optixDenoiserDestroy(OptixData.denoiser); - auto optixContext = getOptixContext(OptixData.context, 0); - auto cudaStream = getStream(OptixData.context, 0); + auto optixContext = owlContextGetOptixContext(OptixData.context, 0); + auto cudaStream = owlContextGetStream(OptixData.context, 0); OPTIX_CHECK(optixDenoiserCreate(optixContext, &options, &OptixData.denoiser)); OptixDenoiserModelKind kind; @@ -1907,9 +1842,9 @@ void configureDenoiser(bool useAlbedoGuide, bool useNormalGuide, bool useKernelP (cudaStream_t) cudaStream, (unsigned int) OptixData.LP.frameSize.x, (unsigned int) OptixData.LP.frameSize.y, - (CUdeviceptr) bufferGetPointer(OptixData.denoiserStateBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OptixData.denoiserStateBuffer, 0), OptixData.denoiserSizes.stateSizeInBytes, - (CUdeviceptr) bufferGetPointer(OptixData.denoiserScratchBuffer, 0), + (CUdeviceptr) owlBufferGetPointer(OptixData.denoiserScratchBuffer, 0), scratchSizeInBytes ); }); @@ -1922,7 +1857,7 @@ std::vector readFrameBuffer() { int num_devices = getDeviceCount(); synchronizeDevices(); - const glm::vec4 *fb = (const glm::vec4*)bufferGetPointer(OptixData.frameBuffer,0); + const glm::vec4 *fb = (const glm::vec4*)owlBufferGetPointer(OptixData.frameBuffer,0); for (uint32_t test = 0; test < frameBuffer.size(); test += 4) { frameBuffer[test + 0] = fb[test / 4].r; frameBuffer[test + 1] = fb[test / 4].g; @@ -1995,7 +1930,7 @@ std::vector render(uint32_t width, uint32_t height, uint32_t samplesPerPi synchronizeDevices(); - const glm::vec4 *fb = (const glm::vec4*) bufferGetPointer(OptixData.frameBuffer,0); + const glm::vec4 *fb = (const glm::vec4*) owlBufferGetPointer(OptixData.frameBuffer,0); cudaMemcpyAsync(frameBuffer.data(), fb, width * height * sizeof(glm::vec4), cudaMemcpyDeviceToHost); synchronizeDevices(); @@ -2127,7 +2062,7 @@ std::vector renderData(uint32_t width, uint32_t height, uint32_t startFra synchronizeDevices(); - const glm::vec4 *fb = (const glm::vec4*) bufferGetPointer(OptixData.frameBuffer,0); + const glm::vec4 *fb = (const glm::vec4*) owlBufferGetPointer(OptixData.frameBuffer,0); cudaMemcpyAsync(frameBuffer.data(), fb, width * height * sizeof(glm::vec4), cudaMemcpyDeviceToHost); OptixData.LP.renderDataMode = 0; @@ -2418,12 +2353,12 @@ void initializeInteractive( denoiseImage(); } } - // glm::vec4* samplePtr = (glm::vec4*) bufferGetPointer(OptixData.accumBuffer,0); - // glm::vec4* mvecPtr = (glm::vec4*) bufferGetPointer(OptixData.mvecBuffer,0); - // glm::vec4* t0AlbPtr = (glm::vec4*) bufferGetPointer(OptixData.scratchBuffer,0); - // glm::vec4* t1AlbPtr = (glm::vec4*) bufferGetPointer(OptixData.albedoBuffer,0); - // glm::vec4* fbPtr = (glm::vec4*) bufferGetPointer(OptixData.frameBuffer,0); - // glm::vec4* sPtr = (glm::vec4*) bufferGetPointer(OptixData.normalBuffer,0); + // glm::vec4* samplePtr = (glm::vec4*) owlBufferGetPointer(OptixData.accumBuffer,0); + // glm::vec4* mvecPtr = (glm::vec4*) owlBufferGetPointer(OptixData.mvecBuffer,0); + // glm::vec4* t0AlbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.scratchBuffer,0); + // glm::vec4* t1AlbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.albedoBuffer,0); + // glm::vec4* fbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.frameBuffer,0); + // glm::vec4* sPtr = (glm::vec4*) owlBufferGetPointer(OptixData.normalBuffer,0); // int width = OptixData.LP.frameSize.x; // int height = OptixData.LP.frameSize.y; // reproject(samplePtr, t0AlbPtr, t1AlbPtr, mvecPtr, sPtr, fbPtr, width, height); From 0c46d4d6e9336bbbd39c2e524cc003ac19cae626 Mon Sep 17 00:00:00 2001 From: n8vm Date: Wed, 17 Feb 2021 00:39:45 -0700 Subject: [PATCH 02/55] some fixes for multigpu setups. Some issues with different gpus rendering different images: --- src/nvisii/nvisii.cpp | 92 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 4 deletions(-) diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index 0d635a38..3f415090 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -1575,6 +1575,80 @@ void updateLaunchParams() OptixData.LP.frameID ++; } +// Different GPUs have different local framebuffers. +// This function combines those framebuffers on the CPU, then uploads results to device 0. +void mergeFrameBuffers() { + int deviceCount = getDeviceCount(); + int width = OptixData.LP.frameSize.x; + int height = OptixData.LP.frameSize.y; + if (deviceCount <= 1) return; + + // synchronizeDevices(); + + std::vector fb_h(width * height); + std::vector fba_h(width * height); + std::vector fbn_h(width * height); + std::vector> fb_hd(deviceCount); + std::vector> fba_hd(deviceCount); + std::vector> fbn_hd(deviceCount); + for (uint32_t i = 0; i < deviceCount; ++i){ + fb_hd[i] = std::vector(width * height); + fba_hd[i] = std::vector(width * height); + fbn_hd[i] = std::vector(width * height); + void* fb_d = (void*)owlBufferGetPointer(OptixData.frameBuffer,i); + void* fba_d = (void*)owlBufferGetPointer(OptixData.albedoBuffer,i); + void* fbn_d = (void*)owlBufferGetPointer(OptixData.normalBuffer,i); + cudaMemcpyAsync((void*)fb_hd[i].data(), (void*)fb_d, fb_h.size() * sizeof(glm::vec4), cudaMemcpyDeviceToHost); + cudaMemcpyAsync((void*)fba_hd[i].data(), (void*)fba_d, fb_h.size() * sizeof(glm::vec4), cudaMemcpyDeviceToHost); + cudaMemcpyAsync((void*)fbn_hd[i].data(), (void*)fbn_d, fb_h.size() * sizeof(glm::vec4), cudaMemcpyDeviceToHost); + } + // synchronizeDevices(); + + // note, GPUs render 32xN strips + for (uint32_t y = 0; y < height; ++y) { + for (uint32_t x = 0; x < width; x += 32) { + if (x >= width) continue; + int deviceThatIsResponsible = (x>>5) % deviceCount; + { + glm::vec4* A = fb_h.data() + (y * width) + x; + glm::vec4* B = fb_hd[deviceThatIsResponsible].data() + (y * width) + x; + memcpy(A, B, min(32, int(width - x)) * sizeof(glm::vec4)); + } + { + glm::vec4* A = fba_h.data() + (y * width) + x; + glm::vec4* B = fba_hd[deviceThatIsResponsible].data() + (y * width) + x; + memcpy(A, B, min(32, int(width - x)) * sizeof(glm::vec4)); + } + { + glm::vec4* A = fbn_h.data() + (y * width) + x; + glm::vec4* B = fbn_hd[deviceThatIsResponsible].data() + (y * width) + x; + memcpy(A, B, min(32, int(width - x)) * sizeof(glm::vec4)); + } + } + } + + // // note, GPUs render 32xN strips + // for (uint32_t y = 0; y < height; ++y) { + // for (uint32_t x = 0; x < width; x += 32) { + // int deviceThatIsResponsible = (x>>5) % deviceCount; + // glm::vec4* A = fb_h.data() + (y * width) + x; + // glm::vec4* B = ((glm::vec4*)fb_d[deviceThatIsResponsible]) + (y * width) + x; + // cudaMemcpyAsync((void*)A, (void*)B, min(32, int(width - x)) * sizeof(glm::vec4), cudaMemcpyDeviceToHost); + // } + // } + + + // cudaMemcpyAsync(fb_h.data(), fb_d[1], fb_h.size() * sizeof(glm::vec4), cudaMemcpyDeviceToHost); + synchronizeDevices(); + void* fb_d = (void*)owlBufferGetPointer(OptixData.frameBuffer,0); + void* fba_d = (void*)owlBufferGetPointer(OptixData.albedoBuffer,0); + void* fbn_d = (void*)owlBufferGetPointer(OptixData.normalBuffer,0); + cudaMemcpyAsync(fb_d, fb_h.data(), fb_h.size() * sizeof(glm::vec4), cudaMemcpyHostToDevice); + cudaMemcpyAsync(fba_d, fba_h.data(), fba_h.size() * sizeof(glm::vec4), cudaMemcpyHostToDevice); + cudaMemcpyAsync(fbn_d, fbn_h.data(), fbn_h.size() * sizeof(glm::vec4), cudaMemcpyHostToDevice); + // synchronizeDevices(); +} + void denoiseImage() { synchronizeDevices(); @@ -1903,12 +1977,14 @@ std::vector render(uint32_t width, uint32_t height, uint32_t samplesPerPi updateLaunchParams(); owlLaunch2D(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, OptixData.launchParams); - if (OptixData.enableDenoiser) - { - denoiseImage(); - } if (!NVISII.headlessMode) { + mergeFrameBuffers(); + if (OptixData.enableDenoiser) + { + denoiseImage(); + } + drawFrameBufferToWindow(); glfwSetWindowTitle(WindowData.window, (std::to_string(i) + std::string("/") + std::to_string(samplesPerPixel)).c_str()); @@ -1930,6 +2006,12 @@ std::vector render(uint32_t width, uint32_t height, uint32_t samplesPerPi synchronizeDevices(); + mergeFrameBuffers(); + if (OptixData.enableDenoiser) + { + denoiseImage(); + } + const glm::vec4 *fb = (const glm::vec4*) owlBufferGetPointer(OptixData.frameBuffer,0); cudaMemcpyAsync(frameBuffer.data(), fb, width * height * sizeof(glm::vec4), cudaMemcpyDeviceToHost); @@ -2049,6 +2131,7 @@ std::vector renderData(uint32_t width, uint32_t height, uint32_t startFra updateLaunchParams(); owlLaunch2D(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, OptixData.launchParams); + mergeFrameBuffers(); // Dont run denoiser to raw data rendering // if (OptixData.enableDenoiser) // { @@ -2348,6 +2431,7 @@ void initializeInteractive( updateComponents(); updateLaunchParams(); owlLaunch2D(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, OptixData.launchParams); + mergeFrameBuffers(); if (OptixData.enableDenoiser) { denoiseImage(); From 81291c7c1602107b9e0b3ea66bddb465ce2842f1 Mon Sep 17 00:00:00 2001 From: n8vm Date: Sat, 20 Feb 2021 12:12:34 -0700 Subject: [PATCH 03/55] changing how max path bounces are handled to avoid long tails. --- src/nvisii/devicecode/path_tracer.cu | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/nvisii/devicecode/path_tracer.cu b/src/nvisii/devicecode/path_tracer.cu index ed8344db..63b09de8 100644 --- a/src/nvisii/devicecode/path_tracer.cu +++ b/src/nvisii/devicecode/path_tracer.cu @@ -940,6 +940,8 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() uint8_t transparencyDepth = 0; uint8_t transmissionDepth = 0; uint8_t volumeDepth = 0; + int sampledBsdf = -1; + bool useBRDF = true; // direct here is used for final image clamping float3 directIllum = make_float3(0.f); @@ -1214,7 +1216,6 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() float3 irradiance = make_float3(0.f); // If we hit a volume, use hybrid scattering to determine whether or not to use a BRDF or a phase function. - bool useBRDF = true; if (volPayload.tHit >= 0.f) { float opacity = mat.alpha; // would otherwise be sampled from a transfer function float grad_len = uv.y; @@ -1232,7 +1233,6 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // First, sample the BRDF / phase function so that we can use the sampled direction for MIS float3 w_i; float bsdfPDF; - int sampledBsdf = -1; float3 bsdf; if (useBRDF) { sample_disney_brdf( @@ -1554,11 +1554,14 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // of terminating, just so that we don't get black regions in our glass if (transmissionDepth >= LP.maxTransmissionDepth) continue; } while ( - diffuseDepth < LP.maxDiffuseDepth && - glossyDepth < LP.maxGlossyDepth && - // transmissionDepth < LP.maxTransmissionDepth && // see comment above - transparencyDepth < LP.maxTransparencyDepth && - volumeDepth < LP.maxVolumeDepth + // Terminate the path if the sampled BRDF's corresponding bounce depth exceeds the max bounce for that bounce type minus the overall path depth. + // This prevents long tails that can otherwise occur from mixing BRDF events + (!(sampledBsdf == DISNEY_DIFFUSE_BRDF && diffuseDepth > (LP.maxDiffuseDepth - (depth - 1)))) && + (!(sampledBsdf == DISNEY_GLOSSY_BRDF && glossyDepth > LP.maxGlossyDepth - (depth - 1)) ) && + (!(sampledBsdf == DISNEY_CLEARCOAT_BRDF && glossyDepth > LP.maxGlossyDepth - (depth - 1)) ) && + (!(useBRDF == false && volumeDepth > LP.maxVolumeDepth - (depth - 1))) && + (!(transparencyDepth > LP.maxTransparencyDepth - (depth - 1))) + // (!(sampledBsdf == DISNEY_TRANSMISSION_BRDF && transmissionDepth < LP.maxTransmissionDepth - (depth - 1)) ) && // see comment above ); // For segmentations, save heatmap metadata From f57bcc6b6210b6d9d6d9d097fc874f2530c36bb7 Mon Sep 17 00:00:00 2001 From: n8vm Date: Fri, 26 Feb 2021 14:18:48 -0700 Subject: [PATCH 04/55] working on multigpu perf --- CMakeLists.txt | 3 + include/nvisii/utilities/CMakeLists.txt | 1 + include/nvisii/utilities/work_distribution.h | 74 ++++ src/nvisii/devicecode/buffer.h | 2 +- src/nvisii/devicecode/launch_params.h | 4 +- src/nvisii/devicecode/path_tracer.cu | 71 ++-- src/nvisii/nvisii.cpp | 403 ++++++++++--------- src/nvisii/nvisii.cu | 37 +- 8 files changed, 359 insertions(+), 236 deletions(-) create mode 100644 include/nvisii/utilities/work_distribution.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 9055fc1e..6b86d441 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -293,6 +293,9 @@ find_program(BIN2C bin2c /usr/local/cuda/bin) # optix 7 +include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/include/nvisii/utilities/sutil/) +include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/include/nvisii/utilities/) + if ($ENV{OPTIX_VERSION}) set(OPTION_OPTIX_VERSION $ENV{OPTIX_VERSION}) else() diff --git a/include/nvisii/utilities/CMakeLists.txt b/include/nvisii/utilities/CMakeLists.txt index ee81cbbe..84d88ba4 100644 --- a/include/nvisii/utilities/CMakeLists.txt +++ b/include/nvisii/utilities/CMakeLists.txt @@ -14,4 +14,5 @@ set(Utilities_HDR ${CMAKE_CURRENT_SOURCE_DIR}/singleton.h ${CMAKE_CURRENT_SOURCE_DIR}/version.h ${CMAKE_CURRENT_SOURCE_DIR}/procedural_sky.h + ${CMAKE_CURRENT_SOURCE_DIR}/work_distribution.h PARENT_SCOPE) diff --git a/include/nvisii/utilities/work_distribution.h b/include/nvisii/utilities/work_distribution.h new file mode 100644 index 00000000..5f1c5a56 --- /dev/null +++ b/include/nvisii/utilities/work_distribution.h @@ -0,0 +1,74 @@ +#pragma once + +#if defined(__CUDACC__) || defined(__CUDABE__) +# define SWD_HOSTDEVICE __host__ __device__ +# define SWD_INLINE __forceinline__ +# define CONST_STATIC_INIT( ... ) +#else +# define SWD_HOSTDEVICE +# define SWD_INLINE inline +# define CONST_STATIC_INIT( ... ) = __VA_ARGS__ +#endif + +#include + +#include + +class StaticWorkDistribution +{ +public: + SWD_INLINE SWD_HOSTDEVICE void setRasterSize( int width, int height ) + { + m_width = width; + m_height = height; + } + + + SWD_INLINE SWD_HOSTDEVICE void setNumGPUs( int32_t num_gpus ) + { + m_num_gpus = num_gpus; + } + + + SWD_INLINE SWD_HOSTDEVICE int32_t numSamples( ) + { + const int tile_strip_width = TILE_WIDTH*m_num_gpus; + const int tile_strip_height = TILE_HEIGHT; + const int num_tile_strip_cols = m_width /tile_strip_width + ( m_width %tile_strip_width == 0 ? 0 : 1 ); + const int num_tile_strip_rows = m_height/tile_strip_height + ( m_height%tile_strip_height == 0 ? 0 : 1 ); + return num_tile_strip_rows*num_tile_strip_cols*TILE_WIDTH*TILE_HEIGHT; + } + + + SWD_INLINE SWD_HOSTDEVICE int2 getSamplePixel( int32_t gpu_idx, int32_t sample_idx ) + { + const int tile_strip_width = TILE_WIDTH*m_num_gpus; + const int tile_strip_height = TILE_HEIGHT; + const int num_tile_strip_cols = m_width /tile_strip_width + ( m_width % tile_strip_width == 0 ? 0 : 1 ); + + const int tile_strip_idx = sample_idx / (TILE_WIDTH*TILE_HEIGHT ); + const int tile_strip_y = tile_strip_idx / num_tile_strip_cols; + const int tile_strip_x = tile_strip_idx - tile_strip_y * num_tile_strip_cols; + const int tile_strip_x_start = tile_strip_x * tile_strip_width; + const int tile_strip_y_start = tile_strip_y * tile_strip_height; + + const int tile_pixel_idx = sample_idx - ( tile_strip_idx * TILE_WIDTH*TILE_HEIGHT ); + const int tile_pixel_y = tile_pixel_idx / TILE_WIDTH; + const int tile_pixel_x = tile_pixel_idx - tile_pixel_y * TILE_WIDTH; + + const int tile_offset_x = ( gpu_idx + tile_strip_y % m_num_gpus ) % m_num_gpus * TILE_WIDTH; + + const int pixel_y = tile_strip_y_start + tile_pixel_y; + const int pixel_x = tile_strip_x_start + tile_pixel_x + tile_offset_x ; + return make_int2( pixel_x, pixel_y ); + } + + +private: + int32_t m_num_gpus = 0; + int32_t m_width = 0; + int32_t m_height = 0; + + static const int32_t TILE_WIDTH = 8; + static const int32_t TILE_HEIGHT = 4; +}; diff --git a/src/nvisii/devicecode/buffer.h b/src/nvisii/devicecode/buffer.h index 5464326e..8354d66f 100644 --- a/src/nvisii/devicecode/buffer.h +++ b/src/nvisii/devicecode/buffer.h @@ -35,4 +35,4 @@ class Buffer : public owl::device::Buffer #define GET(RETURN, TYPE, BUFFER, ADDRESS) \ if (BUFFER.data == nullptr) {::printf("Device Side Error on Line %d: buffer was nullptr.\n", __LINE__); asm("trap;");} \ if (ADDRESS >= BUFFER.count) {::printf("Device Side Error on Line %d: out of bounds access (address: %d, size %d).\n", __LINE__, ADDRESS, uint32_t(BUFFER.count)); asm("trap;");} \ -RETURN = ((TYPE*)BUFFER.data)[ADDRESS];\ +RETURN = ((TYPE*)BUFFER.data)[ADDRESS]; diff --git a/src/nvisii/devicecode/launch_params.h b/src/nvisii/devicecode/launch_params.h index ea2c2d51..ea5edb0f 100644 --- a/src/nvisii/devicecode/launch_params.h +++ b/src/nvisii/devicecode/launch_params.h @@ -19,10 +19,12 @@ #include "./buffer.h" struct LaunchParams { + Buffer sampleIndexBuffer; + glm::ivec2 frameSize; uint64_t frameID = 0; glm::vec4 *frameBuffer; - glm::vec4 *albedoBuffer; + uchar4 *albedoBuffer; glm::vec4 *normalBuffer; glm::vec4 *scratchBuffer; glm::vec4 *mvecBuffer; diff --git a/src/nvisii/devicecode/path_tracer.cu b/src/nvisii/devicecode/path_tracer.cu index ed8344db..8d99d060 100644 --- a/src/nvisii/devicecode/path_tracer.cu +++ b/src/nvisii/devicecode/path_tracer.cu @@ -644,7 +644,7 @@ float sampleTime(float xi) { } inline __device__ -owl::Ray generateRay(const CameraStruct &camera, const TransformStruct &transform, ivec2 pixelID, ivec2 frameSize, LCGRand &rng, float time) +owl::Ray generateRay(const CameraStruct &camera, const TransformStruct &transform, int2 pixelID, float2 frameSize, LCGRand &rng, float time) { auto &LP = optixLaunchParams; /* Generate camera rays */ @@ -665,7 +665,7 @@ owl::Ray generateRay(const CameraStruct &camera, const TransformStruct &transfor - vec2(LP.xPixelSamplingInterval[0], LP.yPixelSamplingInterval[0]) ) * vec2(lcg_randomf(rng),lcg_randomf(rng)); - vec2 inUV = (vec2(pixelID.x, pixelID.y) + aa) / vec2(frameSize); + vec2 inUV = (vec2(pixelID.x, pixelID.y) + aa) / make_vec2(frameSize); vec3 right = normalize(glm::column(viewinv, 0)); vec3 up = normalize(glm::column(viewinv, 1)); vec3 origin = glm::column(viewinv, 3); @@ -885,24 +885,19 @@ bool debugging() { OPTIX_RAYGEN_PROGRAM(rayGen)() { const RayGenData &self = owl::getProgramData(); - cudaTextureObject_t envTex = getEnvironmentTexture(); - auto &LP = optixLaunchParams; auto launchIndex = optixGetLaunchIndex().x; auto launchDim = optixGetLaunchDimensions().x; - auto pixelID = ivec2(launchIndex % LP.frameSize.x, launchIndex / LP.frameSize.x); + + GET(const int2 pixelID, int2, LP.sampleIndexBuffer, launchIndex); + + // Work distribution might assign tiles that cross over image boundary + if( pixelID.x > LP.frameSize.x-1 || pixelID.y > LP.frameSize.y-1 ) return; + + cudaTextureObject_t envTex = getEnvironmentTexture(); bool debug = (pixelID.x == int(LP.frameSize.x / 2) && pixelID.y == int(LP.frameSize.y / 2)); - float tmax = 1e20f; //todo: customize depending on scene bounds //glm::distance(LP.sceneBBMin, LP.sceneBBMax); - /* compute who is repsonible for a given group of pixels */ - /* and if it's not us, just return. */ - /* (some other device will compute these pixels) */ - int deviceThatIsResponsible = (pixelID.x>>5) % self.deviceCount; - if (self.deviceIndex != deviceThatIsResponsible) { - return; - } - auto dims = ivec2(LP.frameSize.x, LP.frameSize.x); uint64_t start_clock = clock(); int numLights = LP.numLightEntities; @@ -924,7 +919,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } // Trace an initial ray through the scene - surfRay = generateRay(camera, camera_transform, pixelID, LP.frameSize, rng, time); + surfRay = generateRay(camera, camera_transform, pixelID, make_float2(LP.frameSize), rng, time); surfRay.tmax = tmax; float3 accum_illum = make_float3(0.f); @@ -1593,12 +1588,12 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() auto fbOfs = pixelID.x+LP.frameSize.x * ((LP.frameSize.y - 1) - pixelID.y); float4* accumPtr = (float4*) LP.accumPtr; float4* fbPtr = (float4*) LP.frameBuffer; - float4* normalPtr = (float4*) LP.normalBuffer; - float4* albedoPtr = (float4*) LP.albedoBuffer; + // float4* normalPtr = (float4*) LP.normalBuffer; + // float4* albedoPtr = (float4*) LP.albedoBuffer; float4 prev_color = accumPtr[fbOfs]; - float4 prev_normal = normalPtr[fbOfs]; - float4 prev_albedo = albedoPtr[fbOfs]; + // float4 prev_normal = normalPtr[fbOfs]; + // float4 prev_albedo = albedoPtr[fbOfs]; float4 accum_color; if (LP.renderDataMode == RenderDataFlags::NONE) @@ -1612,27 +1607,27 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } - // compute screen space normal / albedo - vec4 oldAlbedo = make_vec4(prev_albedo); - vec4 oldNormal = make_vec4(prev_normal); - if (any(isnan(oldAlbedo))) oldAlbedo = vec4(0.f); - if (any(isnan(oldNormal))) oldNormal = vec4(0.f); - vec4 newAlbedo = vec4(primaryAlbedo.x, primaryAlbedo.y, primaryAlbedo.z, 1.f); - vec4 accumAlbedo = (newAlbedo + float(LP.frameID) * oldAlbedo) / float(LP.frameID + 1); - vec4 newNormal = vec4(make_vec3(primaryNormal), 1.f); - if (!all(equal(make_vec3(primaryNormal), vec3(0.f, 0.f, 0.f)))) { - glm::quat r0 = glm::quat_cast(LP.viewT0); - glm::quat r1 = glm::quat_cast(LP.viewT1); - glm::quat rot = (glm::all(glm::equal(r0, r1))) ? r0 : glm::slerp(r0, r1, time); - vec3 tmp = normalize(glm::mat3_cast(rot) * make_vec3(primaryNormal)); - tmp = normalize(vec3(LP.proj * vec4(tmp, 0.f))); - newNormal = vec4(tmp, 1.f); - } - vec4 accumNormal = (newNormal + float(LP.frameID) * oldNormal) / float(LP.frameID + 1); + // // compute screen space normal / albedo + // vec4 oldAlbedo = make_vec4(prev_albedo); + // vec4 oldNormal = make_vec4(prev_normal); + // if (any(isnan(oldAlbedo))) oldAlbedo = vec4(0.f); + // if (any(isnan(oldNormal))) oldNormal = vec4(0.f); + // vec4 newAlbedo = vec4(primaryAlbedo.x, primaryAlbedo.y, primaryAlbedo.z, 1.f); + // vec4 accumAlbedo = (newAlbedo + float(LP.frameID) * oldAlbedo) / float(LP.frameID + 1); + // vec4 newNormal = vec4(make_vec3(primaryNormal), 1.f); + // if (!all(equal(make_vec3(primaryNormal), vec3(0.f, 0.f, 0.f)))) { + // glm::quat r0 = glm::quat_cast(LP.viewT0); + // glm::quat r1 = glm::quat_cast(LP.viewT1); + // glm::quat rot = (glm::all(glm::equal(r0, r1))) ? r0 : glm::slerp(r0, r1, time); + // vec3 tmp = normalize(glm::mat3_cast(rot) * make_vec3(primaryNormal)); + // tmp = normalize(vec3(LP.proj * vec4(tmp, 0.f))); + // newNormal = vec4(tmp, 1.f); + // } + // vec4 accumNormal = (newNormal + float(LP.frameID) * oldNormal) / float(LP.frameID + 1); // save data to frame buffers accumPtr[fbOfs] = accum_color; fbPtr[fbOfs] = accum_color; - albedoPtr[fbOfs] = make_float4(accumAlbedo); - normalPtr[fbOfs] = make_float4(accumNormal); + // albedoPtr[fbOfs] = make_float4(accumAlbedo); + // normalPtr[fbOfs] = make_float4(accumNormal); } diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index 3f415090..487dead4 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -23,6 +23,7 @@ #define PBRLUT_IMPLEMENTATION #include #include +#include #include #include @@ -89,6 +90,8 @@ static struct OptixData { LaunchParams LP; GLuint imageTexID = -1; cudaGraphicsResource_t cudaResourceTex; + OWLBuffer sampleIndexBuffer; + OWLBuffer frameBuffer; OWLBuffer normalBuffer; OWLBuffer albedoBuffer; @@ -184,7 +187,7 @@ static struct NVISII { bool headlessMode; std::function callback; std::recursive_mutex callbackMutex; - + StaticWorkDistribution wd; } NVISII; void applyStyle() @@ -344,30 +347,6 @@ owl4x3f glmToOWL(glm::mat4 &xfm){ return oxfm; } -OWLLaunchParams launchParamsCreate(OWLContext context, size_t size, OWLVarDecl *vars, size_t numVars) -{ - return owlParamsCreate(context, size, vars, numVars); -} - -void launchParamsSetBuffer(OWLLaunchParams params, const char* varName, OWLBuffer buffer) -{ - owlParamsSetBuffer(params, varName, buffer); -} - -void launchParamsSetRaw(OWLLaunchParams params, const char* varName, const void* data) -{ - owlParamsSetRaw(params, varName, data); -} - -void launchParamsSetTexture(OWLLaunchParams params, const char* varName, OWLTexture texture) -{ - owlParamsSetTexture(params, varName, texture); -} - -void launchParamsSetGroup(OWLLaunchParams params, const char *varName, OWLGroup group) { - owlParamsSetGroup(params, varName, group); -} - void synchronizeDevices() { for (int i = 0; i < getDeviceCount(); i++) { @@ -426,9 +405,22 @@ void initializeFrameBuffer(int fbWidth, int fbHeight) { synchronizeDevices(); } +extern "C" void fillSamplesCUDA( + int32_t num_samples, + cudaStream_t stream, + int32_t gpu_idx, + int32_t num_gpus, + int32_t width, + int32_t height, + int2* samples ); + void resizeOptixFrameBuffer(uint32_t width, uint32_t height) { auto &OD = OptixData; + uint32_t numGPUs = owlGetDeviceCount(OD.context); + + NVISII.wd.setRasterSize( width, height ); + NVISII.wd.setNumGPUs( numGPUs ); OD.LP.frameSize.x = width; OD.LP.frameSize.y = height; @@ -439,6 +431,23 @@ void resizeOptixFrameBuffer(uint32_t width, uint32_t height) owlBufferResize(OD.mvecBuffer, width * height); owlBufferResize(OD.accumBuffer, width * height); + owlBufferResize(OD.sampleIndexBuffer, NVISII.wd.numSamples()); + + for (uint32_t i = 0; i < numGPUs; ++i) + { + cudaSetDevice( i ); + fillSamplesCUDA( + NVISII.wd.numSamples(), + owlContextGetStream(OD.context, i), + i, + numGPUs, + width, + height, + (int2*)owlBufferGetPointer(OD.sampleIndexBuffer, i) + ); + } + cudaSetDevice(0); + // Reconfigure denoiser optixDenoiserComputeMemoryResources(OD.denoiser, OD.LP.frameSize.x, OD.LP.frameSize.y, &OD.denoiserSizes); uint64_t scratchSizeInBytes; @@ -499,6 +508,7 @@ void initializeOptix(bool headless) /* Setup Optix Launch Params */ OWLVarDecl launchParamVars[] = { + { "sampleIndexBuffer", OWL_BUFFER, OWL_OFFSETOF(LaunchParams, sampleIndexBuffer)}, { "frameSize", OWL_USER_TYPE(glm::ivec2), OWL_OFFSETOF(LaunchParams, frameSize)}, { "frameID", OWL_USER_TYPE(uint64_t), OWL_OFFSETOF(LaunchParams, frameID)}, { "frameBuffer", OWL_BUFPTR, OWL_OFFSETOF(LaunchParams, frameBuffer)}, @@ -563,27 +573,32 @@ void initializeOptix(bool headless) { "enableDomeSampling", OWL_USER_TYPE(bool), OWL_OFFSETOF(LaunchParams, enableDomeSampling)}, { /* sentinel to mark end of list */ } }; - OD.launchParams = launchParamsCreate(OD.context, sizeof(LaunchParams), launchParamVars, -1); + OD.launchParams = owlParamsCreate(OD.context, sizeof(LaunchParams), launchParamVars, -1); /* Create AOV Buffers */ if (!headless) { initializeFrameBuffer(512, 512); } - OD.frameBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); + NVISII.wd.setRasterSize( 512, 512 ); + NVISII.wd.setNumGPUs( owlGetDeviceCount(OD.context) ); + OD.sampleIndexBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(int2), NVISII.wd.numSamples(), nullptr); + owlParamsSetBuffer(OD.launchParams, "sampleIndexBuffer", OD.sampleIndexBuffer); + + OD.frameBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512); OD.accumBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.normalBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.albedoBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.scratchBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); - OD.mvecBuffer = owlDeviceBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512, nullptr); + OD.normalBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512); + OD.albedoBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512); + OD.scratchBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512); + OD.mvecBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512); OD.LP.frameSize = glm::ivec2(512, 512); - launchParamsSetBuffer(OD.launchParams, "frameBuffer", OD.frameBuffer); - launchParamsSetBuffer(OD.launchParams, "normalBuffer", OD.normalBuffer); - launchParamsSetBuffer(OD.launchParams, "albedoBuffer", OD.albedoBuffer); - launchParamsSetBuffer(OD.launchParams, "scratchBuffer", OD.scratchBuffer); - launchParamsSetBuffer(OD.launchParams, "mvecBuffer", OD.mvecBuffer); - launchParamsSetBuffer(OD.launchParams, "accumPtr", OD.accumBuffer); - launchParamsSetRaw(OD.launchParams, "frameSize", &OD.LP.frameSize); + owlParamsSetBuffer(OD.launchParams, "frameBuffer", OD.frameBuffer); + owlParamsSetBuffer(OD.launchParams, "normalBuffer", OD.normalBuffer); + owlParamsSetBuffer(OD.launchParams, "albedoBuffer", OD.albedoBuffer); + owlParamsSetBuffer(OD.launchParams, "scratchBuffer", OD.scratchBuffer); + owlParamsSetBuffer(OD.launchParams, "mvecBuffer", OD.mvecBuffer); + owlParamsSetBuffer(OD.launchParams, "accumPtr", OD.accumBuffer); + owlParamsSetRaw(OD.launchParams, "frameSize", &OD.LP.frameSize); /* Create Component Buffers */ // note, extra textures reserved for internal use @@ -606,24 +621,24 @@ void initializeOptix(bool headless) OD.indexListsBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); OD.textureObjectsBuffer = owlDeviceBufferCreate(OD.context, OWL_TEXTURE, Texture::getCount() + NUM_MAT_PARAMS * Material::getCount(), nullptr); - launchParamsSetBuffer(OD.launchParams, "entities", OD.entityBuffer); - launchParamsSetBuffer(OD.launchParams, "transforms", OD.transformBuffer); - launchParamsSetBuffer(OD.launchParams, "cameras", OD.cameraBuffer); - launchParamsSetBuffer(OD.launchParams, "materials", OD.materialBuffer); - launchParamsSetBuffer(OD.launchParams, "meshes", OD.meshBuffer); - launchParamsSetBuffer(OD.launchParams, "lights", OD.lightBuffer); - launchParamsSetBuffer(OD.launchParams, "textures", OD.textureBuffer); - launchParamsSetBuffer(OD.launchParams, "volumes", OD.volumeBuffer); - launchParamsSetBuffer(OD.launchParams, "lightEntities", OD.lightEntitiesBuffer); - launchParamsSetBuffer(OD.launchParams, "surfaceInstanceToEntity", OD.surfaceInstanceToEntityBuffer); - launchParamsSetBuffer(OD.launchParams, "volumeInstanceToEntity", OD.volumeInstanceToEntityBuffer); - launchParamsSetBuffer(OD.launchParams, "vertexLists", OD.vertexListsBuffer); - launchParamsSetBuffer(OD.launchParams, "normalLists", OD.normalListsBuffer); - launchParamsSetBuffer(OD.launchParams, "tangentLists", OD.tangentListsBuffer); - launchParamsSetBuffer(OD.launchParams, "texCoordLists", OD.texCoordListsBuffer); - launchParamsSetBuffer(OD.launchParams, "indexLists", OD.indexListsBuffer); - launchParamsSetBuffer(OD.launchParams, "textureObjects", OD.textureObjectsBuffer); - launchParamsSetBuffer(OD.launchParams, "volumeHandles", OD.volumeHandlesBuffer); + owlParamsSetBuffer(OD.launchParams, "entities", OD.entityBuffer); + owlParamsSetBuffer(OD.launchParams, "transforms", OD.transformBuffer); + owlParamsSetBuffer(OD.launchParams, "cameras", OD.cameraBuffer); + owlParamsSetBuffer(OD.launchParams, "materials", OD.materialBuffer); + owlParamsSetBuffer(OD.launchParams, "meshes", OD.meshBuffer); + owlParamsSetBuffer(OD.launchParams, "lights", OD.lightBuffer); + owlParamsSetBuffer(OD.launchParams, "textures", OD.textureBuffer); + owlParamsSetBuffer(OD.launchParams, "volumes", OD.volumeBuffer); + owlParamsSetBuffer(OD.launchParams, "lightEntities", OD.lightEntitiesBuffer); + owlParamsSetBuffer(OD.launchParams, "surfaceInstanceToEntity", OD.surfaceInstanceToEntityBuffer); + owlParamsSetBuffer(OD.launchParams, "volumeInstanceToEntity", OD.volumeInstanceToEntityBuffer); + owlParamsSetBuffer(OD.launchParams, "vertexLists", OD.vertexListsBuffer); + owlParamsSetBuffer(OD.launchParams, "normalLists", OD.normalListsBuffer); + owlParamsSetBuffer(OD.launchParams, "tangentLists", OD.tangentListsBuffer); + owlParamsSetBuffer(OD.launchParams, "texCoordLists", OD.texCoordListsBuffer); + owlParamsSetBuffer(OD.launchParams, "indexLists", OD.indexListsBuffer); + owlParamsSetBuffer(OD.launchParams, "textureObjects", OD.textureObjectsBuffer); + owlParamsSetBuffer(OD.launchParams, "volumeHandles", OD.volumeHandlesBuffer); uint32_t meshCount = Mesh::getCount(); OD.vertexLists.resize(meshCount); @@ -647,13 +662,13 @@ void initializeOptix(bool headless) OD.LP.environmentMapID = -1; OD.LP.environmentMapRotation = glm::quat(1,0,0,0); - launchParamsSetRaw(OD.launchParams, "environmentMapID", &OD.LP.environmentMapID); - launchParamsSetRaw(OD.launchParams, "environmentMapRotation", &OD.LP.environmentMapRotation); + owlParamsSetRaw(OD.launchParams, "environmentMapID", &OD.LP.environmentMapID); + owlParamsSetRaw(OD.launchParams, "environmentMapRotation", &OD.LP.environmentMapRotation); - launchParamsSetBuffer(OD.launchParams, "environmentMapRows", OD.environmentMapRowsBuffer); - launchParamsSetBuffer(OD.launchParams, "environmentMapCols", OD.environmentMapColsBuffer); - launchParamsSetRaw(OD.launchParams, "environmentMapWidth", &OD.LP.environmentMapWidth); - launchParamsSetRaw(OD.launchParams, "environmentMapHeight", &OD.LP.environmentMapHeight); + owlParamsSetBuffer(OD.launchParams, "environmentMapRows", OD.environmentMapRowsBuffer); + owlParamsSetBuffer(OD.launchParams, "environmentMapCols", OD.environmentMapColsBuffer); + owlParamsSetRaw(OD.launchParams, "environmentMapWidth", &OD.LP.environmentMapWidth); + owlParamsSetRaw(OD.launchParams, "environmentMapHeight", &OD.LP.environmentMapHeight); // OWLTexture GGX_E_AVG_LOOKUP = owlTexture2DCreate(OD.context, // OWL_TEXEL_FORMAT_R32F, @@ -673,22 +688,22 @@ void initializeOptix(bool headless) // launchParamsSetTexture(OD.launchParams, "GGX_E_LOOKUP", GGX_E_LOOKUP); OD.LP.numLightEntities = uint32_t(OD.lightEntities.size()); - launchParamsSetRaw(OD.launchParams, "numLightEntities", &OD.LP.numLightEntities); - launchParamsSetRaw(OD.launchParams, "domeLightIntensity", &OD.LP.domeLightIntensity); - launchParamsSetRaw(OD.launchParams, "domeLightExposure", &OD.LP.domeLightExposure); - launchParamsSetRaw(OD.launchParams, "domeLightColor", &OD.LP.domeLightColor); - launchParamsSetRaw(OD.launchParams, "directClamp", &OD.LP.directClamp); - launchParamsSetRaw(OD.launchParams, "indirectClamp", &OD.LP.indirectClamp); - launchParamsSetRaw(OD.launchParams, "maxDiffuseDepth", &OD.LP.maxDiffuseDepth); - launchParamsSetRaw(OD.launchParams, "maxGlossyDepth", &OD.LP.maxGlossyDepth); - launchParamsSetRaw(OD.launchParams, "maxTransparencyDepth", &OD.LP.maxTransparencyDepth); - launchParamsSetRaw(OD.launchParams, "maxTransmissionDepth", &OD.LP.maxTransmissionDepth); - launchParamsSetRaw(OD.launchParams, "maxVolumeDepth", &OD.LP.maxVolumeDepth); - launchParamsSetRaw(OD.launchParams, "numLightSamples", &OD.LP.numLightSamples); - launchParamsSetRaw(OD.launchParams, "seed", &OD.LP.seed); - launchParamsSetRaw(OD.launchParams, "xPixelSamplingInterval", &OD.LP.xPixelSamplingInterval); - launchParamsSetRaw(OD.launchParams, "yPixelSamplingInterval", &OD.LP.yPixelSamplingInterval); - launchParamsSetRaw(OD.launchParams, "timeSamplingInterval", &OD.LP.timeSamplingInterval); + owlParamsSetRaw(OD.launchParams, "numLightEntities", &OD.LP.numLightEntities); + owlParamsSetRaw(OD.launchParams, "domeLightIntensity", &OD.LP.domeLightIntensity); + owlParamsSetRaw(OD.launchParams, "domeLightExposure", &OD.LP.domeLightExposure); + owlParamsSetRaw(OD.launchParams, "domeLightColor", &OD.LP.domeLightColor); + owlParamsSetRaw(OD.launchParams, "directClamp", &OD.LP.directClamp); + owlParamsSetRaw(OD.launchParams, "indirectClamp", &OD.LP.indirectClamp); + owlParamsSetRaw(OD.launchParams, "maxDiffuseDepth", &OD.LP.maxDiffuseDepth); + owlParamsSetRaw(OD.launchParams, "maxGlossyDepth", &OD.LP.maxGlossyDepth); + owlParamsSetRaw(OD.launchParams, "maxTransparencyDepth", &OD.LP.maxTransparencyDepth); + owlParamsSetRaw(OD.launchParams, "maxTransmissionDepth", &OD.LP.maxTransmissionDepth); + owlParamsSetRaw(OD.launchParams, "maxVolumeDepth", &OD.LP.maxVolumeDepth); + owlParamsSetRaw(OD.launchParams, "numLightSamples", &OD.LP.numLightSamples); + owlParamsSetRaw(OD.launchParams, "seed", &OD.LP.seed); + owlParamsSetRaw(OD.launchParams, "xPixelSamplingInterval", &OD.LP.xPixelSamplingInterval); + owlParamsSetRaw(OD.launchParams, "yPixelSamplingInterval", &OD.LP.yPixelSamplingInterval); + owlParamsSetRaw(OD.launchParams, "timeSamplingInterval", &OD.LP.timeSamplingInterval); OWLVarDecl trianglesGeomVars[] = {{/* sentinel to mark end of list */}}; OD.trianglesGeomType = geomTypeCreate(OD.context, OWL_GEOM_TRIANGLES, sizeof(TrianglesGeomData), trianglesGeomVars,-1); @@ -738,7 +753,7 @@ void initializeOptix(bool headless) OWLGroup surfacesIAS = instanceGroupCreate(OD.context, 1); instanceGroupSetChild(surfacesIAS, 0, OD.placeholderGroup); groupBuildAccel(surfacesIAS); - launchParamsSetGroup(OD.launchParams, "surfacesIAS", surfacesIAS); + owlParamsSetGroup(OD.launchParams, "surfacesIAS", surfacesIAS); OWLGeom userGeom = owlGeomCreate(OD.context, OD.volumeGeomType); owlGeomSetPrimCount(userGeom, 1); @@ -751,7 +766,7 @@ void initializeOptix(bool headless) OWLGroup volumesIAS = instanceGroupCreate(OD.context, 1); instanceGroupSetChild(volumesIAS, 0, OD.placeholderUserGroup); groupBuildAccel(volumesIAS); - launchParamsSetGroup(OD.launchParams, "volumesIAS", volumesIAS); + owlParamsSetGroup(OD.launchParams, "volumesIAS", volumesIAS); // Build *SBT* required to trace the groups owlBuildPipeline(OD.context); @@ -1037,7 +1052,7 @@ void setIndirectLightingClamp(float clamp) { clamp = std::max(float(clamp), float(0.f)); OptixData.LP.indirectClamp = clamp; - launchParamsSetRaw(OptixData.launchParams, "indirectClamp", &OptixData.LP.indirectClamp); + owlParamsSetRaw(OptixData.launchParams, "indirectClamp", &OptixData.LP.indirectClamp); resetAccumulation(); } @@ -1045,7 +1060,7 @@ void setDirectLightingClamp(float clamp) { clamp = std::max(float(clamp), float(0.f)); OptixData.LP.directClamp = clamp; - launchParamsSetRaw(OptixData.launchParams, "directClamp", &OptixData.LP.directClamp); + owlParamsSetRaw(OptixData.launchParams, "directClamp", &OptixData.LP.directClamp); resetAccumulation(); } @@ -1062,11 +1077,11 @@ void setMaxBounceDepth( OptixData.LP.maxTransmissionDepth = transmissionDepth; OptixData.LP.maxVolumeDepth = volumeDepth; - launchParamsSetRaw(OptixData.launchParams, "maxDiffuseDepth", &OptixData.LP.maxDiffuseDepth); - launchParamsSetRaw(OptixData.launchParams, "maxGlossyDepth", &OptixData.LP.maxGlossyDepth); - launchParamsSetRaw(OptixData.launchParams, "maxTransparencyDepth", &OptixData.LP.maxTransparencyDepth); - launchParamsSetRaw(OptixData.launchParams, "maxTransmissionDepth", &OptixData.LP.maxTransmissionDepth); - launchParamsSetRaw(OptixData.launchParams, "maxVolumeDepth", &OptixData.LP.maxVolumeDepth); + owlParamsSetRaw(OptixData.launchParams, "maxDiffuseDepth", &OptixData.LP.maxDiffuseDepth); + owlParamsSetRaw(OptixData.launchParams, "maxGlossyDepth", &OptixData.LP.maxGlossyDepth); + owlParamsSetRaw(OptixData.launchParams, "maxTransparencyDepth", &OptixData.LP.maxTransparencyDepth); + owlParamsSetRaw(OptixData.launchParams, "maxTransmissionDepth", &OptixData.LP.maxTransmissionDepth); + owlParamsSetRaw(OptixData.launchParams, "maxVolumeDepth", &OptixData.LP.maxVolumeDepth); resetAccumulation(); } @@ -1081,7 +1096,7 @@ void setLightSampleCount(uint32_t count) std::string("Error: number of light samples must be between 1 and ") + std::to_string(MAX_LIGHT_SAMPLES)); OptixData.LP.numLightSamples = count; - launchParamsSetRaw(OptixData.launchParams, "numLightSamples", &OptixData.LP.numLightSamples); + owlParamsSetRaw(OptixData.launchParams, "numLightSamples", &OptixData.LP.numLightSamples); resetAccumulation(); } @@ -1089,15 +1104,15 @@ void samplePixelArea(vec2 xSampleInterval, vec2 ySampleInterval) { OptixData.LP.xPixelSamplingInterval = xSampleInterval; OptixData.LP.yPixelSamplingInterval = ySampleInterval; - launchParamsSetRaw(OptixData.launchParams, "xPixelSamplingInterval", &OptixData.LP.xPixelSamplingInterval); - launchParamsSetRaw(OptixData.launchParams, "yPixelSamplingInterval", &OptixData.LP.yPixelSamplingInterval); + owlParamsSetRaw(OptixData.launchParams, "xPixelSamplingInterval", &OptixData.LP.xPixelSamplingInterval); + owlParamsSetRaw(OptixData.launchParams, "yPixelSamplingInterval", &OptixData.LP.yPixelSamplingInterval); resetAccumulation(); } void sampleTimeInterval(vec2 sampleTimeInterval) { OptixData.LP.timeSamplingInterval = sampleTimeInterval; - launchParamsSetRaw(OptixData.launchParams, "timeSamplingInterval", &OptixData.LP.timeSamplingInterval); + owlParamsSetRaw(OptixData.launchParams, "timeSamplingInterval", &OptixData.LP.timeSamplingInterval); resetAccumulation(); } @@ -1349,9 +1364,9 @@ void updateComponents() // Build IAS groupBuildAccel(OD.volumesIAS); - launchParamsSetGroup(OD.launchParams, "volumesIAS", OD.volumesIAS); + owlParamsSetGroup(OD.launchParams, "volumesIAS", OD.volumesIAS); groupBuildAccel(OD.surfacesIAS); - launchParamsSetGroup(OD.launchParams, "surfacesIAS", OD.surfacesIAS); + owlParamsSetGroup(OD.launchParams, "surfacesIAS", OD.surfacesIAS); // Now that IAS have changed, we need to rebuild SBT owlBuildSBT(OD.context); @@ -1372,7 +1387,7 @@ void updateComponents() owlBufferResize(OptixData.lightEntitiesBuffer, OD.lightEntities.size()); owlBufferUpload(OptixData.lightEntitiesBuffer, OD.lightEntities.data()); OD.LP.numLightEntities = uint32_t(OD.lightEntities.size()); - launchParamsSetRaw(OD.launchParams, "numLightEntities", &OD.LP.numLightEntities); + owlParamsSetRaw(OD.launchParams, "numLightEntities", &OD.LP.numLightEntities); // Finally, upload entity structs to the GPU. Entity::updateComponents(); @@ -1549,105 +1564,107 @@ void updateComponents() void updateLaunchParams() { - launchParamsSetRaw(OptixData.launchParams, "frameID", &OptixData.LP.frameID); - launchParamsSetRaw(OptixData.launchParams, "frameSize", &OptixData.LP.frameSize); - launchParamsSetRaw(OptixData.launchParams, "cameraEntity", &OptixData.LP.cameraEntity); - launchParamsSetRaw(OptixData.launchParams, "domeLightIntensity", &OptixData.LP.domeLightIntensity); - launchParamsSetRaw(OptixData.launchParams, "domeLightExposure", &OptixData.LP.domeLightExposure); - launchParamsSetRaw(OptixData.launchParams, "domeLightColor", &OptixData.LP.domeLightColor); - launchParamsSetRaw(OptixData.launchParams, "renderDataMode", &OptixData.LP.renderDataMode); - launchParamsSetRaw(OptixData.launchParams, "renderDataBounce", &OptixData.LP.renderDataBounce); - launchParamsSetRaw(OptixData.launchParams, "enableDomeSampling", &OptixData.LP.enableDomeSampling); - launchParamsSetRaw(OptixData.launchParams, "seed", &OptixData.LP.seed); - launchParamsSetRaw(OptixData.launchParams, "proj", &OptixData.LP.proj); - launchParamsSetRaw(OptixData.launchParams, "viewT0", &OptixData.LP.viewT0); - launchParamsSetRaw(OptixData.launchParams, "viewT1", &OptixData.LP.viewT1); - - launchParamsSetRaw(OptixData.launchParams, "environmentMapID", &OptixData.LP.environmentMapID); - launchParamsSetRaw(OptixData.launchParams, "environmentMapRotation", &OptixData.LP.environmentMapRotation); - launchParamsSetBuffer(OptixData.launchParams, "environmentMapRows", OptixData.environmentMapRowsBuffer); - launchParamsSetBuffer(OptixData.launchParams, "environmentMapCols", OptixData.environmentMapColsBuffer); - launchParamsSetRaw(OptixData.launchParams, "environmentMapWidth", &OptixData.LP.environmentMapWidth); - launchParamsSetRaw(OptixData.launchParams, "environmentMapHeight", &OptixData.LP.environmentMapHeight); - launchParamsSetRaw(OptixData.launchParams, "sceneBBMin", &OptixData.LP.sceneBBMin); - launchParamsSetRaw(OptixData.launchParams, "sceneBBMax", &OptixData.LP.sceneBBMax); + owlParamsSetRaw(OptixData.launchParams, "frameID", &OptixData.LP.frameID); + owlParamsSetRaw(OptixData.launchParams, "frameSize", &OptixData.LP.frameSize); + owlParamsSetRaw(OptixData.launchParams, "cameraEntity", &OptixData.LP.cameraEntity); + owlParamsSetRaw(OptixData.launchParams, "domeLightIntensity", &OptixData.LP.domeLightIntensity); + owlParamsSetRaw(OptixData.launchParams, "domeLightExposure", &OptixData.LP.domeLightExposure); + owlParamsSetRaw(OptixData.launchParams, "domeLightColor", &OptixData.LP.domeLightColor); + owlParamsSetRaw(OptixData.launchParams, "renderDataMode", &OptixData.LP.renderDataMode); + owlParamsSetRaw(OptixData.launchParams, "renderDataBounce", &OptixData.LP.renderDataBounce); + owlParamsSetRaw(OptixData.launchParams, "enableDomeSampling", &OptixData.LP.enableDomeSampling); + owlParamsSetRaw(OptixData.launchParams, "seed", &OptixData.LP.seed); + owlParamsSetRaw(OptixData.launchParams, "proj", &OptixData.LP.proj); + owlParamsSetRaw(OptixData.launchParams, "viewT0", &OptixData.LP.viewT0); + owlParamsSetRaw(OptixData.launchParams, "viewT1", &OptixData.LP.viewT1); + + owlParamsSetRaw(OptixData.launchParams, "environmentMapID", &OptixData.LP.environmentMapID); + owlParamsSetRaw(OptixData.launchParams, "environmentMapRotation", &OptixData.LP.environmentMapRotation); + owlParamsSetBuffer(OptixData.launchParams, "environmentMapRows", OptixData.environmentMapRowsBuffer); + owlParamsSetBuffer(OptixData.launchParams, "environmentMapCols", OptixData.environmentMapColsBuffer); + owlParamsSetRaw(OptixData.launchParams, "environmentMapWidth", &OptixData.LP.environmentMapWidth); + owlParamsSetRaw(OptixData.launchParams, "environmentMapHeight", &OptixData.LP.environmentMapHeight); + owlParamsSetRaw(OptixData.launchParams, "sceneBBMin", &OptixData.LP.sceneBBMin); + owlParamsSetRaw(OptixData.launchParams, "sceneBBMax", &OptixData.LP.sceneBBMax); OptixData.LP.frameID ++; } -// Different GPUs have different local framebuffers. -// This function combines those framebuffers on the CPU, then uploads results to device 0. -void mergeFrameBuffers() { - int deviceCount = getDeviceCount(); - int width = OptixData.LP.frameSize.x; - int height = OptixData.LP.frameSize.y; - if (deviceCount <= 1) return; - - // synchronizeDevices(); - - std::vector fb_h(width * height); - std::vector fba_h(width * height); - std::vector fbn_h(width * height); - std::vector> fb_hd(deviceCount); - std::vector> fba_hd(deviceCount); - std::vector> fbn_hd(deviceCount); - for (uint32_t i = 0; i < deviceCount; ++i){ - fb_hd[i] = std::vector(width * height); - fba_hd[i] = std::vector(width * height); - fbn_hd[i] = std::vector(width * height); - void* fb_d = (void*)owlBufferGetPointer(OptixData.frameBuffer,i); - void* fba_d = (void*)owlBufferGetPointer(OptixData.albedoBuffer,i); - void* fbn_d = (void*)owlBufferGetPointer(OptixData.normalBuffer,i); - cudaMemcpyAsync((void*)fb_hd[i].data(), (void*)fb_d, fb_h.size() * sizeof(glm::vec4), cudaMemcpyDeviceToHost); - cudaMemcpyAsync((void*)fba_hd[i].data(), (void*)fba_d, fb_h.size() * sizeof(glm::vec4), cudaMemcpyDeviceToHost); - cudaMemcpyAsync((void*)fbn_hd[i].data(), (void*)fbn_d, fb_h.size() * sizeof(glm::vec4), cudaMemcpyDeviceToHost); - } - // synchronizeDevices(); - - // note, GPUs render 32xN strips - for (uint32_t y = 0; y < height; ++y) { - for (uint32_t x = 0; x < width; x += 32) { - if (x >= width) continue; - int deviceThatIsResponsible = (x>>5) % deviceCount; - { - glm::vec4* A = fb_h.data() + (y * width) + x; - glm::vec4* B = fb_hd[deviceThatIsResponsible].data() + (y * width) + x; - memcpy(A, B, min(32, int(width - x)) * sizeof(glm::vec4)); - } - { - glm::vec4* A = fba_h.data() + (y * width) + x; - glm::vec4* B = fba_hd[deviceThatIsResponsible].data() + (y * width) + x; - memcpy(A, B, min(32, int(width - x)) * sizeof(glm::vec4)); - } - { - glm::vec4* A = fbn_h.data() + (y * width) + x; - glm::vec4* B = fbn_hd[deviceThatIsResponsible].data() + (y * width) + x; - memcpy(A, B, min(32, int(width - x)) * sizeof(glm::vec4)); - } - } - } - - // // note, GPUs render 32xN strips - // for (uint32_t y = 0; y < height; ++y) { - // for (uint32_t x = 0; x < width; x += 32) { - // int deviceThatIsResponsible = (x>>5) % deviceCount; - // glm::vec4* A = fb_h.data() + (y * width) + x; - // glm::vec4* B = ((glm::vec4*)fb_d[deviceThatIsResponsible]) + (y * width) + x; - // cudaMemcpyAsync((void*)A, (void*)B, min(32, int(width - x)) * sizeof(glm::vec4), cudaMemcpyDeviceToHost); - // } - // } - +// Update: This is still prohibitively slow. Official OptiX samples use host pinned memory. +// Moving to that approach... +// // Different GPUs have different local framebuffers. +// // This function combines those framebuffers on the CPU, then uploads results to device 0. +// void mergeFrameBuffers() { +// int deviceCount = getDeviceCount(); +// int width = OptixData.LP.frameSize.x; +// int height = OptixData.LP.frameSize.y; +// if (deviceCount <= 1) return; + +// // synchronizeDevices(); + +// std::vector fb_h(width * height); +// std::vector fba_h(width * height); +// std::vector fbn_h(width * height); +// std::vector> fb_hd(deviceCount); +// std::vector> fba_hd(deviceCount); +// std::vector> fbn_hd(deviceCount); +// for (uint32_t i = 0; i < deviceCount; ++i){ +// fb_hd[i] = std::vector(width * height); +// fba_hd[i] = std::vector(width * height); +// fbn_hd[i] = std::vector(width * height); +// void* fb_d = (void*)owlBufferGetPointer(OptixData.frameBuffer,i); +// void* fba_d = (void*)owlBufferGetPointer(OptixData.albedoBuffer,i); +// void* fbn_d = (void*)owlBufferGetPointer(OptixData.normalBuffer,i); +// cudaMemcpyAsync((void*)fb_hd[i].data(), (void*)fb_d, fb_h.size() * sizeof(glm::vec4), cudaMemcpyDeviceToHost); +// cudaMemcpyAsync((void*)fba_hd[i].data(), (void*)fba_d, fb_h.size() * sizeof(glm::vec4), cudaMemcpyDeviceToHost); +// cudaMemcpyAsync((void*)fbn_hd[i].data(), (void*)fbn_d, fb_h.size() * sizeof(glm::vec4), cudaMemcpyDeviceToHost); +// } +// // synchronizeDevices(); + +// // note, GPUs render 32xN strips +// for (uint32_t y = 0; y < height; ++y) { +// for (uint32_t x = 0; x < width; x += 32) { +// if (x >= width) continue; +// int deviceThatIsResponsible = (x>>5) % deviceCount; +// { +// glm::vec4* A = fb_h.data() + (y * width) + x; +// glm::vec4* B = fb_hd[deviceThatIsResponsible].data() + (y * width) + x; +// memcpy(A, B, min(32, int(width - x)) * sizeof(glm::vec4)); +// } +// { +// glm::vec4* A = fba_h.data() + (y * width) + x; +// glm::vec4* B = fba_hd[deviceThatIsResponsible].data() + (y * width) + x; +// memcpy(A, B, min(32, int(width - x)) * sizeof(glm::vec4)); +// } +// { +// glm::vec4* A = fbn_h.data() + (y * width) + x; +// glm::vec4* B = fbn_hd[deviceThatIsResponsible].data() + (y * width) + x; +// memcpy(A, B, min(32, int(width - x)) * sizeof(glm::vec4)); +// } +// } +// } - // cudaMemcpyAsync(fb_h.data(), fb_d[1], fb_h.size() * sizeof(glm::vec4), cudaMemcpyDeviceToHost); - synchronizeDevices(); - void* fb_d = (void*)owlBufferGetPointer(OptixData.frameBuffer,0); - void* fba_d = (void*)owlBufferGetPointer(OptixData.albedoBuffer,0); - void* fbn_d = (void*)owlBufferGetPointer(OptixData.normalBuffer,0); - cudaMemcpyAsync(fb_d, fb_h.data(), fb_h.size() * sizeof(glm::vec4), cudaMemcpyHostToDevice); - cudaMemcpyAsync(fba_d, fba_h.data(), fba_h.size() * sizeof(glm::vec4), cudaMemcpyHostToDevice); - cudaMemcpyAsync(fbn_d, fbn_h.data(), fbn_h.size() * sizeof(glm::vec4), cudaMemcpyHostToDevice); - // synchronizeDevices(); -} +// // // note, GPUs render 32xN strips +// // for (uint32_t y = 0; y < height; ++y) { +// // for (uint32_t x = 0; x < width; x += 32) { +// // int deviceThatIsResponsible = (x>>5) % deviceCount; +// // glm::vec4* A = fb_h.data() + (y * width) + x; +// // glm::vec4* B = ((glm::vec4*)fb_d[deviceThatIsResponsible]) + (y * width) + x; +// // cudaMemcpyAsync((void*)A, (void*)B, min(32, int(width - x)) * sizeof(glm::vec4), cudaMemcpyDeviceToHost); +// // } +// // } + + +// // cudaMemcpyAsync(fb_h.data(), fb_d[1], fb_h.size() * sizeof(glm::vec4), cudaMemcpyDeviceToHost); +// synchronizeDevices(); +// void* fb_d = (void*)owlBufferGetPointer(OptixData.frameBuffer,0); +// void* fba_d = (void*)owlBufferGetPointer(OptixData.albedoBuffer,0); +// void* fbn_d = (void*)owlBufferGetPointer(OptixData.normalBuffer,0); +// cudaMemcpyAsync(fb_d, fb_h.data(), fb_h.size() * sizeof(glm::vec4), cudaMemcpyHostToDevice); +// cudaMemcpyAsync(fba_d, fba_h.data(), fba_h.size() * sizeof(glm::vec4), cudaMemcpyHostToDevice); +// cudaMemcpyAsync(fbn_d, fbn_h.data(), fbn_h.size() * sizeof(glm::vec4), cudaMemcpyHostToDevice); +// // synchronizeDevices(); +// } void denoiseImage() { synchronizeDevices(); @@ -1976,10 +1993,9 @@ std::vector render(uint32_t width, uint32_t height, uint32_t samplesPerPi } updateLaunchParams(); - owlLaunch2D(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, OptixData.launchParams); + owlLaunch2D(OptixData.rayGen, NVISII.wd.numSamples(), 1, OptixData.launchParams); if (!NVISII.headlessMode) { - mergeFrameBuffers(); if (OptixData.enableDenoiser) { denoiseImage(); @@ -2006,7 +2022,6 @@ std::vector render(uint32_t width, uint32_t height, uint32_t samplesPerPi synchronizeDevices(); - mergeFrameBuffers(); if (OptixData.enableDenoiser) { denoiseImage(); @@ -2130,8 +2145,7 @@ std::vector renderData(uint32_t width, uint32_t height, uint32_t startFra } updateLaunchParams(); - owlLaunch2D(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, OptixData.launchParams); - mergeFrameBuffers(); + owlLaunch2D(OptixData.rayGen, NVISII.wd.numSamples(), 1, OptixData.launchParams); // Dont run denoiser to raw data rendering // if (OptixData.enableDenoiser) // { @@ -2430,8 +2444,7 @@ void initializeInteractive( updateFrameBuffer(); updateComponents(); updateLaunchParams(); - owlLaunch2D(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, OptixData.launchParams); - mergeFrameBuffers(); + owlLaunch2D(OptixData.rayGen, NVISII.wd.numSamples(), 1, OptixData.launchParams); if (OptixData.enableDenoiser) { denoiseImage(); diff --git a/src/nvisii/nvisii.cu b/src/nvisii/nvisii.cu index 6a94855c..0d81d5d2 100644 --- a/src/nvisii/nvisii.cu +++ b/src/nvisii/nvisii.cu @@ -53,4 +53,39 @@ void reproject(glm::vec4 *sampleBuffer, glm::vec4 *t0AlbedoBuffer, glm::vec4 *t1 dim3 gridSize = dim3 (bx, by); _reproject<<>>(sampleBuffer, t0AlbedoBuffer, t1AlbedoBuffer, mvecBuffer, scratchBuffer, imageBuffer, true, width, height); _reproject<<>>(sampleBuffer, t0AlbedoBuffer, t1AlbedoBuffer, mvecBuffer, scratchBuffer, imageBuffer, false, width, height); -} \ No newline at end of file +} + +#include "work_distribution.h" + +extern "C" __global__ void fillSamples( + int gpu_idx, + int num_gpus, + int width, + int height, + int2* sample_indices ) +{ + StaticWorkDistribution wd; + wd.setRasterSize( width, height ); + wd.setNumGPUs( num_gpus ); + + const int sample_idx = blockIdx.x; + sample_indices[sample_idx] = wd.getSamplePixel( gpu_idx, sample_idx ); +} + + +extern "C" __host__ void fillSamplesCUDA( + int num_samples, + cudaStream_t stream, + int gpu_idx, + int num_gpus, + int width, + int height, + int2* sample_indices ) +{ + fillSamples<<>>( + gpu_idx, + num_gpus, + width, + height, + sample_indices ); +} From e0555a33a7dfab5c2fe0abe00f384ef31f0a5294 Mon Sep 17 00:00:00 2001 From: n8vm Date: Fri, 26 Feb 2021 17:34:51 -0700 Subject: [PATCH 05/55] some steps forward towards dynamic load balancing --- externals/owl | 2 +- include/nvisii/utilities/work_distribution.h | 2 +- src/nvisii/nvisii.cpp | 25 ++++++++++++++------ 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/externals/owl b/externals/owl index da110c5e..97d3acd6 160000 --- a/externals/owl +++ b/externals/owl @@ -1 +1 @@ -Subproject commit da110c5e1453c17c8e3567d187a69b9bc6082943 +Subproject commit 97d3acd6ad4d0e356f90959d34c8a94024a4fa70 diff --git a/include/nvisii/utilities/work_distribution.h b/include/nvisii/utilities/work_distribution.h index 5f1c5a56..b02e93b8 100644 --- a/include/nvisii/utilities/work_distribution.h +++ b/include/nvisii/utilities/work_distribution.h @@ -30,7 +30,7 @@ class StaticWorkDistribution } - SWD_INLINE SWD_HOSTDEVICE int32_t numSamples( ) + SWD_INLINE SWD_HOSTDEVICE int32_t numSamples( int32_t gpu_idx ) { const int tile_strip_width = TILE_WIDTH*m_num_gpus; const int tile_strip_height = TILE_HEIGHT; diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index 487dead4..c3d0923b 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -430,14 +430,13 @@ void resizeOptixFrameBuffer(uint32_t width, uint32_t height) owlBufferResize(OD.scratchBuffer, width * height); owlBufferResize(OD.mvecBuffer, width * height); owlBufferResize(OD.accumBuffer, width * height); - - owlBufferResize(OD.sampleIndexBuffer, NVISII.wd.numSamples()); + owlBufferResize(OD.sampleIndexBuffer, width * height); for (uint32_t i = 0; i < numGPUs; ++i) { cudaSetDevice( i ); fillSamplesCUDA( - NVISII.wd.numSamples(), + NVISII.wd.numSamples(i), owlContextGetStream(OD.context, i), i, numGPUs, @@ -582,7 +581,7 @@ void initializeOptix(bool headless) NVISII.wd.setRasterSize( 512, 512 ); NVISII.wd.setNumGPUs( owlGetDeviceCount(OD.context) ); - OD.sampleIndexBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(int2), NVISII.wd.numSamples(), nullptr); + OD.sampleIndexBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(int2), 512*512, nullptr); owlParamsSetBuffer(OD.launchParams, "sampleIndexBuffer", OD.sampleIndexBuffer); OD.frameBuffer = owlHostPinnedBufferCreate(OD.context,OWL_USER_TYPE(glm::vec4),512*512); @@ -1993,7 +1992,10 @@ std::vector render(uint32_t width, uint32_t height, uint32_t samplesPerPi } updateLaunchParams(); - owlLaunch2D(OptixData.rayGen, NVISII.wd.numSamples(), 1, OptixData.launchParams); + for (uint32_t deviceID = 0; deviceID < owlGetDeviceCount(OptixData.context); deviceID++) { + owlAsyncLaunch2DOnDevice(OptixData.rayGen, NVISII.wd.numSamples(deviceID), 1, deviceID, OptixData.launchParams); + } + owlLaunchSync(OptixData.launchParams); if (!NVISII.headlessMode) { if (OptixData.enableDenoiser) @@ -2145,7 +2147,12 @@ std::vector renderData(uint32_t width, uint32_t height, uint32_t startFra } updateLaunchParams(); - owlLaunch2D(OptixData.rayGen, NVISII.wd.numSamples(), 1, OptixData.launchParams); + + for (uint32_t deviceID = 0; deviceID < owlGetDeviceCount(OptixData.context); deviceID++) { + owlAsyncLaunch2DOnDevice(OptixData.rayGen, NVISII.wd.numSamples(deviceID), 1, deviceID, OptixData.launchParams); + } + owlLaunchSync(OptixData.launchParams); + // Dont run denoiser to raw data rendering // if (OptixData.enableDenoiser) // { @@ -2444,7 +2451,11 @@ void initializeInteractive( updateFrameBuffer(); updateComponents(); updateLaunchParams(); - owlLaunch2D(OptixData.rayGen, NVISII.wd.numSamples(), 1, OptixData.launchParams); + for (uint32_t deviceID = 0; deviceID < owlGetDeviceCount(OptixData.context); deviceID++) { + owlAsyncLaunch2DOnDevice(OptixData.rayGen, NVISII.wd.numSamples(deviceID), 1, deviceID, OptixData.launchParams); + } + owlLaunchSync(OptixData.launchParams); + if (OptixData.enableDenoiser) { denoiseImage(); From 14cfdee1b59202180eadc681cee6ca0b8ffd3838 Mon Sep 17 00:00:00 2001 From: n8vm Date: Sun, 28 Feb 2021 21:17:54 -0700 Subject: [PATCH 06/55] fixes to load balancing. Fixes to frame buffer when cuda visible devices set to non-zero value --- include/nvisii/nvisii.h | 36 +- include/nvisii/utilities/CMakeLists.txt | 1 - include/nvisii/utilities/work_distribution.h | 74 ---- src/nvisii/devicecode/launch_params.h | 5 +- src/nvisii/devicecode/path_tracer.cu | 79 ++-- src/nvisii/nvisii.cpp | 410 ++++++++++++------- src/nvisii/nvisii.cu | 35 -- 7 files changed, 341 insertions(+), 299 deletions(-) delete mode 100644 include/nvisii/utilities/work_distribution.h diff --git a/include/nvisii/nvisii.h b/include/nvisii/nvisii.h index e7126249..d15f6220 100644 --- a/include/nvisii/nvisii.h +++ b/include/nvisii/nvisii.h @@ -300,10 +300,20 @@ void renderToFile(uint32_t width, uint32_t height, uint32_t samples_per_pixel, s * @param bounce The number of bounces required to reach the vertex whose metadata result should come from. A value of 0 * would save data for objects directly visible to the camera, a value of 1 would save reflections/refractions, etc. * @param options Indicates the data to return. Current possible values include - * "none" for rendering out raw path traced data, "depth" to render the distance between the previous path vertex to the current one, - * "position" for rendering out the world space position of the path vertex, "normal" for rendering out the world space normal of the - * path vertex, "entity_id" for rendering out the entity ID whose surface the path vertex hit, "denoise_normal" for rendering out - * the normal buffer supplied to the Optix denoiser, and "denoise_albedo" for rendering out the albedo supplied to the Optix denoiser. + * "none" for rendering out raw path traced data, + * "depth" to render the distance between the previous path vertex to the current one, + * "ray_direction" to render the direction that the ray was traced in world space, + * "position" for rendering out the world space position of the path vertex, + * "normal" for rendering out the world space normal of the path vertex, + * "entity_id" for rendering out the entity ID whose surface the path vertex hit, + * "base_color" for rendering out the surface base color, + * "texture_coordinates" for rendering out the texture coordinates of the hit surface, + * "screen_space_normal" for rendering out the normals of the hit surface in screen space, + * "diffuse_motion_vectors" for rendering out screen space motion vectors for moving objects, + * "denoise_normal" for rendering out the normal buffer supplied to the Optix denoiser, + * "denoise_albedo" for rendering out the albedo supplied to the Optix denoiser, + * "heatmap" for rendering out the time it takes to render out each pixel, + * "device_id" for determining which GPU was used to render what pixel. * @param seed A seed used to initialize the random number generator. */ std::vector renderData( @@ -319,10 +329,20 @@ std::vector renderData( * @param bounce The number of bounces required to reach the vertex whose metadata result should come from. A value of 0 * would save data for objects directly visible to the camera, a value of 1 would save reflections/refractions, etc. * @param options Indicates the data to return. Current possible values include - * "none" for rendering out raw path traced data, "depth" to render the distance between the previous path vertex to the current one, - * "position" for rendering out the world space position of the path vertex, "normal" for rendering out the world space normal of the - * path vertex, "entity_id" for rendering out the entity ID whose surface the path vertex hit, "denoise_normal" for rendering out - * the normal buffer supplied to the Optix denoiser, and "denoise_albedo" for rendering out the albedo supplied to the Optix denoiser. + * "none" for rendering out raw path traced data, + * "depth" to render the distance between the previous path vertex to the current one, + * "ray_direction" to render the direction that the ray was traced in world space, + * "position" for rendering out the world space position of the path vertex, + * "normal" for rendering out the world space normal of the path vertex, + * "entity_id" for rendering out the entity ID whose surface the path vertex hit, + * "base_color" for rendering out the surface base color, + * "texture_coordinates" for rendering out the texture coordinates of the hit surface, + * "screen_space_normal" for rendering out the normals of the hit surface in screen space, + * "diffuse_motion_vectors" for rendering out screen space motion vectors for moving objects, + * "denoise_normal" for rendering out the normal buffer supplied to the Optix denoiser, + * "denoise_albedo" for rendering out the albedo supplied to the Optix denoiser, + * "heatmap" for rendering out the time it takes to render out each pixel, + * "device_id" for determining which GPU was used to render what pixel. * @param file_path The path to use to save the file, including the extension. Supported extensions are EXR, HDR, and PNG * @param seed A seed used to initialize the random number generator. */ diff --git a/include/nvisii/utilities/CMakeLists.txt b/include/nvisii/utilities/CMakeLists.txt index 84d88ba4..ee81cbbe 100644 --- a/include/nvisii/utilities/CMakeLists.txt +++ b/include/nvisii/utilities/CMakeLists.txt @@ -14,5 +14,4 @@ set(Utilities_HDR ${CMAKE_CURRENT_SOURCE_DIR}/singleton.h ${CMAKE_CURRENT_SOURCE_DIR}/version.h ${CMAKE_CURRENT_SOURCE_DIR}/procedural_sky.h - ${CMAKE_CURRENT_SOURCE_DIR}/work_distribution.h PARENT_SCOPE) diff --git a/include/nvisii/utilities/work_distribution.h b/include/nvisii/utilities/work_distribution.h deleted file mode 100644 index b02e93b8..00000000 --- a/include/nvisii/utilities/work_distribution.h +++ /dev/null @@ -1,74 +0,0 @@ -#pragma once - -#if defined(__CUDACC__) || defined(__CUDABE__) -# define SWD_HOSTDEVICE __host__ __device__ -# define SWD_INLINE __forceinline__ -# define CONST_STATIC_INIT( ... ) -#else -# define SWD_HOSTDEVICE -# define SWD_INLINE inline -# define CONST_STATIC_INIT( ... ) = __VA_ARGS__ -#endif - -#include - -#include - -class StaticWorkDistribution -{ -public: - SWD_INLINE SWD_HOSTDEVICE void setRasterSize( int width, int height ) - { - m_width = width; - m_height = height; - } - - - SWD_INLINE SWD_HOSTDEVICE void setNumGPUs( int32_t num_gpus ) - { - m_num_gpus = num_gpus; - } - - - SWD_INLINE SWD_HOSTDEVICE int32_t numSamples( int32_t gpu_idx ) - { - const int tile_strip_width = TILE_WIDTH*m_num_gpus; - const int tile_strip_height = TILE_HEIGHT; - const int num_tile_strip_cols = m_width /tile_strip_width + ( m_width %tile_strip_width == 0 ? 0 : 1 ); - const int num_tile_strip_rows = m_height/tile_strip_height + ( m_height%tile_strip_height == 0 ? 0 : 1 ); - return num_tile_strip_rows*num_tile_strip_cols*TILE_WIDTH*TILE_HEIGHT; - } - - - SWD_INLINE SWD_HOSTDEVICE int2 getSamplePixel( int32_t gpu_idx, int32_t sample_idx ) - { - const int tile_strip_width = TILE_WIDTH*m_num_gpus; - const int tile_strip_height = TILE_HEIGHT; - const int num_tile_strip_cols = m_width /tile_strip_width + ( m_width % tile_strip_width == 0 ? 0 : 1 ); - - const int tile_strip_idx = sample_idx / (TILE_WIDTH*TILE_HEIGHT ); - const int tile_strip_y = tile_strip_idx / num_tile_strip_cols; - const int tile_strip_x = tile_strip_idx - tile_strip_y * num_tile_strip_cols; - const int tile_strip_x_start = tile_strip_x * tile_strip_width; - const int tile_strip_y_start = tile_strip_y * tile_strip_height; - - const int tile_pixel_idx = sample_idx - ( tile_strip_idx * TILE_WIDTH*TILE_HEIGHT ); - const int tile_pixel_y = tile_pixel_idx / TILE_WIDTH; - const int tile_pixel_x = tile_pixel_idx - tile_pixel_y * TILE_WIDTH; - - const int tile_offset_x = ( gpu_idx + tile_strip_y % m_num_gpus ) % m_num_gpus * TILE_WIDTH; - - const int pixel_y = tile_strip_y_start + tile_pixel_y; - const int pixel_x = tile_strip_x_start + tile_pixel_x + tile_offset_x ; - return make_int2( pixel_x, pixel_y ); - } - - -private: - int32_t m_num_gpus = 0; - int32_t m_width = 0; - int32_t m_height = 0; - - static const int32_t TILE_WIDTH = 8; - static const int32_t TILE_HEIGHT = 4; -}; diff --git a/src/nvisii/devicecode/launch_params.h b/src/nvisii/devicecode/launch_params.h index ea5edb0f..717b2cf3 100644 --- a/src/nvisii/devicecode/launch_params.h +++ b/src/nvisii/devicecode/launch_params.h @@ -19,7 +19,7 @@ #include "./buffer.h" struct LaunchParams { - Buffer sampleIndexBuffer; + Buffer assignmentBuffer; glm::ivec2 frameSize; uint64_t frameID = 0; @@ -113,7 +113,8 @@ enum RenderDataFlags : uint32_t { TRANSMISSION_INDIRECT_LIGHTING = 17, RAY_DIRECTION = 18, HEATMAP = 19, - TEXTURE_COORDINATES = 20 + TEXTURE_COORDINATES = 20, + DEVICE_ID = 21 }; #define MAX_LIGHT_SAMPLES 10 diff --git a/src/nvisii/devicecode/path_tracer.cu b/src/nvisii/devicecode/path_tracer.cu index 8d99d060..905db055 100644 --- a/src/nvisii/devicecode/path_tracer.cu +++ b/src/nvisii/devicecode/path_tracer.cu @@ -872,6 +872,18 @@ void saveHeatmapRenderData( renderData = make_float3(relClock); } +__device__ +void saveDeviceAssignment( + float3 &renderData, + int bounce, + uint32_t deviceIndex +) +{ + auto &LP = optixLaunchParams; + if (LP.renderDataMode != RenderDataFlags::DEVICE_ID) return; + renderData = make_float3(deviceIndex); +} + __device__ bool debugging() { #ifndef DEBUGGING @@ -888,11 +900,21 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() auto &LP = optixLaunchParams; auto launchIndex = optixGetLaunchIndex().x; auto launchDim = optixGetLaunchDimensions().x; - - GET(const int2 pixelID, int2, LP.sampleIndexBuffer, launchIndex); - - // Work distribution might assign tiles that cross over image boundary + auto pixelID = make_int2(launchIndex % LP.frameSize.x, launchIndex / LP.frameSize.x); + + // Terminate thread if current pixel not assigned to this device + GET(float start, float, LP.assignmentBuffer, self.deviceIndex); + GET(float stop, float, LP.assignmentBuffer, self.deviceIndex + 1); + start *= (LP.frameSize.x * LP.frameSize.y); + stop *= (LP.frameSize.x * LP.frameSize.y); + + // if (launchIndex == 0) { + // printf("device %d start %f stop %f\n", self.deviceIndex, start, stop); + // } + if( pixelID.x > LP.frameSize.x-1 || pixelID.y > LP.frameSize.y-1 ) return; + if( (launchIndex < start) || (stop <= launchIndex) ) return; + // if (self.deviceIndex == 1) return; cudaTextureObject_t envTex = getEnvironmentTexture(); bool debug = (pixelID.x == int(LP.frameSize.x / 2) && pixelID.y == int(LP.frameSize.y / 2)); @@ -1559,6 +1581,9 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // For segmentations, save heatmap metadata saveHeatmapRenderData(renderData, depth, start_clock); + // Device assignment data + saveDeviceAssignment(renderData, depth, self.deviceIndex); + // clamp out any extreme fireflies glm::vec3 gillum = vec3(illum.x, illum.y, illum.z); glm::vec3 dillum = vec3(directIllum.x, directIllum.y, directIllum.z); @@ -1588,12 +1613,12 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() auto fbOfs = pixelID.x+LP.frameSize.x * ((LP.frameSize.y - 1) - pixelID.y); float4* accumPtr = (float4*) LP.accumPtr; float4* fbPtr = (float4*) LP.frameBuffer; - // float4* normalPtr = (float4*) LP.normalBuffer; - // float4* albedoPtr = (float4*) LP.albedoBuffer; + float4* normalPtr = (float4*) LP.normalBuffer; + float4* albedoPtr = (float4*) LP.albedoBuffer; float4 prev_color = accumPtr[fbOfs]; - // float4 prev_normal = normalPtr[fbOfs]; - // float4 prev_albedo = albedoPtr[fbOfs]; + float4 prev_normal = normalPtr[fbOfs]; + float4 prev_albedo = albedoPtr[fbOfs]; float4 accum_color; if (LP.renderDataMode == RenderDataFlags::NONE) @@ -1607,27 +1632,27 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } - // // compute screen space normal / albedo - // vec4 oldAlbedo = make_vec4(prev_albedo); - // vec4 oldNormal = make_vec4(prev_normal); - // if (any(isnan(oldAlbedo))) oldAlbedo = vec4(0.f); - // if (any(isnan(oldNormal))) oldNormal = vec4(0.f); - // vec4 newAlbedo = vec4(primaryAlbedo.x, primaryAlbedo.y, primaryAlbedo.z, 1.f); - // vec4 accumAlbedo = (newAlbedo + float(LP.frameID) * oldAlbedo) / float(LP.frameID + 1); - // vec4 newNormal = vec4(make_vec3(primaryNormal), 1.f); - // if (!all(equal(make_vec3(primaryNormal), vec3(0.f, 0.f, 0.f)))) { - // glm::quat r0 = glm::quat_cast(LP.viewT0); - // glm::quat r1 = glm::quat_cast(LP.viewT1); - // glm::quat rot = (glm::all(glm::equal(r0, r1))) ? r0 : glm::slerp(r0, r1, time); - // vec3 tmp = normalize(glm::mat3_cast(rot) * make_vec3(primaryNormal)); - // tmp = normalize(vec3(LP.proj * vec4(tmp, 0.f))); - // newNormal = vec4(tmp, 1.f); - // } - // vec4 accumNormal = (newNormal + float(LP.frameID) * oldNormal) / float(LP.frameID + 1); + // compute screen space normal / albedo + vec4 oldAlbedo = make_vec4(prev_albedo); + vec4 oldNormal = make_vec4(prev_normal); + if (any(isnan(oldAlbedo))) oldAlbedo = vec4(0.f); + if (any(isnan(oldNormal))) oldNormal = vec4(0.f); + vec4 newAlbedo = vec4(primaryAlbedo.x, primaryAlbedo.y, primaryAlbedo.z, 1.f); + vec4 accumAlbedo = (newAlbedo + float(LP.frameID) * oldAlbedo) / float(LP.frameID + 1); + vec4 newNormal = vec4(make_vec3(primaryNormal), 1.f); + if (!all(equal(make_vec3(primaryNormal), vec3(0.f, 0.f, 0.f)))) { + glm::quat r0 = glm::quat_cast(LP.viewT0); + glm::quat r1 = glm::quat_cast(LP.viewT1); + glm::quat rot = (glm::all(glm::equal(r0, r1))) ? r0 : glm::slerp(r0, r1, time); + vec3 tmp = normalize(glm::mat3_cast(rot) * make_vec3(primaryNormal)); + tmp = normalize(vec3(LP.proj * vec4(tmp, 0.f))); + newNormal = vec4(tmp, 1.f); + } + vec4 accumNormal = (newNormal + float(LP.frameID) * oldNormal) / float(LP.frameID + 1); // save data to frame buffers accumPtr[fbOfs] = accum_color; fbPtr[fbOfs] = accum_color; - // albedoPtr[fbOfs] = make_float4(accumAlbedo); - // normalPtr[fbOfs] = make_float4(accumNormal); + albedoPtr[fbOfs] = make_float4(accumAlbedo); + normalPtr[fbOfs] = make_float4(accumNormal); } diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index c3d0923b..f7a5478d 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -23,7 +23,6 @@ #define PBRLUT_IMPLEMENTATION #include #include -#include #include #include @@ -90,7 +89,8 @@ static struct OptixData { LaunchParams LP; GLuint imageTexID = -1; cudaGraphicsResource_t cudaResourceTex; - OWLBuffer sampleIndexBuffer; + bool resourceSharingSuccessful = true; + OWLBuffer assignmentBuffer; OWLBuffer frameBuffer; OWLBuffer normalBuffer; @@ -99,6 +99,10 @@ static struct OptixData { OWLBuffer mvecBuffer; OWLBuffer accumBuffer; + OWLBuffer combinedFrameBuffer; + OWLBuffer combinedNormalBuffer; + OWLBuffer combinedAlbedoBuffer; + OWLBuffer entityBuffer; OWLBuffer transformBuffer; OWLBuffer cameraBuffer; @@ -187,7 +191,10 @@ static struct NVISII { bool headlessMode; std::function callback; std::recursive_mutex callbackMutex; - StaticWorkDistribution wd; + + std::vector> events; + std::vector times; + std::vector weights; } NVISII; void applyStyle() @@ -347,14 +354,14 @@ owl4x3f glmToOWL(glm::mat4 &xfm){ return oxfm; } -void synchronizeDevices() +void synchronizeDevices(std::string error_string = "") { for (int i = 0; i < getDeviceCount(); i++) { cudaSetDevice(i); cudaDeviceSynchronize(); cudaError_t err = cudaPeekAtLastError(); if (err != 0) { - std::cout<< "ERROR: " << cudaGetErrorString(err)< signals(num_gpus); + float total_time = 0.f; + for (uint32_t i = 0; i < num_gpus; ++i) total_time += NVISII.times[i]; + for (uint32_t i = 0; i < num_gpus; ++i) signals[i] = NVISII.times[i] / float(total_time); + + std::vector p_error(num_gpus); + for (uint32_t i = 0; i < num_gpus; ++i) p_error[i] = target - signals[i]; + + // update weights + float pK = 1.f; + for (uint32_t i = 0; i < num_gpus; ++i) { + NVISII.weights[i] = max(NVISII.weights[i] + p_error[i], .001f); + } + + std::vector scan; + for (size_t i = 0; i <= num_gpus; ++i) { + if (i == 0) scan.push_back(0.f); + else scan.push_back(scan[i - 1] + NVISII.weights[i - 1]); + } + + // std::cout<<"Scan: "; + for (size_t i = 0; i <= num_gpus; ++i) { + scan[i] /= scan[num_gpus]; + // std::cout< fb_h(width * height); -// std::vector fba_h(width * height); -// std::vector fbn_h(width * height); -// std::vector> fb_hd(deviceCount); -// std::vector> fba_hd(deviceCount); -// std::vector> fbn_hd(deviceCount); -// for (uint32_t i = 0; i < deviceCount; ++i){ -// fb_hd[i] = std::vector(width * height); -// fba_hd[i] = std::vector(width * height); -// fbn_hd[i] = std::vector(width * height); -// void* fb_d = (void*)owlBufferGetPointer(OptixData.frameBuffer,i); -// void* fba_d = (void*)owlBufferGetPointer(OptixData.albedoBuffer,i); -// void* fbn_d = (void*)owlBufferGetPointer(OptixData.normalBuffer,i); -// cudaMemcpyAsync((void*)fb_hd[i].data(), (void*)fb_d, fb_h.size() * sizeof(glm::vec4), cudaMemcpyDeviceToHost); -// cudaMemcpyAsync((void*)fba_hd[i].data(), (void*)fba_d, fb_h.size() * sizeof(glm::vec4), cudaMemcpyDeviceToHost); -// cudaMemcpyAsync((void*)fbn_hd[i].data(), (void*)fbn_d, fb_h.size() * sizeof(glm::vec4), cudaMemcpyDeviceToHost); -// } -// // synchronizeDevices(); - -// // note, GPUs render 32xN strips -// for (uint32_t y = 0; y < height; ++y) { -// for (uint32_t x = 0; x < width; x += 32) { -// if (x >= width) continue; -// int deviceThatIsResponsible = (x>>5) % deviceCount; -// { -// glm::vec4* A = fb_h.data() + (y * width) + x; -// glm::vec4* B = fb_hd[deviceThatIsResponsible].data() + (y * width) + x; -// memcpy(A, B, min(32, int(width - x)) * sizeof(glm::vec4)); -// } -// { -// glm::vec4* A = fba_h.data() + (y * width) + x; -// glm::vec4* B = fba_hd[deviceThatIsResponsible].data() + (y * width) + x; -// memcpy(A, B, min(32, int(width - x)) * sizeof(glm::vec4)); -// } -// { -// glm::vec4* A = fbn_h.data() + (y * width) + x; -// glm::vec4* B = fbn_hd[deviceThatIsResponsible].data() + (y * width) + x; -// memcpy(A, B, min(32, int(width - x)) * sizeof(glm::vec4)); -// } -// } -// } +void mergeFrameBuffers() { + // For multigpu setups, we currently render to zero-copy memory to merge on the host. + // So for now, just upload those results to device 0's combined unified frame buffers on the device + owlBufferUpload(OptixData.combinedFrameBuffer, owlBufferGetPointer(OptixData.frameBuffer, 0)); + + if (OptixData.enableAlbedoGuide) { + owlBufferUpload(OptixData.combinedAlbedoBuffer, owlBufferGetPointer(OptixData.albedoBuffer, 0)); + } -// // // note, GPUs render 32xN strips -// // for (uint32_t y = 0; y < height; ++y) { -// // for (uint32_t x = 0; x < width; x += 32) { -// // int deviceThatIsResponsible = (x>>5) % deviceCount; -// // glm::vec4* A = fb_h.data() + (y * width) + x; -// // glm::vec4* B = ((glm::vec4*)fb_d[deviceThatIsResponsible]) + (y * width) + x; -// // cudaMemcpyAsync((void*)A, (void*)B, min(32, int(width - x)) * sizeof(glm::vec4), cudaMemcpyDeviceToHost); -// // } -// // } - - -// // cudaMemcpyAsync(fb_h.data(), fb_d[1], fb_h.size() * sizeof(glm::vec4), cudaMemcpyDeviceToHost); -// synchronizeDevices(); -// void* fb_d = (void*)owlBufferGetPointer(OptixData.frameBuffer,0); -// void* fba_d = (void*)owlBufferGetPointer(OptixData.albedoBuffer,0); -// void* fbn_d = (void*)owlBufferGetPointer(OptixData.normalBuffer,0); -// cudaMemcpyAsync(fb_d, fb_h.data(), fb_h.size() * sizeof(glm::vec4), cudaMemcpyHostToDevice); -// cudaMemcpyAsync(fba_d, fba_h.data(), fba_h.size() * sizeof(glm::vec4), cudaMemcpyHostToDevice); -// cudaMemcpyAsync(fbn_d, fbn_h.data(), fbn_h.size() * sizeof(glm::vec4), cudaMemcpyHostToDevice); -// // synchronizeDevices(); -// } + if (OptixData.enableNormalGuide) { + owlBufferUpload(OptixData.combinedNormalBuffer, owlBufferGetPointer(OptixData.normalBuffer, 0)); + } +} void denoiseImage() { synchronizeDevices(); @@ -1671,8 +1682,6 @@ void denoiseImage() { auto &OD = OptixData; auto cudaStream = owlContextGetStream(OD.context, 0); - CUdeviceptr frameBuffer = (CUdeviceptr) owlBufferGetPointer(OD.frameBuffer, 0); - std::vector inputLayers; OptixImage2D colorLayer; colorLayer.width = OD.LP.frameSize.x; @@ -1680,7 +1689,7 @@ void denoiseImage() { colorLayer.format = OPTIX_PIXEL_FORMAT_FLOAT4; colorLayer.pixelStrideInBytes = 4 * sizeof(float); colorLayer.rowStrideInBytes = OD.LP.frameSize.x * 4 * sizeof(float); - colorLayer.data = (CUdeviceptr) owlBufferGetPointer(OD.frameBuffer, 0); + colorLayer.data = (CUdeviceptr) owlBufferGetPointer(OD.combinedFrameBuffer, 0); inputLayers.push_back(colorLayer); OptixImage2D albedoLayer; @@ -1689,7 +1698,7 @@ void denoiseImage() { albedoLayer.format = OPTIX_PIXEL_FORMAT_FLOAT4; albedoLayer.pixelStrideInBytes = 4 * sizeof(float); albedoLayer.rowStrideInBytes = OD.LP.frameSize.x * 4 * sizeof(float); - albedoLayer.data = (CUdeviceptr) owlBufferGetPointer(OD.albedoBuffer, 0); + albedoLayer.data = (CUdeviceptr) owlBufferGetPointer(OD.combinedAlbedoBuffer, 0); if (OD.enableAlbedoGuide) inputLayers.push_back(albedoLayer); OptixImage2D normalLayer; @@ -1698,7 +1707,7 @@ void denoiseImage() { normalLayer.format = OPTIX_PIXEL_FORMAT_FLOAT4; normalLayer.pixelStrideInBytes = 4 * sizeof(float); normalLayer.rowStrideInBytes = OD.LP.frameSize.x * 4 * sizeof(float); - normalLayer.data = (CUdeviceptr) owlBufferGetPointer(OD.normalBuffer, 0); + normalLayer.data = (CUdeviceptr) owlBufferGetPointer(OD.combinedNormalBuffer, 0); if (OD.enableNormalGuide) inputLayers.push_back(normalLayer); OptixImage2D outputLayer = colorLayer; // can I get away with this? @@ -1755,23 +1764,88 @@ void denoiseImage() { (CUdeviceptr) owlBufferGetPointer(OD.denoiserScratchBuffer, 0), scratchSizeInBytes )); +} - synchronizeDevices(); +inline const char* getGLErrorString( GLenum error ) +{ + switch( error ) + { + case GL_NO_ERROR: return "No error"; + case GL_INVALID_ENUM: return "Invalid enum"; + case GL_INVALID_VALUE: return "Invalid value"; + case GL_INVALID_OPERATION: return "Invalid operation"; + //case GL_STACK_OVERFLOW: return "Stack overflow"; + //case GL_STACK_UNDERFLOW: return "Stack underflow"; + case GL_OUT_OF_MEMORY: return "Out of memory"; + //case GL_TABLE_TOO_LARGE: return "Table too large"; + default: return "Unknown GL error"; + } } +#define DO_GL_CHECK +#ifdef DO_GL_CHECK +# define GL_CHECK( call ) \ + do \ + { \ + call; \ + GLenum err = glGetError(); \ + if( err != GL_NO_ERROR ) \ + { \ + std::stringstream ss; \ + ss << "GL error " << getGLErrorString( err ) << " at " \ + << __FILE__ << "(" << __LINE__ << "): " << #call \ + << std::endl; \ + std::cerr << ss.str() << std::endl; \ + throw std::runtime_error( ss.str().c_str() ); \ + } \ + } \ + while (0) + + +# define GL_CHECK_ERRORS( ) \ + do \ + { \ + GLenum err = glGetError(); \ + if( err != GL_NO_ERROR ) \ + { \ + std::stringstream ss; \ + ss << "GL error " << getGLErrorString( err ) << " at " \ + << __FILE__ << "(" << __LINE__ << ")"; \ + std::cerr << ss.str() << std::endl; \ + throw std::runtime_error( ss.str().c_str() ); \ + } \ + } \ + while (0) + +#else +# define GL_CHECK( call ) do { call; } while(0) +# define GL_CHECK_ERRORS( ) do { ; } while(0) +#endif + void drawFrameBufferToWindow() { synchronizeDevices(); glFlush(); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); auto &OD = OptixData; - cudaGraphicsMapResources(1, &OD.cudaResourceTex); - const void* fbdevptr = owlBufferGetPointer(OD.frameBuffer,0); - cudaArray_t array; - cudaGraphicsSubResourceGetMappedArray(&array, OD.cudaResourceTex, 0, 0); - cudaMemcpyToArray(array, 0, 0, fbdevptr, OD.LP.frameSize.x * OD.LP.frameSize.y * sizeof(glm::vec4), cudaMemcpyDeviceToDevice); - cudaGraphicsUnmapResources(1, &OD.cudaResourceTex); - + const void* fbdevptr = owlBufferGetPointer(OD.combinedFrameBuffer,0); + + if (OD.resourceSharingSuccessful) { + cudaGraphicsMapResources(1, &OD.cudaResourceTex); + cudaArray_t array; + cudaGraphicsSubResourceGetMappedArray(&array, OD.cudaResourceTex, 0, 0); + cudaMemcpyToArray(array, 0, 0, fbdevptr, OD.LP.frameSize.x * OD.LP.frameSize.y * sizeof(glm::vec4), cudaMemcpyDeviceToDevice); + cudaGraphicsUnmapResources(1, &OD.cudaResourceTex); + } else { + GL_CHECK(glBindTexture(GL_TEXTURE_2D, OD.imageTexID)); + glEnable(GL_TEXTURE_2D); + GL_CHECK(glTexSubImage2D(GL_TEXTURE_2D,0, + 0, 0, + OD.LP.frameSize.x, OD.LP.frameSize.y, + GL_RGBA, GL_FLOAT, fbdevptr)); + } + // Draw pixels from optix frame buffer glEnable(GL_FRAMEBUFFER_SRGB); glViewport(0, 0, OD.LP.frameSize.x, OD.LP.frameSize.y); @@ -1784,7 +1858,6 @@ void drawFrameBufferToWindow() glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0); glDisable(GL_DEPTH_TEST); - glBindTexture(GL_TEXTURE_2D, OD.imageTexID); // Draw texture to screen via immediate mode glEnable(GL_TEXTURE_2D); @@ -1980,6 +2053,7 @@ std::vector render(uint32_t width, uint32_t height, uint32_t samplesPerPi resizeOptixFrameBuffer(width, height); resetAccumulation(); updateComponents(); + int numGPUs = owlGetDeviceCount(OptixData.context); for (uint32_t i = 0; i < samplesPerPixel; ++i) { // std::cout< render(uint32_t width, uint32_t height, uint32_t samplesPerPi } updateLaunchParams(); - for (uint32_t deviceID = 0; deviceID < owlGetDeviceCount(OptixData.context); deviceID++) { - owlAsyncLaunch2DOnDevice(OptixData.rayGen, NVISII.wd.numSamples(deviceID), 1, deviceID, OptixData.launchParams); + for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) { + cudaSetDevice(deviceID); + cudaEventRecord(NVISII.events[deviceID].first); + owlAsyncLaunch2DOnDevice(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, deviceID, OptixData.launchParams); + cudaEventRecord(NVISII.events[deviceID].second); + } + for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) { + cudaEventSynchronize(NVISII.events[deviceID].second); + cudaEventElapsedTime(&NVISII.times[deviceID], NVISII.events[deviceID].first, NVISII.events[deviceID].second); } - owlLaunchSync(OptixData.launchParams); + updateGPUWeights(); + mergeFrameBuffers(); if (!NVISII.headlessMode) { if (OptixData.enableDenoiser) @@ -2022,17 +2104,14 @@ std::vector render(uint32_t width, uint32_t height, uint32_t samplesPerPi std::cout<<"\r "<< samplesPerPixel << "/" << samplesPerPixel <<" - done!" << std::endl; } - synchronizeDevices(); - if (OptixData.enableDenoiser) { denoiseImage(); } + synchronizeDevices(); const glm::vec4 *fb = (const glm::vec4*) owlBufferGetPointer(OptixData.frameBuffer,0); cudaMemcpyAsync(frameBuffer.data(), fb, width * height * sizeof(glm::vec4), cudaMemcpyDeviceToHost); - - synchronizeDevices(); }); return frameBuffer; @@ -2125,6 +2204,9 @@ std::vector renderData(uint32_t width, uint32_t height, uint32_t startFra else if (option == std::string("heatmap")) { OptixData.LP.renderDataMode = RenderDataFlags::HEATMAP; } + else if (option == std::string("device_id")) { + OptixData.LP.renderDataMode = RenderDataFlags::DEVICE_ID; + } else { throw std::runtime_error(std::string("Error, unknown option : \"") + _option + std::string("\". ") + std::string("See documentation for available options")); @@ -2135,6 +2217,7 @@ std::vector renderData(uint32_t width, uint32_t height, uint32_t startFra OptixData.LP.renderDataBounce = bounce; OptixData.LP.seed = seed; updateComponents(); + int numGPUs = owlGetDeviceCount(OptixData.context); for (uint32_t i = startFrame; i < frameCount; ++i) { // std::cout< renderData(uint32_t width, uint32_t height, uint32_t startFra updateLaunchParams(); - for (uint32_t deviceID = 0; deviceID < owlGetDeviceCount(OptixData.context); deviceID++) { - owlAsyncLaunch2DOnDevice(OptixData.rayGen, NVISII.wd.numSamples(deviceID), 1, deviceID, OptixData.launchParams); + for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) { + cudaSetDevice(deviceID); + cudaEventRecord(NVISII.events[deviceID].first); + owlAsyncLaunch2DOnDevice(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, deviceID, OptixData.launchParams); + cudaEventRecord(NVISII.events[deviceID].second); + } + for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) { + cudaEventSynchronize(NVISII.events[deviceID].second); + cudaEventElapsedTime(&NVISII.times[deviceID], NVISII.events[deviceID].first, NVISII.events[deviceID].second); } - owlLaunchSync(OptixData.launchParams); + updateGPUWeights(); + mergeFrameBuffers(); // Dont run denoiser to raw data rendering // if (OptixData.enableDenoiser) @@ -2427,9 +2518,10 @@ void initializeInteractive( glfw->poll_events(); initializeOptix(/*headless = */ false); - initializeImgui(); + int numGPUs = owlGetDeviceCount(OptixData.context); + while (!stopped) { /* Poll events from the window */ @@ -2451,15 +2543,23 @@ void initializeInteractive( updateFrameBuffer(); updateComponents(); updateLaunchParams(); - for (uint32_t deviceID = 0; deviceID < owlGetDeviceCount(OptixData.context); deviceID++) { - owlAsyncLaunch2DOnDevice(OptixData.rayGen, NVISII.wd.numSamples(deviceID), 1, deviceID, OptixData.launchParams); + + for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) { + cudaSetDevice(deviceID); + cudaEventRecord(NVISII.events[deviceID].first, owlParamsGetCudaStream(OptixData.launchParams, deviceID)); + owlAsyncLaunch2DOnDevice(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, deviceID, OptixData.launchParams); + cudaEventRecord(NVISII.events[deviceID].second, owlParamsGetCudaStream(OptixData.launchParams, deviceID)); } owlLaunchSync(OptixData.launchParams); - - if (OptixData.enableDenoiser) - { + for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) { + cudaEventElapsedTime(&NVISII.times[deviceID], NVISII.events[deviceID].first, NVISII.events[deviceID].second); + } + updateGPUWeights(); + mergeFrameBuffers(); + + if (OptixData.enableDenoiser) { denoiseImage(); - } + } } // glm::vec4* samplePtr = (glm::vec4*) owlBufferGetPointer(OptixData.accumBuffer,0); // glm::vec4* mvecPtr = (glm::vec4*) owlBufferGetPointer(OptixData.mvecBuffer,0); @@ -2485,7 +2585,10 @@ void initializeInteractive( OPTIX_CHECK(optixDenoiserDestroy(OptixData.denoiser)); if (OptixData.imageTexID != -1) { - cudaGraphicsUnregisterResource(OptixData.cudaResourceTex); + if (OptixData.cudaResourceTex) { + cudaGraphicsUnregisterResource(OptixData.cudaResourceTex); + OptixData.cudaResourceTex = 0; + } glDeleteTextures(1, &OptixData.imageTexID); } @@ -2647,12 +2750,12 @@ void updateSceneAabb(Entity* entity) void enableUpdates() { - enqueueCommand([] () { lazyUpdatesEnabled = false; }); + enqueueCommandAndWait([] () { lazyUpdatesEnabled = false; }); } void disableUpdates() { - enqueueCommand([] () { lazyUpdatesEnabled = true; }); + enqueueCommandAndWait([] () { lazyUpdatesEnabled = true; }); } bool areUpdatesEnabled() @@ -2819,6 +2922,9 @@ void __test__(std::vector args) { else if (option == std::string("heatmap")) { OptixData.LP.renderDataMode = RenderDataFlags::HEATMAP; } + else if (option == std::string("device_id")) { + OptixData.LP.renderDataMode = RenderDataFlags::DEVICE_ID; + } else { throw std::runtime_error(std::string("Error, unknown option : \"") + option + std::string("\". ") + std::string("See documentation for available options")); diff --git a/src/nvisii/nvisii.cu b/src/nvisii/nvisii.cu index 0d81d5d2..15f41939 100644 --- a/src/nvisii/nvisii.cu +++ b/src/nvisii/nvisii.cu @@ -54,38 +54,3 @@ void reproject(glm::vec4 *sampleBuffer, glm::vec4 *t0AlbedoBuffer, glm::vec4 *t1 _reproject<<>>(sampleBuffer, t0AlbedoBuffer, t1AlbedoBuffer, mvecBuffer, scratchBuffer, imageBuffer, true, width, height); _reproject<<>>(sampleBuffer, t0AlbedoBuffer, t1AlbedoBuffer, mvecBuffer, scratchBuffer, imageBuffer, false, width, height); } - -#include "work_distribution.h" - -extern "C" __global__ void fillSamples( - int gpu_idx, - int num_gpus, - int width, - int height, - int2* sample_indices ) -{ - StaticWorkDistribution wd; - wd.setRasterSize( width, height ); - wd.setNumGPUs( num_gpus ); - - const int sample_idx = blockIdx.x; - sample_indices[sample_idx] = wd.getSamplePixel( gpu_idx, sample_idx ); -} - - -extern "C" __host__ void fillSamplesCUDA( - int num_samples, - cudaStream_t stream, - int gpu_idx, - int num_gpus, - int width, - int height, - int2* sample_indices ) -{ - fillSamples<<>>( - gpu_idx, - num_gpus, - width, - height, - sample_indices ); -} From 8a386fc091d744c75a2786e6e439f443eeb9ae17 Mon Sep 17 00:00:00 2001 From: n8vm Date: Fri, 2 Apr 2021 18:44:05 -0600 Subject: [PATCH 07/55] adding some sync points --- src/nvisii/nvisii.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index 3f415090..7cd2ad85 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -1948,6 +1948,9 @@ std::vector render(uint32_t width, uint32_t height, uint32_t samplesPerPi if ((width < 1) || (height < 1)) throw std::runtime_error("Error, invalid width/height"); std::vector frameBuffer(width * height * 4); + // flush command queue + enqueueCommandAndWait([](){}); + enqueueCommandAndWait([&frameBuffer, width, height, samplesPerPixel, seed] () { if (!NVISII.headlessMode) { if ((width != WindowData.currentSize.x) || (height != WindowData.currentSize.y)) @@ -2033,6 +2036,8 @@ std::vector renderData(uint32_t width, uint32_t height, uint32_t startFra { std::vector frameBuffer(width * height * 4); + enqueueCommandAndWait([](){}); + enqueueCommandAndWait([&frameBuffer, width, height, startFrame, frameCount, bounce, _option, seed] () { if (!NVISII.headlessMode) { if ((width != WindowData.currentSize.x) || (height != WindowData.currentSize.y)) From bc5d1af6c73ba2ed5237c21b12611d43ca969bf2 Mon Sep 17 00:00:00 2001 From: n8vm Date: Fri, 2 Apr 2021 22:00:37 -0600 Subject: [PATCH 08/55] Adding support for constant material parameters with import_scene function --- include/nvisii/nvisii.h | 6 +-- src/nvisii/light.cpp | 6 +-- src/nvisii/nvisii_import_scene.cpp | 65 ++++++++++++++++++++++++++++-- 3 files changed, 68 insertions(+), 9 deletions(-) diff --git a/include/nvisii/nvisii.h b/include/nvisii/nvisii.h index d15f6220..e14a2a97 100644 --- a/include/nvisii/nvisii.h +++ b/include/nvisii/nvisii.h @@ -378,11 +378,11 @@ struct Scene { * * @param filepath The path for the file to load * @param position A change in position to apply to all entities generated by this function - * @param position A change in scale to apply to all entities generated by this function - * @param position A change in rotation to apply to all entities generated by this function + * @param scale A change in scale to apply to all entities generated by this function + * @param rotation A change in rotation to apply to all entities generated by this function * @param args A list of optional arguments that can effect the importer. * Possible options include: - * "verbose" - print out information related to loading the scene. + * "verbose" - print out information related to loading the scene. Useful for debugging! */ Scene importScene( std::string file_path, diff --git a/src/nvisii/light.cpp b/src/nvisii/light.cpp index 01e3ab02..842981b8 100644 --- a/src/nvisii/light.cpp +++ b/src/nvisii/light.cpp @@ -46,9 +46,9 @@ LightStruct &Light::getStruct() { void Light::setColor(glm::vec3 color) { auto &light = getStruct(); - light.r = max(0.f, min(color.r, 1.f)); - light.g = max(0.f, min(color.g, 1.f)); - light.b = max(0.f, min(color.b, 1.f)); + light.r = max(0.f, color.r); + light.g = max(0.f, color.g); + light.b = max(0.f, color.b); markDirty(); } diff --git a/src/nvisii/nvisii_import_scene.cpp b/src/nvisii/nvisii_import_scene.cpp index 9b557d69..60bbd3a5 100644 --- a/src/nvisii/nvisii_import_scene.cpp +++ b/src/nvisii/nvisii_import_scene.cpp @@ -101,7 +101,7 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu nvisiiScene.materials.push_back(mat); material_light_map[mat] = nullptr; aiString Path; - + // Diffuse/specular workflow if (material->GetTextureCount(aiTextureType_DIFFUSE) > 0) { if (material->GetTexture(aiTextureType_DIFFUSE, 0, &Path, NULL, NULL, NULL, NULL, NULL) == AI_SUCCESS) { @@ -204,6 +204,62 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu auto name = std::string(material->GetName().C_Str()); auto mat = nvisiiScene.materials[materialIdx]; aiString Path; + + if (verbose) std::cout<<"Creating material : " << name << std::endl; + + aiColor3D color (0.f,0.f,0.f); + if(AI_SUCCESS == material->Get(AI_MATKEY_COLOR_DIFFUSE, color)) { + mat->setBaseColor(glm::vec3(color.r, color.g, color.b)); + if (verbose) std::cout<<"Assigning base color : " << color.r << " " << color.g << " " << color.b << std::endl; + } + if(AI_SUCCESS == material->Get(AI_MATKEY_COLOR_EMISSIVE, color)) { + if (color.r > 0.f || color.g > 0.f || color.b > 0.f) { + if (verbose) std::cout<<"Assigning base color : " << color.r << " " << color.g << " " << color.b << std::endl; + if (Light::get(mat->getName()) == nullptr) { + Light::create(mat->getName()); + nvisiiScene.lights.push_back(material_light_map[mat]); + } + material_light_map[mat] = Light::get(mat->getName()); + material_light_map[mat]->setColor(glm::vec3(color.r, color.g, color.b)); + } + } + if(AI_SUCCESS == material->Get(AI_MATKEY_COLOR_SPECULAR, color)) { + if (color.r == color.b && color.r == color.g) { + if (verbose) std::cout<<"Setting constant specular: " << color.r << std::endl; + mat->setSpecular(color.r); + } + else if (verbose) { + std::cout<<"Error, colored specular found (not supported)" << std::endl; + } + } + + float scalar; + if(AI_SUCCESS == material->Get(AI_MATKEY_SHININESS, scalar)) { + if (scalar != 0.f) { + if (verbose) std::cout<<"Interpreting shininess as 2/roughness^4 - 2: " << powf(2.f / (scalar + 2.f), 1.f/4.f) << std::endl; + mat->setRoughness(powf(2.f / (scalar + 2.f), 1.f/4.f)); + } + } + float ior = 1.f; + if(AI_SUCCESS == material->Get(AI_MATKEY_REFRACTI, ior)) { + if (verbose) std::cout<<"Assigning index of refraction " << ior << std::endl; + mat->setIor(ior); + } + + if(AI_SUCCESS == material->Get(AI_MATKEY_OPACITY, scalar)) { + if (scalar != 1.f) { + if (ior == 1) { + if (verbose) std::cout<<"Assigning opacity " << scalar << std::endl; + mat->setAlpha(scalar); + } + else { + if (verbose) std::cout<<"IOR != 1.0, interpreting dissolve as transmission " << scalar << std::endl; + mat->setTransmission(scalar); + } + } + } + + // todo, add texture paths to map above, load later and connect if (material->GetTextureCount(aiTextureType_DIFFUSE) > 0) { @@ -236,9 +292,12 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu std::string path = directory + "/" + std::string(Path.C_Str()); std::replace(path.begin(), path.end(), '\\', '/'); if (texture_map[path]) { - material_light_map[mat] = Light::create(mat->getName()); + if (Light::get(mat->getName()) == nullptr) { + Light::create(mat->getName()); + nvisiiScene.lights.push_back(material_light_map[mat]); + } + material_light_map[mat] = Light::get(mat->getName()); material_light_map[mat]->setColorTexture(texture_map[path]); - nvisiiScene.lights.push_back(material_light_map[mat]); } } } From 1991793a890fe5374913bc17e6b035ce226233f0 Mon Sep 17 00:00:00 2001 From: n8vm Date: Fri, 2 Apr 2021 23:59:19 -0600 Subject: [PATCH 09/55] updating owl submodule --- externals/owl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/externals/owl b/externals/owl index 97d3acd6..f1d01daa 160000 --- a/externals/owl +++ b/externals/owl @@ -1 +1 @@ -Subproject commit 97d3acd6ad4d0e356f90959d34c8a94024a4fa70 +Subproject commit f1d01daa451151e6ac64c7f9d6662b354b467384 From 74ccee12d2fb92e788017c9d49f082db6694d8b7 Mon Sep 17 00:00:00 2001 From: n8vm Date: Sat, 3 Apr 2021 00:45:59 -0600 Subject: [PATCH 10/55] adding more visibility flags. --- include/nvisii/entity.h | 16 ++++++++++++-- include/nvisii/entity_struct.h | 5 +++++ src/nvisii/entity.cpp | 39 +++++++++++++++++++++++++++++++++- 3 files changed, 57 insertions(+), 3 deletions(-) diff --git a/include/nvisii/entity.h b/include/nvisii/entity.h index 5120184a..9fadab2a 100644 --- a/include/nvisii/entity.h +++ b/include/nvisii/entity.h @@ -211,9 +211,21 @@ class Entity : public StaticFactory { /** * Objects can be set to be invisible to particular ray types: - * @param camera Makes the object visible to camera rays + * @param camera Makes the object visible to camera rays (the first rays to be traced from the camera). + * @param diffuse (todo...) Makes the object visible to diffuse rays (eg for diffuse GI) + * @param glossy (todo...) Makes the object visible to glossy rays (eg in reflections) + * @param transmission (todo...) Makes the object visible to transmission rays (eg from inside glass) + * @param volume_scatter (todo...) Makes the object visible to volume scatter rays (eg from light simulation inside a volume) + * @param shadow Enables the object to cast shadows. */ - void setVisibility(bool camera = true); + void setVisibility( + bool camera = true, + bool diffuse = true, + bool glossy = true, + bool transmission = true, + bool volume_scatter = true, + bool shadow = true + ); /** @returns the minimum axis aligned bounding box position. Requires a transform and mesh component to be attached. */ glm::vec3 getMinAabbCorner(); diff --git a/include/nvisii/entity_struct.h b/include/nvisii/entity_struct.h index 41658fa4..9e77c832 100644 --- a/include/nvisii/entity_struct.h +++ b/include/nvisii/entity_struct.h @@ -7,6 +7,11 @@ #ifndef ENTITY_VISIBILITY_FLAGS #define ENTITY_VISIBILITY_FLAGS #define ENTITY_VISIBILITY_CAMERA_RAYS (1<<0) +#define ENTITY_VISIBILITY_DIFFUSE_RAYS (1<<1) +#define ENTITY_VISIBILITY_GLOSSY_RAYS (1<<2) +#define ENTITY_VISIBILITY_TRANSMISSION_RAYS (1<<3) +#define ENTITY_VISIBILITY_VOLUME_SCATTER_RAYS (1<<4) +#define ENTITY_VISIBILITY_SHADOW_RAYS (1<<5) #endif struct EntityStruct { diff --git a/src/nvisii/entity.cpp b/src/nvisii/entity.cpp index 52987292..aac5d55c 100644 --- a/src/nvisii/entity.cpp +++ b/src/nvisii/entity.cpp @@ -253,7 +253,14 @@ Mesh* Entity::getMesh() return &mesh; } -void Entity::setVisibility(bool camera) +void Entity::setVisibility( + bool camera, + bool diffuse, + bool glossy, + bool transmission, + bool volume_scatter, + bool shadow +) { std::lock_guard lock(*Entity::getEditMutex().get()); @@ -263,6 +270,36 @@ void Entity::setVisibility(bool camera) } else { entity.flags &= (~ENTITY_VISIBILITY_CAMERA_RAYS); } + + if (diffuse) { + entity.flags |= ENTITY_VISIBILITY_DIFFUSE_RAYS; + } else { + entity.flags &= (~ENTITY_VISIBILITY_DIFFUSE_RAYS); + } + + if (glossy) { + entity.flags |= ENTITY_VISIBILITY_GLOSSY_RAYS; + } else { + entity.flags &= (~ENTITY_VISIBILITY_GLOSSY_RAYS); + } + + if (transmission) { + entity.flags |= ENTITY_VISIBILITY_TRANSMISSION_RAYS; + } else { + entity.flags &= (~ENTITY_VISIBILITY_TRANSMISSION_RAYS); + } + + if (volume_scatter) { + entity.flags |= ENTITY_VISIBILITY_VOLUME_SCATTER_RAYS; + } else { + entity.flags &= (~ENTITY_VISIBILITY_VOLUME_SCATTER_RAYS); + } + + if (shadow) { + entity.flags |= ENTITY_VISIBILITY_SHADOW_RAYS; + } else { + entity.flags &= (~ENTITY_VISIBILITY_SHADOW_RAYS); + } markDirty(); } From 5a68fdf48f0daaa87aba11b7d719cc7c8cfdf8e5 Mon Sep 17 00:00:00 2001 From: n8vm Date: Sat, 3 Apr 2021 00:54:38 -0600 Subject: [PATCH 11/55] assigning visibility masks to entities --- src/nvisii/nvisii.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index fa576ccf..2e0bfb1f 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -1290,12 +1290,11 @@ void updateComponents() OD.volumeHandles[v->getAddress()] = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint8_t), gridHdlPtr.get()->size(), nullptr); owlBufferUpload(OD.volumeHandles[v->getAddress()], gridHdlPtr.get()->data()); - printf("%hhx\n",gridHdlPtr.get()->data()[0]); + // printf("%hhx\n",gridHdlPtr.get()->data()[0]); const void* d_gridData = owlBufferGetPointer(OD.volumeHandles[v->getAddress()], 0); uint8_t first_byte; cudaMemcpy((void*)&first_byte, d_gridData, 1, cudaMemcpyDeviceToHost); - printf("%hhx\n",first_byte); - + // printf("%hhx\n",first_byte); // Create geometry and build BLAS uint32_t volumeID = v->getAddress(); @@ -1321,12 +1320,14 @@ void updateComponents() std::vector surfaceInstances; std::vector t0SurfaceTransforms; std::vector t1SurfaceTransforms; + std::vector surfaceMasks; std::vector surfaceInstanceToEntity; // Volume instances std::vector volumeInstances; std::vector t0VolumeTransforms; std::vector t1VolumeTransforms; + std::vector volumeMasks; std::vector volumeInstanceToEntity; // Todo: curves... @@ -1362,6 +1363,7 @@ void updateComponents() surfaceInstanceToEntity.push_back(eid); t0SurfaceTransforms.push_back(prevLocalToWorld); t1SurfaceTransforms.push_back(localToWorld); + surfaceMasks.push_back(entities[eid].getStruct().flags); } // Add any instanced volume geometry to the list @@ -1376,11 +1378,14 @@ void updateComponents() volumeInstanceToEntity.push_back(eid); t0VolumeTransforms.push_back(prevLocalToWorld); t1VolumeTransforms.push_back(localToWorld); + volumeMasks.push_back(entities[eid].getStruct().flags); } } + std::vector owlSurfaceVisibilityMasks; std::vector t0OwlSurfaceTransforms; std::vector t1OwlSurfaceTransforms; + std::vector owlVolumeVisibilityMasks; std::vector t0OwlVolumeTransforms; std::vector t1OwlVolumeTransforms; auto oldSurfaceIAS = OD.surfacesIAS; @@ -1409,9 +1414,11 @@ void updateComponents() instanceGroupSetChild(OD.surfacesIAS, iid, surfaceInstances[iid]); t0OwlSurfaceTransforms.push_back(glmToOWL(t0SurfaceTransforms[iid])); t1OwlSurfaceTransforms.push_back(glmToOWL(t1SurfaceTransforms[iid])); + owlSurfaceVisibilityMasks.push_back(surfaceMasks[iid]); } owlInstanceGroupSetTransforms(OD.surfacesIAS,0,(const float*)t0OwlSurfaceTransforms.data()); owlInstanceGroupSetTransforms(OD.surfacesIAS,1,(const float*)t1OwlSurfaceTransforms.data()); + owlInstanceGroupSetVisibilityMasks(OD.surfacesIAS, owlSurfaceVisibilityMasks.data()); owlBufferResize(OD.surfaceInstanceToEntityBuffer, surfaceInstanceToEntity.size()); owlBufferUpload(OD.surfaceInstanceToEntityBuffer, surfaceInstanceToEntity.data()); } @@ -1423,9 +1430,11 @@ void updateComponents() instanceGroupSetChild(OD.volumesIAS, iid, volumeInstances[iid]); t0OwlVolumeTransforms.push_back(glmToOWL(t0VolumeTransforms[iid])); t1OwlVolumeTransforms.push_back(glmToOWL(t1VolumeTransforms[iid])); + owlVolumeVisibilityMasks.push_back(volumeMasks[iid]); } owlInstanceGroupSetTransforms(OD.volumesIAS,0,(const float*)t0OwlVolumeTransforms.data()); owlInstanceGroupSetTransforms(OD.volumesIAS,1,(const float*)t1OwlVolumeTransforms.data()); + owlInstanceGroupSetVisibilityMasks(OD.volumesIAS, owlVolumeVisibilityMasks.data()); owlBufferResize(OD.volumeInstanceToEntityBuffer, volumeInstanceToEntity.size()); owlBufferUpload(OD.volumeInstanceToEntityBuffer, volumeInstanceToEntity.data()); } From 8db142e5b8b39741b2240c8853073098f23ebf53 Mon Sep 17 00:00:00 2001 From: n8vm Date: Tue, 6 Apr 2021 13:02:02 -0600 Subject: [PATCH 12/55] forcing denoiser configure to wait --- src/nvisii/nvisii.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index 2e0bfb1f..012c8dd8 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -1946,7 +1946,7 @@ void configureDenoiser(bool useAlbedoGuide, bool useNormalGuide, bool useKernelP "If normal guide is enabled, albedo guide must also be enabled."); } - enqueueCommand([useAlbedoGuide, useNormalGuide, useKernelPrediction](){ + enqueueCommandAndWait([useAlbedoGuide, useNormalGuide, useKernelPrediction](){ OptixData.enableAlbedoGuide = useAlbedoGuide; OptixData.enableNormalGuide = useNormalGuide; #ifdef USE_OPTIX70 From 96c0aeee3d3314bb79f51b002d8d3ebe2b722f69 Mon Sep 17 00:00:00 2001 From: n8vm Date: Wed, 14 Apr 2021 10:37:53 -0600 Subject: [PATCH 13/55] fixing bug where renderToImage wasnt saving the correct buffer --- src/nvisii/nvisii.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index 012c8dd8..8a7da290 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -2029,7 +2029,7 @@ std::vector readFrameBuffer() { int num_devices = getDeviceCount(); synchronizeDevices(); - const glm::vec4 *fb = (const glm::vec4*)owlBufferGetPointer(OptixData.frameBuffer,0); + const glm::vec4 *fb = (const glm::vec4*)owlBufferGetPointer(OptixData.combinedFrameBuffer,0); for (uint32_t test = 0; test < frameBuffer.size(); test += 4) { frameBuffer[test + 0] = fb[test / 4].r; frameBuffer[test + 1] = fb[test / 4].g; @@ -2046,9 +2046,6 @@ std::vector render(uint32_t width, uint32_t height, uint32_t samplesPerPi if ((width < 1) || (height < 1)) throw std::runtime_error("Error, invalid width/height"); std::vector frameBuffer(width * height * 4); - // flush command queue - enqueueCommandAndWait([](){}); - enqueueCommandAndWait([&frameBuffer, width, height, samplesPerPixel, seed] () { if (!NVISII.headlessMode) { if ((width != WindowData.currentSize.x) || (height != WindowData.currentSize.y)) @@ -2122,7 +2119,7 @@ std::vector render(uint32_t width, uint32_t height, uint32_t samplesPerPi } synchronizeDevices(); - const glm::vec4 *fb = (const glm::vec4*) owlBufferGetPointer(OptixData.frameBuffer,0); + const glm::vec4 *fb = (const glm::vec4*) owlBufferGetPointer(OptixData.combinedFrameBuffer,0); cudaMemcpyAsync(frameBuffer.data(), fb, width * height * sizeof(glm::vec4), cudaMemcpyDeviceToHost); }); @@ -2271,7 +2268,7 @@ std::vector renderData(uint32_t width, uint32_t height, uint32_t startFra synchronizeDevices(); - const glm::vec4 *fb = (const glm::vec4*) owlBufferGetPointer(OptixData.frameBuffer,0); + const glm::vec4 *fb = (const glm::vec4*) owlBufferGetPointer(OptixData.combinedFrameBuffer,0); cudaMemcpyAsync(frameBuffer.data(), fb, width * height * sizeof(glm::vec4), cudaMemcpyDeviceToHost); OptixData.LP.renderDataMode = 0; From 8ccbf36bbd239771473f12819c404a89be3d06ff Mon Sep 17 00:00:00 2001 From: n8vm Date: Wed, 14 Apr 2021 10:59:30 -0600 Subject: [PATCH 14/55] working on a volume sample --- examples/download_content.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/download_content.sh b/examples/download_content.sh index 02b0fe46..92f416b3 100644 --- a/examples/download_content.sh +++ b/examples/download_content.sh @@ -23,4 +23,7 @@ wget https://www.dropbox.com/s/22bug1he354oqpt/bmw.zip unzip bmw.zip -d bmw/ rm bmw.zip -wget https://www.dropbox.com/s/76gumyy7j0f3cyj/dragon.stl \ No newline at end of file +wget https://www.dropbox.com/s/76gumyy7j0f3cyj/dragon.stl + +wget https://www.dropbox.com/s/runlp60bjjf3dpu/bunny_cloud.zip +unzip bunny_cloud.zip From 5746cb6fb8c1c512988abbb2cedfb68c237d2bfc Mon Sep 17 00:00:00 2001 From: n8vm Date: Wed, 14 Apr 2021 11:25:55 -0600 Subject: [PATCH 15/55] simplifying download_content script a bit --- examples/content.txt | 11 +++++++++++ examples/download_content.sh | 22 ++++------------------ 2 files changed, 15 insertions(+), 18 deletions(-) create mode 100644 examples/content.txt diff --git a/examples/content.txt b/examples/content.txt new file mode 100644 index 00000000..1b7882e6 --- /dev/null +++ b/examples/content.txt @@ -0,0 +1,11 @@ +https://www.dropbox.com/s/jve877nanizw2vf/dragon.zip +https://www.dropbox.com/s/jh3o6wtqdrq4bi2/photos_2020_5_11_fst_gray-wall-grunge.jpg +https://www.dropbox.com/s/gb67d0cv1lgrgdp/kiara_4_mid-morning_4k.hdr +https://www.dropbox.com/s/8nj82vxvxwvnttt/salle_de_bain_separated.zip +https://www.dropbox.com/s/p2xius4kd4olqg3/gradient.png +https://www.dropbox.com/s/bxbkzmuy2mviyzb/Bricks051_2K-JPG.zip +https://www.dropbox.com/s/na3vo8rca7feoiq/teatro_massimo_2k.hdr +https://www.dropbox.com/s/22bug1he354oqpt/bmw.zip +https://www.dropbox.com/s/76gumyy7j0f3cyj/dragon.stl +https://www.dropbox.com/s/runlp60bjjf3dpu/bunny_cloud.zip +https://www.dropbox.com/s/nim7jsjiumei4f9/boston_teapot_256x256x178_uint8.zip diff --git a/examples/download_content.sh b/examples/download_content.sh index 92f416b3..c6ca856f 100644 --- a/examples/download_content.sh +++ b/examples/download_content.sh @@ -1,29 +1,15 @@ mkdir content cd content -wget https://www.dropbox.com/s/jve877nanizw2vf/dragon.zip -unzip dragon.zip -d dragon/ -rm dragon.zip - -wget https://www.dropbox.com/s/jh3o6wtqdrq4bi2/photos_2020_5_11_fst_gray-wall-grunge.jpg -wget https://www.dropbox.com/s/gb67d0cv1lgrgdp/kiara_4_mid-morning_4k.hdr +wget -i ../content.txt -wget https://www.dropbox.com/s/8nj82vxvxwvnttt/salle_de_bain_separated.zip +unzip dragon.zip -d dragon/ +rm dragon.zip unzip salle_de_bain_separated.zip rm salle_de_bain_separated.zip - -wget https://www.dropbox.com/s/p2xius4kd4olqg3/gradient.png -wget https://www.dropbox.com/s/bxbkzmuy2mviyzb/Bricks051_2K-JPG.zip unzip Bricks051_2K-JPG.zip - -wget https://www.dropbox.com/s/na3vo8rca7feoiq/teatro_massimo_2k.hdr - mkdir bmw -wget https://www.dropbox.com/s/22bug1he354oqpt/bmw.zip unzip bmw.zip -d bmw/ rm bmw.zip - -wget https://www.dropbox.com/s/76gumyy7j0f3cyj/dragon.stl - -wget https://www.dropbox.com/s/runlp60bjjf3dpu/bunny_cloud.zip unzip bunny_cloud.zip +unzip boston_teapot_256x256x178_uint8.zip From 04f2bb25c3b016abae1336af1ad1b85b53d94910 Mon Sep 17 00:00:00 2001 From: n8vm Date: Wed, 14 Apr 2021 12:42:12 -0600 Subject: [PATCH 16/55] adding a volumes example. Some bugs need to be worked out --- examples/22.volumes.py | 146 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 examples/22.volumes.py diff --git a/examples/22.volumes.py b/examples/22.volumes.py new file mode 100644 index 00000000..9a74eb14 --- /dev/null +++ b/examples/22.volumes.py @@ -0,0 +1,146 @@ +#%% + +# 22.volumes.py +# +# This shows an example of two volumes. One volume uses the NanoVDB format, +# and the other is a raw volume. + +import nvisii +import numpy as np +opt = lambda: None +opt.spp = 50 +opt.width = 512 +opt.height = 512 +opt.out = '22_volumes.png' + +# headless - no window +# verbose - output number of frames rendered, etc.. +nvisii.initialize(headless = False, verbose = True, window_on_top = True) + +#%% +# Use a neural network to denoise ray traced +nvisii.enable_denoiser() + +# First, lets create an entity that will serve as our camera. +camera = nvisii.entity.create(name = "camera") + +# To place the camera into our scene, we'll add a "transform" component. +# (All nvisii objects have a "name" that can be used for easy lookup later.) +camera.set_transform(nvisii.transform.create(name = "camera_transform")) + +# To make our camera entity act like a "camera", we'll add a camera component +camera.set_camera( + nvisii.camera.create_from_fov( + name = "camera_camera", + field_of_view = 0.785398, # note, this is in radians + aspect = opt.width / float(opt.height) + ) +) + +# Finally, we'll select this entity to be the current camera entity. +# (nvisii can only use one camera at the time) +nvisii.set_camera_entity(camera) + +# Lets set the camera to look at an object. +# We'll do this by editing the transform component. +camera.get_transform().look_at(at = (0, 0, .9), up = (0, 0, 1), eye = (0, 5, 1)) + +# Next, lets at an object (a floor). +floor = nvisii.entity.create( + name = "floor", + mesh = nvisii.mesh.create_plane("mesh_floor"), + transform = nvisii.transform.create("transform_floor"), + material = nvisii.material.create("material_floor") +) + +# Lets make our floor act as a mirror +mat = floor.get_material() +# mat = nvisii.material.get("material_floor") # <- this also works +#%% +# Mirrors are smooth and "metallic". +mat.set_base_color((1.,1.,1.)) +mat.set_metallic(0) +mat.set_roughness(1) + +# Make the floor large by scaling it +trans = floor.get_transform() +trans.set_scale((5,5,1)) + +#%% +# Let's also add a sphere +torus = nvisii.entity.create( + name="torus", + volume = nvisii.volume.create_torus("torus"), + transform = nvisii.transform.create("torus"), + material = nvisii.material.create("torus") +) +#%% +torus.get_transform().set_position((.5,2,0.35)) +torus.get_transform().set_scale((0.003, 0.003, 0.003)) +torus.get_transform().set_angle_axis(3.14 * .25, (1,0,0)) +torus.get_material().set_base_color((.0,0.0,1)) +torus.get_material().set_roughness(0.0) +torus.get_material().set_transmission(0.0) +torus.get_volume().set_gradient_factor(10) +torus.get_volume().set_absorption(0) +torus.get_volume().set_scattering(1) +torus.get_volume().set_scale(50) + +#%% +# Let's also add a bunny +bunny = nvisii.entity.create( + name="bunny", + volume = nvisii.volume.create_from_file("bunny", "./content/bunny_cloud.nvdb"), + transform = nvisii.transform.create("bunny"), + material = nvisii.material.create("bunny") +) +#%% +bunny.get_transform().set_position((-1,0,0.75)) +bunny.get_transform().set_scale((0.003, 0.003, 0.003)) +bunny.get_material().set_base_color((0.1,0.9,0.08)) +bunny.get_material().set_roughness(0.7) +bunny.get_volume().set_gradient_factor(10) +bunny.get_volume().set_absorption(0) +bunny.get_volume().set_scattering(1) +bunny.get_volume().set_scale(10) +bunny.get_transform().set_angle_axis(nvisii.pi() * .5, (1,0,0)) +bunny.get_transform().add_angle_axis(nvisii.pi(), (0,1,0)) + +#%% +voxels = np.fromfile("./content/boston_teapot_256x256x178_uint8.raw", dtype=np.uint8).astype(np.float32) + + + +#%% +# Let's also add a teapot +teapot = nvisii.entity.create( + name="teapot", + volume = nvisii.volume.create_from_data("teapot", width = 256, height = 256, depth = 178, data = voxels, background = 0.0), + transform = nvisii.transform.create("teapot"), + material = nvisii.material.create("teapot") +) +#%% +teapot.get_transform().set_position((1,0,0.7)) +teapot.get_transform().set_scale((0.005, 0.005, 0.005)) +teapot.get_material().set_base_color((1.0,0.0,0.0)) +teapot.get_material().set_roughness(0.0) +teapot.get_material().set_metallic(1.0) +teapot.get_volume().set_gradient_factor(100) +teapot.get_volume().set_absorption(1) +teapot.get_volume().set_scattering(0) +teapot.get_volume().set_scale(250) +teapot.get_transform().set_angle_axis(-nvisii.pi() * .5, (1,0,0)) +teapot.get_transform().add_angle_axis(nvisii.pi() * 1.1, (0,1,0)) + +#%% +#%% +# Now that we have a simple scene, let's render it +print("rendering to", "01_simple_scene.png") +nvisii.render_to_file( + width = opt.width, + height = opt.height, + samples_per_pixel = opt.spp, + file_path = "01_simple_scene.png" +) + +nvisii.deinitialize() From 53be1aabd8088d0767f59c1a6d3bd2fa036d81bc Mon Sep 17 00:00:00 2001 From: n8vm Date: Wed, 14 Apr 2021 16:25:47 -0600 Subject: [PATCH 17/55] working on refactoring volume codepath to improve efficiency --- src/nvisii/devicecode/launch_params.h | 6 +- src/nvisii/devicecode/path_tracer.cu | 32 ++-- src/nvisii/nvisii.cpp | 242 +++++++------------------- 3 files changed, 78 insertions(+), 202 deletions(-) diff --git a/src/nvisii/devicecode/launch_params.h b/src/nvisii/devicecode/launch_params.h index 717b2cf3..e6142f07 100644 --- a/src/nvisii/devicecode/launch_params.h +++ b/src/nvisii/devicecode/launch_params.h @@ -29,8 +29,7 @@ struct LaunchParams { glm::vec4 *scratchBuffer; glm::vec4 *mvecBuffer; glm::vec4 *accumPtr; - OptixTraversableHandle surfacesIAS; - OptixTraversableHandle volumesIAS; + OptixTraversableHandle IAS; float domeLightIntensity = 1.f; float domeLightExposure = 0.f; glm::vec3 domeLightColor = glm::vec3(-1.f); @@ -60,8 +59,7 @@ struct LaunchParams { Buffer textures; Buffer volumes; Buffer lightEntities; - Buffer surfaceInstanceToEntity; - Buffer volumeInstanceToEntity; + Buffer instanceToEntity; uint32_t numLightEntities = 0; Buffer> vertexLists; diff --git a/src/nvisii/devicecode/path_tracer.cu b/src/nvisii/devicecode/path_tracer.cu index bd438288..e292428b 100644 --- a/src/nvisii/devicecode/path_tracer.cu +++ b/src/nvisii/devicecode/path_tracer.cu @@ -967,7 +967,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() RayPayload surfPayload; surfPayload.tHit = -1.f; surfRay.time = time; - owl::traceRay( /*accel to trace against*/ LP.surfacesIAS, + owl::traceRay( /*accel to trace against*/ LP.IAS, /*the ray to trace*/ surfRay, /*prd*/ surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); @@ -980,7 +980,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() volPayload.t0 = volRay.tmin; volPayload.t1 = volRay.tmax; volPayload.primitiveID = (debug) ? -2 : -1; - owl::traceRay( /*accel to trace against*/ LP.volumesIAS, + owl::traceRay( /*accel to trace against*/ LP.IAS, /*the ray to trace*/ volRay, /*prd*/ volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); @@ -1020,8 +1020,8 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // Load the object we hit. int entityID; - if (isVolume) { GET(entityID, int, LP.volumeInstanceToEntity, volPayload.instanceID); } - else { GET(entityID, int, LP.surfaceInstanceToEntity, surfPayload.instanceID); } + if (isVolume) { GET(entityID, int, LP.instanceToEntity, volPayload.instanceID); } + else { GET(entityID, int, LP.instanceToEntity, surfPayload.instanceID); } GET(EntityStruct entity, EntityStruct, LP.entities, entityID); GET(TransformStruct transform, TransformStruct, LP.transforms, entity.transform_id); @@ -1035,7 +1035,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() surfRay.origin = surfRay.origin + surfRay.direction * (surfPayload.tHit + EPSILON); surfPayload.tHit = -1.f; surfRay.time = time; - owl::traceRay( LP.surfacesIAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); + owl::traceRay( LP.IAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); volRay = surfRay; volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit; @@ -1044,7 +1044,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() volPayload.t0 = volRay.tmin; volPayload.t1 = volRay.tmax; volPayload.primitiveID = (debug) ? -3 : -1; - owl::traceRay( LP.volumesIAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); + owl::traceRay( LP.IAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); transparencyDepth++; if (transparencyDepth > LP.maxTransparencyDepth) break; continue; @@ -1190,7 +1190,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() surfRay.origin = surfRay.origin + surfRay.direction * (surfPayload.tHit + EPSILON); surfPayload.tHit = -1.f; surfRay.time = time; - owl::traceRay( LP.surfacesIAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); + owl::traceRay( LP.IAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); volRay = surfRay; volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit; @@ -1199,7 +1199,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() volPayload.t0 = volRay.tmin; volPayload.t1 = volRay.tmax; volPayload.primitiveID = (debug) ? -4 : -1; - owl::traceRay( LP.volumesIAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); + owl::traceRay( LP.IAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); ++depth; transparencyDepth++; @@ -1289,7 +1289,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() surfRay.origin = surfRay.origin + surfRay.direction * (surfPayload.tHit + EPSILON); surfPayload.tHit = -1.f; surfRay.time = time; - owl::traceRay( LP.surfacesIAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); + owl::traceRay( LP.IAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); volRay = surfRay; volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit; @@ -1298,7 +1298,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() volPayload.t0 = volRay.tmin; volPayload.t1 = volRay.tmax; volPayload.primitiveID = (debug) ? -4 : -1; - owl::traceRay( LP.volumesIAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); + owl::traceRay( LP.IAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); // Count this as a "transparent" bounce. ++depth; @@ -1432,13 +1432,13 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() ray.tmin = EPSILON * 10.f; ray.tmax = lightDistance + EPSILON; // needs to be distance to light, else anyhit logic breaks. ray.origin = hit_p; ray.direction = lightDir; ray.time = time; - owl::traceRay( LP.surfacesIAS, ray, surfPayload, occlusion_flags); + owl::traceRay( LP.IAS, ray, surfPayload, occlusion_flags); ray.tmax = (surfPayload.instanceID == -2) ? ray.tmax : surfPayload.tHit; volPayload.rng = rng; volPayload.t0 = volRay.tmin; volPayload.t1 = volRay.tmax; volPayload.primitiveID = (debug) ? -5 : -1; - owl::traceRay( LP.volumesIAS, ray, volPayload, occlusion_flags); + owl::traceRay( LP.IAS, ray, volPayload, occlusion_flags); bool visible; if (randomID == numLights) { // If we sampled the dome light, just check to see if we hit anything @@ -1447,7 +1447,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // If we sampled a light source, then check to see if we hit something other than the light int surfEntity; if (surfPayload.instanceID == -2) surfEntity = -1; - else { GET(surfEntity, int, LP.surfaceInstanceToEntity, surfPayload.instanceID); } + else { GET(surfEntity, int, LP.instanceToEntity, surfPayload.instanceID); } visible = (volPayload.instanceID == -2) && (surfPayload.instanceID == -2 || surfEntity == sampledLightID); } if (visible) { @@ -1475,7 +1475,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() surfPayload.instanceID = -1; surfPayload.tHit = -1.f; surfRay.time = sampleTime(lcg_randomf(rng)); - owl::traceRay(LP.surfacesIAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); + owl::traceRay(LP.IAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); volRay = surfRay; volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit; @@ -1483,7 +1483,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() volPayload.t0 = volRay.tmin; volPayload.t1 = volRay.tmax; volPayload.primitiveID = (debug) ? -6 : -1; - owl::traceRay(LP.volumesIAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); + owl::traceRay(LP.IAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); // Check if we hit any of the previously sampled lights bool hitLight = false; @@ -1506,7 +1506,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // else if by sampling the brdf we also hit an area light // TODO: consider hitting emissive voxels? else if (surfPayload.instanceID != -1 && volPayload.instanceID == -1) { - GET(int entityID, int, LP.surfaceInstanceToEntity, surfPayload.instanceID); + GET(int entityID, int, LP.instanceToEntity, surfPayload.instanceID); bool visible = (entityID == sampledLightID); // We hit the light we sampled previously if (visible) { diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index 8a7da290..5aea8f48 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -112,8 +112,7 @@ static struct OptixData { OWLBuffer textureBuffer; OWLBuffer volumeBuffer; OWLBuffer lightEntitiesBuffer; - OWLBuffer surfaceInstanceToEntityBuffer; - OWLBuffer volumeInstanceToEntityBuffer; + OWLBuffer instanceToEntityBuffer; OWLBuffer vertexListsBuffer; OWLBuffer normalListsBuffer; OWLBuffer tangentListsBuffer; @@ -144,8 +143,7 @@ static struct OptixData { std::vector volumeGeomList; std::vector volumeBlasList; - OWLGroup surfacesIAS = nullptr; - OWLGroup volumesIAS = nullptr; + OWLGroup IAS = nullptr; std::vector lightEntities; @@ -521,8 +519,7 @@ void initializeOptix(bool headless) { "scratchBuffer", OWL_BUFPTR, OWL_OFFSETOF(LaunchParams, scratchBuffer)}, { "mvecBuffer", OWL_BUFPTR, OWL_OFFSETOF(LaunchParams, mvecBuffer)}, { "accumPtr", OWL_BUFPTR, OWL_OFFSETOF(LaunchParams, accumPtr)}, - { "surfacesIAS", OWL_GROUP, OWL_OFFSETOF(LaunchParams, surfacesIAS)}, - { "volumesIAS", OWL_GROUP, OWL_OFFSETOF(LaunchParams, volumesIAS)}, + { "IAS", OWL_GROUP, OWL_OFFSETOF(LaunchParams, IAS)}, { "cameraEntity", OWL_USER_TYPE(EntityStruct), OWL_OFFSETOF(LaunchParams, cameraEntity)}, { "entities", OWL_BUFFER, OWL_OFFSETOF(LaunchParams, entities)}, { "transforms", OWL_BUFFER, OWL_OFFSETOF(LaunchParams, transforms)}, @@ -539,8 +536,7 @@ void initializeOptix(bool headless) { "texCoordLists", OWL_BUFFER, OWL_OFFSETOF(LaunchParams, texCoordLists)}, { "indexLists", OWL_BUFFER, OWL_OFFSETOF(LaunchParams, indexLists)}, { "numLightEntities", OWL_USER_TYPE(uint32_t), OWL_OFFSETOF(LaunchParams, numLightEntities)}, - { "surfaceInstanceToEntity", OWL_BUFFER, OWL_OFFSETOF(LaunchParams, surfaceInstanceToEntity)}, - { "volumeInstanceToEntity", OWL_BUFFER, OWL_OFFSETOF(LaunchParams, volumeInstanceToEntity)}, + { "instanceToEntity", OWL_BUFFER, OWL_OFFSETOF(LaunchParams, instanceToEntity)}, { "domeLightIntensity", OWL_USER_TYPE(float), OWL_OFFSETOF(LaunchParams, domeLightIntensity)}, { "domeLightExposure", OWL_USER_TYPE(float), OWL_OFFSETOF(LaunchParams, domeLightExposure)}, { "domeLightColor", OWL_USER_TYPE(glm::vec3), OWL_OFFSETOF(LaunchParams, domeLightColor)}, @@ -632,8 +628,7 @@ void initializeOptix(bool headless) OD.volumeBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(VolumeStruct), Volume::getCount(), nullptr); OD.volumeHandlesBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Volume::getCount(), nullptr); OD.lightEntitiesBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), 1, nullptr); - OD.surfaceInstanceToEntityBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), 1, nullptr); - OD.volumeInstanceToEntityBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), 1, nullptr); + OD.instanceToEntityBuffer = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint32_t), 1, nullptr); OD.vertexListsBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); OD.normalListsBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); OD.tangentListsBuffer = owlDeviceBufferCreate(OD.context, OWL_BUFFER, Mesh::getCount(), nullptr); @@ -650,15 +645,14 @@ void initializeOptix(bool headless) owlParamsSetBuffer(OD.launchParams, "textures", OD.textureBuffer); owlParamsSetBuffer(OD.launchParams, "volumes", OD.volumeBuffer); owlParamsSetBuffer(OD.launchParams, "lightEntities", OD.lightEntitiesBuffer); - owlParamsSetBuffer(OD.launchParams, "surfaceInstanceToEntity", OD.surfaceInstanceToEntityBuffer); - owlParamsSetBuffer(OD.launchParams, "volumeInstanceToEntity", OD.volumeInstanceToEntityBuffer); + owlParamsSetBuffer(OD.launchParams, "instanceToEntity", OD.instanceToEntityBuffer); owlParamsSetBuffer(OD.launchParams, "vertexLists", OD.vertexListsBuffer); owlParamsSetBuffer(OD.launchParams, "normalLists", OD.normalListsBuffer); - owlParamsSetBuffer(OD.launchParams, "tangentLists", OD.tangentListsBuffer); + owlParamsSetBuffer(OD.launchParams, "tangentLists", OD.tangentListsBuffer); owlParamsSetBuffer(OD.launchParams, "texCoordLists", OD.texCoordListsBuffer); owlParamsSetBuffer(OD.launchParams, "indexLists", OD.indexListsBuffer); owlParamsSetBuffer(OD.launchParams, "textureObjects", OD.textureObjectsBuffer); - owlParamsSetBuffer(OD.launchParams, "volumeHandles", OD.volumeHandlesBuffer); + owlParamsSetBuffer(OD.launchParams, "volumeHandles", OD.volumeHandlesBuffer); uint32_t meshCount = Mesh::getCount(); OD.vertexLists.resize(meshCount); @@ -689,24 +683,7 @@ void initializeOptix(bool headless) owlParamsSetBuffer(OD.launchParams, "environmentMapCols", OD.environmentMapColsBuffer); owlParamsSetRaw(OD.launchParams, "environmentMapWidth", &OD.LP.environmentMapWidth); owlParamsSetRaw(OD.launchParams, "environmentMapHeight", &OD.LP.environmentMapHeight); - - // OWLTexture GGX_E_AVG_LOOKUP = owlTexture2DCreate(OD.context, - // OWL_TEXEL_FORMAT_R32F, - // GGX_E_avg_size,1, - // GGX_E_avg, - // OWL_TEXTURE_LINEAR, - // OWL_COLOR_SPACE_LINEAR, - // OWL_TEXTURE_CLAMP); - // OWLTexture GGX_E_LOOKUP = owlTexture2DCreate(OD.context, - // OWL_TEXEL_FORMAT_R32F, - // GGX_E_size[0],GGX_E_size[1], - // GGX_E, - // OWL_TEXTURE_LINEAR, - // OWL_TEXTURE_CLAMP, - // OWL_COLOR_SPACE_LINEAR); - // launchParamsSetTexture(OD.launchParams, "GGX_E_AVG_LOOKUP", GGX_E_AVG_LOOKUP); - // launchParamsSetTexture(OD.launchParams, "GGX_E_LOOKUP", GGX_E_LOOKUP); - + OD.LP.numLightEntities = uint32_t(OD.lightEntities.size()); owlParamsSetRaw(OD.launchParams, "numLightEntities", &OD.LP.numLightEntities); owlParamsSetRaw(OD.launchParams, "domeLightIntensity", &OD.LP.domeLightIntensity); @@ -770,10 +747,10 @@ void initializeOptix(bool headless) groupBuildAccel(OD.placeholderGroup); // build IAS - OWLGroup surfacesIAS = instanceGroupCreate(OD.context, 1); - instanceGroupSetChild(surfacesIAS, 0, OD.placeholderGroup); - groupBuildAccel(surfacesIAS); - owlParamsSetGroup(OD.launchParams, "surfacesIAS", surfacesIAS); + OWLGroup IAS = instanceGroupCreate(OD.context, 1); + instanceGroupSetChild(IAS, 0, OD.placeholderGroup); + groupBuildAccel(IAS); + owlParamsSetGroup(OD.launchParams, "IAS", IAS); OWLGeom userGeom = owlGeomCreate(OD.context, OD.volumeGeomType); owlGeomSetPrimCount(userGeom, 1); @@ -783,11 +760,6 @@ void initializeOptix(bool headless) OD.placeholderUserGroup = owlUserGeomGroupCreate(OD.context, 1, &userGeom); groupBuildAccel(OD.placeholderUserGroup); - OWLGroup volumesIAS = instanceGroupCreate(OD.context, 1); - instanceGroupSetChild(volumesIAS, 0, OD.placeholderUserGroup); - groupBuildAccel(volumesIAS); - owlParamsSetGroup(OD.launchParams, "volumesIAS", volumesIAS); - // Build *SBT* required to trace the groups owlBuildPipeline(OD.context); owlBuildSBT(OD.context); @@ -1012,36 +984,6 @@ void setDomeLightSky(vec3 sunPos, vec3 skyTint, float atmosphereThickness, float OptixData.proceduralSkyTexture = owlTexture2DCreate(OptixData.context, OWL_TEXEL_FORMAT_RGBA32F, width, height, texels.data()); owlParamsSetTexture(OptixData.launchParams, "proceduralSkyTexture", OptixData.proceduralSkyTexture); - // float invWidth = 1.f / float(width); - // float invHeight = 1.f / float(height); - // float invjacobian = width * height / float(4 * M_PI); - - // auto rows = std::vector(height); - // auto cols = std::vector(width * height); - // for (int y = 0, i = 0; y < height; y++) { - // for (int x = 0; x < width; x++, i++) { - // cols[i] = std::max(texels[i].r, std::max(texels[i].g, texels[i].b)) + ((x > 0) ? cols[i - 1] : 0.f); - // } - // rows[y] = cols[i - 1] + ((y > 0) ? rows[y - 1] : 0.0f); - // // normalize the pdf for this scanline (if it was non-zero) - // if (cols[i - 1] > 0) { - // for (int x = 0; x < width; x++) { - // cols[i - width + x] /= cols[i - 1]; - // } - // } - // } - - // // normalize the pdf across all scanlines - // for (int y = 0; y < height; y++) - // rows[y] /= rows[height - 1]; - - // if (OptixData.environmentMapRowsBuffer) owlBufferRelease(OptixData.environmentMapRowsBuffer); - // if (OptixData.environmentMapColsBuffer) owlBufferRelease(OptixData.environmentMapColsBuffer); - // OptixData.environmentMapRowsBuffer = owlDeviceBufferCreate(OptixData.context, OWL_USER_TYPE(float), height, rows.data()); - // OptixData.environmentMapColsBuffer = owlDeviceBufferCreate(OptixData.context, OWL_USER_TYPE(float), width * height, cols.data()); - // OptixData.LP.environmentMapWidth = width; - // OptixData.LP.environmentMapHeight = height; - OptixData.LP.environmentMapWidth = 0; OptixData.LP.environmentMapHeight = 0; resetAccumulation(); @@ -1316,21 +1258,11 @@ void updateComponents() // Manage Entities: Build / Rebuild TLAS auto dirtyEntities = Entity::getDirtyEntities(); if (dirtyEntities.size() > 0) { - // Surface instances - std::vector surfaceInstances; - std::vector t0SurfaceTransforms; - std::vector t1SurfaceTransforms; - std::vector surfaceMasks; - std::vector surfaceInstanceToEntity; - - // Volume instances - std::vector volumeInstances; - std::vector t0VolumeTransforms; - std::vector t1VolumeTransforms; - std::vector volumeMasks; - std::vector volumeInstanceToEntity; - - // Todo: curves... + std::vector instances; + std::vector t0Transforms; + std::vector t1Transforms; + std::vector masks; + std::vector instanceToEntity; // Aggregate instanced geometry and transformations Entity* entities = Entity::getFront(); @@ -1349,6 +1281,14 @@ void updateComponents() // Get instance transformation glm::mat4 prevLocalToWorld = entities[eid].getTransform()->getLocalToWorldMatrix(/*previous = */true); glm::mat4 localToWorld = entities[eid].getTransform()->getLocalToWorldMatrix(/*previous = */false); + t0Transforms.push_back(prevLocalToWorld); + t1Transforms.push_back(localToWorld); + + // Get instance mask + masks.push_back(entities[eid].getStruct().flags); + + // Indirection from instance back to entity ID + instanceToEntity.push_back(eid); // Add any instanced mesh geometry to the list if (entities[eid].getMesh()) { @@ -1359,98 +1299,63 @@ void updateComponents() // Mark it as dirty. It should be available in a subsequent frame entities[eid].getMesh()->markDirty(); return; } - surfaceInstances.push_back(blas); - surfaceInstanceToEntity.push_back(eid); - t0SurfaceTransforms.push_back(prevLocalToWorld); - t1SurfaceTransforms.push_back(localToWorld); - surfaceMasks.push_back(entities[eid].getStruct().flags); + instances.push_back(blas); } // Add any instanced volume geometry to the list - if (entities[eid].getVolume()) { + else if (entities[eid].getVolume()) { uint32_t address = entities[eid].getVolume()->getAddress(); OWLGroup blas = OD.volumeBlasList[address]; if (!blas) { // Same as meshes, if BLAS doesn't exist, force BLAS build and try again. entities[eid].getMesh()->markDirty(); return; } - volumeInstances.push_back(blas); - volumeInstanceToEntity.push_back(eid); - t0VolumeTransforms.push_back(prevLocalToWorld); - t1VolumeTransforms.push_back(localToWorld); - volumeMasks.push_back(entities[eid].getStruct().flags); - } + instances.push_back(blas); + } + + else { + throw std::runtime_error("Internal Error, renderable entity has no mesh or volume components!?"); + } } - std::vector owlSurfaceVisibilityMasks; - std::vector t0OwlSurfaceTransforms; - std::vector t1OwlSurfaceTransforms; - std::vector owlVolumeVisibilityMasks; - std::vector t0OwlVolumeTransforms; - std::vector t1OwlVolumeTransforms; - auto oldSurfaceIAS = OD.surfacesIAS; - auto oldVolumeIAS = OD.volumesIAS; + std::vector owlVisibilityMasks; + std::vector t0OwlTransforms; + std::vector t1OwlTransforms; + auto oldIAS = OD.IAS; - // If no surfaces instanced, insert an unhittable placeholder. + // If no objects are instanced, insert an unhittable placeholder. // (required for certain older driver versions) - if (surfaceInstances.size() == 0) { - OD.surfacesIAS = instanceGroupCreate(OD.context, 1); - instanceGroupSetChild(OD.surfacesIAS, 0, OD.placeholderGroup); - groupBuildAccel(OD.surfacesIAS); + if (instances.size() == 0) { + OD.IAS = instanceGroupCreate(OD.context, 1); + instanceGroupSetChild(OD.IAS, 0, OD.placeholderGroup); + groupBuildAccel(OD.IAS); } - // If no volumes instanced, insert an unhittable placeholder. - // (required for certain older driver versions) - if (volumeInstances.size() == 0) { - OD.volumesIAS = instanceGroupCreate(OD.context, 1); - instanceGroupSetChild(OD.volumesIAS, 0, OD.placeholderUserGroup); - groupBuildAccel(OD.volumesIAS); - } - - // Set surface transforms to IAS, upload surface instance to entity map - if (surfaceInstances.size() > 0) { - OD.surfacesIAS = instanceGroupCreate(OD.context, surfaceInstances.size()); - for (uint32_t iid = 0; iid < surfaceInstances.size(); ++iid) { - instanceGroupSetChild(OD.surfacesIAS, iid, surfaceInstances[iid]); - t0OwlSurfaceTransforms.push_back(glmToOWL(t0SurfaceTransforms[iid])); - t1OwlSurfaceTransforms.push_back(glmToOWL(t1SurfaceTransforms[iid])); - owlSurfaceVisibilityMasks.push_back(surfaceMasks[iid]); + // Set instance transforms and masks, upload instance to entity map + if (instances.size() > 0) { + OD.IAS = instanceGroupCreate(OD.context, instances.size()); + for (uint32_t iid = 0; iid < instances.size(); ++iid) { + instanceGroupSetChild(OD.IAS, iid, instances[iid]); + t0OwlTransforms.push_back(glmToOWL(t0Transforms[iid])); + t1OwlTransforms.push_back(glmToOWL(t1Transforms[iid])); + owlVisibilityMasks.push_back(masks[iid]); } - owlInstanceGroupSetTransforms(OD.surfacesIAS,0,(const float*)t0OwlSurfaceTransforms.data()); - owlInstanceGroupSetTransforms(OD.surfacesIAS,1,(const float*)t1OwlSurfaceTransforms.data()); - owlInstanceGroupSetVisibilityMasks(OD.surfacesIAS, owlSurfaceVisibilityMasks.data()); - owlBufferResize(OD.surfaceInstanceToEntityBuffer, surfaceInstanceToEntity.size()); - owlBufferUpload(OD.surfaceInstanceToEntityBuffer, surfaceInstanceToEntity.data()); + owlInstanceGroupSetTransforms(OD.IAS,0,(const float*)t0OwlTransforms.data()); + owlInstanceGroupSetTransforms(OD.IAS,1,(const float*)t1OwlTransforms.data()); + owlInstanceGroupSetVisibilityMasks(OD.IAS, owlVisibilityMasks.data()); + owlBufferResize(OD.instanceToEntityBuffer, instanceToEntity.size()); + owlBufferUpload(OD.instanceToEntityBuffer, instanceToEntity.data()); } - // Set volume transforms to IAS, upload volume instance to entity map - if (volumeInstances.size() > 0) { - OD.volumesIAS = instanceGroupCreate(OD.context, volumeInstances.size()); - for (uint32_t iid = 0; iid < volumeInstances.size(); ++iid) { - instanceGroupSetChild(OD.volumesIAS, iid, volumeInstances[iid]); - t0OwlVolumeTransforms.push_back(glmToOWL(t0VolumeTransforms[iid])); - t1OwlVolumeTransforms.push_back(glmToOWL(t1VolumeTransforms[iid])); - owlVolumeVisibilityMasks.push_back(volumeMasks[iid]); - } - owlInstanceGroupSetTransforms(OD.volumesIAS,0,(const float*)t0OwlVolumeTransforms.data()); - owlInstanceGroupSetTransforms(OD.volumesIAS,1,(const float*)t1OwlVolumeTransforms.data()); - owlInstanceGroupSetVisibilityMasks(OD.volumesIAS, owlVolumeVisibilityMasks.data()); - owlBufferResize(OD.volumeInstanceToEntityBuffer, volumeInstanceToEntity.size()); - owlBufferUpload(OD.volumeInstanceToEntityBuffer, volumeInstanceToEntity.data()); - } - // Build IAS - groupBuildAccel(OD.volumesIAS); - owlParamsSetGroup(OD.launchParams, "volumesIAS", OD.volumesIAS); - groupBuildAccel(OD.surfacesIAS); - owlParamsSetGroup(OD.launchParams, "surfacesIAS", OD.surfacesIAS); - + groupBuildAccel(OD.IAS); + owlParamsSetGroup(OD.launchParams, "IAS", OD.IAS); + // Now that IAS have changed, we need to rebuild SBT owlBuildSBT(OD.context); // Release any old IAS (TODO, don't rebuild if entity edit doesn't effect IAS...) - if (oldSurfaceIAS) {owlGroupRelease(oldSurfaceIAS);} - if (oldVolumeIAS) {owlGroupRelease(oldVolumeIAS);} + if (oldIAS) {owlGroupRelease(oldIAS);} // Aggregate entities that are light sources (todo: consider emissive volumes...) OD.lightEntities.resize(0); @@ -1608,19 +1513,6 @@ void updateComponents() auto dirtyTransforms = Transform::getDirtyTransforms(); if (dirtyTransforms.size() > 0) { Transform::updateComponents(); - - // // for each device - // for (uint32_t id = 0; id < owlGetDeviceCount(OptixData.context); ++id) - // { - // cudaSetDevice(id); - - // TransformStruct* devTransforms = (TransformStruct*)owlBufferGetPointer(OptixData.transformBuffer, id); - // TransformStruct* transformStructs = Transform::getFrontStruct(); - // for (auto &t : dirtyTransforms) { - // if (!t->isInitialized()) continue; - // CUDA_CHECK(cudaMemcpy(&devTransforms[t->getAddress()], &transformStructs[t->getAddress()], sizeof(TransformStruct), cudaMemcpyHostToDevice)); - // } - // } // cudaSetDevice(0); owlBufferUpload(OptixData.transformBuffer, Transform::getFrontStruct()); @@ -2450,20 +2342,6 @@ void renderToFile(uint32_t width, uint32_t height, uint32_t samplesPerPixel, std } } -// void renderDataToPNG(uint32_t width, uint32_t height, uint32_t startFrame, uint32_t frameCount, uint32_t bounce, std::string field, std::string imagePath) -// { -// std::vector fb = renderData(width, height, startFrame, frameCount, bounce, field); -// std::vector colors(4 * width * height); -// for (size_t i = 0; i < (width * height); ++i) { -// colors[i * 4 + 0] = uint8_t(glm::clamp(fb[i * 4 + 0] * 255.f, 0.f, 255.f)); -// colors[i * 4 + 1] = uint8_t(glm::clamp(fb[i * 4 + 1] * 255.f, 0.f, 255.f)); -// colors[i * 4 + 2] = uint8_t(glm::clamp(fb[i * 4 + 2] * 255.f, 0.f, 255.f)); -// colors[i * 4 + 3] = uint8_t(glm::clamp(fb[i * 4 + 3] * 255.f, 0.f, 255.f)); -// } -// stbi_flip_vertically_on_write(true); -// stbi_write_png(imagePath.c_str(), width, height, /* num channels*/ 4, colors.data(), /* stride in bytes */ width * 4); -// } - void initializeComponentFactories( uint32_t maxEntities, uint32_t maxCameras, From 94316a6320ee8c92532349b8cdc69a97e387025d Mon Sep 17 00:00:00 2001 From: n8vm Date: Wed, 14 Apr 2021 21:35:12 -0600 Subject: [PATCH 18/55] refacting volume rendering code. Now much cheaper to render surfaces without any volumes present --- externals/owl | 2 +- include/nvisii/entity_struct.h | 14 +- src/nvisii/devicecode/path_tracer.cu | 614 +++++++++++++-------------- src/nvisii/entity.cpp | 1 + src/nvisii/nvisii.cpp | 3 - 5 files changed, 313 insertions(+), 321 deletions(-) diff --git a/externals/owl b/externals/owl index f1d01daa..0f82536c 160000 --- a/externals/owl +++ b/externals/owl @@ -1 +1 @@ -Subproject commit f1d01daa451151e6ac64c7f9d6662b354b467384 +Subproject commit 0f82536cd56668ca786d4f5e8eb796e6b03752c9 diff --git a/include/nvisii/entity_struct.h b/include/nvisii/entity_struct.h index 9e77c832..86b0d396 100644 --- a/include/nvisii/entity_struct.h +++ b/include/nvisii/entity_struct.h @@ -6,12 +6,12 @@ #ifndef ENTITY_VISIBILITY_FLAGS #define ENTITY_VISIBILITY_FLAGS -#define ENTITY_VISIBILITY_CAMERA_RAYS (1<<0) -#define ENTITY_VISIBILITY_DIFFUSE_RAYS (1<<1) -#define ENTITY_VISIBILITY_GLOSSY_RAYS (1<<2) -#define ENTITY_VISIBILITY_TRANSMISSION_RAYS (1<<3) -#define ENTITY_VISIBILITY_VOLUME_SCATTER_RAYS (1<<4) -#define ENTITY_VISIBILITY_SHADOW_RAYS (1<<5) +#define ENTITY_VISIBILITY_CAMERA_RAYS (1<<0) // object is visible to direct camera rays +#define ENTITY_VISIBILITY_DIFFUSE_RAYS (1<<1) // object is visible to diffuse rays +#define ENTITY_VISIBILITY_GLOSSY_RAYS (1<<2) // object is visible to glossy rays +#define ENTITY_VISIBILITY_TRANSMISSION_RAYS (1<<3) // object is visible to transmission rays +#define ENTITY_VISIBILITY_VOLUME_SCATTER_RAYS (1<<4) // object is visible to multiple-scattering volume rays +#define ENTITY_VISIBILITY_SHADOW_RAYS (1<<5) // object is visible to shadow rays (ie, casts shadows) #endif struct EntityStruct { @@ -22,7 +22,7 @@ struct EntityStruct { int32_t light_id = -1; int32_t mesh_id = -1; int32_t volume_id = -1; - int32_t flags = 1; + uint32_t flags = (uint32_t)-1; glm::vec4 bbmin = glm::vec4(0.f); glm::vec4 bbmax = glm::vec4(0.f); }; \ No newline at end of file diff --git a/src/nvisii/devicecode/path_tracer.cu b/src/nvisii/devicecode/path_tracer.cu index e292428b..1def657d 100644 --- a/src/nvisii/devicecode/path_tracer.cu +++ b/src/nvisii/devicecode/path_tracer.cu @@ -263,190 +263,173 @@ OPTIX_CLOSEST_HIT_PROGRAM(VolumeMesh)() { auto &LP = optixLaunchParams; RayPayload &prd = owl::getPRD(); - const auto &self = owl::getProgramData(); - LCGRand rng = prd.rng; - - // Load the volume we hit - GET(VolumeStruct volume, VolumeStruct, LP.volumes, self.volumeID); - uint8_t *hdl = (uint8_t*)LP.volumeHandles.get(self.volumeID, __LINE__).data; - const auto grid = reinterpret_cast(hdl); - const auto& tree = grid->tree(); - auto acc = tree.getAccessor(); - - auto bbox = acc.root().bbox(); - auto mx = bbox.max(); - auto mn = bbox.min(); - glm::vec3 offset = glm::vec3(mn[0], mn[1], mn[2]) + - (glm::vec3(mx[0], mx[1], mx[2]) - - glm::vec3(mn[0], mn[1], mn[2])) * .5f; - - float majorant_extinction = acc.root().valueMax(); - float gradient_factor = volume.gradient_factor; - float linear_attenuation_unit = volume.scale; - float absorption = volume.absorption; - float scattering = volume.scattering; - - vec3 x = make_vec3(prd.objectSpaceRayOrigin) + offset; - vec3 w = make_vec3(prd.objectSpaceRayDirection); - - linear_attenuation_unit /= length(w); - - // Move ray to volume boundary - float t0 = prd.t0, t1 = prd.t1; - x = x + t0 * w; - t1 = t1 - t0; - t0 = 0.f; - - // Sample the free path distance to see if our ray makes it to the boundary - float t; - int event; - bool hitVolume = false; - #define MAX_NULL_COLLISIONS 10000 - for (int dti = 0; dti < MAX_NULL_COLLISIONS; ++dti) { - SampleDeltaTracking(rng, acc, majorant_extinction, linear_attenuation_unit, - absorption, scattering, x, w, t1, t, event); - x = x + t * w; - - // The boundary was hit - if (event == 0) { - break; - } - - // An absorption / emission event occurred - if (event == 1) { - hitVolume = true; - break; - } + // const auto &self = owl::getProgramData(); + // LCGRand rng = prd.rng; + + // // Load the volume we hit + // GET(VolumeStruct volume, VolumeStruct, LP.volumes, self.volumeID); + // uint8_t *hdl = (uint8_t*)LP.volumeHandles.get(self.volumeID, __LINE__).data; + // const auto grid = reinterpret_cast(hdl); + // const auto& tree = grid->tree(); + // auto acc = tree.getAccessor(); + + // auto bbox = acc.root().bbox(); + // auto mx = bbox.max(); + // auto mn = bbox.min(); + // glm::vec3 offset = glm::vec3(mn[0], mn[1], mn[2]) + + // (glm::vec3(mx[0], mx[1], mx[2]) - + // glm::vec3(mn[0], mn[1], mn[2])) * .5f; + + // float majorant_extinction = acc.root().valueMax(); + // float gradient_factor = volume.gradient_factor; + // float linear_attenuation_unit = volume.scale; + // float absorption = volume.absorption; + // float scattering = volume.scattering; + + // vec3 x = make_vec3(prd.objectSpaceRayOrigin) + offset; + // vec3 w = make_vec3(prd.objectSpaceRayDirection); + + // linear_attenuation_unit /= length(w); + + // // Move ray to volume boundary + // float t0 = prd.t0, t1 = prd.t1; + // x = x + t0 * w; + // t1 = t1 - t0; + // t0 = 0.f; + + // // Sample the free path distance to see if our ray makes it to the boundary + // float t; + // int event; + // bool hitVolume = false; + // #define MAX_NULL_COLLISIONS 10000 + // for (int dti = 0; dti < MAX_NULL_COLLISIONS; ++dti) { + // SampleDeltaTracking(rng, acc, majorant_extinction, linear_attenuation_unit, + // absorption, scattering, x, w, t1, t, event); + // x = x + t * w; + + // // The boundary was hit + // if (event == 0) { + // break; + // } + + // // An absorption / emission event occurred + // if (event == 1) { + // hitVolume = true; + // break; + // } + + // // A scattering event occurred + // if (event == 2) { + // hitVolume = true; + // break; + // } + + // // A null collision occurred. + // if (event == 3) { + // // update boundary in relation to the new collision x, w does not change. + // t1 = t1 - t; + // } + // } - // A scattering event occurred - if (event == 2) { - hitVolume = true; - break; - } + optixGetObjectToWorldTransformMatrix(prd.localToWorld); - // A null collision occurred. - if (event == 3) { - // update boundary in relation to the new collision x, w does not change. - t1 = t1 - t; - } - } + // If we don't need motion vectors, (or in the future if an object + // doesn't have motion blur) then return. + if (LP.renderDataMode == RenderDataFlags::NONE) return; - if (!hitVolume) { - prd.tHit = -1.f; - } - else { - prd.instanceID = optixGetInstanceIndex(); - prd.eventID = event; - prd.tHit = t; - - auto sampler = nanovdb::SampleFromVoxels, /*Interpolation Degree*/1, /*UseCache*/false>(acc); - auto coord_pos = nanovdb::Coord::Floor( nanovdb::Vec3f(x.x, x.y, x.z) ); - float densityValue = acc.getValue(coord_pos); - auto g = sampler.gradient(nanovdb::Vec3f(x.x, x.y, x.z)); - - prd.mp = make_float3(x - offset); // not super confident about this offset... - prd.gradient = make_float3(g[0], g[1], g[2]);// TEMPORARY FOR BUNNY - prd.density = densityValue; - optixGetObjectToWorldTransformMatrix(prd.localToWorld); - - // If we don't need motion vectors, (or in the future if an object - // doesn't have motion blur) then return. - if (LP.renderDataMode == RenderDataFlags::NONE) return; + OptixTraversableHandle handle = optixGetTransformListHandle(prd.instanceID); + float4 trf00, trf01, trf02; + float4 trf10, trf11, trf12; - OptixTraversableHandle handle = optixGetTransformListHandle(prd.instanceID); - float4 trf00, trf01, trf02; - float4 trf10, trf11, trf12; - - optix_impl::optixGetInterpolatedTransformationFromHandle( trf00, trf01, trf02, handle, /* time */ 0.f, true ); - optix_impl::optixGetInterpolatedTransformationFromHandle( trf10, trf11, trf12, handle, /* time */ 1.f, true ); - memcpy(&prd.localToWorldT0[0], &trf00, sizeof(trf00)); - memcpy(&prd.localToWorldT0[4], &trf01, sizeof(trf01)); - memcpy(&prd.localToWorldT0[8], &trf02, sizeof(trf02)); - memcpy(&prd.localToWorldT1[0], &trf10, sizeof(trf10)); - memcpy(&prd.localToWorldT1[4], &trf11, sizeof(trf11)); - memcpy(&prd.localToWorldT1[8], &trf12, sizeof(trf12)); - } + optix_impl::optixGetInterpolatedTransformationFromHandle( trf00, trf01, trf02, handle, /* time */ 0.f, true ); + optix_impl::optixGetInterpolatedTransformationFromHandle( trf10, trf11, trf12, handle, /* time */ 1.f, true ); + memcpy(&prd.localToWorldT0[0], &trf00, sizeof(trf00)); + memcpy(&prd.localToWorldT0[4], &trf01, sizeof(trf01)); + memcpy(&prd.localToWorldT0[8], &trf02, sizeof(trf02)); + memcpy(&prd.localToWorldT1[0], &trf10, sizeof(trf10)); + memcpy(&prd.localToWorldT1[4], &trf11, sizeof(trf11)); + memcpy(&prd.localToWorldT1[8], &trf12, sizeof(trf12)); } OPTIX_CLOSEST_HIT_PROGRAM(VolumeShadowRay)() { - auto &LP = optixLaunchParams; - const auto &self = owl::getProgramData(); - RayPayload &prd = owl::getPRD(); - LCGRand rng = prd.rng; - - GET(VolumeStruct volume, VolumeStruct, LP.volumes, self.volumeID); - uint8_t *hdl = (uint8_t*)LP.volumeHandles.get(self.volumeID, __LINE__).data; - const auto grid = reinterpret_cast(hdl); - const auto& tree = grid->tree(); - auto acc = tree.getAccessor(); - - auto bbox = acc.root().bbox(); - auto mx = bbox.max(); - auto mn = bbox.min(); - glm::vec3 offset = glm::vec3(mn[0], mn[1], mn[2]) + - (glm::vec3(mx[0], mx[1], mx[2]) - - glm::vec3(mn[0], mn[1], mn[2])) * .5f; - - float majorant_extinction = acc.root().valueMax(); - float gradient_factor = volume.gradient_factor; - float linear_attenuation_unit = volume.scale; - float absorption = volume.absorption; - float scattering = volume.scattering; - - vec3 x = make_vec3(prd.objectSpaceRayOrigin) + offset; - vec3 w = make_vec3(prd.objectSpaceRayDirection); - - linear_attenuation_unit /= length(w); - - // Move ray to volume boundary - float t0 = prd.t0, t1 = prd.t1; - x = x + t0 * w; - t1 = t1 - t0; - t0 = 0.f; - - // Sample the free path distance to see if our ray makes it to the boundary - float t; - int event; - bool hitVolume = false; - #define MAX_NULL_COLLISIONS 10000 - for (int dti = 0; dti < MAX_NULL_COLLISIONS; ++dti) { - SampleDeltaTracking(rng, acc, majorant_extinction, linear_attenuation_unit, - absorption, scattering, x, w, t1, t, event); - x = x + t * w; - - // The boundary was hit - if (event == 0) { - break; - } - - // An absorption / emission event occurred - if (event == 1) { - hitVolume = true; - break; - } - - // A scattering event occurred - if (event == 2) { - hitVolume = true; - break; - } - - // A null collision occurred. - if (event == 3) { - // update boundary in relation to the new collision x, w does not change. - t1 = t1 - t; - } - } + // auto &LP = optixLaunchParams; + // const auto &self = owl::getProgramData(); + // RayPayload &prd = owl::getPRD(); + // LCGRand rng = prd.rng; + + // GET(VolumeStruct volume, VolumeStruct, LP.volumes, self.volumeID); + // uint8_t *hdl = (uint8_t*)LP.volumeHandles.get(self.volumeID, __LINE__).data; + // const auto grid = reinterpret_cast(hdl); + // const auto& tree = grid->tree(); + // auto acc = tree.getAccessor(); + + // auto bbox = acc.root().bbox(); + // auto mx = bbox.max(); + // auto mn = bbox.min(); + // glm::vec3 offset = glm::vec3(mn[0], mn[1], mn[2]) + + // (glm::vec3(mx[0], mx[1], mx[2]) - + // glm::vec3(mn[0], mn[1], mn[2])) * .5f; + + // float majorant_extinction = acc.root().valueMax(); + // float gradient_factor = volume.gradient_factor; + // float linear_attenuation_unit = volume.scale; + // float absorption = volume.absorption; + // float scattering = volume.scattering; + + // vec3 x = make_vec3(prd.objectSpaceRayOrigin) + offset; + // vec3 w = make_vec3(prd.objectSpaceRayDirection); + + // linear_attenuation_unit /= length(w); + + // // Move ray to volume boundary + // float t0 = prd.t0, t1 = prd.t1; + // x = x + t0 * w; + // t1 = t1 - t0; + // t0 = 0.f; + + // // Sample the free path distance to see if our ray makes it to the boundary + // float t; + // int event; + // bool hitVolume = false; + // #define MAX_NULL_COLLISIONS 10000 + // for (int dti = 0; dti < MAX_NULL_COLLISIONS; ++dti) { + // SampleDeltaTracking(rng, acc, majorant_extinction, linear_attenuation_unit, + // absorption, scattering, x, w, t1, t, event); + // x = x + t * w; + + // // The boundary was hit + // if (event == 0) { + // break; + // } + + // // An absorption / emission event occurred + // if (event == 1) { + // hitVolume = true; + // break; + // } + + // // A scattering event occurred + // if (event == 2) { + // hitVolume = true; + // break; + // } + + // // A null collision occurred. + // if (event == 3) { + // // update boundary in relation to the new collision x, w does not change. + // t1 = t1 - t; + // } + // } - if (!hitVolume) { - prd.tHit = -1.f; - } - else { - prd.instanceID = optixGetInstanceIndex(); - prd.eventID = event; - prd.tHit = t; - } + // if (!hitVolume) { + // prd.tHit = -1.f; + // } + // else { + // prd.instanceID = optixGetInstanceIndex(); + // prd.eventID = event; + // prd.tHit = t; + // } } OPTIX_INTERSECT_PROGRAM(VolumeIntersection)() @@ -454,7 +437,9 @@ OPTIX_INTERSECT_PROGRAM(VolumeIntersection)() // float old_tmax = optixGetRayTmax(); // const int primID = optixGetPrimitiveIndex(); + auto &LP = optixLaunchParams; const auto &self = owl::getProgramData(); + RayPayload &prd = owl::getPRD(); float3 origin = optixGetObjectRayOrigin(); // note, this is _not_ normalized. Useful for computing world space tmin/mmax @@ -492,12 +477,78 @@ OPTIX_INTERSECT_PROGRAM(VolumeIntersection)() // clip hit to near position thit0 = max(thit0, optixGetRayTmin()); - RayPayload &prd = owl::getPRD(); - if (optixReportIntersection(thit0, /* hit kind */ 0)) { - prd.objectSpaceRayOrigin = origin; - prd.objectSpaceRayDirection = direction; - prd.t0 = max(prd.t0, thit0); - prd.t1 = min(prd.t1, thit1); + // Load the volume we hit + GET(VolumeStruct volume, VolumeStruct, LP.volumes, self.volumeID); + uint8_t *hdl = (uint8_t*)LP.volumeHandles.get(self.volumeID, __LINE__).data; + const auto grid = reinterpret_cast(hdl); + const auto& tree = grid->tree(); + auto acc = tree.getAccessor(); + auto nvdbSampler = nanovdb::SampleFromVoxels, + /*Interpolation Degree*/1, /*UseCache*/false>(acc); + + float majorant_extinction = acc.root().valueMax(); + float gradient_factor = volume.gradient_factor; + float linear_attenuation_unit = volume.scale; + float absorption = volume.absorption; + float scattering = volume.scattering; + + auto bbox = acc.root().bbox(); + auto mx = bbox.max(); + auto mn = bbox.min(); + float3 offset = make_float3(glm::vec3(mn[0], mn[1], mn[2]) + + (glm::vec3(mx[0], mx[1], mx[2]) - + glm::vec3(mn[0], mn[1], mn[2])) * .5f); + + // Sample the free path distance to see if our ray makes it to the boundary + float t = thit0; + int event; + bool hitVolume = false; + float unit = volume.scale / length(direction); + #define MAX_NULL_COLLISIONS 1000 + for (int i = 0; i < MAX_NULL_COLLISIONS; ++i) { + // Sample a distance + t = t - (log(1.0f - lcg_randomf(prd.rng)) / majorant_extinction) * unit; + + // A boundary has been hit, no intersection + if (t >= thit1) return; + + // Update current position + float3 x = offset + origin + t * direction; + + // Sample heterogeneous media + float densityValue = nvdbSampler(nanovdb::Vec3f(x.x, x.y, x.z)); + + float a = densityValue * absorption; + float s = densityValue * scattering; + float e = a + s; + float n = majorant_extinction - e; + + a = a / majorant_extinction; + s = s / majorant_extinction; + n = n / majorant_extinction; + + float event = lcg_randomf(prd.rng); + // An absorption/emission collision occured + if (event < (a + s)) { + if (optixReportIntersection(t, /* hit kind */ 0)) { + auto g = nvdbSampler.gradient(nanovdb::Vec3f(x.x, x.y, x.z)); + prd.objectSpaceRayOrigin = origin; + prd.objectSpaceRayDirection = direction; + prd.eventID = (event < a) ? 1 : 2; + prd.instanceID = optixGetInstanceIndex(); + prd.tHit = t; + prd.mp = x - offset; // not super confident about this offset... + prd.gradient = make_float3(g[0], g[1], g[2]);// TEMPORARY FOR BUNNY + prd.density = densityValue; + } + return; + } + + // A null collision occurred + else { + event = 3; + continue; + } } } @@ -930,7 +981,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() float time = sampleTime(lcg_randomf(rng)); // If no camera is in use, just display some random noise... - owl::Ray surfRay; + owl::Ray ray; EntityStruct camera_entity; TransformStruct camera_transform; CameraStruct camera; @@ -941,8 +992,8 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } // Trace an initial ray through the scene - surfRay = generateRay(camera, camera_transform, pixelID, make_float2(LP.frameSize), rng, time); - surfRay.tmax = tmax; + ray = generateRay(camera, camera_transform, pixelID, make_float2(LP.frameSize), rng, time); + ray.tmax = tmax; float3 accum_illum = make_float3(0.f); float3 pathThroughput = make_float3(1.f); @@ -964,25 +1015,13 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() float3 directIllum = make_float3(0.f); float3 illum = make_float3(0.f); - RayPayload surfPayload; - surfPayload.tHit = -1.f; - surfRay.time = time; + RayPayload payload; + payload.tHit = -1.f; + ray.time = time; + ray.visibilityMask = ENTITY_VISIBILITY_CAMERA_RAYS; owl::traceRay( /*accel to trace against*/ LP.IAS, - /*the ray to trace*/ surfRay, - /*prd*/ surfPayload, - OPTIX_RAY_FLAG_DISABLE_ANYHIT); - - owl::Ray volRay = surfRay; - volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit; - RayPayload volPayload; - volPayload.tHit = -1.f; - volPayload.rng = rng; - volPayload.t0 = volRay.tmin; - volPayload.t1 = volRay.tmax; - volPayload.primitiveID = (debug) ? -2 : -1; - owl::traceRay( /*accel to trace against*/ LP.IAS, - /*the ray to trace*/ volRay, - /*prd*/ volPayload, + /*the ray to trace*/ ray, + /*prd*/ payload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); // Shade each hit point on a path using NEE with MIS @@ -990,22 +1029,22 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() float alpha = 0.f; // If ray misses, terminate the ray - if ((surfPayload.tHit <= 0.f) && (volPayload.tHit <= 0.f)) { + if (payload.tHit <= 0.f) { // Compute lighting from environment if (depth == 0) { - float3 col = missColor(surfRay, envTex); + float3 col = missColor(ray, envTex); illum = illum + pathThroughput * (col * LP.domeLightIntensity); directIllum = illum; primaryAlbedo = col; } else if (enableDomeSampling) - illum = illum + pathThroughput * (missColor(surfRay, envTex) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure)); + illum = illum + pathThroughput * (missColor(ray, envTex) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure)); const float envDist = 10000.0f; // large value /* Compute miss motion vector */ float3 mvec; // Point far away - float3 pFar = surfRay.origin + surfRay.direction * envDist; + float3 pFar = ray.origin + ray.direction * envDist; // TODO: account for motion from rotating dome light vec4 tmp1 = LP.proj * LP.viewT0 * /*xfmt0 **/ make_vec4(pFar, 1.0f); float3 pt0 = make_float3(tmp1 / tmp1.w) * .5f; @@ -1016,15 +1055,13 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() break; } - bool isVolume = (volPayload.tHit >= 0.f); // Load the object we hit. - int entityID; - if (isVolume) { GET(entityID, int, LP.instanceToEntity, volPayload.instanceID); } - else { GET(entityID, int, LP.instanceToEntity, surfPayload.instanceID); } - + GET(int entityID, int, LP.instanceToEntity, payload.instanceID); GET(EntityStruct entity, EntityStruct, LP.entities, entityID); GET(TransformStruct transform, TransformStruct, LP.transforms, entity.transform_id); + + bool isVolume = (entity.volume_id != -1); MeshStruct mesh; VolumeStruct volume; if (!isVolume) { GET(mesh, MeshStruct, LP.meshes, entity.mesh_id); } @@ -1032,48 +1069,37 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // Skip forward if the hit object is invisible for this ray type, skip it. if (((entity.flags & ENTITY_VISIBILITY_CAMERA_RAYS) == 0)) { - surfRay.origin = surfRay.origin + surfRay.direction * (surfPayload.tHit + EPSILON); - surfPayload.tHit = -1.f; - surfRay.time = time; - owl::traceRay( LP.IAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); - - volRay = surfRay; - volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit; - volPayload.tHit = -1.f; - volPayload.rng = rng; - volPayload.t0 = volRay.tmin; - volPayload.t1 = volRay.tmax; - volPayload.primitiveID = (debug) ? -3 : -1; - owl::traceRay( LP.IAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); + ray.origin = ray.origin + ray.direction * (payload.tHit + EPSILON); + payload.tHit = -1.f; + ray.time = time; + owl::traceRay( LP.IAS, ray, payload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); transparencyDepth++; if (transparencyDepth > LP.maxTransparencyDepth) break; continue; } // Set new outgoing light direction and hit position. - const float3 w_o = -surfRay.direction; - float3 hit_p; - if (volPayload.tHit >= 0.f) hit_p = volRay.origin + volPayload.tHit * volRay.direction; - else hit_p = surfRay.origin + surfPayload.tHit * surfRay.direction; + const float3 w_o = -ray.direction; + float3 hit_p = ray.origin + payload.tHit * ray.direction; // Load geometry data for the hit object float3 mp, p, v_x, v_y, v_z, v_gz, v_bz; float2 uv; float3 diffuseMotion; - if (volPayload.tHit >= 0.f) { + if (isVolume) { v_x = v_y = make_float3(0.f); // Perhaps I could use divergence / curl here? - v_z = v_gz = normalize(volPayload.gradient); + v_z = v_gz = normalize(payload.gradient); if (any(isnan(make_vec3(v_z)))) v_z = v_gz = make_float3(0.f); - mp = volPayload.mp; - uv = make_float2(volPayload.density, length(volPayload.gradient)); + mp = payload.mp; + uv = make_float2(payload.density, length(payload.gradient)); } else { int3 indices; - loadMeshTriIndices(entity.mesh_id, mesh.numTris, surfPayload.primitiveID, indices); - loadMeshVertexData(entity.mesh_id, mesh.numVerts, indices, surfPayload.barycentrics, mp, v_gz); - loadMeshUVData(entity.mesh_id, mesh.numVerts, indices, surfPayload.barycentrics, uv); - loadMeshNormalData(entity.mesh_id, mesh.numVerts, indices, surfPayload.barycentrics, v_z); - loadMeshTangentData(entity.mesh_id, mesh.numVerts, indices, surfPayload.barycentrics, v_x); + loadMeshTriIndices(entity.mesh_id, mesh.numTris, payload.primitiveID, indices); + loadMeshVertexData(entity.mesh_id, mesh.numVerts, indices, payload.barycentrics, mp, v_gz); + loadMeshUVData(entity.mesh_id, mesh.numVerts, indices, payload.barycentrics, uv); + loadMeshNormalData(entity.mesh_id, mesh.numVerts, indices, payload.barycentrics, v_z); + loadMeshTangentData(entity.mesh_id, mesh.numVerts, indices, payload.barycentrics, v_x); } // Load material data for the hit object @@ -1085,7 +1111,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // Transform geometry data into world space { - glm::mat4 xfm = to_mat4((volPayload.tHit >= 0.f) ? volPayload.localToWorld : surfPayload.localToWorld); + glm::mat4 xfm = to_mat4(payload.localToWorld); p = make_float3(xfm * make_vec4(mp, 1.0f)); hit_p = p; glm::mat3 nxfm = transpose(glm::inverse(glm::mat3(xfm))); @@ -1096,8 +1122,8 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() v_x = cross(v_y, v_z); if (LP.renderDataMode != RenderDataFlags::NONE) { - glm::mat4 xfmt0 = to_mat4((volPayload.tHit >= 0.f) ? volPayload.localToWorldT0 : surfPayload.localToWorldT0); - glm::mat4 xfmt1 = to_mat4((volPayload.tHit >= 0.f) ? volPayload.localToWorldT1 : surfPayload.localToWorldT1); + glm::mat4 xfmt0 = to_mat4(payload.localToWorldT0); + glm::mat4 xfmt1 = to_mat4(payload.localToWorldT1); vec4 tmp1 = LP.proj * LP.viewT0 * xfmt0 * make_vec4(mp, 1.0f); vec4 tmp2 = LP.proj * LP.viewT1 * xfmt1 * make_vec4(mp, 1.0f); float3 pt0 = make_float3(tmp1 / tmp1.w) * .5f; @@ -1176,7 +1202,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } // For segmentations, save geometric metadata - saveGeometricRenderData(renderData, depth, surfPayload.tHit, hit_p, v_z, w_o, uv, entityID, diffuseMotion, time, mat); + saveGeometricRenderData(renderData, depth, payload.tHit, hit_p, v_z, w_o, uv, entityID, diffuseMotion, time, mat); if (depth == 0) { primaryAlbedo = mat.base_color; primaryNormal = v_z; @@ -1187,20 +1213,10 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() float alpha_rnd = lcg_randomf(rng); if (alpha_rnd > mat.alpha) { - surfRay.origin = surfRay.origin + surfRay.direction * (surfPayload.tHit + EPSILON); - surfPayload.tHit = -1.f; - surfRay.time = time; - owl::traceRay( LP.IAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); - - volRay = surfRay; - volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit; - volPayload.tHit = -1.f; - volPayload.rng = rng; - volPayload.t0 = volRay.tmin; - volPayload.t1 = volRay.tmax; - volPayload.primitiveID = (debug) ? -4 : -1; - owl::traceRay( LP.IAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); - + ray.origin = ray.origin + ray.direction * (payload.tHit + EPSILON); + payload.tHit = -1.f; + ray.time = time; + owl::traceRay( LP.IAS, ray, payload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); ++depth; transparencyDepth++; continue; @@ -1211,14 +1227,14 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // Note that NEE/MIS will also potentially terminate the path, preventing double-counting. // todo: account for volumetric emission here... if (entity.light_id >= 0 && entity.light_id < LP.lights.count) { - float dotNWi = max(dot(surfRay.direction, v_z), 0.f); + float dotNWi = max(dot(ray.direction, v_z), 0.f); if ((dotNWi > EPSILON) && (depth != 0)) break; GET(LightStruct entityLight, LightStruct, LP.lights, entity.light_id); float3 lightEmission; if (entityLight.color_texture_id == -1) lightEmission = make_float3(entityLight.r, entityLight.g, entityLight.b); else lightEmission = sampleTexture(entityLight.color_texture_id, uv, make_float3(0.f, 0.f, 0.f)); - float dist = surfPayload.tHit; + float dist = payload.tHit; lightEmission = (lightEmission * entityLight.intensity); if (depth != 0) lightEmission = (lightEmission * pow(2.f, entityLight.exposure)) / max((dist * dist), 1.f); float3 contribution = pathThroughput * lightEmission; @@ -1233,7 +1249,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() float3 irradiance = make_float3(0.f); // If we hit a volume, use hybrid scattering to determine whether or not to use a BRDF or a phase function. - if (volPayload.tHit >= 0.f) { + if (isVolume) { float opacity = mat.alpha; // would otherwise be sampled from a transfer function float grad_len = uv.y; float p_brdf = opacity * (1.f - exp(-25.f * pow(volume.gradient_factor, 3.f) * grad_len)); @@ -1257,7 +1273,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() w_i, bsdfPDF, sampledBsdf, bsdf); // outputs } else { /* a scatter event occurred */ - if (volPayload.eventID == 2) { + if (payload.eventID == 2) { // currently isotropic. Todo: implement henyey greenstien... float rand1 = lcg_randomf(rng); float rand2 = lcg_randomf(rng); @@ -1273,7 +1289,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } /* An absorption / emission event occurred */ - if (volPayload.eventID == 1) { + if (payload.eventID == 1) { bsdfPDF = 1.f / (4.0 * M_PI); bsdf = make_float3(1.f / (4.0 * M_PI)); w_i = -w_o; @@ -1286,19 +1302,10 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // At this point, if we are refracting and we ran out of transmission bounces, skip forward. // This avoids creating black regions on glass objects due to bounce limits if (sampledBsdf == DISNEY_TRANSMISSION_BRDF && transmissionDepth >= LP.maxTransmissionDepth) { - surfRay.origin = surfRay.origin + surfRay.direction * (surfPayload.tHit + EPSILON); - surfPayload.tHit = -1.f; - surfRay.time = time; - owl::traceRay( LP.IAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); - - volRay = surfRay; - volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit; - volPayload.tHit = -1.f; - volPayload.rng = rng; - volPayload.t0 = volRay.tmin; - volPayload.t1 = volRay.tmax; - volPayload.primitiveID = (debug) ? -4 : -1; - owl::traceRay( LP.IAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); + ray.origin = ray.origin + ray.direction * (payload.tHit + EPSILON); + payload.tHit = -1.f; + ray.time = time; + owl::traceRay( LP.IAS, ray, payload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); // Count this as a "transparent" bounce. ++depth; @@ -1426,32 +1433,27 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } lightPDF *= (1.f / float(numLights + 1.f)) * (1.f / float(numTris)); if ((lightPDF > 0.0) && (dotNWi > EPSILON)) { - RayPayload surfPayload; surfPayload.instanceID = -2; - RayPayload volPayload = surfPayload; + RayPayload payload; payload.instanceID = -2; + RayPayload volPayload = payload; owl::RayT ray; // shadow ray ray.tmin = EPSILON * 10.f; ray.tmax = lightDistance + EPSILON; // needs to be distance to light, else anyhit logic breaks. ray.origin = hit_p; ray.direction = lightDir; ray.time = time; - owl::traceRay( LP.IAS, ray, surfPayload, occlusion_flags); - ray.tmax = (surfPayload.instanceID == -2) ? ray.tmax : surfPayload.tHit; - volPayload.rng = rng; - volPayload.t0 = volRay.tmin; - volPayload.t1 = volRay.tmax; - volPayload.primitiveID = (debug) ? -5 : -1; - owl::traceRay( LP.IAS, ray, volPayload, occlusion_flags); + owl::traceRay( LP.IAS, ray, payload, occlusion_flags); + ray.tmax = (payload.instanceID == -2) ? ray.tmax : payload.tHit; bool visible; if (randomID == numLights) { // If we sampled the dome light, just check to see if we hit anything - visible = (surfPayload.instanceID == -2) && (volPayload.instanceID == -2); + visible = (payload.instanceID == -2); } else { // If we sampled a light source, then check to see if we hit something other than the light int surfEntity; - if (surfPayload.instanceID == -2) surfEntity = -1; - else { GET(surfEntity, int, LP.instanceToEntity, surfPayload.instanceID); } - visible = (volPayload.instanceID == -2) && (surfPayload.instanceID == -2 || surfEntity == sampledLightID); + if (payload.instanceID == -2) surfEntity = -1; + else { GET(surfEntity, int, LP.instanceToEntity, payload.instanceID); } + visible = (payload.instanceID == -2 || surfEntity == sampledLightID); } if (visible) { - if (randomID != numLights) lightEmission = lightEmission / max(pow(surfPayload.tHit, falloff),1.f); + if (randomID != numLights) lightEmission = lightEmission / max(pow(payload.tHit, falloff),1.f); float w = power_heuristic(1.f, lightPDF, 1.f, bsdfPDF); float3 Li = (lightEmission * w) / lightPDF; irradiance = irradiance + (l_bsdf * Li); @@ -1469,33 +1471,25 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } // Next, sample a light source using the importance sampled BDRF direction. - surfRay.origin = hit_p; - surfRay.direction = w_i; - surfRay.tmin = EPSILON;//* 100.f; - surfPayload.instanceID = -1; - surfPayload.tHit = -1.f; - surfRay.time = sampleTime(lcg_randomf(rng)); - owl::traceRay(LP.IAS, surfRay, surfPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); - - volRay = surfRay; - volRay.tmax = (surfPayload.tHit == -1.f) ? volRay.tmax : surfPayload.tHit; - volPayload.rng = rng; - volPayload.t0 = volRay.tmin; - volPayload.t1 = volRay.tmax; - volPayload.primitiveID = (debug) ? -6 : -1; - owl::traceRay(LP.IAS, volRay, volPayload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); + ray.origin = hit_p; + ray.direction = w_i; + ray.tmin = EPSILON;//* 100.f; + payload.instanceID = -1; + payload.tHit = -1.f; + ray.time = sampleTime(lcg_randomf(rng)); + owl::traceRay(LP.IAS, ray, payload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); // Check if we hit any of the previously sampled lights bool hitLight = false; if (lightPDF > EPSILON) { - float dotNWi = (useBRDF) ? max(dot(surfRay.direction, v_gz), 0.f) : 1.f; // geometry term + float dotNWi = (useBRDF) ? max(dot(ray.direction, v_gz), 0.f) : 1.f; // geometry term // if by sampling the brdf we also hit the dome light... - if ((surfPayload.instanceID == -1) && (volPayload.instanceID == -1) && (sampledLightID == -1) && enableDomeSampling) { + if ((payload.instanceID == -1) && (sampledLightID == -1) && enableDomeSampling) { // Case where we hit the background, and also previously sampled the background float w = power_heuristic(1.f, bsdfPDF, 1.f, lightPDF); - float3 lightEmission = missColor(surfRay, envTex) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure); + float3 lightEmission = missColor(ray, envTex) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure); float3 Li = (lightEmission * w) / bsdfPDF; if (dotNWi > 0.f) { @@ -1505,8 +1499,8 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } // else if by sampling the brdf we also hit an area light // TODO: consider hitting emissive voxels? - else if (surfPayload.instanceID != -1 && volPayload.instanceID == -1) { - GET(int entityID, int, LP.instanceToEntity, surfPayload.instanceID); + else if (payload.instanceID != -1) { + GET(int entityID, int, LP.instanceToEntity, payload.instanceID); bool visible = (entityID == sampledLightID); // We hit the light we sampled previously if (visible) { @@ -1514,10 +1508,10 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() GET(EntityStruct light_entity, EntityStruct, LP.entities, sampledLightID); GET(MeshStruct light_mesh, MeshStruct, LP.meshes, light_entity.mesh_id); GET(LightStruct light_light, LightStruct, LP.lights, light_entity.light_id); - loadMeshTriIndices(light_entity.mesh_id, light_mesh.numTris, surfPayload.primitiveID, indices); - loadMeshUVData(light_entity.mesh_id, light_mesh.numVerts, indices, surfPayload.barycentrics, uv); + loadMeshTriIndices(light_entity.mesh_id, light_mesh.numTris, payload.primitiveID, indices); + loadMeshUVData(light_entity.mesh_id, light_mesh.numVerts, indices, payload.barycentrics, uv); - float dist = surfPayload.tHit; + float dist = payload.tHit; float3 lightEmission; if (light_light.color_texture_id == -1) lightEmission = make_float3(light_light.r, light_light.g, light_light.b) * (light_light.intensity * pow(2.f, light_light.exposure)); diff --git a/src/nvisii/entity.cpp b/src/nvisii/entity.cpp index aac5d55c..5bc7df95 100644 --- a/src/nvisii/entity.cpp +++ b/src/nvisii/entity.cpp @@ -32,6 +32,7 @@ Entity::Entity(std::string name, uint32_t id) { entity.material_id = -1; entity.light_id = -1; entity.mesh_id = -1; + entity.flags = (uint32_t)-1; } std::string Entity::toString() diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index 5aea8f48..7f28ef7d 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -1220,15 +1220,12 @@ void updateComponents() // Next, allocate resources for the new volume. auto gridHdlPtr = v->getNanoVDBGridHandle(); const nanovdb::FloatGrid* grid = reinterpret_cast(gridHdlPtr.get()->data()); - std::cout<checksum()<tree().getAccessor(); // auto bbox = tree.root().bbox(); auto bbox = grid->tree().bbox().asReal(); // int nodecount = grid->tree().nodeCount(3); - // std::cout<getAddress()] = owlDeviceBufferCreate(OD.context, OWL_USER_TYPE(uint8_t), gridHdlPtr.get()->size(), nullptr); owlBufferUpload(OD.volumeHandles[v->getAddress()], gridHdlPtr.get()->data()); From cf6e014dd725c5b6d43cb465e3ef4728de7f4c39 Mon Sep 17 00:00:00 2001 From: n8vm Date: Thu, 15 Apr 2021 12:31:35 -0600 Subject: [PATCH 19/55] upgrades to volume example --- examples/22.volumes.py | 178 +++++++++++++++++++++++++--------------- include/nvisii/volume.h | 9 +- src/nvisii/volume.cpp | 7 +- 3 files changed, 124 insertions(+), 70 deletions(-) diff --git a/examples/22.volumes.py b/examples/22.volumes.py index 9a74eb14..3b1878df 100644 --- a/examples/22.volumes.py +++ b/examples/22.volumes.py @@ -2,33 +2,31 @@ # 22.volumes.py # -# This shows an example of two volumes. One volume uses the NanoVDB format, -# and the other is a raw volume. +# This shows an example of several volumes. Some volume uses the NanoVDB format, +# others use a raw volume, and then some are generated procedurally. +# This scene tests how volumes can be lit up with light sources, and how they can +# overlap. + +# Note, the API here is subject to change with future versions... import nvisii import numpy as np opt = lambda: None -opt.spp = 50 -opt.width = 512 -opt.height = 512 +opt.spp = 512 +opt.width = 1024 +opt.height = 1024 opt.out = '22_volumes.png' -# headless - no window -# verbose - output number of frames rendered, etc.. nvisii.initialize(headless = False, verbose = True, window_on_top = True) - -#%% -# Use a neural network to denoise ray traced nvisii.enable_denoiser() -# First, lets create an entity that will serve as our camera. -camera = nvisii.entity.create(name = "camera") +# Configuring the denoiser here to not use albedo and normal guides, which are +# noisy for volumes +nvisii.configure_denoiser(False, False, True) -# To place the camera into our scene, we'll add a "transform" component. -# (All nvisii objects have a "name" that can be used for easy lookup later.) +# Make a camera... +camera = nvisii.entity.create(name = "camera") camera.set_transform(nvisii.transform.create(name = "camera_transform")) - -# To make our camera entity act like a "camera", we'll add a camera component camera.set_camera( nvisii.camera.create_from_fov( name = "camera_camera", @@ -36,93 +34,131 @@ aspect = opt.width / float(opt.height) ) ) - -# Finally, we'll select this entity to be the current camera entity. -# (nvisii can only use one camera at the time) nvisii.set_camera_entity(camera) +camera.get_transform().look_at(at = (0, 0, .5), up = (0, 0, 1), eye = (0, 5, 2)) + +# Make a dome light +env_tex = nvisii.texture.create_from_file("env_tex", "./content/kiara_4_mid-morning_4k.hdr") +nvisii.enable_dome_light_sampling() +nvisii.set_dome_light_texture(env_tex, enable_cdf=True) +nvisii.set_dome_light_exposure(-2.0) -# Lets set the camera to look at an object. -# We'll do this by editing the transform component. -camera.get_transform().look_at(at = (0, 0, .9), up = (0, 0, 1), eye = (0, 5, 1)) -# Next, lets at an object (a floor). +# Make a textured floor floor = nvisii.entity.create( name = "floor", mesh = nvisii.mesh.create_plane("mesh_floor"), transform = nvisii.transform.create("transform_floor"), material = nvisii.material.create("material_floor") ) - -# Lets make our floor act as a mirror mat = floor.get_material() -# mat = nvisii.material.get("material_floor") # <- this also works -#%% -# Mirrors are smooth and "metallic". -mat.set_base_color((1.,1.,1.)) -mat.set_metallic(0) -mat.set_roughness(1) - -# Make the floor large by scaling it +floor_tex = nvisii.texture.create_from_file("floor_tex", "./content/salle_de_bain_separated/textures/WoodFloor_BaseColor.jpg") +mat.set_base_color_texture(floor_tex) trans = floor.get_transform() trans.set_scale((5,5,1)) -#%% -# Let's also add a sphere +# Make a procedural torus volume torus = nvisii.entity.create( name="torus", volume = nvisii.volume.create_torus("torus"), transform = nvisii.transform.create("torus"), material = nvisii.material.create("torus") ) -#%% -torus.get_transform().set_position((.5,2,0.35)) +torus.get_transform().set_position((0.8,2,.2)) torus.get_transform().set_scale((0.003, 0.003, 0.003)) -torus.get_transform().set_angle_axis(3.14 * .25, (1,0,0)) -torus.get_material().set_base_color((.0,0.0,1)) -torus.get_material().set_roughness(0.0) -torus.get_material().set_transmission(0.0) -torus.get_volume().set_gradient_factor(10) -torus.get_volume().set_absorption(0) -torus.get_volume().set_scattering(1) -torus.get_volume().set_scale(50) - -#%% -# Let's also add a bunny +torus.get_transform().set_angle_axis(nvisii.pi() * .5, (1,0,0)) +torus.get_material().set_base_color((1.,1.,1.0)) +# The gradient factor here controls how "surface like" the volume is. +# Higher values mean "more surface like" in areas where there is a strong +# gradient in the scalar field of the volume (which occurs near surfaces defined +# by high density regions) +torus.get_volume().set_gradient_factor(10) + +# Absorption controls the probability of light being absorbed by the volume +torus.get_volume().set_absorption(1.) +# Absorption controls the probability of light bouncing off one of the particles in the volume +torus.get_volume().set_scattering(.0) +# The scale here controls how "big" a voxel is, where "1" means a voxel is 1cm wide. +# Larger scales result in particles being distributed over longer distances, +# causing the volume to appear less dense +torus.get_volume().set_scale(100) + +# Create a procedural octahedron +octahedron = nvisii.entity.create( + name="octahedron", + volume = nvisii.volume.create_octahedron("octahedron"), + transform = nvisii.transform.create("octahedron"), + material = nvisii.material.create("octahedron") +) +octahedron.get_transform().set_position((.80,2.0,0.2)) # Note that this octahedron is inside the torus +octahedron.get_transform().set_scale((0.01, 0.01, 0.01)) +octahedron.get_transform().set_angle_axis(nvisii.pi() * .25, (0,0,1)) +octahedron.get_material().set_base_color((1.0,0.0,0)) +octahedron.get_volume().set_gradient_factor(10) +octahedron.get_volume().set_absorption(0) +octahedron.get_volume().set_scattering(1) +octahedron.get_volume().set_scale(15) + +# Create a procedural sphere +sphere = nvisii.entity.create( + name="sphere", + volume = nvisii.volume.create_sphere("sphere"), + transform = nvisii.transform.create("sphere"), + material = nvisii.material.create("sphere") +) +sphere.get_transform().set_position((-1.0,2,0.25)) +sphere.get_transform().set_scale((0.0025, 0.0025, 0.0025)) +sphere.get_material().set_base_color((0.2,0.2,1.0)) +sphere.get_volume().set_gradient_factor(10) +sphere.get_volume().set_absorption(0) +sphere.get_volume().set_scattering(1) +sphere.get_volume().set_scale(100) + +# Create a procedural box +box = nvisii.entity.create( + name="box", + volume = nvisii.volume.create_box("box"), + transform = nvisii.transform.create("box"), + material = nvisii.material.create("box") +) +box.get_transform().set_position((-1.0,2,0.25)) +box.get_transform().set_scale((0.005, 0.005, 0.005)) +box.get_transform().set_angle_axis(.3, (0,0,1)) +box.get_material().set_base_color((1.0,1.0,1.0)) +box.get_volume().set_gradient_factor(10) +box.get_volume().set_absorption(0) +box.get_volume().set_scattering(1) +box.get_volume().set_scale(100) + +# Create a cloudy bunny using a nanovdb file bunny = nvisii.entity.create( name="bunny", volume = nvisii.volume.create_from_file("bunny", "./content/bunny_cloud.nvdb"), transform = nvisii.transform.create("bunny"), material = nvisii.material.create("bunny") ) -#%% -bunny.get_transform().set_position((-1,0,0.75)) +bunny.get_transform().set_position((-.8,.5,0.75)) bunny.get_transform().set_scale((0.003, 0.003, 0.003)) bunny.get_material().set_base_color((0.1,0.9,0.08)) bunny.get_material().set_roughness(0.7) bunny.get_volume().set_gradient_factor(10) -bunny.get_volume().set_absorption(0) -bunny.get_volume().set_scattering(1) -bunny.get_volume().set_scale(10) +bunny.get_volume().set_absorption(1) +bunny.get_volume().set_scattering(0) +bunny.get_volume().set_scale(4) bunny.get_transform().set_angle_axis(nvisii.pi() * .5, (1,0,0)) bunny.get_transform().add_angle_axis(nvisii.pi(), (0,1,0)) -#%% +# Create a boston teapot using a raw CT scanned volume voxels = np.fromfile("./content/boston_teapot_256x256x178_uint8.raw", dtype=np.uint8).astype(np.float32) - - - -#%% -# Let's also add a teapot teapot = nvisii.entity.create( name="teapot", volume = nvisii.volume.create_from_data("teapot", width = 256, height = 256, depth = 178, data = voxels, background = 0.0), transform = nvisii.transform.create("teapot"), material = nvisii.material.create("teapot") ) -#%% teapot.get_transform().set_position((1,0,0.7)) teapot.get_transform().set_scale((0.005, 0.005, 0.005)) -teapot.get_material().set_base_color((1.0,0.0,0.0)) +teapot.get_material().set_base_color((1.0,1.0,1.0)) teapot.get_material().set_roughness(0.0) teapot.get_material().set_metallic(1.0) teapot.get_volume().set_gradient_factor(100) @@ -132,15 +168,25 @@ teapot.get_transform().set_angle_axis(-nvisii.pi() * .5, (1,0,0)) teapot.get_transform().add_angle_axis(nvisii.pi() * 1.1, (0,1,0)) -#%% -#%% -# Now that we have a simple scene, let's render it -print("rendering to", "01_simple_scene.png") +# Volumes can be lit up using light sources +light = nvisii.entity.create( + name="light", + mesh = nvisii.mesh.create_sphere("light"), + transform = nvisii.transform.create("light"), + light = nvisii.light.create("light") +) +light.get_transform().set_position((0,1,2.5)) +light.get_transform().set_scale((.2,.2,.2)) +light.get_light().set_temperature(4000) +light.get_light().set_intensity(20) + +# Render out the image +print("rendering to", "22_volumes.png") nvisii.render_to_file( width = opt.width, height = opt.height, samples_per_pixel = opt.spp, - file_path = "01_simple_scene.png" + file_path = "22_volumes.png" ) nvisii.deinitialize() diff --git a/include/nvisii/volume.h b/include/nvisii/volume.h index d71236d3..7ceddeb8 100644 --- a/include/nvisii/volume.h +++ b/include/nvisii/volume.h @@ -78,8 +78,15 @@ class Volume : public StaticFactory * is 0 and inactive, the interior is active with values varying * smoothly from 0 at the surface of the box to 1 at the half width * and interior of the box. + * @param name The name of the volume to create. + * @param size The width, height, and depth of the box in local units. + * @param center The center of the box in local units + * @param half_width The half-width of the narrow band in voxel units */ - static Volume *createBox(std::string name); + static Volume *createBox(std::string name, + glm::vec3 size = glm::vec3(100.f), + glm::vec3 center = glm::vec3(0.f), + float half_width = 3.f); /** * Creates a sparse fog volume of an octahedron such that the exterior diff --git a/src/nvisii/volume.cpp b/src/nvisii/volume.cpp index 838b7c51..8654e7f6 100644 --- a/src/nvisii/volume.cpp +++ b/src/nvisii/volume.cpp @@ -188,10 +188,11 @@ Volume *Volume::createTorus(std::string name) } } -Volume *Volume::createBox(std::string name) +Volume *Volume::createBox(std::string name, + glm::vec3 size, glm::vec3 center, float halfWidth) { - auto create = [] (Volume* v) { - nanovdb::GridHandle<> gridHdl = nanovdb::createFogVolumeBox(); + auto create = [size, center, halfWidth] (Volume* v) { + nanovdb::GridHandle<> gridHdl = nanovdb::createFogVolumeBox(size.x, size.y, size.z, ((nanovdb::Vec3R)(0)), 1.0f, halfWidth); v->gridHdlPtr = std::make_shared>(std::move(gridHdl)); v->markDirty(); }; From 86a0503ea5d2f9a65c167edba7e9b672eeae0594 Mon Sep 17 00:00:00 2001 From: n8vm Date: Thu, 15 Apr 2021 16:05:53 -0600 Subject: [PATCH 20/55] ray visibility flags now working. Added an example demonstrating feature --- examples/23.ray_visibility.py | 282 +++++++++++++++++++++++++++ include/nvisii/entity.h | 8 +- include/nvisii/nvisii.h | 2 +- src/nvisii/devicecode/path_tracer.cu | 19 +- 4 files changed, 295 insertions(+), 16 deletions(-) create mode 100644 examples/23.ray_visibility.py diff --git a/examples/23.ray_visibility.py b/examples/23.ray_visibility.py new file mode 100644 index 00000000..5311621f --- /dev/null +++ b/examples/23.ray_visibility.py @@ -0,0 +1,282 @@ +import nvisii +import math +import PySide2 +import colorsys +from PySide2.QtCore import * +from PySide2.QtWidgets import * + +nvisii.initialize() +nvisii.resize_window(1000,1000) +nvisii.enable_denoiser() +# nvisii.configure_denoiser(False, False, True) +nvisii.set_max_bounce_depth(diffuse_depth=2, glossy_depth = 8, transparency_depth = 8, transmission_depth = 12, volume_depth = 2) + +# Set the sky +nvisii.disable_dome_light_sampling() +nvisii.set_dome_light_color((0,0,0)) + +# Set camera +camera = nvisii.entity.create( + name = "camera", + transform = nvisii.transform.create(name = "camera_transform"), + camera = nvisii.camera.create( + name = "camera_camera", + aspect = 1.0 + ) +) +camera.get_transform().look_at( + at = (0, 0, 0.5), # at position + up = (0, 0, 1), # up vector + eye = (0, 5, 2) # eye position +) +nvisii.set_camera_entity(camera) + +# Floor +floor = nvisii.entity.create( + name = "floor", + mesh = nvisii.mesh.create_plane("mesh_floor"), + transform = nvisii.transform.create("transform_floor"), + material = nvisii.material.create("material_floor") +) +floor.get_material().set_base_color((0.19,0.16,0.19)) +floor.get_material().set_metallic(0) +floor.get_material().set_roughness(1) +floor.get_transform().set_scale((5,5,1)) + +# Mirror 1 +mirror1 = nvisii.entity.create( + name = "mirror1", + mesh = nvisii.mesh.create_box("mesh_mirror1"), + transform = nvisii.transform.create("transform_mirror1"), + material = nvisii.material.create("material_mirror1") +) +mirror1.get_transform().look_at(eye = (-1.5, -1.5, .5), at = (0,0,.7), up = (0,0,1)) +mirror1.get_material().set_base_color((1.,1.,1.)) +mirror1.get_material().set_metallic(1) +mirror1.get_material().set_roughness(0) +mirror1.get_transform().set_scale((.7,.7,.1)) + +# Glass 1 +glass1 = nvisii.entity.create( + name = "glass1", + mesh = nvisii.mesh.create_box("mesh_glass1"), + transform = nvisii.transform.create("transform_glass1"), + material = nvisii.material.create("material_glass1") +) +glass1.get_transform().look_at(eye = (1.5, 1.5, .5), at = (0,0,.7), up = (0,0,1)) +glass1.get_material().set_base_color((1.,1.,1.)) +glass1.get_material().set_transmission(1) +glass1.get_material().set_roughness(0) +glass1.get_transform().set_scale((.7,.7,.1)) + +# Mirror 2 +mirror2 = nvisii.entity.create( + name = "mirror2", + mesh = nvisii.mesh.create_box("mesh_mirror2"), + transform = nvisii.transform.create("transform_mirror2"), + material = nvisii.material.create("material_mirror2") +) +mirror2.get_transform().look_at(eye = (1.5, -1.5, .5), at = (0,0,.7), up = (0,0,1)) +mirror2.get_material().set_base_color((1.,1.,1.)) +mirror2.get_material().set_metallic(1) +mirror2.get_material().set_roughness(0) +mirror2.get_transform().set_scale((.7,.7,.1)) + +# Glass 2 +glass2 = nvisii.entity.create( + name = "glass2", + mesh = nvisii.mesh.create_box("mesh_glass2"), + transform = nvisii.transform.create("transform_glass2"), + material = nvisii.material.create("material_glass2") +) +glass2.get_transform().look_at(eye = (-1.5, 1.5, .5), at = (0,0,.7), up = (0,0,1)) +glass2.get_material().set_base_color((1.,1.,1.)) +glass2.get_material().set_transmission(1) +glass2.get_material().set_roughness(0) +glass2.get_transform().set_scale((.7,.7,.1)) + +# Fog +fog = nvisii.entity.create( + name = "fog", + volume = nvisii.volume.create_box("mesh_fog"), + transform = nvisii.transform.create("transform_fog"), + material = nvisii.material.create("material_fog") +) +fog.get_material().set_base_color((1.,1.,1.)) +fog.get_material().set_transmission(1) +fog.get_material().set_roughness(0) +fog.get_volume().set_scale(100) + +# Light +light = nvisii.entity.create( + name = "light", + light = nvisii.light.create("light"), + transform = nvisii.transform.create("light"), + mesh = nvisii.mesh.create_sphere("light") +) +light.get_transform().set_position((0,0,5)) +light.get_transform().set_scale((.1,.1,.1)) +light.get_light().set_exposure(7) + +# Light blocker +blocker = nvisii.entity.create( + name = "blocker", + mesh = nvisii.mesh.create_capped_tube("blocker", innerRadius = .04), + transform = nvisii.transform.create("blocker"), + material = nvisii.material.create("blocker") +) +blocker.get_transform().set_scale((10,10,.01)) +blocker.get_transform().set_position((0,0,3.0)) + +# Teapot +teapotahedron = nvisii.entity.create( + name="teapotahedron", + mesh = nvisii.mesh.create_teapotahedron("teapotahedron", segments = 32), + transform = nvisii.transform.create("teapotahedron"), + material = nvisii.material.create("teapotahedron") +) +teapotahedron.get_transform().set_rotation(nvisii.angleAxis(nvisii.pi() / 4.0, (0,0,1))) +teapotahedron.get_transform().set_position((0,0,0)) +teapotahedron.get_transform().set_scale((0.4, 0.4, 0.4)) +teapotahedron.get_material().set_base_color((255.0 / 255.0, 100.0 / 255.0, 2.0 / 256.0)) +teapotahedron.get_material().set_roughness(0.0) +teapotahedron.get_material().set_specular(1.0) +teapotahedron.get_material().set_metallic(1.0) + +# Make a QT window to demonstrate the difference between alpha transparency and transmission +app = QApplication([]) # Start an application. +window = QWidget() # Create a window. +layout = QVBoxLayout() # Create a layout. + +def rotateCamera(value): + value = value / 100.0 + cam_pos = camera.get_transform().get_position() + + camera.get_transform().look_at( + at = (0, 0, 0.5), # at position + up = (0, 0, 1), # up vector + eye = (5 * math.cos(value * 2 * nvisii.pi()), 5 * math.sin(value * 2 * nvisii.pi()), cam_pos[2]) # eye position + ) +rotateCamera(0) +dial = QDial() +dial.setWrapping(True) +dial.valueChanged[int].connect(rotateCamera) +layout.addWidget(QLabel('Camera rotation')) +layout.addWidget(dial) + +def rotateCameraElevation(value): + # print(value) + value = value / 100 + cam_pos = camera.get_transform().get_position() + camera.get_transform().look_at( + at = (0, 0, 0.5), # at position + up = (0, 0, 1), # up vector + eye = (cam_pos[0], cam_pos[1], 0.1 + 2.5*value) # eye position + ) + # print(value, 2 * math.cos(value * 2 * nvisii.pi())) + +slider = QSlider(Qt.Horizontal) +slider.valueChanged[int].connect(rotateCameraElevation) +slider.setValue(40) +layout.addWidget(QLabel('Camera Elevation')) +layout.addWidget(slider) + +# Add some toggles to demonstrate how the set_visibility function works + +camera_visibility = True +diffuse_visibility = True +glossy_visibility = True +transmission_visibility = True +scatter_visibility = True +shadow_visibility = True +def updateVisibility(): + global camera_visibility + global diffuse_visibility + global glossy_visibility + global transmission_visibility + global scatter_visibility + global shadow_visibility + + teapotahedron.set_visibility( + camera = camera_visibility, + diffuse = diffuse_visibility, + glossy = glossy_visibility, + transmission = transmission_visibility, + volume_scatter = scatter_visibility, + shadow = shadow_visibility) + +def toggleCamera(): + global camera_visibility + camera_visibility = not camera_visibility + updateVisibility() +button = QPushButton("toggleCamera") +button.clicked.connect(toggleCamera) +layout.addWidget(button) + +def toggleDiffuse(): + global diffuse_visibility + diffuse_visibility = not diffuse_visibility + updateVisibility() +button = QPushButton("toggleDiffuse") +button.clicked.connect(toggleDiffuse) +layout.addWidget(button) + +def toggleGlossy(): + global glossy_visibility + glossy_visibility = not glossy_visibility + updateVisibility() +button = QPushButton("toggleGlossy") +button.clicked.connect(toggleGlossy) +layout.addWidget(button) + +def toggleTransmission(): + global transmission_visibility + transmission_visibility = not transmission_visibility + updateVisibility() +button = QPushButton("toggleTransmission") +button.clicked.connect(toggleTransmission) +layout.addWidget(button) + +def toggleScattering(): + global scatter_visibility + scatter_visibility = not scatter_visibility + updateVisibility() +button = QPushButton("toggleScattering") +button.clicked.connect(toggleScattering) +layout.addWidget(button) + +def toggleShadows(): + global shadow_visibility + shadow_visibility = not shadow_visibility + updateVisibility() +button = QPushButton("toggleShadows") +button.clicked.connect(toggleShadows) +layout.addWidget(button) + +def setFogStrength(value): + value = (100 - value) * 2 + 10 + fog.get_volume().set_scale(value) +setFogStrength(100) +slider = QSlider(Qt.Horizontal) +slider.valueChanged[int].connect(setFogStrength) +slider.setValue(100) +layout.addWidget(QLabel('Fog Strength')) +layout.addWidget(slider) + + +def setLightHeight(value): + value = value / 100.0 + light.get_transform().set_position((0,0,3 + value * 2)) +setLightHeight(50) +slider = QSlider(Qt.Horizontal) +slider.valueChanged[int].connect(setLightHeight) +slider.setValue(50) +layout.addWidget(QLabel('Light Height')) +layout.addWidget(slider) + + +window.setLayout(layout) +window.show() +app.exec_() + +nvisii.deinitialize() \ No newline at end of file diff --git a/include/nvisii/entity.h b/include/nvisii/entity.h index 9fadab2a..5abcf28a 100644 --- a/include/nvisii/entity.h +++ b/include/nvisii/entity.h @@ -212,10 +212,10 @@ class Entity : public StaticFactory { /** * Objects can be set to be invisible to particular ray types: * @param camera Makes the object visible to camera rays (the first rays to be traced from the camera). - * @param diffuse (todo...) Makes the object visible to diffuse rays (eg for diffuse GI) - * @param glossy (todo...) Makes the object visible to glossy rays (eg in reflections) - * @param transmission (todo...) Makes the object visible to transmission rays (eg from inside glass) - * @param volume_scatter (todo...) Makes the object visible to volume scatter rays (eg from light simulation inside a volume) + * @param diffuse Makes the object visible to diffuse rays (eg for diffuse GI) + * @param glossy Makes the object visible to glossy rays (eg in reflections) + * @param transmission Makes the object visible to transmission rays (eg from inside glass) + * @param volume_scatter Makes the object visible to volume scatter rays (eg from light simulation inside a volume) * @param shadow Enables the object to cast shadows. */ void setVisibility( diff --git a/include/nvisii/nvisii.h b/include/nvisii/nvisii.h index e14a2a97..dd770fe7 100644 --- a/include/nvisii/nvisii.h +++ b/include/nvisii/nvisii.h @@ -207,7 +207,7 @@ void setDirectLightingClamp(float clamp); */ void setMaxBounceDepth( uint32_t diffuse_depth = 2, - uint32_t glossy_depth = 2, + uint32_t glossy_depth = 8, uint32_t transparency_depth = 8, uint32_t transmission_depth = 12, uint32_t volume_depth = 2 diff --git a/src/nvisii/devicecode/path_tracer.cu b/src/nvisii/devicecode/path_tracer.cu index 1def657d..c0fbe130 100644 --- a/src/nvisii/devicecode/path_tracer.cu +++ b/src/nvisii/devicecode/path_tracer.cu @@ -1067,17 +1067,6 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() if (!isVolume) { GET(mesh, MeshStruct, LP.meshes, entity.mesh_id); } else { GET(volume, VolumeStruct, LP.volumes, entity.volume_id); } - // Skip forward if the hit object is invisible for this ray type, skip it. - if (((entity.flags & ENTITY_VISIBILITY_CAMERA_RAYS) == 0)) { - ray.origin = ray.origin + ray.direction * (payload.tHit + EPSILON); - payload.tHit = -1.f; - ray.time = time; - owl::traceRay( LP.IAS, ray, payload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); - transparencyDepth++; - if (transparencyDepth > LP.maxTransparencyDepth) break; - continue; - } - // Set new outgoing light direction and hit position. const float3 w_o = -ray.direction; float3 hit_p = ray.origin + payload.tHit * ray.direction; @@ -1216,6 +1205,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() ray.origin = ray.origin + ray.direction * (payload.tHit + EPSILON); payload.tHit = -1.f; ray.time = time; + // ray.visibilityMask reuses the last visibility mask here owl::traceRay( LP.IAS, ray, payload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); ++depth; transparencyDepth++; @@ -1305,6 +1295,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() ray.origin = ray.origin + ray.direction * (payload.tHit + EPSILON); payload.tHit = -1.f; ray.time = time; + // ray.visibilityMask reuses the last visibility mask here owl::traceRay( LP.IAS, ray, payload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); // Count this as a "transparent" bounce. @@ -1439,6 +1430,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() ray.tmin = EPSILON * 10.f; ray.tmax = lightDistance + EPSILON; // needs to be distance to light, else anyhit logic breaks. ray.origin = hit_p; ray.direction = lightDir; ray.time = time; + ray.visibilityMask = ENTITY_VISIBILITY_SHADOW_RAYS; owl::traceRay( LP.IAS, ray, payload, occlusion_flags); ray.tmax = (payload.instanceID == -2) ? ray.tmax : payload.tHit; bool visible; @@ -1477,6 +1469,11 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() payload.instanceID = -1; payload.tHit = -1.f; ray.time = sampleTime(lcg_randomf(rng)); + if (isVolume) ray.visibilityMask = ENTITY_VISIBILITY_VOLUME_SCATTER_RAYS; + else if (sampledBsdf == DISNEY_TRANSMISSION_BRDF) ray.visibilityMask = ENTITY_VISIBILITY_TRANSMISSION_RAYS; + else if (sampledBsdf == DISNEY_DIFFUSE_BRDF) ray.visibilityMask = ENTITY_VISIBILITY_DIFFUSE_RAYS; + else if (sampledBsdf == DISNEY_GLOSSY_BRDF) ray.visibilityMask = ENTITY_VISIBILITY_GLOSSY_RAYS; + else if (sampledBsdf == DISNEY_CLEARCOAT_BRDF) ray.visibilityMask = ENTITY_VISIBILITY_GLOSSY_RAYS; owl::traceRay(LP.IAS, ray, payload, OPTIX_RAY_FLAG_DISABLE_ANYHIT); // Check if we hit any of the previously sampled lights From 1b8a87b69ed78543dc53a9b4d69d8a9b7dc24047 Mon Sep 17 00:00:00 2001 From: n8vm Date: Thu, 15 Apr 2021 16:29:11 -0600 Subject: [PATCH 21/55] fixing some tests. Normal map test is failing --- examples/14.normal_map.py | 6 +++--- examples/content.txt | 1 + examples/requirements.txt | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/14.normal_map.py b/examples/14.normal_map.py index dce409ca..f0518799 100644 --- a/examples/14.normal_map.py +++ b/examples/14.normal_map.py @@ -103,9 +103,9 @@ mat.set_specular(0) # load an example brick texture -color_tex = nvisii.texture.create_from_file("color",'content/Bricks051_2K_Color.jpg') -normal_tex = nvisii.texture.create_from_file("normal",'content/Bricks051_2K_Normal.jpg', linear = True) -rough_tex = nvisii.texture.create_from_file("rough",'content/Bricks051_2K_Roughness.jpg', linear = True) +color_tex = nvisii.texture.create_from_file("color",'./content/Bricks051_2K_Color.jpg') +normal_tex = nvisii.texture.create_from_file("normal",'./content/Bricks051_2K_Normal.jpg', linear = True) +rough_tex = nvisii.texture.create_from_file("rough",'./content/Bricks051_2K_Roughness.jpg', linear = True) color_tex.set_scale((.1,.1)) normal_tex.set_scale((.1,.1)) diff --git a/examples/content.txt b/examples/content.txt index 1b7882e6..232b1289 100644 --- a/examples/content.txt +++ b/examples/content.txt @@ -9,3 +9,4 @@ https://www.dropbox.com/s/22bug1he354oqpt/bmw.zip https://www.dropbox.com/s/76gumyy7j0f3cyj/dragon.stl https://www.dropbox.com/s/runlp60bjjf3dpu/bunny_cloud.zip https://www.dropbox.com/s/nim7jsjiumei4f9/boston_teapot_256x256x178_uint8.zip +https://www.dropbox.com/s/yybckz6sawq5nbw/TestNormalMap.png diff --git a/examples/requirements.txt b/examples/requirements.txt index ebf9dd38..317f777d 100644 --- a/examples/requirements.txt +++ b/examples/requirements.txt @@ -4,6 +4,6 @@ noise numpy pillow scipy -pygame open3d -PySide2 \ No newline at end of file +PySide2 +opencv-python From 3d34ab5d377529e7a0acd4212c7b7563e4230a02 Mon Sep 17 00:00:00 2001 From: n8vm Date: Thu, 15 Apr 2021 16:57:26 -0600 Subject: [PATCH 22/55] fixing race condition with textures and materials --- src/nvisii/nvisii.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index 7f28ef7d..81c45a12 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -1375,7 +1375,8 @@ void updateComponents() // Manage textures and materials if (Texture::areAnyDirty() || Material::areAnyDirty()) { - std::lock_guard material_lock(Material::areAnyDirty() ? *Material::getEditMutex().get() : dummyMutex); + std::lock_guard material_lock(*Material::getEditMutex().get()); + std::lock_guard texture_lock(*Texture::getEditMutex().get()); // Allocate cuda textures for all texture components auto dirtyTextures = Texture::getDirtyTextures(); @@ -1421,7 +1422,6 @@ void updateComponents() colorSpace ); } - } // Create additional cuda textures for material constants @@ -1935,6 +1935,8 @@ std::vector render(uint32_t width, uint32_t height, uint32_t samplesPerPi if ((width < 1) || (height < 1)) throw std::runtime_error("Error, invalid width/height"); std::vector frameBuffer(width * height * 4); + enqueueCommandAndWait([](){}); + enqueueCommandAndWait([&frameBuffer, width, height, samplesPerPixel, seed] () { if (!NVISII.headlessMode) { if ((width != WindowData.currentSize.x) || (height != WindowData.currentSize.y)) From 8c3d050c0d87621ef33d884d3c6551c9d39eb9bd Mon Sep 17 00:00:00 2001 From: n8vm Date: Sat, 24 Apr 2021 11:41:39 -0600 Subject: [PATCH 23/55] adding a get_center function to entities, which behaves differently to the aabb center --- include/nvisii/entity.h | 6 ++++++ include/nvisii/mesh.h | 4 ++-- src/nvisii/entity.cpp | 7 +++++++ src/nvisii/mesh.cpp | 2 +- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/include/nvisii/entity.h b/include/nvisii/entity.h index 9fadab2a..0c53ce24 100644 --- a/include/nvisii/entity.h +++ b/include/nvisii/entity.h @@ -236,6 +236,12 @@ class Entity : public StaticFactory { /** @returns the center of the aligned bounding box. Requires a transform and mesh component to be attached. */ glm::vec3 getAabbCenter(); + /** + * @returns the average of the vertices of the mesh in world space, which will lay roughly in the center. Requires + * a transform and mesh component to be attached. + */ + glm::vec3 getCenter(); + /** For internal use. Returns the mutex used to lock entities for processing by the renderer. */ static std::shared_ptr getEditMutex(); diff --git a/include/nvisii/mesh.h b/include/nvisii/mesh.h index 6bb73cca..ff79da0e 100644 --- a/include/nvisii/mesh.h +++ b/include/nvisii/mesh.h @@ -843,8 +843,8 @@ class Mesh : public StaticFactory // /* TODO: Explain this */ // void save_tetrahedralization(float quality_bound, float maximum_volume); - /** @returns the last computed mesh centroid. */ - glm::vec3 getCentroid(); + /** @returns the average of the vertices of the mesh, which will lay roughly in the center. */ + glm::vec3 getCenter(); /** @returns the minimum axis aligned bounding box position */ glm::vec3 getMinAabbCorner(); diff --git a/src/nvisii/entity.cpp b/src/nvisii/entity.cpp index aac5d55c..aa0f50cf 100644 --- a/src/nvisii/entity.cpp +++ b/src/nvisii/entity.cpp @@ -318,6 +318,13 @@ glm::vec3 Entity::getAabbCenter() return entityStructs[id].bbmin + (entityStructs[id].bbmax - entityStructs[id].bbmin) * .5f; } +glm::vec3 Entity::getCenter() +{ + if (!getTransform()) throw std::runtime_error("Error: no transform attached to entity"); + if (!getMesh()) throw std::runtime_error("Error: no mesh attached to entity"); + return glm::vec3(getTransform()->getLocalToWorldMatrix() * glm::vec4(getMesh()->getCenter(), 1.f)); +} + void Entity::initializeFactory(uint32_t max_components) { if (isFactoryInitialized()) return; diff --git a/src/nvisii/mesh.cpp b/src/nvisii/mesh.cpp index 26fb96dc..bf53228d 100644 --- a/src/nvisii/mesh.cpp +++ b/src/nvisii/mesh.cpp @@ -141,7 +141,7 @@ void Mesh::computeMetadata() this->meshStructs[id].numVerts = uint32_t(positions.size()); } -glm::vec3 Mesh::getCentroid() +glm::vec3 Mesh::getCenter() { return vec3(meshStructs[id].center); } From 819e1b44285cb669a71574c32adf0e73d89108db Mon Sep 17 00:00:00 2001 From: n8vm Date: Sat, 15 May 2021 23:26:14 -0700 Subject: [PATCH 24/55] changing scene importer to allow for degenerate transforms with a warning --- src/nvisii/nvisii_import_scene.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/nvisii/nvisii_import_scene.cpp b/src/nvisii/nvisii_import_scene.cpp index 60bbd3a5..1cebd732 100644 --- a/src/nvisii/nvisii_import_scene.cpp +++ b/src/nvisii/nvisii_import_scene.cpp @@ -42,9 +42,6 @@ std::string dirnameOf(const std::string& fname) Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::quat rotation, std::vector args) { - bool updatesEnabled = areUpdatesEnabled(); - - disableUpdates(); std::string directory = dirnameOf(path); bool verbose = false; bool max_quality = false; @@ -462,7 +459,16 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu } if (verbose) std::cout<< std::string(level, '\t') << "Creating transform " << transformName << std::endl; auto transform = Transform::create(transformName); - transform->setTransform(aiMatrix4x4ToGlm(&node->mTransformation)); + try { + transform->setTransform(aiMatrix4x4ToGlm(&node->mTransformation)); + } catch(...) { + if (verbose) std::cout<< std::string(level, '\t') << "Warning! transform " << transformName << " Decomposition failed! Is the product of the 4x4 with the determinant of the upper left 3x3 nonzero? See Graphics Gems II: Decomposing a Matrix into Simple Transformations" << std::endl; + Transform::remove(transformName); + return; + + // transform->setTransform(aiMatrix4x4ToGlm(&node->mTransformation), false); + // transform->setScale({0.f, 0.f, 0.f}); + } if (parentTransform == nullptr) { transform->setScale(transform->getScale() * scale); transform->addRotation(rotation); @@ -514,7 +520,6 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu addNode(scene->mRootNode, nullptr, 0); aiReleaseImport(scene); - if (updatesEnabled) enableUpdates(); if (verbose) std::cout<<"Done!"< Date: Sat, 22 May 2021 13:52:53 -0600 Subject: [PATCH 25/55] adding support for point lights --- src/nvisii/devicecode/path_tracer.cu | 234 +++++---------------------- src/nvisii/nvisii.cpp | 5 +- 2 files changed, 44 insertions(+), 195 deletions(-) diff --git a/src/nvisii/devicecode/path_tracer.cu b/src/nvisii/devicecode/path_tracer.cu index c0fbe130..34abd24b 100644 --- a/src/nvisii/devicecode/path_tracer.cu +++ b/src/nvisii/devicecode/path_tracer.cu @@ -244,93 +244,10 @@ void SampleDeltaTracking( } } -// bool debug = (prd.primitiveID == -2); -// if (debug) { -// if (! ((mn[0] < x[0]) && (x[0] < mx[0]) && -// (mn[1] < x[1]) && (x[1] < mx[1]) && -// (mn[2] < x[2]) && (x[2] < mx[2])) -// ) { -// printf("X"); -// } else { -// printf("O"); -// } -// } -// if (debug) { -// printf("\n"); -// } - OPTIX_CLOSEST_HIT_PROGRAM(VolumeMesh)() { auto &LP = optixLaunchParams; RayPayload &prd = owl::getPRD(); - // const auto &self = owl::getProgramData(); - // LCGRand rng = prd.rng; - - // // Load the volume we hit - // GET(VolumeStruct volume, VolumeStruct, LP.volumes, self.volumeID); - // uint8_t *hdl = (uint8_t*)LP.volumeHandles.get(self.volumeID, __LINE__).data; - // const auto grid = reinterpret_cast(hdl); - // const auto& tree = grid->tree(); - // auto acc = tree.getAccessor(); - - // auto bbox = acc.root().bbox(); - // auto mx = bbox.max(); - // auto mn = bbox.min(); - // glm::vec3 offset = glm::vec3(mn[0], mn[1], mn[2]) + - // (glm::vec3(mx[0], mx[1], mx[2]) - - // glm::vec3(mn[0], mn[1], mn[2])) * .5f; - - // float majorant_extinction = acc.root().valueMax(); - // float gradient_factor = volume.gradient_factor; - // float linear_attenuation_unit = volume.scale; - // float absorption = volume.absorption; - // float scattering = volume.scattering; - - // vec3 x = make_vec3(prd.objectSpaceRayOrigin) + offset; - // vec3 w = make_vec3(prd.objectSpaceRayDirection); - - // linear_attenuation_unit /= length(w); - - // // Move ray to volume boundary - // float t0 = prd.t0, t1 = prd.t1; - // x = x + t0 * w; - // t1 = t1 - t0; - // t0 = 0.f; - - // // Sample the free path distance to see if our ray makes it to the boundary - // float t; - // int event; - // bool hitVolume = false; - // #define MAX_NULL_COLLISIONS 10000 - // for (int dti = 0; dti < MAX_NULL_COLLISIONS; ++dti) { - // SampleDeltaTracking(rng, acc, majorant_extinction, linear_attenuation_unit, - // absorption, scattering, x, w, t1, t, event); - // x = x + t * w; - - // // The boundary was hit - // if (event == 0) { - // break; - // } - - // // An absorption / emission event occurred - // if (event == 1) { - // hitVolume = true; - // break; - // } - - // // A scattering event occurred - // if (event == 2) { - // hitVolume = true; - // break; - // } - - // // A null collision occurred. - // if (event == 3) { - // // update boundary in relation to the new collision x, w does not change. - // t1 = t1 - t; - // } - // } - optixGetObjectToWorldTransformMatrix(prd.localToWorld); // If we don't need motion vectors, (or in the future if an object @@ -353,90 +270,10 @@ OPTIX_CLOSEST_HIT_PROGRAM(VolumeMesh)() OPTIX_CLOSEST_HIT_PROGRAM(VolumeShadowRay)() { - // auto &LP = optixLaunchParams; - // const auto &self = owl::getProgramData(); - // RayPayload &prd = owl::getPRD(); - // LCGRand rng = prd.rng; - - // GET(VolumeStruct volume, VolumeStruct, LP.volumes, self.volumeID); - // uint8_t *hdl = (uint8_t*)LP.volumeHandles.get(self.volumeID, __LINE__).data; - // const auto grid = reinterpret_cast(hdl); - // const auto& tree = grid->tree(); - // auto acc = tree.getAccessor(); - - // auto bbox = acc.root().bbox(); - // auto mx = bbox.max(); - // auto mn = bbox.min(); - // glm::vec3 offset = glm::vec3(mn[0], mn[1], mn[2]) + - // (glm::vec3(mx[0], mx[1], mx[2]) - - // glm::vec3(mn[0], mn[1], mn[2])) * .5f; - - // float majorant_extinction = acc.root().valueMax(); - // float gradient_factor = volume.gradient_factor; - // float linear_attenuation_unit = volume.scale; - // float absorption = volume.absorption; - // float scattering = volume.scattering; - - // vec3 x = make_vec3(prd.objectSpaceRayOrigin) + offset; - // vec3 w = make_vec3(prd.objectSpaceRayDirection); - - // linear_attenuation_unit /= length(w); - - // // Move ray to volume boundary - // float t0 = prd.t0, t1 = prd.t1; - // x = x + t0 * w; - // t1 = t1 - t0; - // t0 = 0.f; - - // // Sample the free path distance to see if our ray makes it to the boundary - // float t; - // int event; - // bool hitVolume = false; - // #define MAX_NULL_COLLISIONS 10000 - // for (int dti = 0; dti < MAX_NULL_COLLISIONS; ++dti) { - // SampleDeltaTracking(rng, acc, majorant_extinction, linear_attenuation_unit, - // absorption, scattering, x, w, t1, t, event); - // x = x + t * w; - - // // The boundary was hit - // if (event == 0) { - // break; - // } - - // // An absorption / emission event occurred - // if (event == 1) { - // hitVolume = true; - // break; - // } - - // // A scattering event occurred - // if (event == 2) { - // hitVolume = true; - // break; - // } - - // // A null collision occurred. - // if (event == 3) { - // // update boundary in relation to the new collision x, w does not change. - // t1 = t1 - t; - // } - // } - - // if (!hitVolume) { - // prd.tHit = -1.f; - // } - // else { - // prd.instanceID = optixGetInstanceIndex(); - // prd.eventID = event; - // prd.tHit = t; - // } } OPTIX_INTERSECT_PROGRAM(VolumeIntersection)() { - // float old_tmax = optixGetRayTmax(); - - // const int primID = optixGetPrimitiveIndex(); auto &LP = optixLaunchParams; const auto &self = owl::getProgramData(); RayPayload &prd = owl::getPRD(); @@ -1361,47 +1198,58 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() // sample light sources else { + // Sample the light to compute an incident light ray to this point if (numLights == 0) continue; GET( sampledLightID, int, LP.lightEntities, randomID ); GET( EntityStruct light_entity, EntityStruct, LP.entities, sampledLightID ); GET( LightStruct light_light, LightStruct, LP.lights, light_entity.light_id ); GET( TransformStruct transform, TransformStruct, LP.transforms, light_entity.transform_id ); - GET( MeshStruct mesh, MeshStruct, LP.meshes, light_entity.mesh_id ); - uint32_t random_tri_id = uint32_t(min(lcg_randomf(rng) * mesh.numTris, float(mesh.numTris - 1))); - GET( Buffer indices, Buffer, LP.indexLists, light_entity.mesh_id ); - GET( Buffer vertices, Buffer, LP.vertexLists, light_entity.mesh_id ); - GET( Buffer normals, Buffer, LP.normalLists, light_entity.mesh_id ); - GET( Buffer texCoords, Buffer, LP.texCoordLists, light_entity.mesh_id ); - GET( int3 triIndex, int3, indices, random_tri_id ); - - // Sample the light to compute an incident light ray to this point + auto <w = transform.localToWorld; float3 dir; float2 uv; float3 pos = hit_p; - GET(float3 n1, float3, normals, triIndex.x ); - GET(float3 n2, float3, normals, triIndex.y ); - GET(float3 n3, float3, normals, triIndex.z ); - GET(float3 v1, float3, vertices, triIndex.x ); - GET(float3 v2, float3, vertices, triIndex.y ); - GET(float3 v3, float3, vertices, triIndex.z ); - GET(float2 uv1, float2, texCoords, triIndex.x ); - GET(float2 uv2, float2, texCoords, triIndex.y ); - GET(float2 uv3, float2, texCoords, triIndex.z ); - - // Might be a bug here with normal transform... - n1 = make_float3(ltw * make_float4(n1, 0.0f)); - n2 = make_float3(ltw * make_float4(n2, 0.0f)); - n3 = make_float3(ltw * make_float4(n3, 0.0f)); - v1 = make_float3(ltw * make_float4(v1, 1.0f)); - v2 = make_float3(ltw * make_float4(v2, 1.0f)); - v3 = make_float3(ltw * make_float4(v3, 1.0f)); - sampleTriangle(pos, n1, n2, n3, v1, v2, v3, uv1, uv2, uv3, - lcg_randomf(rng), lcg_randomf(rng), dir, lightDistance, lightPDF, uv, - /*double_sided*/ false, /*use surface area*/ light_light.use_surface_area); + // The sampled light is a point light + if ((light_entity.mesh_id < 0) || (light_entity.mesh_id >= LP.meshes.count)) { + numTris = 1.f; + float3 tmp = make_float3(ltw[3]) - pos; + lightDistance = length(tmp); + dir = tmp / lightDistance; + lightPDF = PdfAtoW(1.f/(4.f * M_PI), lightDistance * lightDistance, 1.f); + uv = make_float2(0.f, 0.f); + } + // The sampled light is a mesh light + else { + GET( MeshStruct mesh, MeshStruct, LP.meshes, light_entity.mesh_id ); + uint32_t random_tri_id = uint32_t(min(lcg_randomf(rng) * mesh.numTris, float(mesh.numTris - 1))); + GET( Buffer indices, Buffer, LP.indexLists, light_entity.mesh_id ); + GET( Buffer vertices, Buffer, LP.vertexLists, light_entity.mesh_id ); + GET( Buffer normals, Buffer, LP.normalLists, light_entity.mesh_id ); + GET( Buffer texCoords, Buffer, LP.texCoordLists, light_entity.mesh_id ); + GET( int3 triIndex, int3, indices, random_tri_id ); + GET(float3 n1, float3, normals, triIndex.x ); + GET(float3 n2, float3, normals, triIndex.y ); + GET(float3 n3, float3, normals, triIndex.z ); + GET(float3 v1, float3, vertices, triIndex.x ); + GET(float3 v2, float3, vertices, triIndex.y ); + GET(float3 v3, float3, vertices, triIndex.z ); + GET(float2 uv1, float2, texCoords, triIndex.x ); + GET(float2 uv2, float2, texCoords, triIndex.y ); + GET(float2 uv3, float2, texCoords, triIndex.z ); + // Might be a bug here with normal transform... + n1 = make_float3(ltw * make_float4(n1, 0.0f)); + n2 = make_float3(ltw * make_float4(n2, 0.0f)); + n3 = make_float3(ltw * make_float4(n3, 0.0f)); + v1 = make_float3(ltw * make_float4(v1, 1.0f)); + v2 = make_float3(ltw * make_float4(v2, 1.0f)); + v3 = make_float3(ltw * make_float4(v3, 1.0f)); + sampleTriangle(pos, n1, n2, n3, v1, v2, v3, uv1, uv2, uv3, + lcg_randomf(rng), lcg_randomf(rng), dir, lightDistance, lightPDF, uv, + /*double_sided*/ false, /*use surface area*/ light_light.use_surface_area); + numTris = mesh.numTris; + } falloff = light_light.falloff; - numTris = mesh.numTris; lightDir = make_float3(dir.x, dir.y, dir.z); if (light_light.color_texture_id == -1) lightEmission = make_float3(light_light.r, light_light.g, light_light.b) * (light_light.intensity * pow(2.f, light_light.exposure)); else lightEmission = sampleTexture(light_light.color_texture_id, uv, make_float3(0.f, 0.f, 0.f)) * (light_light.intensity * pow(2.f, light_light.exposure)); diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index 81c45a12..315ff35d 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -1148,8 +1148,8 @@ void updateComponents() anyUpdated |= Texture::areAnyDirty(); anyUpdated |= Entity::areAnyDirty(); anyUpdated |= Volume::areAnyDirty(); - if (!anyUpdated) return; + resetAccumulation(); std::recursive_mutex dummyMutex; @@ -1360,7 +1360,8 @@ void updateComponents() if (!entities[eid].isInitialized()) continue; if (!entities[eid].getTransform()) continue; if (!entities[eid].getLight()) continue; - if (!entities[eid].getMesh()) continue; + // Edit: adding support for "point" lights that have no meshes + // if (!entities[eid].getMesh()) continue; OD.lightEntities.push_back(eid); } owlBufferResize(OptixData.lightEntitiesBuffer, OD.lightEntities.size()); From 2a08bb66daa29d60c4f5eb1acad4a6884fb6867f Mon Sep 17 00:00:00 2001 From: n8vm Date: Sat, 22 May 2021 13:54:13 -0600 Subject: [PATCH 26/55] updating docs --- include/nvisii/light.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/nvisii/light.h b/include/nvisii/light.h index 4b957628..99dd7969 100644 --- a/include/nvisii/light.h +++ b/include/nvisii/light.h @@ -11,7 +11,9 @@ class Texture; /** * A "Light" component illuminates objects in a scene. Light components must * be added to an entity with a transform component to have a visible - * impact on the scene. + * impact on the scene. Lights attached to entities with no mesh components + * act like point lights. Otherwise, lights attached to entities with meshes + * will act like mesh lights. */ class Light : public StaticFactory { friend class StaticFactory; From 7617f80f7fc17a984c310cea0853c732ead618bb Mon Sep 17 00:00:00 2001 From: n8vm Date: Sun, 23 May 2021 11:37:53 -0600 Subject: [PATCH 27/55] improvements to how import_scene handles normal maps --- include/nvisii/mesh.h | 6 ++ include/nvisii/nvisii.h | 1 + include/nvisii/texture.h | 3 + src/nvisii/devicecode/launch_params.h | 3 +- src/nvisii/devicecode/path_tracer.cu | 31 +++++-- src/nvisii/mesh.cpp | 28 ++++++- src/nvisii/nvisii.cpp | 6 ++ src/nvisii/nvisii_import_scene.cpp | 111 +++++++++++++++++++------- src/nvisii/texture.cpp | 5 ++ 9 files changed, 152 insertions(+), 42 deletions(-) diff --git a/include/nvisii/mesh.h b/include/nvisii/mesh.h index ff79da0e..91980cc4 100644 --- a/include/nvisii/mesh.h +++ b/include/nvisii/mesh.h @@ -699,6 +699,8 @@ class Mesh : public StaticFactory * @param position_dimensions The number of floats per position. Valid numbers are 3 or 4. * @param normals A list of vertex normals. If indices aren't supplied, this must be a multiple of 3. * @param normal_dimensions The number of floats per normal. Valid numbers are 3 or 4. + * @param tangents A list of vertex tangents. If indices aren't supplied, this must be a multiple of 3. + * @param tangent_dimensions The number of floats per tangent. Valid numbers are 3 or 4. * @param colors A list of per-vertex colors. If indices aren't supplied, this must be a multiple of 3. * @param color_dimensions The number of floats per color. Valid numbers are 3 or 4. * @param texcoords A list of 2D per-vertex texture coordinates. If indices aren't supplied, this must be a multiple of 3. @@ -712,6 +714,8 @@ class Mesh : public StaticFactory uint32_t position_dimensions = 3, std::vector normals = std::vector(), uint32_t normal_dimensions = 3, + std::vector tangents = std::vector(), + uint32_t tangent_dimensions = 3, std::vector colors = std::vector(), uint32_t color_dimensions = 4, std::vector texcoords = std::vector(), @@ -1012,6 +1016,8 @@ class Mesh : public StaticFactory uint32_t position_dimensions, std::vector &normals_, uint32_t normal_dimensions, + std::vector &tangents_, + uint32_t tangent_dimensions, std::vector &colors_, uint32_t color_dimensions, std::vector &texcoords_, diff --git a/include/nvisii/nvisii.h b/include/nvisii/nvisii.h index dd770fe7..635ad212 100644 --- a/include/nvisii/nvisii.h +++ b/include/nvisii/nvisii.h @@ -305,6 +305,7 @@ void renderToFile(uint32_t width, uint32_t height, uint32_t samples_per_pixel, s * "ray_direction" to render the direction that the ray was traced in world space, * "position" for rendering out the world space position of the path vertex, * "normal" for rendering out the world space normal of the path vertex, + * "tangent" for rendering out the world space tangent of the path vertex, * "entity_id" for rendering out the entity ID whose surface the path vertex hit, * "base_color" for rendering out the surface base color, * "texture_coordinates" for rendering out the texture coordinates of the hit surface, diff --git a/include/nvisii/texture.h b/include/nvisii/texture.h index 107cb231..6bb64faa 100644 --- a/include/nvisii/texture.h +++ b/include/nvisii/texture.h @@ -209,6 +209,9 @@ class Texture : public StaticFactory /** @returns True if the texture is represented linearly. Otherwise, the texture is in sRGB space */ bool isLinear(); + /** @param is_linear If True, texels will be interpreted as linear space. Otherwise, the texels will be interpreed as sRGB space */ + void setLinear(bool is_linear); + private: /* TODO */ static std::shared_ptr editMutex; diff --git a/src/nvisii/devicecode/launch_params.h b/src/nvisii/devicecode/launch_params.h index e6142f07..5da1324b 100644 --- a/src/nvisii/devicecode/launch_params.h +++ b/src/nvisii/devicecode/launch_params.h @@ -112,7 +112,8 @@ enum RenderDataFlags : uint32_t { RAY_DIRECTION = 18, HEATMAP = 19, TEXTURE_COORDINATES = 20, - DEVICE_ID = 21 + DEVICE_ID = 21, + TANGENT = 22 }; #define MAX_LIGHT_SAMPLES 10 diff --git a/src/nvisii/devicecode/path_tracer.cu b/src/nvisii/devicecode/path_tracer.cu index 34abd24b..0e0f154a 100644 --- a/src/nvisii/devicecode/path_tracer.cu +++ b/src/nvisii/devicecode/path_tracer.cu @@ -592,16 +592,19 @@ void initializeRenderData(float3 &renderData) auto &LP = optixLaunchParams; // these might change in the future... if (LP.renderDataMode == RenderDataFlags::NONE) { - renderData = make_float3(FLT_MAX); + renderData = make_float3(0.0f); } else if (LP.renderDataMode == RenderDataFlags::DEPTH) { - renderData = make_float3(FLT_MAX); + renderData = make_float3(-FLT_MAX); } else if (LP.renderDataMode == RenderDataFlags::POSITION) { - renderData = make_float3(FLT_MAX); + renderData = make_float3(-FLT_MAX); } else if (LP.renderDataMode == RenderDataFlags::NORMAL) { - renderData = make_float3(FLT_MAX); + renderData = make_float3(0.0f); + } + else if (LP.renderDataMode == RenderDataFlags::TANGENT) { + renderData = make_float3(0.0f); } else if (LP.renderDataMode == RenderDataFlags::SCREEN_SPACE_NORMAL) { renderData = make_float3(0.0f); @@ -699,7 +702,7 @@ __device__ void saveGeometricRenderData( float3 &renderData, int bounce, float depth, - float3 w_p, float3 w_n, float3 w_o, float2 uv, + float3 w_p, float3 w_n, float3 w_x, float3 w_o, float2 uv, int entity_id, float3 diffuse_mvec, float time, DisneyMaterial &mat) { @@ -716,6 +719,9 @@ void saveGeometricRenderData( else if (LP.renderDataMode == RenderDataFlags::NORMAL) { renderData = w_n; } + else if (LP.renderDataMode == RenderDataFlags::TANGENT) { + renderData = w_x; + } else if (LP.renderDataMode == RenderDataFlags::SCREEN_SPACE_NORMAL) { glm::quat r0 = glm::quat_cast(LP.viewT0); glm::quat r1 = glm::quat_cast(LP.viewT1); @@ -945,7 +951,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() v_z = make_float3(normalize(nxfm * make_vec3(v_z))); v_x = make_float3(normalize(nxfm * make_vec3(v_x))); v_y = cross(v_z, v_x); - v_x = cross(v_y, v_z); + // v_x = cross(v_y, v_z); if (LP.renderDataMode != RenderDataFlags::NONE) { glm::mat4 xfmt0 = to_mat4(payload.localToWorldT0); @@ -1028,7 +1034,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } // For segmentations, save geometric metadata - saveGeometricRenderData(renderData, depth, payload.tHit, hit_p, v_z, w_o, uv, entityID, diffuseMotion, time, mat); + saveGeometricRenderData(renderData, depth, payload.tHit, hit_p, v_z, v_x, w_o, uv, entityID, diffuseMotion, time, mat); if (depth == 0) { primaryAlbedo = mat.base_color; primaryNormal = v_z; @@ -1470,7 +1476,18 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() else { // Override framebuffer output if user requested to render metadata accum_illum = make_float3(renderData.x, renderData.y, renderData.z); + if (isnan(renderData.x) || isnan(renderData.y) || isnan(renderData.z) || + isinf(renderData.x) || isinf(renderData.y) || isinf(renderData.z) || + isnan(prev_color.x) || isnan(prev_color.y) || isnan(prev_color.z) || + isinf(prev_color.x) || isinf(prev_color.y) || isinf(prev_color.z)) { + accum_illum = make_float3(0.f, 0.f, 0.f); + prev_color = make_float4(0.f, 0.f, 0.f, 1.f); + } accum_color = make_float4((accum_illum + float(LP.frameID) * make_float3(prev_color)) / float(LP.frameID + 1), 1.0f); + + if (debug) { + printf("output: %f %f %f\n", accum_color.x, accum_color.y, accum_color.z); + } } diff --git a/src/nvisii/mesh.cpp b/src/nvisii/mesh.cpp index bf53228d..83447ad5 100644 --- a/src/nvisii/mesh.cpp +++ b/src/nvisii/mesh.cpp @@ -211,6 +211,8 @@ void Mesh::loadData( uint32_t position_dimensions, std::vector &normals_, uint32_t normal_dimensions, + std::vector &tangents_, + uint32_t tangent_dimensions, std::vector &colors_, uint32_t color_dimensions, std::vector &texcoords_, @@ -219,6 +221,7 @@ void Mesh::loadData( ) { bool readingNormals = normals_.size() > 0; + bool readingTangents = tangents_.size() > 0; bool readingColors = colors_.size() > 0; bool readingTexCoords = texcoords_.size() > 0; bool readingIndices = indices_.size() > 0; @@ -228,6 +231,9 @@ void Mesh::loadData( if ((normal_dimensions != 3) && (normal_dimensions != 4)) throw std::runtime_error( std::string("Error, invalid normal dimensions. Possible normal dimensions are 3 or 4.")); + + if ((tangent_dimensions != 3) && (tangent_dimensions != 4)) + throw std::runtime_error( std::string("Error, invalid tangent dimensions. Possible tangent dimensions are 3 or 4.")); if ((color_dimensions != 3) && (color_dimensions != 4)) throw std::runtime_error( std::string("Error, invalid color dimensions. Possible color dimensions are 3 or 4.")); @@ -247,6 +253,9 @@ void Mesh::loadData( if (readingNormals && ((normals_.size() / normal_dimensions) != (positions_.size() / position_dimensions))) throw std::runtime_error( std::string("Error, length mismatch. Total normals: " + std::to_string(normals_.size() / normal_dimensions) + " does not equal total positions: " + std::to_string(positions_.size() / position_dimensions))); + if (readingTangents && ((tangents_.size() / tangent_dimensions) != (positions_.size() / position_dimensions))) + throw std::runtime_error( std::string("Error, length mismatch. Total tangents: " + std::to_string(tangents_.size() / tangent_dimensions) + " does not equal total positions: " + std::to_string(positions_.size() / position_dimensions))); + if (readingColors && ((colors_.size() / color_dimensions) != (positions_.size() / position_dimensions))) throw std::runtime_error( std::string("Error, length mismatch. Total colors: " + std::to_string(colors_.size() / color_dimensions) + " does not equal total positions: " + std::to_string(positions_.size() / position_dimensions))); @@ -275,6 +284,12 @@ void Mesh::loadData( vertex.normal.z = normals_[i * normal_dimensions + 2]; vertex.normal.w = (normal_dimensions == 4) ? normals_[i * normal_dimensions + 3] : 0.f; } + if (readingTangents) { + vertex.tangent.x = tangents_[i * tangent_dimensions + 0]; + vertex.tangent.y = tangents_[i * tangent_dimensions + 1]; + vertex.tangent.z = tangents_[i * tangent_dimensions + 2]; + vertex.tangent.w = (tangent_dimensions == 4) ? tangents_[i * tangent_dimensions + 3] : 0.f; + } if (readingColors) { vertex.color.x = colors_[i * color_dimensions + 0]; vertex.color.y = colors_[i * color_dimensions + 1]; @@ -316,6 +331,7 @@ void Mesh::loadData( this->positions.resize(uniqueVertices.size()); this->colors.resize(uniqueVertices.size()); this->normals.resize(uniqueVertices.size()); + this->tangents.resize(uniqueVertices.size()); this->texCoords.resize(uniqueVertices.size()); for (int i = 0; i < uniqueVertices.size(); ++i) { @@ -323,6 +339,7 @@ void Mesh::loadData( this->positions[i] = {v.point.x, v.point.y, v.point.z}; this->colors[i] = v.color; this->normals[i] = v.normal; + this->tangents[i] = v.tangent; this->texCoords[i] = v.texcoord; } @@ -330,6 +347,10 @@ void Mesh::loadData( generateSmoothNormals(); } + if (!readingTangents) { + generateSmoothTangents(); + } + computeMetadata(); } @@ -1339,19 +1360,20 @@ Mesh* Mesh::createFromData( uint32_t position_dimensions, std::vector normals_, uint32_t normal_dimensions, + std::vector tangents_, + uint32_t tangent_dimensions, std::vector colors_, uint32_t color_dimensions, std::vector texcoords_, uint32_t texcoord_dimensions, std::vector indices_ ) { - auto create = [&positions_, position_dimensions, &normals_, normal_dimensions, + auto create = [&positions_, position_dimensions, &normals_, normal_dimensions, &tangents_, tangent_dimensions, &colors_, color_dimensions, &texcoords_, texcoord_dimensions, &indices_] (Mesh* mesh) { - mesh->loadData(positions_, position_dimensions, normals_, normal_dimensions, + mesh->loadData(positions_, position_dimensions, normals_, normal_dimensions, tangents_, tangent_dimensions, colors_, color_dimensions, texcoords_, texcoord_dimensions, indices_); - mesh->generateSmoothTangents(); dirtyMeshes.insert(mesh); }; diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index 315ff35d..287065fb 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -2062,6 +2062,9 @@ std::vector renderData(uint32_t width, uint32_t height, uint32_t startFra else if (option == std::string("normal")) { OptixData.LP.renderDataMode = RenderDataFlags::NORMAL; } + else if (option == std::string("tangent")) { + OptixData.LP.renderDataMode = RenderDataFlags::TANGENT; + } else if (option == std::string("entity_id")) { OptixData.LP.renderDataMode = RenderDataFlags::ENTITY_ID; } @@ -2766,6 +2769,9 @@ void __test__(std::vector args) { else if (option == std::string("normal")) { OptixData.LP.renderDataMode = RenderDataFlags::NORMAL; } + else if (option == std::string("tangent")) { + OptixData.LP.renderDataMode = RenderDataFlags::TANGENT; + } else if (option == std::string("entity_id")) { OptixData.LP.renderDataMode = RenderDataFlags::ENTITY_ID; } diff --git a/src/nvisii/nvisii_import_scene.cpp b/src/nvisii/nvisii_import_scene.cpp index 1cebd732..aed71804 100644 --- a/src/nvisii/nvisii_import_scene.cpp +++ b/src/nvisii/nvisii_import_scene.cpp @@ -255,9 +255,7 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu } } } - - - + // todo, add texture paths to map above, load later and connect if (material->GetTextureCount(aiTextureType_DIFFUSE) > 0) { if (material->GetTexture(aiTextureType_DIFFUSE, 0, &Path, NULL, NULL, NULL, NULL, NULL) == AI_SUCCESS) { @@ -281,6 +279,10 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu std::string path = directory + "/" + std::string(Path.C_Str()); std::replace(path.begin(), path.end(), '\\', '/'); if (texture_map[path]) mat->setNormalMapTexture(texture_map[path]); + if (!texture_map[path]->isLinear()) { + if (verbose) std::cout<<"WARNING: normal map texture " << path << " not marked as linear! Forcing texture into linear mode..." << std::endl; + texture_map[path]->setLinear(true); + } } } @@ -323,11 +325,13 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu auto &aiMesh = scene->mMeshes[meshIdx]; auto &aiVertices = aiMesh->mVertices; auto &aiNormals = aiMesh->mNormals; + auto &aiTangents = aiMesh->mTangents; auto &aiFaces = aiMesh->mFaces; auto &aiTextureCoords = aiMesh->mTextureCoords; std::vector positions; std::vector normals; + std::vector tangents; std::vector texCoords; std::vector indices; @@ -347,6 +351,9 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu if (!aiMesh->HasNormals()) { if (verbose) std::cout<<"\tWARNING: mesh " << meshName << " has no normals" << std::endl; } + if (!aiMesh->HasTangentsAndBitangents()) { + if (verbose) std::cout<<"\tWARNING: mesh " << meshName << " has no tangents" << std::endl; + } if (!aiMesh->HasTextureCoords(0)) { if (verbose) std::cout<<"\tWARNING: mesh " << meshName << " has no texture coordinates" << std::endl; } @@ -366,6 +373,12 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu v.normal.y = normal.y; v.normal.z = normal.z; } + if (aiMesh->HasTangentsAndBitangents()) { + auto tangent = aiTangents[vid]; + v.tangent.x = tangent.x; + v.tangent.y = tangent.y; + v.tangent.z = tangent.z; + } if (aiMesh->HasTextureCoords(0)) { // just try to take the first texcoord auto texCoord = aiTextureCoords[0][vid]; @@ -378,6 +391,9 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu normals.push_back(v.normal.x); normals.push_back(v.normal.y); normals.push_back(v.normal.z); + tangents.push_back(v.tangent.x); + tangents.push_back(v.tangent.y); + tangents.push_back(v.tangent.z); texCoords.push_back(v.texcoord.x); texCoords.push_back(v.texcoord.y); } @@ -408,6 +424,7 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu meshName, positions, 3, normals, 3, + tangents, 3, /*colors*/{}, 3, texCoords, 2, indices @@ -419,33 +436,6 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu } } - // load lights - for (uint32_t lightIdx = 0; lightIdx < scene->mNumLights; ++lightIdx) { - auto light = scene->mLights[lightIdx]; - if (verbose) { - std::cout<<"Found light: " << std::string(light->mName.C_Str()) << std::endl; - if (light->mType == aiLightSource_DIRECTIONAL) { - std::cout<<"Directional"<mType == aiLightSource_POINT) { - std::cout<<"Point"<mType == aiLightSource_SPOT) { - std::cout<<"Spot"<mType == aiLightSource_AMBIENT) { - std::cout<<"Ambient"<mType == aiLightSource_AREA) { - std::cout<<"Area"<mNumCameras; ++cameraIdx) { - auto camera = scene->mCameras[cameraIdx]; - if (verbose) { - std::cout<<"Found camera: " << std::string(camera->mName.C_Str()) << std::endl; - } - } - std::function addNode; addNode = [&scene, &nvisiiScene, &material_light_map, &addNode, position, rotation, scale, verbose] (aiNode* node, Transform* parentTransform, int level) @@ -516,8 +506,67 @@ Scene importScene(std::string path, glm::vec3 position, glm::vec3 scale, glm::qu for (uint32_t cid = 0; cid < node->mNumChildren; ++cid) addNode(node->mChildren[cid], transform, level+1); }; - addNode(scene->mRootNode, nullptr, 0); + + + // load lights + for (uint32_t lightIdx = 0; lightIdx < scene->mNumLights; ++lightIdx) { + auto &aiLight = scene->mLights[lightIdx]; + std::string lightName = std::string(aiLight->mName.C_Str()); + // if (verbose) + { + if (verbose) std::cout<<"Found light: " << lightName << std::endl; + if (aiLight->mType == aiLightSource_DIRECTIONAL) { + if (verbose) std::cout<<"Directional"<mType == aiLightSource_POINT) { + if (verbose) std::cout<<"Point"<setTransform(Transform::get(lightName)); + if (verbose) std::cout<< std::string(1, '\t') << "transform: " << lightName <setLight(light); + light->setColor({aiLight->mColorDiffuse.r, aiLight->mColorDiffuse.g, aiLight->mColorDiffuse.b}); + + nvisiiScene.entities.push_back(entity); + } else if (aiLight->mType == aiLightSource_SPOT) { + std::cout<<"Spot"<mType == aiLightSource_AMBIENT) { + std::cout<<"Ambient"<mType == aiLightSource_AREA) { + std::cout<<"Area"<mNumCameras; ++cameraIdx) { + auto camera = scene->mCameras[cameraIdx]; + if (verbose) { + std::cout<<"Found camera: " << std::string(camera->mName.C_Str()) << std::endl; + } + } + aiReleaseImport(scene); if (verbose) std::cout<<"Done!"< Date: Sun, 23 May 2021 11:39:29 -0600 Subject: [PATCH 28/55] removing debug print in device code --- src/nvisii/devicecode/path_tracer.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nvisii/devicecode/path_tracer.cu b/src/nvisii/devicecode/path_tracer.cu index 0e0f154a..4b5c896a 100644 --- a/src/nvisii/devicecode/path_tracer.cu +++ b/src/nvisii/devicecode/path_tracer.cu @@ -1485,9 +1485,9 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } accum_color = make_float4((accum_illum + float(LP.frameID) * make_float3(prev_color)) / float(LP.frameID + 1), 1.0f); - if (debug) { - printf("output: %f %f %f\n", accum_color.x, accum_color.y, accum_color.z); - } + // if (debug) { + // printf("output: %f %f %f\n", accum_color.x, accum_color.y, accum_color.z); + // } } From ef1880aa118cb46f180a4791ee70e4471d0de644 Mon Sep 17 00:00:00 2001 From: n8vm Date: Wed, 22 Sep 2021 14:53:10 -0600 Subject: [PATCH 29/55] adding support for alpha transparent backgrounds --- include/nvisii/nvisii.h | 22 +++---- src/externals/glfw_implementation/glfw.cpp | 1 + src/nvisii/devicecode/launch_params.h | 2 +- src/nvisii/devicecode/path_tracer.cu | 52 ++++++++++------ src/nvisii/nvisii.cpp | 72 +++++----------------- 5 files changed, 59 insertions(+), 90 deletions(-) diff --git a/include/nvisii/nvisii.h b/include/nvisii/nvisii.h index 635ad212..3a58f2ed 100644 --- a/include/nvisii/nvisii.h +++ b/include/nvisii/nvisii.h @@ -124,9 +124,10 @@ void setDomeLightExposure(float exposure); /** * Sets the color which this dome light will emit. * - * @param The RGB color emitted that this dome light should emit. + * @param color The RGB color emitted that this dome light should emit. + * @param alpha The alpha transparency to use for the background if hit by primary rays. */ -void setDomeLightColor(glm::vec3 color); +void setDomeLightColor(glm::vec3 color, float alpha = 1.0f); /** * Configures the procedural sky for the dome light (aka the environment). @@ -137,18 +138,21 @@ void setDomeLightColor(glm::vec3 color); * @param atmosphere_thickness effects Rayleigh scattering. Thin atmospheres look more * like space, and thick atmospheres see more Rayleigh scattering. * @param saturation causes the sky to appear more or less "vibrant" + * @param alpha The alpha transparency to use for the background if hit by primary rays. */ void setDomeLightSky( glm::vec3 sun_position, glm::vec3 sky_tint = vec3(.5f, .5f, .5f), float atmosphere_thickness = 1.0f, - float saturation = 1.0f); + float saturation = 1.0f, + float alpha = 1.0f); /** * Sets the texture used to color the dome light (aka the environment). * Textures are sampled using a 2D to 3D latitude/longitude strategy. * - * @param texture The texture to sample for the dome light. + * @param texture The texture to sample for the dome light. Alpha channel values + * effect alpha transparency of background for primary rays. * @param enable_cdf If True, reduces noise of sampling a dome light texture, * but at the expense of frame rate. Useful for dome lights with bright lights * that should cast shadows. @@ -269,16 +273,6 @@ void configureDenoiser(bool use_albedo_guide = true, bool use_normal_guide = tru */ std::vector render(uint32_t width, uint32_t height, uint32_t samples_per_pixel, uint32_t seed = 0); -/** - * Deprecated. Please use renderToFile. -*/ -void renderToHDR(uint32_t width, uint32_t height, uint32_t samples_per_pixel, std::string image_path, uint32_t seed = 0); - -/** - * Deprecated. Please use renderToFile. -*/ -void renderToPNG(uint32_t width, uint32_t height, uint32_t samples_per_pixel, std::string image_path, uint32_t seed = 0); - /** * Renders the current scene, saving the resulting framebuffer to an image on disk. * diff --git a/src/externals/glfw_implementation/glfw.cpp b/src/externals/glfw_implementation/glfw.cpp index 2753c195..d2564e9f 100644 --- a/src/externals/glfw_implementation/glfw.cpp +++ b/src/externals/glfw_implementation/glfw.cpp @@ -103,6 +103,7 @@ namespace Libraries { glfwWindowHint(GLFW_DECORATED, (decorated) ? GLFW_TRUE : GLFW_FALSE); glfwWindowHint(GLFW_RESIZABLE, (resizable) ? GLFW_TRUE : GLFW_FALSE); glfwWindowHint(GLFW_FLOATING, (floating) ? GLFW_TRUE : GLFW_FALSE); + glfwWindowHint(GLFW_TRANSPARENT_FRAMEBUFFER, GLFW_TRUE); // glfwWindowHint( GLFW_DOUBLEBUFFER, GL_FALSE ); Window window = {}; diff --git a/src/nvisii/devicecode/launch_params.h b/src/nvisii/devicecode/launch_params.h index 5da1324b..4587d379 100644 --- a/src/nvisii/devicecode/launch_params.h +++ b/src/nvisii/devicecode/launch_params.h @@ -32,7 +32,7 @@ struct LaunchParams { OptixTraversableHandle IAS; float domeLightIntensity = 1.f; float domeLightExposure = 0.f; - glm::vec3 domeLightColor = glm::vec3(-1.f); + glm::vec4 domeLightColor = glm::vec4(-1.f); float directClamp = 100.f; float indirectClamp = 100.f; uint32_t maxDiffuseDepth = 2; diff --git a/src/nvisii/devicecode/path_tracer.cu b/src/nvisii/devicecode/path_tracer.cu index 4b5c896a..a8837429 100644 --- a/src/nvisii/devicecode/path_tracer.cu +++ b/src/nvisii/devicecode/path_tracer.cu @@ -89,25 +89,27 @@ cudaTextureObject_t getEnvironmentTexture() } inline __device__ -float3 missColor(const float3 n_dir, cudaTextureObject_t &tex) +float4 missColor(const float3 n_dir, cudaTextureObject_t &tex) { auto &LP = optixLaunchParams; vec3 rayDir = LP.environmentMapRotation * make_vec3(n_dir); if (tex) { vec2 tc = toUV(vec3(rayDir.x, rayDir.y, rayDir.z)); - float4 texColor = tex2D(tex, tc.x,tc.y); - return make_float3(texColor); + return tex2D(tex, tc.x,tc.y); } - if (glm::any(glm::greaterThanEqual(LP.domeLightColor, glm::vec3(0.f)))) return make_float3(LP.domeLightColor); + // If none of the background color channels are negative, return that dome light color. + if (glm::any(glm::greaterThanEqual(LP.domeLightColor, glm::vec4(0.f)))) return make_float4(LP.domeLightColor); + + // otherwise, we found a negative value, so revert to some default interpolated background value. float t = 0.5f*(rayDir.z + 1.0f); float3 c = (1.0f - t) * make_float3(pow(vec3(1.0f), vec3(2.2f))) + t * make_float3( pow(vec3(0.5f, 0.7f, 1.0f), vec3(2.2f)) ); - return c; + return make_float4(c, 1.f); } inline __device__ -float3 missColor(const owl::Ray &ray, cudaTextureObject_t &tex) +float4 missColor(const owl::Ray &ray, cudaTextureObject_t &tex) { return missColor(ray.direction, tex); } @@ -843,6 +845,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() float3 renderData = make_float3(0.f); float3 primaryAlbedo = make_float3(0.f); float3 primaryNormal = make_float3(0.f); + float primaryAlpha = 1.f; initializeRenderData(renderData); uint8_t depth = 0; @@ -875,13 +878,20 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() if (payload.tHit <= 0.f) { // Compute lighting from environment if (depth == 0) { - float3 col = missColor(ray, envTex); - illum = illum + pathThroughput * (col * LP.domeLightIntensity); + // don't account for exposure if this is a primary ray. + // (note, this is not physically correct... however, it makes directly visible lights more artist directable. ) + // (todo, account for lights visible through specular chains...) + // Also store primary albedo for denoising + float4 col = missColor(ray, envTex); + illum = illum + pathThroughput * (make_float3(col) * LP.domeLightIntensity); directIllum = illum; - primaryAlbedo = col; + primaryAlbedo = make_float3(col); + primaryAlpha = col.w; + } + else if (enableDomeSampling){ + // else account for exposure of the dome light in addition to intensity. + illum = illum + pathThroughput * (make_float3(missColor(ray, envTex)) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure)); } - else if (enableDomeSampling) - illum = illum + pathThroughput * (missColor(ray, envTex) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure)); const float envDist = 10000.0f; // large value /* Compute miss motion vector */ @@ -1199,7 +1209,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() } numTris = 1.f; - lightEmission = (missColor(lightDir, envTex) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure)); + lightEmission = (make_float3(missColor(lightDir, envTex)) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure)); } // sample light sources else @@ -1340,7 +1350,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() if ((payload.instanceID == -1) && (sampledLightID == -1) && enableDomeSampling) { // Case where we hit the background, and also previously sampled the background float w = power_heuristic(1.f, bsdfPDF, 1.f, lightPDF); - float3 lightEmission = missColor(ray, envTex) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure); + float3 lightEmission = make_float3(missColor(ray, envTex)) * LP.domeLightIntensity * pow(2.f, LP.domeLightExposure); float3 Li = (lightEmission * w) / bsdfPDF; if (dotNWi > 0.f) { @@ -1471,7 +1481,13 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() if (LP.renderDataMode == RenderDataFlags::NONE) { - accum_color = make_float4((accum_illum + float(LP.frameID) * make_float3(prev_color)) / float(LP.frameID + 1), 1.0f); + // accum_color = (make_float4(accum_illum, primaryAlpha) + float(LP.frameID) * prev_color) / float(LP.frameID + 1); + float3 c = (accum_illum + float(LP.frameID) * make_float3(prev_color)) / float(LP.frameID + 1); + float a = (primaryAlpha + float(LP.frameID) * prev_color.w) / float(LP.frameID + 1); + accum_color.x = c.x; + accum_color.y = c.y; + accum_color.z = c.z; + accum_color.w = a; } else { // Override framebuffer output if user requested to render metadata @@ -1483,11 +1499,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() accum_illum = make_float3(0.f, 0.f, 0.f); prev_color = make_float4(0.f, 0.f, 0.f, 1.f); } - accum_color = make_float4((accum_illum + float(LP.frameID) * make_float3(prev_color)) / float(LP.frameID + 1), 1.0f); - - // if (debug) { - // printf("output: %f %f %f\n", accum_color.x, accum_color.y, accum_color.z); - // } + accum_color = make_float4( (accum_illum + float(LP.frameID) * make_float3(prev_color)) / float(LP.frameID + 1), 1.0f); } @@ -1496,7 +1508,7 @@ OPTIX_RAYGEN_PROGRAM(rayGen)() vec4 oldNormal = make_vec4(prev_normal); if (any(isnan(oldAlbedo))) oldAlbedo = vec4(0.f); if (any(isnan(oldNormal))) oldNormal = vec4(0.f); - vec4 newAlbedo = vec4(primaryAlbedo.x, primaryAlbedo.y, primaryAlbedo.z, 1.f); + vec4 newAlbedo = vec4(primaryAlbedo.x, primaryAlbedo.y, primaryAlbedo.z, primaryAlpha); vec4 accumAlbedo = (newAlbedo + float(LP.frameID) * oldAlbedo) / float(LP.frameID + 1); vec4 newNormal = vec4(make_vec3(primaryNormal), 1.f); if (!all(equal(make_vec3(primaryNormal), vec3(0.f, 0.f, 0.f)))) { diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index 94c8ed14..2b956277 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -519,7 +519,7 @@ void initializeOptix(bool headless) { "instanceToEntity", OWL_BUFFER, OWL_OFFSETOF(LaunchParams, instanceToEntity)}, { "domeLightIntensity", OWL_USER_TYPE(float), OWL_OFFSETOF(LaunchParams, domeLightIntensity)}, { "domeLightExposure", OWL_USER_TYPE(float), OWL_OFFSETOF(LaunchParams, domeLightExposure)}, - { "domeLightColor", OWL_USER_TYPE(glm::vec3), OWL_OFFSETOF(LaunchParams, domeLightColor)}, + { "domeLightColor", OWL_USER_TYPE(glm::vec4), OWL_OFFSETOF(LaunchParams, domeLightColor)}, { "directClamp", OWL_USER_TYPE(float), OWL_OFFSETOF(LaunchParams, directClamp)}, { "indirectClamp", OWL_USER_TYPE(float), OWL_OFFSETOF(LaunchParams, indirectClamp)}, { "maxDiffuseDepth", OWL_USER_TYPE(uint32_t), OWL_OFFSETOF(LaunchParams, maxDiffuseDepth)}, @@ -892,13 +892,15 @@ void setDomeLightExposure(float exposure) resetAccumulation(); } -void setDomeLightColor(vec3 color) +void setDomeLightColor(vec3 color, float alpha) { clearDomeLightTexture(); - color.r = glm::max(0.f, glm::min(color.r, 1.f)); - color.g = glm::max(0.f, glm::min(color.g, 1.f)); - color.b = glm::max(0.f, glm::min(color.b, 1.f)); - OptixData.LP.domeLightColor = color; + vec4 c; + c.r = glm::max(0.f, glm::min(color.r, 1.f)); + c.g = glm::max(0.f, glm::min(color.g, 1.f)); + c.b = glm::max(0.f, glm::min(color.b, 1.f)); + c.a = glm::max(0.f, glm::min(alpha, 1.f)); + OptixData.LP.domeLightColor = c; resetAccumulation(); } @@ -938,9 +940,9 @@ vec3 toPolar(vec2 uv) return n; } -void setDomeLightSky(vec3 sunPos, vec3 skyTint, float atmosphereThickness, float saturation) +void setDomeLightSky(vec3 sunPos, vec3 skyTint, float atmosphereThickness, float saturation, float alpha) { - enqueueCommand([sunPos, skyTint, atmosphereThickness, saturation] () { + enqueueCommand([sunPos, skyTint, atmosphereThickness, saturation, alpha] () { /* Generate procedural sky */ uint32_t width = 1024/2; uint32_t height = 512/2; @@ -950,7 +952,7 @@ void setDomeLightSky(vec3 sunPos, vec3 skyTint, float atmosphereThickness, float glm::vec2 uv = glm::vec2(x / float(width), y / float(height)); glm::vec3 dir = toPolar(uv); glm::vec3 c = ProceduralSkybox(glm::vec3(dir.x, -dir.z, dir.y), glm::vec3(sunPos.x, sunPos.z, sunPos.y), skyTint, atmosphereThickness, saturation); - texels[x + y * width] = glm::vec4(c.r, c.g, c.b, 1.0f); + texels[x + y * width] = glm::vec4(c.r, c.g, c.b, alpha); } } @@ -1942,7 +1944,7 @@ std::vector render(uint32_t width, uint32_t height, uint32_t samplesPerPi auto glfw = Libraries::GLFW::Get(); glfw->poll_events(); glfw->swap_buffers("NVISII"); - glClearColor(1,1,1,1); + glClearColor(0,0,0,0); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); } @@ -2111,7 +2113,7 @@ std::vector renderData(uint32_t width, uint32_t height, uint32_t startFra auto glfw = Libraries::GLFW::Get(); glfw->poll_events(); glfw->swap_buffers("NVISII"); - glClearColor(1,1,1,1); + glClearColor(0,0,0,0); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); } @@ -2207,19 +2209,6 @@ void renderDataToFile(uint32_t width, uint32_t height, uint32_t startFrame, uint } } -static bool renderToHDRDeprecatedShown = false; -void renderToHDR(uint32_t width, uint32_t height, uint32_t samplesPerPixel, std::string imagePath, uint32_t seed) -{ - if (renderToHDRDeprecatedShown == false) { - std::cout<<"Warning, render_to_hdr is deprecated and will be removed in a subsequent release. Please switch to render_to_file." << std::endl; - renderToHDRDeprecatedShown = true; - } - - std::vector fb = render(width, height, samplesPerPixel, seed); - stbi_flip_vertically_on_write(true); - stbi_write_hdr(imagePath.c_str(), width, height, /* num channels*/ 4, fb.data()); -} - float linearToSRGB(float x) { if (x <= 0.0031308f) { return 12.92f * x; @@ -2248,36 +2237,6 @@ vec3 Uncharted2Tonemap(vec3 x) return max(vec3(0.0f), ((x*(A*x+C*B)+D*E_)/(x*(A*x+B)+D*F))-E_/F); } -static bool renderToPNGDeprecatedShown = false; -void renderToPNG(uint32_t width, uint32_t height, uint32_t samplesPerPixel, std::string imagePath, uint32_t seed) -{ - if (renderToPNGDeprecatedShown == false) { - std::cout<<"Warning, render_to_png is deprecated and will be removed in a subsequent release. Please switch to render_to_file." << std::endl; - renderToPNGDeprecatedShown = true; - } - - // float exposure = 2.f; // TODO: expose as a parameter - - std::vector fb = render(width, height, samplesPerPixel, seed); - std::vector colors(4 * width * height); - for (size_t i = 0; i < (width * height); ++i) { - vec3 color = vec3(fb[i * 4 + 0], fb[i * 4 + 1], fb[i * 4 + 2]); - float alpha = fb[i * 4 + 3]; - - // color = Uncharted2Tonemap(color * exposure); - // color = color * (1.0f / Uncharted2Tonemap(vec3(11.2f))); - - color = glm::convertLinearToSRGB(color); - - colors[i * 4 + 0] = uint8_t(glm::clamp(color.r * 255.f, 0.f, 255.f)); - colors[i * 4 + 1] = uint8_t(glm::clamp(color.g * 255.f, 0.f, 255.f)); - colors[i * 4 + 2] = uint8_t(glm::clamp(color.b * 255.f, 0.f, 255.f)); - colors[i * 4 + 3] = uint8_t(glm::clamp(alpha * 255.f, 0.f, 255.f)); - } - stbi_flip_vertically_on_write(true); - stbi_write_png(imagePath.c_str(), width, height, /* num channels*/ 4, colors.data(), /* stride in bytes */ width * 4); -} - void renderToFile(uint32_t width, uint32_t height, uint32_t samplesPerPixel, std::string imagePath, uint32_t seed) { std::vector fb = render(width, height, samplesPerPixel, seed); @@ -2394,12 +2353,15 @@ void initializeInteractive( int numGPUs = owlGetDeviceCount(OptixData.context); + glEnable(GL_BLEND); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + while (!stopped) { /* Poll events from the window */ glfw->poll_events(); glfw->swap_buffers("NVISII"); - glClearColor(1,1,1,1); + glClearColor(0,0,0,0); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); if (NVISII.callback && NVISII.callbackMutex.try_lock()) { From 52b4655105924dd5280162f83c813b605243fae8 Mon Sep 17 00:00:00 2001 From: n8vm Date: Wed, 27 Oct 2021 15:33:32 -0600 Subject: [PATCH 30/55] removing lazy_updates from initialize. Removing deprecated initialize_headless and initialize_interactive methods. Improving functionality of enable_updates and disable_updates --- ...15.camera_motion_car_blur.py => 15.car.py} | 28 +++------ include/nvisii/nvisii.h | 32 ---------- src/nvisii/nvisii.cpp | 63 +++++++++---------- 3 files changed, 39 insertions(+), 84 deletions(-) rename examples/{15.camera_motion_car_blur.py => 15.car.py} (90%) diff --git a/examples/15.camera_motion_car_blur.py b/examples/15.car.py similarity index 90% rename from examples/15.camera_motion_car_blur.py rename to examples/15.car.py index 4afab621..0f3a7746 100644 --- a/examples/15.camera_motion_car_blur.py +++ b/examples/15.car.py @@ -21,13 +21,9 @@ dome = nvisii.texture.create_from_file("dome", "content/teatro_massimo_2k.hdr") # we can add HDR images to act as dome -nvisii.set_dome_light_texture(dome) +nvisii.set_dome_light_texture(dome, enable_cdf=True) nvisii.set_dome_light_rotation(nvisii.angleAxis(nvisii.pi() * .5, nvisii.vec3(0, 0, 1))) -car_speed = 0 -car_speed_x = car_speed -car_speed_y = -2 * car_speed - camera_height = 80 # # # # # # # # # # # # # # # # # # # # # # # # # @@ -46,20 +42,9 @@ camera.get_transform().look_at( at = nvisii.vec3(-50,0,camera_height) , # look at (world coordinate) up = nvisii.vec3(0,0,1), # up vector - eye = nvisii.vec3(-500,500,100 + camera_height), - previous = False -) - -camera.get_transform().look_at( - at = nvisii.vec3(-50,0,camera_height) + nvisii.vec3(car_speed_x, car_speed_y, .0) , # look at (world coordinate) - up = nvisii.vec3(0,0,1), # up vector - eye = nvisii.vec3(-500,500,100 + camera_height), - previous = True + eye = nvisii.vec3(-500,500,100 + camera_height) ) -camera.get_camera().set_aperture_diameter(5000) -camera.get_camera().set_focal_distance(500) - nvisii.set_camera_entity(camera) floor = nvisii.entity.create( @@ -71,7 +56,7 @@ floor.get_transform().set_scale(nvisii.vec3(10000)) floor.get_transform().set_position(nvisii.vec3(0, 0, -5)) floor.get_material().set_base_color(nvisii.vec3(1.0)) -floor.get_material().set_roughness(1) +floor.get_material().set_roughness(0) floor.get_material().set_specular(0) # # # # # # # # # # # # # # # # # # # # # # # # # @@ -102,8 +87,6 @@ # print(s.get_name()) # if 'car' in s.get_name(): # print(s.get_name()) - s.get_transform().set_linear_velocity(nvisii.vec3(car_speed_x, car_speed_y, .0)) - print(s.get_name()) if "carshell" in s.get_name().lower(): s.get_material().set_clearcoat(1) @@ -137,6 +120,11 @@ s.get_material().set_transmission(1) s.get_material().set_roughness(0) s.get_material().set_metallic(0) + s.set_visibility(shadow = False) + + if "interior" in s.get_name().lower(): + s.get_material().set_base_color((1,1,1)) + # elif 'light' in s.get_name().lower(): # print(s.get_name()) # s.set_light(nvisii.light.create('light' + str(i_s))) diff --git a/include/nvisii/nvisii.h b/include/nvisii/nvisii.h index 3a58f2ed..54e1caf5 100644 --- a/include/nvisii/nvisii.h +++ b/include/nvisii/nvisii.h @@ -10,42 +10,11 @@ namespace nvisii { -/** - * Deprecated. Please use initialize() instead. -*/ -void initializeInteractive( - bool window_on_top = false, - bool verbose = false, - uint32_t max_entities = 10000, - uint32_t max_cameras = 10, - uint32_t max_transforms = 10000, - uint32_t max_meshes = 10000, - uint32_t max_materials = 10000, - uint32_t max_lights = 100, - uint32_t max_textures = 1000, - uint32_t max_volumes = 1000); - -/** - * Deprecated. Please use initialize(headless = True) instead. -*/ -void initializeHeadless( - bool verbose = false, - uint32_t max_entities = 10000, - uint32_t max_cameras = 10, - uint32_t max_transforms = 10000, - uint32_t max_meshes = 10000, - uint32_t max_materials = 10000, - uint32_t max_lights = 100, - uint32_t max_textures = 1000, - uint32_t max_volumes = 1000); - /** * Initializes various backend systems required to render scene data. * * @param headless If true, avoids using any OpenGL resources, to enable use on systems without displays. * @param window_on_top Keeps the window opened during an interactive session on top of any other windows. (assuming headless is False) - * @param lazy_updates If True, nvisii will only upload components to the GPU on call to - * render/render_to_png/render_data for better scene editing performance. (assuming headless is False. Always on when headless is True) * @param verbose If false, nvisii will avoid outputing any unneccessary text * @param max_entities The max number of creatable Entity components. * @param max_cameras The max number of creatable Camera components. @@ -58,7 +27,6 @@ void initializeHeadless( void initialize( bool headless = false, bool window_on_top = false, - bool lazy_updates = false, bool verbose = false, uint32_t max_entities = 10000, uint32_t max_cameras = 10, diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index 2b956277..2abfc2a1 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -61,7 +61,7 @@ std::promise exitSignal; std::thread renderThread; static bool initialized = false; static bool stopped = true; -static bool lazyUpdatesEnabled = false; +static bool paused = false; static bool verbose = true; static struct WindowData { @@ -2360,20 +2360,21 @@ void initializeInteractive( { /* Poll events from the window */ glfw->poll_events(); - glfw->swap_buffers("NVISII"); - glClearColor(0,0,0,0); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + if (!paused) { + glfw->swap_buffers("NVISII"); + glClearColor(0,0,0,0); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - if (NVISII.callback && NVISII.callbackMutex.try_lock()) { - NVISII.callback(); - NVISII.callbackMutex.unlock(); - } + if (NVISII.callback && NVISII.callbackMutex.try_lock()) { + NVISII.callback(); + NVISII.callbackMutex.unlock(); + } - static double start=0; - static double stop=0; - start = glfwGetTime(); + static double start=0; + static double stop=0; + start = glfwGetTime(); - if (!lazyUpdatesEnabled) { updateFrameBuffer(); updateComponents(); updateLaunchParams(); @@ -2394,24 +2395,25 @@ void initializeInteractive( if (OptixData.enableDenoiser) { denoiseImage(); } + // glm::vec4* samplePtr = (glm::vec4*) owlBufferGetPointer(OptixData.accumBuffer,0); + // glm::vec4* mvecPtr = (glm::vec4*) owlBufferGetPointer(OptixData.mvecBuffer,0); + // glm::vec4* t0AlbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.scratchBuffer,0); + // glm::vec4* t1AlbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.albedoBuffer,0); + // glm::vec4* fbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.frameBuffer,0); + // glm::vec4* sPtr = (glm::vec4*) owlBufferGetPointer(OptixData.normalBuffer,0); + // int width = OptixData.LP.frameSize.x; + // int height = OptixData.LP.frameSize.y; + // reproject(samplePtr, t0AlbPtr, t1AlbPtr, mvecPtr, sPtr, fbPtr, width, height); + + drawFrameBufferToWindow(); + stop = glfwGetTime(); + glfwSetWindowTitle(WindowData.window, std::to_string(1.f / (stop - start)).c_str()); + drawGUI(); } - // glm::vec4* samplePtr = (glm::vec4*) owlBufferGetPointer(OptixData.accumBuffer,0); - // glm::vec4* mvecPtr = (glm::vec4*) owlBufferGetPointer(OptixData.mvecBuffer,0); - // glm::vec4* t0AlbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.scratchBuffer,0); - // glm::vec4* t1AlbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.albedoBuffer,0); - // glm::vec4* fbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.frameBuffer,0); - // glm::vec4* sPtr = (glm::vec4*) owlBufferGetPointer(OptixData.normalBuffer,0); - // int width = OptixData.LP.frameSize.x; - // int height = OptixData.LP.frameSize.y; - // reproject(samplePtr, t0AlbPtr, t1AlbPtr, mvecPtr, sPtr, fbPtr, width, height); - - drawFrameBufferToWindow(); - stop = glfwGetTime(); - glfwSetWindowTitle(WindowData.window, std::to_string(1.f / (stop - start)).c_str()); - drawGUI(); processCommandQueue(); checkForErrors(); + if (stopped) break; } @@ -2496,7 +2498,6 @@ void initializeHeadless( void initialize( bool headless, bool windowOnTop, - bool _lazyUpdatesEnabled, bool verbose, uint32_t maxEntities, uint32_t maxCameras, @@ -2507,12 +2508,10 @@ void initialize( uint32_t maxTextures, uint32_t maxVolumes) { - lazyUpdatesEnabled = _lazyUpdatesEnabled; // prevents deprecated warning from showing initializeInteractiveDeprecatedShown = true; initializeHeadlessDeprecatedShown = true; - lazyUpdatesEnabled = _lazyUpdatesEnabled; if (headless) initializeHeadless( verbose, maxEntities, maxCameras, maxTransforms, maxMeshes, @@ -2584,17 +2583,17 @@ void updateSceneAabb(Entity* entity) void enableUpdates() { - enqueueCommandAndWait([] () { lazyUpdatesEnabled = false; }); + enqueueCommandAndWait([] () { paused = false; }); } void disableUpdates() { - enqueueCommandAndWait([] () { lazyUpdatesEnabled = true; }); + enqueueCommandAndWait([] () { paused = true; }); } bool areUpdatesEnabled() { - return lazyUpdatesEnabled == false; + return paused == false; } #ifdef __unix__ From f1762f389aa623a0134fc8822ec286a3e6f76f64 Mon Sep 17 00:00:00 2001 From: n8vm Date: Wed, 27 Oct 2021 15:50:14 -0600 Subject: [PATCH 31/55] improving the performance of transform updates --- include/nvisii/transform.h | 9 ----- src/nvisii/transform.cpp | 67 +++++++------------------------------- 2 files changed, 12 insertions(+), 64 deletions(-) diff --git a/include/nvisii/transform.h b/include/nvisii/transform.h index cdf3640f..65559326 100644 --- a/include/nvisii/transform.h +++ b/include/nvisii/transform.h @@ -78,15 +78,6 @@ class Transform : public StaticFactory static std::vector transformStructs; static std::map lookupTable; - /* Updates cached rotation values */ - void updateRotation(); - - /* Updates cached position values */ - void updatePosition(); - - /* Updates cached scale values */ - void updateScale(); - /* Updates cached final local to parent matrix values */ void updateMatrix(); diff --git a/src/nvisii/transform.cpp b/src/nvisii/transform.cpp index 96950ce5..754eda1c 100644 --- a/src/nvisii/transform.cpp +++ b/src/nvisii/transform.cpp @@ -423,7 +423,7 @@ void Transform::setRotation(quat newRotation, bool previous) if (previous) useRelativeAngularMotionBlur = false; auto &r = (previous) ? prevRotation : rotation; r = glm::normalize(newRotation); - updateRotation(); + updateMatrix(); markDirty(); } @@ -436,7 +436,7 @@ void Transform::addRotation(quat additionalRotation, bool previous) { if (previous) useRelativeAngularMotionBlur = false; setRotation(getRotation(previous) * additionalRotation, previous); - updateRotation(); + updateMatrix(); markDirty(); } @@ -445,21 +445,6 @@ void Transform::addAngleAxis(float angle, vec3 axis, bool previous) addRotation(glm::angleAxis(angle, axis), previous); } -void Transform::updateRotation() -{ - // localToParentRotation = glm::toMat4(rotation); - // parentToLocalRotation = glm::inverse(localToParentRotation); - - // if (useRelativeMotionBlur) { - // prevLocalToParentRotation = glm::toMat4(angularVelocity * rotation); - // } else { - // prevLocalToParentRotation = glm::toMat4(prevRotation); - // } - // prevParentToLocalRotation = glm::inverse(prevLocalToParentRotation); - updateMatrix(); - markDirty(); -} - vec3 Transform::getPosition(bool previous) { if (previous) return prevPosition; @@ -513,7 +498,7 @@ void Transform::setPosition(vec3 newPosition, bool previous) if (previous) useRelativeLinearMotionBlur = false; auto &p = (previous) ? prevPosition : position; p = newPosition; - updatePosition(); + updateMatrix(); markDirty(); } @@ -521,7 +506,7 @@ void Transform::addPosition(vec3 additionalPosition, bool previous) { if (previous) useRelativeLinearMotionBlur = false; setPosition(getPosition(previous) + additionalPosition, previous); - updatePosition(); + updateMatrix(); markDirty(); } @@ -531,7 +516,7 @@ void Transform::setLinearVelocity(vec3 newLinearVelocity, float framesPerSecond, mix = glm::clamp(mix, 0.f, 1.f); newLinearVelocity /= framesPerSecond; linearMotion = glm::mix(newLinearVelocity, linearMotion, mix); - updatePosition(); + updateMatrix(); markDirty(); } @@ -543,7 +528,7 @@ void Transform::setAngularVelocity(quat newAngularVelocity, float framesPerSecon newAngularVelocity[1] = newAngularVelocity[1] / framesPerSecond; newAngularVelocity[2] = newAngularVelocity[2] / framesPerSecond; angularMotion = glm::lerp(newAngularVelocity, angularMotion, mix); - updateRotation(); + updateMatrix(); markDirty(); } @@ -553,7 +538,7 @@ void Transform::setScalarVelocity(vec3 newScalarVelocity, float framesPerSecond, mix = glm::clamp(mix, 0.f, 1.f); newScalarVelocity /= framesPerSecond; scalarMotion = glm::mix(newScalarVelocity, scalarMotion, mix); - updateScale(); + updateMatrix(); markDirty(); } @@ -581,21 +566,6 @@ void Transform::clearMotion() // markDirty(); // } -void Transform::updatePosition() -{ - // localToParentTranslation = glm::translate(glm::mat4(1.0), position); - // parentToLocalTranslation = glm::translate(glm::mat4(1.0), -position); - // if (useRelativeMotionBlur) { - // prevLocalToParentTranslation = glm::translate(glm::mat4(1.0), position + linearVelocity); - // prevParentToLocalTranslation = glm::translate(glm::mat4(1.0), -position + linearVelocity); - // } else { - // prevLocalToParentTranslation = glm::translate(glm::mat4(1.0), prevPosition); - // prevParentToLocalTranslation = glm::translate(glm::mat4(1.0), -prevPosition); - // } - updateMatrix(); - markDirty(); -} - vec3 Transform::getScale(bool previous) { if (previous) return prevScale; @@ -607,14 +577,14 @@ void Transform::setScale(vec3 newScale, bool previous) if (previous) useRelativeScalarMotionBlur = false; auto &s = (previous) ? prevScale : scale; s = newScale; - updateScale(); + updateMatrix(); markDirty(); } // void Transform::setScale(float newScale) // { // scale = vec3(newScale, newScale, newScale); -// updateScale(); +// updateMatrix(); // markDirty(); // } @@ -622,7 +592,7 @@ void Transform::addScale(vec3 additionalScale, bool previous) { if (previous) useRelativeScalarMotionBlur = false; setScale(getScale(previous) + additionalScale, previous); - updateScale(); + updateMatrix(); markDirty(); } @@ -644,21 +614,6 @@ void Transform::addScale(vec3 additionalScale, bool previous) // markDirty(); // } -void Transform::updateScale() -{ - // localToParentScale = glm::scale(glm::mat4(1.0), scale); - // parentToLocalScale = glm::scale(glm::mat4(1.0), glm::vec3(1.0 / scale.x, 1.0 / scale.y, 1.0 / scale.z)); - // if (useRelativeMotionBlur) { - // prevLocalToParentScale = glm::scale(glm::mat4(1.0), scale + scalarVelocity); - // prevParentToLocalScale = glm::scale(glm::mat4(1.0), glm::vec3(1.0 / (scale.x + scalarVelocity.x), 1.0 / (scale.y + scalarVelocity.y), 1.0 / (scale.z + scalarVelocity.z))); - // } else { - // prevLocalToParentScale = glm::scale(glm::mat4(1.0), prevScale); - // prevParentToLocalScale = glm::scale(glm::mat4(1.0), glm::vec3(1.0 / prevScale.x, 1.0 / prevScale.y, 1.0 / prevScale.z)); - // } - updateMatrix(); - markDirty(); -} - void Transform::updateMatrix() { localToParentMatrix = (localToParentTransform * getLocalToParentTranslationMatrix(false) * getLocalToParentRotationMatrix(false) * getLocalToParentScaleMatrix(false)); @@ -953,6 +908,8 @@ glm::mat4 Transform::getLocalToWorldMatrix(bool previous) { void Transform::updateChildren() { + if (children.size() == 0) return; + for (auto &c : children) { auto &t = transforms[c]; t.updateChildren(); From ded9a621a82181613d91e8133745120d4efe02b9 Mon Sep 17 00:00:00 2001 From: n8vm Date: Wed, 27 Oct 2021 15:53:00 -0600 Subject: [PATCH 32/55] adding back world matrix calculation --- src/nvisii/transform.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/nvisii/transform.cpp b/src/nvisii/transform.cpp index 754eda1c..a31e9c10 100644 --- a/src/nvisii/transform.cpp +++ b/src/nvisii/transform.cpp @@ -908,8 +908,6 @@ glm::mat4 Transform::getLocalToWorldMatrix(bool previous) { void Transform::updateChildren() { - if (children.size() == 0) return; - for (auto &c : children) { auto &t = transforms[c]; t.updateChildren(); From 6ec79acf0a9979336a29efee81a1f8d44586553b Mon Sep 17 00:00:00 2001 From: n8vm Date: Wed, 27 Oct 2021 17:51:45 -0600 Subject: [PATCH 33/55] merging the two initialize loops into one, in the future this will make it easier to catch bugs between headless mode vs interactive. --- examples/02.random_scene.py | 1 - examples/03.pybullet.py | 4 +- examples/09.meta_data_exporting.py | 2 +- examples/12.pybullet_motion_blur.py | 2 +- include/nvisii/transform.h | 8 +- src/nvisii/nvisii.cpp | 201 ++++++++-------------------- src/nvisii/transform.cpp | 96 ++++--------- 7 files changed, 89 insertions(+), 225 deletions(-) diff --git a/examples/02.random_scene.py b/examples/02.random_scene.py index 370b8e12..bd9d2bde 100644 --- a/examples/02.random_scene.py +++ b/examples/02.random_scene.py @@ -21,7 +21,6 @@ nvisii.initialize( headless = True, verbose = True, - lazy_updates = True, max_entities = opt.nb_objs + 1, max_transforms = opt.nb_objs + 1, max_materials = opt.nb_objs, diff --git a/examples/03.pybullet.py b/examples/03.pybullet.py index f9dfa71d..9caba83c 100644 --- a/examples/03.pybullet.py +++ b/examples/03.pybullet.py @@ -26,8 +26,8 @@ print(f'created folder {opt.outf}/') # # # # # # # # # # # # # # # # # # # # # # # # # -# show an interactive window, and use "lazy" updates for faster object creation time -nvisii.initialize(headless=False, lazy_updates=True) +# show an interactive window +nvisii.initialize(headless=False) if not opt.noise is True: nvisii.enable_denoiser() diff --git a/examples/09.meta_data_exporting.py b/examples/09.meta_data_exporting.py index bf464cbd..07777490 100644 --- a/examples/09.meta_data_exporting.py +++ b/examples/09.meta_data_exporting.py @@ -17,7 +17,7 @@ print(f'created folder {opt.outf}/') # # # # # # # # # # # # # # # # # # # # # # # # # -nvisii.initialize(headless=False, verbose=True, lazy_updates = True) +nvisii.initialize(headless=False, verbose=True) nvisii.enable_denoiser() diff --git a/examples/12.pybullet_motion_blur.py b/examples/12.pybullet_motion_blur.py index 121fcaae..3b85e5c5 100644 --- a/examples/12.pybullet_motion_blur.py +++ b/examples/12.pybullet_motion_blur.py @@ -25,7 +25,7 @@ print(f'created folder {opt.outf}/') # # # # # # # # # # # # # # # # # # # # # # # # # -nvisii.initialize(headless = False, lazy_updates=True) +nvisii.initialize(headless = False) if not opt.noise is True: nvisii.enable_denoiser() diff --git a/include/nvisii/transform.h b/include/nvisii/transform.h index 65559326..787c344d 100644 --- a/include/nvisii/transform.h +++ b/include/nvisii/transform.h @@ -78,15 +78,9 @@ class Transform : public StaticFactory static std::vector transformStructs; static std::map lookupTable; - /* Updates cached final local to parent matrix values */ + /* Updates cached final local to parent and local to world matrix values */ void updateMatrix(); - /* Updates cached final local to world matrix values */ - void updateWorldMatrix(); - - /* updates all childrens cached final local to world matrix values */ - void updateChildren(); - /* updates the struct for this transform which can be uploaded to the GPU. */ void updateStruct(); diff --git a/src/nvisii/nvisii.cpp b/src/nvisii/nvisii.cpp index 2abfc2a1..bc8ab326 100644 --- a/src/nvisii/nvisii.cpp +++ b/src/nvisii/nvisii.cpp @@ -1143,6 +1143,15 @@ void updateComponents() std::lock_guard texture_lock(Texture::areAnyDirty() ? *Texture::getEditMutex().get() : dummyMutex); std::lock_guard volume_lock(Volume::areAnyDirty() ? *Volume::getEditMutex().get() : dummyMutex); + // Manage transforms + auto dirtyTransforms = Transform::getDirtyTransforms(); + if (dirtyTransforms.size() > 0) { + Transform::updateComponents(); + + // cudaSetDevice(0); + owlBufferUpload(OptixData.transformBuffer, Transform::getFrontStruct()); + } + // Manage Meshes: Build / Rebuild BLAS auto dirtyMeshes = Mesh::getDirtyMeshes(); if (dirtyMeshes.size() > 0) { @@ -1489,15 +1498,6 @@ void updateComponents() owlBufferUpload(OptixData.textureBuffer, OptixData.textureStructs.data()); } - // Manage transforms - auto dirtyTransforms = Transform::getDirtyTransforms(); - if (dirtyTransforms.size() > 0) { - Transform::updateComponents(); - - // cudaSetDevice(0); - owlBufferUpload(OptixData.transformBuffer, Transform::getFrontStruct()); - } - // Manage Cameras if (Camera::areAnyDirty()) { Camera::updateComponents(); @@ -2306,10 +2306,8 @@ void initializeComponentFactories( void reproject(glm::vec4 *samplesBuffer, glm::vec4 *t0AlbedoBuffer, glm::vec4 *t1AlbedoBuffer, glm::vec4 *mvecBuffer, glm::vec4 *scratchBuffer, glm::vec4 *imageBuffer, int width, int height); - -static bool initializeInteractiveDeprecatedShown = false; -static bool initializeHeadlessDeprecatedShown = false; -void initializeInteractive( +void initialize( + bool headless, bool windowOnTop, bool _verbose, uint32_t maxEntities, @@ -2319,13 +2317,8 @@ void initializeInteractive( uint32_t maxMaterials, uint32_t maxLights, uint32_t maxTextures, - uint32_t maxVolumes) + uint32_t maxVolumes) { - if (initializeInteractiveDeprecatedShown == false) { - std::cout<<"Warning, initialize_interactive is deprecated and will be removed in a subsequent release. Please switch to initialize." << std::endl; - initializeInteractiveDeprecatedShown = true; - } - // don't initialize more than once if (initialized == true) { throw std::runtime_error("Error: already initialized!"); @@ -2335,56 +2328,67 @@ void initializeInteractive( stopped = false; verbose = _verbose; NVISII.callback = nullptr; + NVISII.headlessMode = headless; initializeComponentFactories(maxEntities, maxCameras, maxTransforms, maxMeshes, maxMaterials, maxLights, maxTextures, maxVolumes); auto loop = [windowOnTop]() { NVISII.render_thread_id = std::this_thread::get_id(); - NVISII.headlessMode = false; + Libraries::GLFW *glfw = nullptr; - auto glfw = Libraries::GLFW::Get(); - WindowData.window = glfw->create_window("NVISII", 512, 512, windowOnTop, true, true); - WindowData.currentSize = WindowData.lastSize = ivec2(512, 512); - glfw->make_context_current("NVISII"); - glfw->poll_events(); + if (!NVISII.headlessMode) + { + glfw = Libraries::GLFW::Get(); + WindowData.window = glfw->create_window("NVISII", 512, 512, windowOnTop, true, true); + WindowData.currentSize = WindowData.lastSize = ivec2(512, 512); + glfw->make_context_current("NVISII"); + glfw->poll_events(); + } - initializeOptix(/*headless = */ false); - initializeImgui(); + initializeOptix(/*headless = */ NVISII.headlessMode); int numGPUs = owlGetDeviceCount(OptixData.context); - glEnable(GL_BLEND); - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + if (!NVISII.headlessMode) { + initializeImgui(); + glEnable(GL_BLEND); + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } while (!stopped) { - /* Poll events from the window */ - glfw->poll_events(); - if (!paused) { - glfw->swap_buffers("NVISII"); - glClearColor(0,0,0,0); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + if (!NVISII.headlessMode) { + /* Poll events from the window */ + glfw->poll_events(); + glfw->swap_buffers("NVISII"); + glClearColor(0,0,0,0); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + } if (NVISII.callback && NVISII.callbackMutex.try_lock()) { NVISII.callback(); NVISII.callbackMutex.unlock(); } - + static double start=0; static double stop=0; - start = glfwGetTime(); - updateFrameBuffer(); + if (!NVISII.headlessMode) { + start = glfwGetTime(); + updateFrameBuffer(); + } + updateComponents(); updateLaunchParams(); - + for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) { cudaSetDevice(deviceID); cudaEventRecord(NVISII.events[deviceID].first, owlParamsGetCudaStream(OptixData.launchParams, deviceID)); owlAsyncLaunch2DOnDevice(OptixData.rayGen, OptixData.LP.frameSize.x * OptixData.LP.frameSize.y, 1, deviceID, OptixData.launchParams); cudaEventRecord(NVISII.events[deviceID].second, owlParamsGetCudaStream(OptixData.launchParams, deviceID)); } + owlLaunchSync(OptixData.launchParams); for (uint32_t deviceID = 0; deviceID < numGPUs; deviceID++) { cudaEventElapsedTime(&NVISII.times[deviceID], NVISII.events[deviceID].first, NVISII.events[deviceID].second); @@ -2395,96 +2399,36 @@ void initializeInteractive( if (OptixData.enableDenoiser) { denoiseImage(); } - // glm::vec4* samplePtr = (glm::vec4*) owlBufferGetPointer(OptixData.accumBuffer,0); - // glm::vec4* mvecPtr = (glm::vec4*) owlBufferGetPointer(OptixData.mvecBuffer,0); - // glm::vec4* t0AlbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.scratchBuffer,0); - // glm::vec4* t1AlbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.albedoBuffer,0); - // glm::vec4* fbPtr = (glm::vec4*) owlBufferGetPointer(OptixData.frameBuffer,0); - // glm::vec4* sPtr = (glm::vec4*) owlBufferGetPointer(OptixData.normalBuffer,0); - // int width = OptixData.LP.frameSize.x; - // int height = OptixData.LP.frameSize.y; - // reproject(samplePtr, t0AlbPtr, t1AlbPtr, mvecPtr, sPtr, fbPtr, width, height); - drawFrameBufferToWindow(); - stop = glfwGetTime(); - glfwSetWindowTitle(WindowData.window, std::to_string(1.f / (stop - start)).c_str()); - drawGUI(); + if (!NVISII.headlessMode) { + drawFrameBufferToWindow(); + stop = glfwGetTime(); + glfwSetWindowTitle(WindowData.window, std::to_string(1.f / (stop - start)).c_str()); + drawGUI(); + } } processCommandQueue(); checkForErrors(); - if (stopped) break; } if (OptixData.denoiser) OPTIX_CHECK(optixDenoiserDestroy(OptixData.denoiser)); - if (OptixData.imageTexID != -1) { - if (OptixData.cudaResourceTex) { - cudaGraphicsUnregisterResource(OptixData.cudaResourceTex); - OptixData.cudaResourceTex = 0; + if (!NVISII.headlessMode) { + if (OptixData.imageTexID != -1) { + if (OptixData.cudaResourceTex) { + cudaGraphicsUnregisterResource(OptixData.cudaResourceTex); + OptixData.cudaResourceTex = 0; + } + glDeleteTextures(1, &OptixData.imageTexID); } - glDeleteTextures(1, &OptixData.imageTexID); - } - - ImGui::DestroyContext(); - if (glfw->does_window_exist("NVISII")) glfw->destroy_window("NVISII"); - - owlContextDestroy(OptixData.context); - }; - - renderThread = std::thread(loop); - // Waits for the render thread to start before returning - enqueueCommandAndWait([] () {}); -} - -void initializeHeadless( - bool _verbose, - uint32_t maxEntities, - uint32_t maxCameras, - uint32_t maxTransforms, - uint32_t maxMeshes, - uint32_t maxMaterials, - uint32_t maxLights, - uint32_t maxTextures, - uint32_t maxVolumes) -{ - if (initializeHeadlessDeprecatedShown == false) { - std::cout<<"Warning, initialize_headless is deprecated and will be removed in a subsequent release. Please switch to initialize(headless = True)." << std::endl; - initializeHeadlessDeprecatedShown = true; - } - - // don't initialize more than once - if (initialized == true) { - throw std::runtime_error("Error: already initialized!"); - } - - initialized = true; - stopped = false; - verbose = _verbose; - NVISII.callback = nullptr; - - initializeComponentFactories(maxEntities, maxCameras, maxTransforms, maxMeshes, maxMaterials, maxLights, maxTextures, maxVolumes); - - auto loop = []() { - NVISII.render_thread_id = std::this_thread::get_id(); - NVISII.headlessMode = true; - - initializeOptix(/*headless = */ true); - - while (!stopped) - { - if(NVISII.callback){ - NVISII.callback(); - } - processCommandQueue(); - if (stopped) break; + ImGui::DestroyContext(); + auto glfw = Libraries::GLFW::Get(); + if (glfw->does_window_exist("NVISII")) glfw->destroy_window("NVISII"); } - - if (OptixData.denoiser) - OPTIX_CHECK(optixDenoiserDestroy(OptixData.denoiser)); owlContextDestroy(OptixData.context); }; @@ -2495,33 +2439,6 @@ void initializeHeadless( enqueueCommandAndWait([] () {}); } -void initialize( - bool headless, - bool windowOnTop, - bool verbose, - uint32_t maxEntities, - uint32_t maxCameras, - uint32_t maxTransforms, - uint32_t maxMeshes, - uint32_t maxMaterials, - uint32_t maxLights, - uint32_t maxTextures, - uint32_t maxVolumes) -{ - // prevents deprecated warning from showing - initializeInteractiveDeprecatedShown = true; - initializeHeadlessDeprecatedShown = true; - - if (headless) - initializeHeadless( - verbose, maxEntities, maxCameras, maxTransforms, maxMeshes, - maxMaterials, maxLights, maxTextures, maxVolumes); - else - initializeInteractive( - windowOnTop, verbose, maxEntities, maxCameras, maxTransforms, - maxMeshes, maxMaterials, maxLights, maxTextures, maxVolumes); -} - static bool registerPreRenderCallbackDeprecatedShown = false; void registerPreRenderCallback(std::function callback){ if (registerPreRenderCallbackDeprecatedShown == false) { diff --git a/src/nvisii/transform.cpp b/src/nvisii/transform.cpp index a31e9c10..0e1ec67a 100644 --- a/src/nvisii/transform.cpp +++ b/src/nvisii/transform.cpp @@ -42,6 +42,14 @@ std::set Transform::getDirtyTransforms() void Transform::updateComponents() { if (dirtyTransforms.size() == 0) return; + + // first, update transform matrices + for (auto &t : dirtyTransforms) { + if (!t->isInitialized()) continue; + t->updateMatrix(); + } + + // next, copy over the local to world matrices for (auto &t : dirtyTransforms) { if (!t->isInitialized()) continue; transformStructs[t->id].localToWorld = t->getLocalToWorldMatrix(false); @@ -299,7 +307,6 @@ void Transform::lookAt(vec3 at, vec3 up, vec3 eye, bool previous) // localToParentTranslation = glm::translate(glm::mat4(1.0), position); // parentToLocalTranslation = glm::translate(glm::mat4(1.0), -position); -// updateMatrix(); // markDirty(); // } @@ -326,7 +333,6 @@ void Transform::rotateAround(vec3 point, glm::quat rot, bool previous) // ltpt = glm::translate(glm::mat4(1.0), t); // ptlt = glm::translate(glm::mat4(1.0), -t); - updateMatrix(); markDirty(); } @@ -407,7 +413,6 @@ void Transform::setTransform(glm::mat4 transformation, bool decompose, bool prev this->localToParentTransform = transformation; // this->parentToLocalTransform = glm::inverse(transformation); } - updateMatrix(); } markDirty(); } @@ -423,7 +428,6 @@ void Transform::setRotation(quat newRotation, bool previous) if (previous) useRelativeAngularMotionBlur = false; auto &r = (previous) ? prevRotation : rotation; r = glm::normalize(newRotation); - updateMatrix(); markDirty(); } @@ -436,7 +440,6 @@ void Transform::addRotation(quat additionalRotation, bool previous) { if (previous) useRelativeAngularMotionBlur = false; setRotation(getRotation(previous) * additionalRotation, previous); - updateMatrix(); markDirty(); } @@ -498,7 +501,6 @@ void Transform::setPosition(vec3 newPosition, bool previous) if (previous) useRelativeLinearMotionBlur = false; auto &p = (previous) ? prevPosition : position; p = newPosition; - updateMatrix(); markDirty(); } @@ -506,7 +508,6 @@ void Transform::addPosition(vec3 additionalPosition, bool previous) { if (previous) useRelativeLinearMotionBlur = false; setPosition(getPosition(previous) + additionalPosition, previous); - updateMatrix(); markDirty(); } @@ -516,7 +517,6 @@ void Transform::setLinearVelocity(vec3 newLinearVelocity, float framesPerSecond, mix = glm::clamp(mix, 0.f, 1.f); newLinearVelocity /= framesPerSecond; linearMotion = glm::mix(newLinearVelocity, linearMotion, mix); - updateMatrix(); markDirty(); } @@ -528,7 +528,6 @@ void Transform::setAngularVelocity(quat newAngularVelocity, float framesPerSecon newAngularVelocity[1] = newAngularVelocity[1] / framesPerSecond; newAngularVelocity[2] = newAngularVelocity[2] / framesPerSecond; angularMotion = glm::lerp(newAngularVelocity, angularMotion, mix); - updateMatrix(); markDirty(); } @@ -538,7 +537,6 @@ void Transform::setScalarVelocity(vec3 newScalarVelocity, float framesPerSecond, mix = glm::clamp(mix, 0.f, 1.f); newScalarVelocity /= framesPerSecond; scalarMotion = glm::mix(newScalarVelocity, scalarMotion, mix); - updateMatrix(); markDirty(); } @@ -550,7 +548,6 @@ void Transform::clearMotion() scalarMotion = glm::vec3(0.f); angularMotion = glm::quat(1.f, 0.f, 0.f, 0.f); linearMotion = glm::vec3(0.f); - updateMatrix(); markDirty(); } @@ -577,14 +574,12 @@ void Transform::setScale(vec3 newScale, bool previous) if (previous) useRelativeScalarMotionBlur = false; auto &s = (previous) ? prevScale : scale; s = newScale; - updateMatrix(); markDirty(); } // void Transform::setScale(float newScale) // { // scale = vec3(newScale, newScale, newScale); -// updateMatrix(); // markDirty(); // } @@ -592,7 +587,6 @@ void Transform::addScale(vec3 additionalScale, bool previous) { if (previous) useRelativeScalarMotionBlur = false; setScale(getScale(previous) + additionalScale, previous); - updateMatrix(); markDirty(); } @@ -616,24 +610,30 @@ void Transform::addScale(vec3 additionalScale, bool previous) void Transform::updateMatrix() { + // Update the current transform matrices localToParentMatrix = (localToParentTransform * getLocalToParentTranslationMatrix(false) * getLocalToParentRotationMatrix(false) * getLocalToParentScaleMatrix(false)); parentToLocalMatrix = (getParentToLocalScaleMatrix(false) * getParentToLocalRotationMatrix(false) * getParentToLocalTranslationMatrix(false) * glm::inverse(localToParentTransform)); prevLocalToParentMatrix = (prevLocalToParentTransform * getLocalToParentTranslationMatrix(true) * getLocalToParentRotationMatrix(true) * getLocalToParentScaleMatrix(true)); prevParentToLocalMatrix = (getParentToLocalScaleMatrix(true) * getParentToLocalRotationMatrix(true) * getParentToLocalTranslationMatrix(true) * glm::inverse(prevLocalToParentTransform)); - // right = glm::vec3(localToParentMatrix[0]); - // up = glm::vec3(localToParentMatrix[1]); - // forward = glm::vec3(localToParentMatrix[2]); - // position = glm::vec3(localToParentMatrix[3]); - - // prevRight = glm::vec3(prevLocalToParentMatrix[0]); - // prevUp = glm::vec3(prevLocalToParentMatrix[1]); - // prevForward = glm::vec3(prevLocalToParentMatrix[2]); - // prevPosition = glm::vec3(prevLocalToParentMatrix[3]); + if (parent == -1) { + worldToLocalMatrix = parentToLocalMatrix; + localToWorldMatrix = localToParentMatrix; + prevWorldToLocalMatrix = prevParentToLocalMatrix; + prevLocalToWorldMatrix = prevLocalToParentMatrix; + } else { + worldToLocalMatrix = computeWorldToLocalMatrix(/*previous=*/false); + prevWorldToLocalMatrix = computeWorldToLocalMatrix(/*previous=*/true); + localToWorldMatrix = glm::inverse(worldToLocalMatrix); + prevLocalToWorldMatrix = glm::inverse(prevWorldToLocalMatrix); + } - updateChildren(); - markDirty(); + // If this transform has children, update those too. + for (auto &c : children) { + auto &t = transforms[c]; + t.updateMatrix(); + } } glm::mat4 Transform::computeWorldToLocalMatrix(bool previous) @@ -656,37 +656,6 @@ glm::mat4 Transform::computeWorldToLocalMatrix(bool previous) // else return getNextParentToLocalMatrix(); // } -void Transform::updateWorldMatrix() -{ - if (parent == -1) { - worldToLocalMatrix = parentToLocalMatrix; - localToWorldMatrix = localToParentMatrix; - prevWorldToLocalMatrix = prevParentToLocalMatrix; - prevLocalToWorldMatrix = prevLocalToParentMatrix; - - // worldScale = scale; - // worldTranslation = position; - // worldRotation = rotation; - // worldSkew = glm::vec3(0.f, 0.f, 0.f); - // worldPerspective = glm::vec4(1.0f, 1.0f, 1.0f, 1.0f); // not sure what this should default to... - - // prevWorldScale = prevScale; - // prevWorldTranslation = prevPosition; - // prevWorldRotation = prevRotation; - // prevWorldSkew = glm::vec3(0.f, 0.f, 0.f); - // prevWorldPerspective = glm::vec4(1.0f, 1.0f, 1.0f, 1.0f); // not sure what this should default to... - } else { - worldToLocalMatrix = computeWorldToLocalMatrix(/*previous=*/false); - prevWorldToLocalMatrix = computeWorldToLocalMatrix(/*previous=*/true); - localToWorldMatrix = glm::inverse(worldToLocalMatrix); - prevLocalToWorldMatrix = glm::inverse(prevWorldToLocalMatrix); - // glm::decompose(localToWorldMatrix, worldScale, worldRotation, worldTranslation, worldSkew, worldPerspective); - // glm::decompose(prevLocalToWorldMatrix, prevWorldScale, prevWorldRotation, prevWorldTranslation, prevWorldSkew, prevWorldPerspective); - // glm::decompose(nextLocalToWorldMatrix, worldScale, worldRotation, worldTranslation, worldSkew, worldPerspective); - } - markDirty(); -} - glm::mat4 Transform::getParentToLocalMatrix(bool previous) { if (previous) return prevParentToLocalMatrix; @@ -787,7 +756,6 @@ void Transform::setParent(Transform *parent) { this->parent = parent->getId(); transforms[parent->getId()].children.insert(this->id); - updateChildren(); markDirty(); } @@ -800,7 +768,6 @@ void Transform::clearParent() transforms[parent].children.erase(this->id); this->parent = -1; - updateChildren(); markDirty(); } @@ -829,7 +796,6 @@ void Transform::removeChild(Transform *object) { children.erase(object->getId()); transforms[object->getId()].parent = -1; - transforms[object->getId()].updateWorldMatrix(); transforms[object->getId()].markDirty(); } @@ -905,18 +871,6 @@ glm::mat4 Transform::getLocalToWorldMatrix(bool previous) { // return m; // } - -void Transform::updateChildren() -{ - for (auto &c : children) { - auto &t = transforms[c]; - t.updateChildren(); - } - - updateWorldMatrix(); - markDirty(); -} - TransformStruct &Transform::getStruct() { return transformStructs[id]; From 2c01f62f5a3891d2c923ba1301efe27a290dd54f Mon Sep 17 00:00:00 2001 From: jtremblay Date: Fri, 10 Jun 2022 09:08:32 -0700 Subject: [PATCH 34/55] Update README.md Forcing a CLI compile --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 6c93964a..a86081cd 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ For more information see our [ICLR workshop 2021 paper](https://arxiv.org/abs/21 [Documentation](https://nvisii.com). +