Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion src/windows/service/exe/HcsVirtualMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ HcsVirtualMachine::HcsVirtualMachine(_In_ const WSLCSessionSettings* Settings)

// Build HCS settings
hcs::ComputeSystem systemSettings{};
systemSettings.Owner = L"WSL";
systemSettings.Owner = std::format(L"WSLC-{}", Settings->DisplayName);
systemSettings.ShouldTerminateOnLastHandleClosed = true;

// Determine which schema version to use based on the Windows version. Windows 10 does not support
Expand Down Expand Up @@ -281,6 +281,15 @@ HcsVirtualMachine::~HcsVirtualMachine()
{
std::lock_guard lock(m_lock);

// Clear the session termination callback before destroying the VM.
// During normal shutdown, the session is already terminating — firing the
// callback would cause a redundant (and potentially crashing) COM call.
// Uses its own lock to avoid deadlock with HCS callback thread.
{
std::lock_guard callbackLock(m_sessionTerminationCallbackLock);
m_sessionTerminationCallback = nullptr;
}

// Wait up to 5 seconds for the VM to terminate gracefully.
bool forceTerminate = false;
if (!m_vmExitEvent.wait(5000))
Expand Down Expand Up @@ -581,6 +590,15 @@ try
}
CATCH_RETURN()

HRESULT HcsVirtualMachine::RegisterTerminationCallback(_In_ ITerminationCallback* Callback)
try
{
std::lock_guard lock(m_sessionTerminationCallbackLock);
m_sessionTerminationCallback = Callback;
return S_OK;
}
CATCH_RETURN()

void CALLBACK HcsVirtualMachine::OnVmExitCallback(HCS_EVENT* Event, void* Context)
try
{
Expand Down Expand Up @@ -630,6 +648,17 @@ void HcsVirtualMachine::OnExit(const HCS_EVENT* Event)
{
LOG_IF_FAILED(m_terminationCallback->OnTermination(reason, Event->EventData));
}

wil::com_ptr<ITerminationCallback> sessionCallback;
{
std::lock_guard lock(m_sessionTerminationCallbackLock);
sessionCallback = m_sessionTerminationCallback;
}

if (sessionCallback)
{
LOG_IF_FAILED(sessionCallback->OnTermination(reason, Event->EventData));
}
}

void HcsVirtualMachine::OnCrash(const HCS_EVENT* Event)
Expand Down
7 changes: 7 additions & 0 deletions src/windows/service/exe/HcsVirtualMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class HcsVirtualMachine
IFACEMETHOD(DetachDisk)(_In_ ULONG Lun) override;
IFACEMETHOD(AddShare)(_In_ LPCWSTR WindowsPath, _In_ BOOL ReadOnly, _Out_ GUID* ShareId) override;
IFACEMETHOD(RemoveShare)(_In_ REFGUID ShareId) override;
IFACEMETHOD(RegisterTerminationCallback)(_In_ ITerminationCallback* Callback) override;

private:
struct DiskInfo
Expand Down Expand Up @@ -97,6 +98,12 @@ class HcsVirtualMachine
bool m_crashLogCaptured = false;

wil::com_ptr<ITerminationCallback> m_terminationCallback;

// Session-side termination callback, registered via RegisterTerminationCallback().
// Guarded by m_sessionTerminationCallbackLock (separate from m_lock to avoid
// deadlock between HCS callback thread and destructor which holds m_lock).
std::mutex m_sessionTerminationCallbackLock;
wil::com_ptr<ITerminationCallback> m_sessionTerminationCallback;
};

} // namespace wsl::windows::service::wslc
4 changes: 4 additions & 0 deletions src/windows/service/inc/wslc.idl
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,10 @@ interface IWSLCVirtualMachine : IUnknown

// Removes a previously added filesystem share.
HRESULT RemoveShare([in] REFGUID ShareId);

// Registers a callback to be invoked when the VM exits.
// The callback receives the exit reason and optional details.
HRESULT RegisterTerminationCallback([in] ITerminationCallback* Callback);
}

typedef enum _WSLCSessionStorageFlags
Expand Down
104 changes: 87 additions & 17 deletions src/windows/wslcsession/WSLCSession.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,28 @@ using wsl::windows::service::wslc::UserHandle;
using wsl::windows::service::wslc::WSLCSession;
using wsl::windows::service::wslc::WSLCVirtualMachine;

// COM callback that signals a local event when the VM terminates.
// Registered with IWSLCVirtualMachine::RegisterTerminationCallback() so the
// SYSTEM service can notify us cross-process when HCS reports VM exit.
struct VmTerminationCallback : winrt::implements<VmTerminationCallback, ITerminationCallback>
{
VmTerminationCallback(HANDLE event)
{
HANDLE dup = nullptr;
THROW_IF_WIN32_BOOL_FALSE(DuplicateHandle(GetCurrentProcess(), event, GetCurrentProcess(), &dup, 0, FALSE, DUPLICATE_SAME_ACCESS));
m_event.reset(dup);
}

HRESULT STDMETHODCALLTYPE OnTermination(WSLCVirtualMachineTerminationReason, LPCWSTR) override
{
m_event.SetEvent();
return S_OK;
}

private:
wil::unique_event m_event;
};

constexpr auto c_containerdStorage = "/var/lib/docker";

namespace {
Expand Down Expand Up @@ -289,6 +311,11 @@ try

m_virtualMachine->Initialize();

// Register a COM callback with the SYSTEM service to be notified when the VM exits.
// The callback signals m_vmExitedEvent, which IORelay monitors to trigger OnVmExited().
auto vmTermCallback = winrt::make<VmTerminationCallback>(m_vmExitedEvent.get());
THROW_IF_FAILED(Vm->RegisterTerminationCallback(vmTermCallback.as<ITerminationCallback>().get()));

Comment on lines +317 to +318
Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

THROW_IF_FAILED here makes session initialization fail hard if the running service/proxy is older and doesn’t support the new COM method (commonly returning RPC_S_PROCNUM_OUT_OF_RANGE/E_NOTIMPL). Consider treating “method not supported” as a non-fatal best-effort (log + continue without VM-exit detection) so version skew during upgrades doesn’t prevent sessions from starting.

Suggested change
THROW_IF_FAILED(Vm->RegisterTerminationCallback(vmTermCallback.as<ITerminationCallback>().get()));
const auto registerTerminationCallbackResult =
Vm->RegisterTerminationCallback(vmTermCallback.as<ITerminationCallback>().get());
if ((registerTerminationCallbackResult == E_NOTIMPL) ||
(registerTerminationCallbackResult == HRESULT_FROM_WIN32(RPC_S_PROCNUM_OUT_OF_RANGE)))
{
WSL_LOG(
"RegisterTerminationCallbackUnsupported",
TraceLoggingValue(registerTerminationCallbackResult, "HResult"),
TraceLoggingValue(m_id, "SessionId"));
}
else
{
THROW_IF_FAILED(registerTerminationCallbackResult);
}

Copilot uses AI. Check for mistakes.
// Configure storage.
ConfigureStorage(*Settings, tokenInfo->User.Sid);

Expand All @@ -306,6 +333,10 @@ try
// Start the event tracker.
m_eventTracker.emplace(m_dockerClient.value(), m_id, m_ioRelay);

// Monitor for unexpected VM exit.
m_ioRelay.AddHandle(
std::make_unique<windows::common::relay::EventHandle>(m_vmExitedEvent.get(), std::bind(&WSLCSession::OnVmExited, this)));

// Recover any existing containers from storage.
RecoverExistingVolumes();
RecoverExistingContainers();
Expand Down Expand Up @@ -413,6 +444,30 @@ void WSLCSession::OnDockerdExited()
}
}

void WSLCSession::OnVmExited()
{
if (m_sessionTerminatingEvent.is_signaled())
{
return; // Already shutting down (normal termination path).
}

WSL_LOG(
"UnexpectedVmExit",
TraceLoggingLevel(WINEVENT_LEVEL_WARNING),
TraceLoggingValue(m_id, "SessionId"),
TraceLoggingValue(m_displayName.c_str(), "Name"));

// N.B. This callback runs on the IORelay thread. Terminate() calls m_ioRelay.Stop()
// which joins the IORelay thread, so we must run termination on a separate thread
// to avoid deadlock. Capture a COM reference to prevent the session from being
// destroyed before the thread runs.
Microsoft::WRL::ComPtr<WSLCSession> self(this);
std::thread([self]() {
wsl::windows::common::wslutil::SetThreadDescription(L"VmExitTermination");
LOG_IF_FAILED(self->Terminate());
}).detach();
}

void WSLCSession::OnDockerdLog(const gsl::span<char>& buffer)
try
{
Expand Down Expand Up @@ -1792,40 +1847,55 @@ try
m_eventTracker.reset();
m_dockerClient.reset();

// Check if the VM has already exited (e.g., killed externally).
// If so, skip operations that require a live VM to avoid unnecessary waits.
const bool vmDead = m_vmExitedEvent.is_signaled();

// Stop dockerd.
// N.B. dockerd wait a couple seconds if there are any outstanding HTTP request sockets opened.
if (m_dockerdProcess.has_value())
{
LOG_IF_FAILED(m_dockerdProcess->Get().Signal(WSLCSignalSIGTERM));

int exitCode = -1;
try
if (!vmDead)
{
exitCode = m_dockerdProcess->Wait(30 * 1000);
}
catch (...)
{
LOG_CAUGHT_EXCEPTION();
LOG_IF_FAILED(m_dockerdProcess->Get().Signal(WSLCSignalSIGTERM));

int exitCode = -1;
try
{
m_dockerdProcess->Get().Signal(WSLCSignalSIGKILL);
exitCode = m_dockerdProcess->Wait(10 * 1000);
exitCode = m_dockerdProcess->Wait(30 * 1000);
}
CATCH_LOG();
catch (...)
{
LOG_CAUGHT_EXCEPTION();
try
{
m_dockerdProcess->Get().Signal(WSLCSignalSIGKILL);
exitCode = m_dockerdProcess->Wait(10 * 1000);
}
CATCH_LOG();
}

WSL_LOG("DockerdExit", TraceLoggingValue(exitCode, "code"));
}
else
{
WSL_LOG("SkippingDockerdShutdown_VmDead");
}

WSL_LOG("DockerdExit", TraceLoggingValue(exitCode, "code"));
m_dockerdProcess.reset();
}

if (m_virtualMachine)
{
// N.B. dockerd has exited by this point, so unmounting the VHD is safe since no container can be running.
try
if (!vmDead)
{
m_virtualMachine->Unmount(c_containerdStorage);
// N.B. dockerd has exited by this point, so unmounting the VHD is safe since no container can be running.
try
{
m_virtualMachine->Unmount(c_containerdStorage);
}
CATCH_LOG();
}
CATCH_LOG();

m_virtualMachine.reset();
}
Expand Down
2 changes: 2 additions & 0 deletions src/windows/wslcsession/WSLCSession.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession
void OnContainerDeleted(const WSLCContainerImpl* Container);
void OnDockerdLog(const gsl::span<char>& Data);
void OnDockerdExited();
void OnVmExited();
void StartDockerd();
void ImportImageImpl(DockerHTTPClient::HTTPRequestContext& Request, const WSLCHandle ImageHandle);
void RecoverExistingContainers();
Expand All @@ -150,6 +151,7 @@ class DECLSPEC_UUID("4877FEFC-4977-4929-A958-9F36AA1892A4") WSLCSession
std::unordered_map<std::string, std::unique_ptr<WSLCVhdVolumeImpl>> m_volumes;
std::unordered_set<std::string> m_anonymousVolumes; // TODO: Implement proper anonymous volume support.
wil::unique_event m_sessionTerminatingEvent{wil::EventOptions::ManualReset};
wil::unique_event m_vmExitedEvent{wil::EventOptions::ManualReset};
wil::srwlock m_lock;
IORelay m_ioRelay;
std::optional<ServiceRunningProcess> m_dockerdProcess;
Expand Down
92 changes: 92 additions & 0 deletions test/windows/WSLCTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6740,4 +6740,96 @@ class WSLCTests

ValidateProcessOutput(initProcess, {{1, "OK\n"}});
}

// Kills the VM for a session by finding it via the "WSLC-<name>" owner in hcsdiag output.
// hcsdiag detail line format: " VM, <State>, <GUID>, WSLC-<name>"
static void KillSessionVm(LPCWSTR sessionName)
{
auto ownerTag = std::format(L"WSLC-{}", sessionName);

wsl::windows::common::SubProcess listProc(nullptr, L"hcsdiag.exe list");
auto listOutput = listProc.RunAndCaptureOutput(10000);

auto& output = listOutput.Stdout;
auto ownerPos = output.find(ownerTag);
VERIFY_IS_TRUE(ownerPos != std::wstring::npos);

// The GUID (36 chars) appears before ", WSLC-<name>" in the detail line.
auto guidEnd = output.rfind(L", ", ownerPos);
VERIFY_IS_TRUE(guidEnd != std::wstring::npos && guidEnd >= 36);

auto vmId = output.substr(guidEnd - 36, 36);
VERIFY_IS_TRUE(wsl::shared::string::ToGuid(vmId.c_str()).has_value());

VERIFY_ARE_EQUAL(wsl::windows::common::SubProcess(nullptr, std::format(L"hcsdiag.exe kill {}", vmId).c_str()).Run(10000), 0u);
}

// Waits for a session to terminate (GetState returns terminated or RPC error).
static bool WaitForSessionTermination(IWSLCSession* session, DWORD timeoutSeconds = 30)
{
for (DWORD i = 0; i < timeoutSeconds; i++)
{
Sleep(1000);

WSLCSessionState state{};
auto hr = session->GetState(&state);
if (FAILED(hr) || state == WSLCSessionStateTerminated)
{
return true;
}
}

return false;
}

WSLC_TEST_METHOD(VmKillTerminatesSession)
{
static constexpr auto c_sessionName = L"wslc-vm-kill-test";
auto settings = GetDefaultSessionSettings(c_sessionName);
auto session = CreateSession(settings);

KillSessionVm(c_sessionName);

VERIFY_IS_TRUE(WaitForSessionTermination(session.get()));
}

WSLC_TEST_METHOD(VmKillFailsInFlightOperations)
{
static constexpr auto c_sessionName = L"wslc-vm-kill-inflight-test";
auto settings = GetDefaultSessionSettings(c_sessionName);
auto session = CreateSession(settings);

WSLCProcessLauncher launcher("/bin/sleep", {"/bin/sleep", "99999"});
auto process = launcher.Launch(*session);

KillSessionVm(c_sessionName);

// The process should fail (not hang).
auto exitEvent = process.GetExitEvent();
bool exited = exitEvent.wait(30000);
if (!exited)
{
WSLCProcessState processState{};
int exitCode{};
VERIFY_IS_TRUE(FAILED(process.Get().GetState(&processState, &exitCode)));
}
}

WSLC_TEST_METHOD(CleanShutdownStillWorks)
{
auto settings = GetDefaultSessionSettings(L"wslc-clean-shutdown-test");
auto session = CreateSession(settings);

ExpectCommandResult(session.get(), {"/bin/echo", "hello"}, 0);

auto hr = session->Terminate();
VERIFY_IS_TRUE(SUCCEEDED(hr) || hr == HRESULT_FROM_WIN32(RPC_S_CALL_FAILED));

if (SUCCEEDED(hr))
{
WSLCSessionState state{};
VERIFY_SUCCEEDED(session->GetState(&state));
VERIFY_ARE_EQUAL(state, WSLCSessionStateTerminated);
}
}
};