diff --git a/src/citra/config.cpp b/src/citra/config.cpp index 64d0914a23a..89532c8ee77 100644 --- a/src/citra/config.cpp +++ b/src/citra/config.cpp @@ -121,6 +121,8 @@ void Config::ReadValues() { Settings::values.use_frame_limit = sdl2_config->GetBoolean("Renderer", "use_frame_limit", true); Settings::values.frame_limit = static_cast(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); + Settings::values.use_format_reinterpret_hack = + static_cast(sdl2_config->GetBoolean("Renderer", "use_format_reinterpret_hack", true)); Settings::values.toggle_3d = sdl2_config->GetBoolean("Renderer", "toggle_3d", false); Settings::values.factor_3d = diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h index 756d3c68b8d..69a6a9fdb0d 100644 --- a/src/citra/default_ini.h +++ b/src/citra/default_ini.h @@ -127,6 +127,12 @@ use_frame_limit = # 1 - 9999: Speed limit as a percentage of target game speed. 100 (default) frame_limit = +# Advanced option: Ignores flushing surfaces from cpu memory if the surface was created by the gpu +# and has a different format. This can speed up many games, potentially break some, but is rightfully +# just a hack as a placeholder for gpu texture encoding/decoding +# 0: Off, 1: On (default) +use_format_reinterpret_hack = + # The clear color for the renderer. What shows up on the sides of the bottom screen. # Must be in range of 0.0-1.0. Defaults to 0.0 for all. bg_red = diff --git a/src/citra_qt/configuration/config.cpp b/src/citra_qt/configuration/config.cpp index 3aecc094257..b3b3c3ed9e1 100644 --- a/src/citra_qt/configuration/config.cpp +++ b/src/citra_qt/configuration/config.cpp @@ -107,6 +107,8 @@ void Config::ReadValues() { Settings::values.use_vsync = ReadSetting("use_vsync", false).toBool(); Settings::values.use_frame_limit = ReadSetting("use_frame_limit", true).toBool(); Settings::values.frame_limit = ReadSetting("frame_limit", 100).toInt(); + Settings::values.use_format_reinterpret_hack = + ReadSetting("use_format_reinterpret_hack", true).toBool(); Settings::values.bg_red = ReadSetting("bg_red", 0.0).toFloat(); Settings::values.bg_green = ReadSetting("bg_green", 0.0).toFloat(); @@ -349,6 +351,7 @@ void Config::SaveValues() { WriteSetting("use_vsync", Settings::values.use_vsync, false); WriteSetting("use_frame_limit", Settings::values.use_frame_limit, true); WriteSetting("frame_limit", Settings::values.frame_limit, 100); + WriteSetting("use_format_reinterpret_hack", Settings::values.use_format_reinterpret_hack, true); // Cast to double because Qt's written float values are not human-readable WriteSetting("bg_red", (double)Settings::values.bg_red, 0.0); diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 551849b205e..bba2521f873 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -25,6 +25,7 @@ void Apply() { VideoCore::g_hw_shader_enabled = values.use_hw_shader; VideoCore::g_hw_shader_accurate_gs = values.shaders_accurate_gs; VideoCore::g_hw_shader_accurate_mul = values.shaders_accurate_mul; + VideoCore::g_use_format_reinterpret_hack = values.use_format_reinterpret_hack; if (VideoCore::g_emu_window) { auto layout = VideoCore::g_emu_window->GetFramebufferLayout(); @@ -60,6 +61,7 @@ void LogSettings() { LogSetting("Renderer_UseVsync", Settings::values.use_vsync); LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); + LogSetting("Renderer_FormatReinterpretHack", Settings::values.use_format_reinterpret_hack); LogSetting("Layout_Toggle3d", Settings::values.toggle_3d); LogSetting("Layout_Factor3d", Settings::values.factor_3d); LogSetting("Layout_LayoutOption", static_cast(Settings::values.layout_option)); diff --git a/src/core/settings.h b/src/core/settings.h index 762246c33d3..d5524923801 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -117,6 +117,7 @@ struct Values { bool use_shader_jit; u16 resolution_factor; bool use_vsync; + bool use_format_reinterpret_hack; bool use_frame_limit; u16 frame_limit; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 7ac52394dad..a6a3300d2a6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -21,7 +21,9 @@ #include "common/math_util.h" #include "common/microprofile.h" #include "common/scope_exit.h" +#include "common/telemetry.h" #include "common/vector_math.h" +#include "core/core.h" #include "core/frontend/emu_window.h" #include "core/memory.h" #include "core/settings.h" @@ -1504,6 +1506,45 @@ void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface, } } +static const char* PixelFormatAsString(SurfaceParams::PixelFormat format) { + switch (format) { + case SurfaceParams::PixelFormat::RGBA8: + return "RGBA8"; + case SurfaceParams::PixelFormat::RGB8: + return "RGB8"; + case SurfaceParams::PixelFormat::RGB5A1: + return "RGB5A1"; + case SurfaceParams::PixelFormat::RGB565: + return "RGB565"; + case SurfaceParams::PixelFormat::RGBA4: + return "RGBA4"; + case SurfaceParams::PixelFormat::IA8: + return "IA8"; + case SurfaceParams::PixelFormat::I8: + return "I8"; + case SurfaceParams::PixelFormat::A8: + return "A8"; + case SurfaceParams::PixelFormat::IA4: + return "IA4"; + case SurfaceParams::PixelFormat::I4: + return "I4"; + case SurfaceParams::PixelFormat::A4: + return "A4"; + case SurfaceParams::PixelFormat::ETC1: + return "ETC1"; + case SurfaceParams::PixelFormat::ETC1A4: + return "ETC1A4"; + case SurfaceParams::PixelFormat::D16: + return "D16"; + case SurfaceParams::PixelFormat::D24: + return "D24"; + case SurfaceParams::PixelFormat::D24S8: + return "D24S8"; + default: + return "Not a real pixel format"; + } +} + void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { if (size == 0) return; @@ -1516,9 +1557,17 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, return; } + auto validate_regions = surface->invalid_regions & validate_interval; + auto notify_validated = [&](SurfaceInterval interval) { + surface->invalid_regions.erase(interval); + validate_regions.erase(interval); + }; + + bool flushed_from_cpu = false; + std::set formats; while (true) { - const auto it = surface->invalid_regions.find(validate_interval); - if (it == surface->invalid_regions.end()) + const auto it = validate_regions.begin(); + if (it == validate_regions.end()) break; const auto interval = *it & validate_interval; @@ -1530,7 +1579,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, if (copy_surface != nullptr) { SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); CopySurface(copy_surface, surface, copy_interval); - surface->invalid_regions.erase(copy_interval); + notify_validated(copy_interval); continue; } @@ -1550,17 +1599,52 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, PAddr addr, ConvertD24S8toABGR(reinterpret_surface->texture.handle, src_rect, surface->texture.handle, dest_rect); - surface->invalid_regions.erase(convert_interval); + notify_validated(convert_interval); continue; } } + // By this point, we've checked to see if there was a valid surface that we could have + // copied from, so now we want to check if the surface was created on the gpu only. If it + // was, and since we already checked if there was a matching surface with the same format, + // this means its requesting a different texture format and we will skip it. If any part + // that we will validate is from the CPU, then we flush it all. + + // As this is a HACK, remove this when we get proper hw texture en/decoding support + if (VideoCore::g_use_format_reinterpret_hack) { + bool retry = false; + for (const auto& pair : RangeFromInterval(dirty_regions, interval)) { + // Don't actually validate the region, and instead just skip it for now + validate_regions.erase(pair.first & interval); + formats.insert(static_cast(pair.second->pixel_format)); + retry = true; + } + + if (retry) + continue; + } + // Load data from 3DS memory FlushRegion(params.addr, params.size); surface->LoadGLBuffer(params.addr, params.end); surface->UploadGLTexture(surface->GetSubRect(params), read_framebuffer.handle, draw_framebuffer.handle); - surface->invalid_regions.erase(params.GetInterval()); + notify_validated(params.GetInterval()); + flushed_from_cpu = true; + } + + if (!flushed_from_cpu && !formats.empty()) { + std::string s; + for (auto format : formats) { + s += PixelFormatAsString(static_cast(format)); + s += ", "; + } + LOG_DEBUG(Debug_GPU, + "Validating surface with pixel format {} and found surfaces created on the gpu " + "that have the following pixel formats: {}", + PixelFormatAsString(surface->pixel_format), s); + Core::Telemetry().AddField(Telemetry::FieldType::Session, "VideoCore_FormatReinterpret", + true); } } diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index d3767a6b417..843a638ce73 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -23,6 +23,7 @@ std::atomic g_hw_shader_enabled; std::atomic g_hw_shader_accurate_gs; std::atomic g_hw_shader_accurate_mul; std::atomic g_renderer_bg_color_update_requested; +std::atomic g_use_format_reinterpret_hack; /// Initialize the video core bool Init(EmuWindow* emu_window) { diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index d7a2b26b3cd..db43ef0efc4 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -26,6 +26,7 @@ extern std::atomic g_hw_shader_enabled; extern std::atomic g_hw_shader_accurate_gs; extern std::atomic g_hw_shader_accurate_mul; extern std::atomic g_renderer_bg_color_update_requested; +extern std::atomic g_use_format_reinterpret_hack; /// Start the video core void Start();