GBA Video: Improve speed of window texture generation on AMD
Vicki Pfau vi@endrift.com
Sat, 26 Sep 2020 21:34:18 -0700
2 files changed,
16 insertions(+),
16 deletions(-)
M
CHANGES
→
CHANGES
@@ -82,6 +82,7 @@ - GB: Add support for sleep and shutdown callbacks
- GBA: Allow pausing event loop while CPU is blocked - GBA BIOS: Division by zero should emit a FATAL error - GBA Video: Convert OpenGL VRAM texture to integer + - GBA Video: Improve speed of window texture generation on AMD - Debugger: Keep track of global cycle count - FFmpeg: Add looping option for GIF/APNG - mGUI: Show battery percentage
M
src/gba/renderers/gl.c
→
src/gba/renderers/gl.c
@@ -497,7 +497,7 @@ "uniform ivec4 win0[160];\n"
"uniform ivec4 win1[160];\n" "OUT(0) out ivec4 window;\n" - "void crop(vec4 windowParams, int flags, inout ivec3 windowFlags) {\n" + "bool crop(vec4 windowParams) {\n" " bvec4 compare = lessThan(texCoord.xxyy, windowParams);\n" " compare = equal(compare, bvec4(true, false, true, false));\n" " if (any(compare)) {\n"@@ -505,25 +505,23 @@ " vec2 h = windowParams.xy;\n"
" vec2 v = windowParams.zw;\n" " if (v.x > v.y) {\n" " if (compare.z && compare.w) {\n" - " return;\n" + " return false;\n" " }\n" " } else if (compare.z || compare.w) {\n" - " return;\n" + " return false;\n" " }\n" " if (h.x > h.y) {\n" " if (compare.x && compare.y) {\n" - " return;\n" + " return false;\n" " }\n" " } else if (compare.x || compare.y) {\n" - " return;\n" + " return false;\n" " }\n" " }\n" - " windowFlags.x = flags;\n" + " return true;\n" "}\n" - "vec4 interpolate(ivec4 win[160]) {\n" - " vec4 bottom = vec4(win[int(texCoord.y) - 1]);\n" - " vec4 top = vec4(win[int(texCoord.y)]);\n" + "vec4 interpolate(vec4 top, vec4 bottom) {\n" " if (distance(top, bottom) > 40.) {\n" " return top;\n" " }\n"@@ -535,14 +533,15 @@ " int dispflags = (dispcnt & 0x1F) | 0x20;\n"
" if ((dispcnt & 0xE0) == 0) {\n" " window = ivec4(dispflags, blend, 0);\n" " } else {\n" - " ivec3 windowFlags = ivec3(flags.z, blend);\n" - " if ((dispcnt & 0x40) != 0) { \n" - " crop(interpolate(win1), flags.y, windowFlags);\n" + " ivec4 windowFlags = ivec4(flags.z, blend, 0);\n" + " int top = int(texCoord.y);\n" + " int bottom = max(top - 1, 0);\n" + " if ((dispcnt & 0x20) != 0 && crop(interpolate(vec4(win0[top]), vec4(win0[bottom])))) { \n" + " windowFlags.x = flags.x;\n" + " } else if ((dispcnt & 0x40) != 0 && crop(interpolate(vec4(win1[top]), vec4(win1[bottom])))) {\n" + " windowFlags.x = flags.y;\n" " }\n" - " if ((dispcnt & 0x20) != 0) { \n" - " crop(interpolate(win0), flags.x, windowFlags);\n" - " }\n" - " window = ivec4(windowFlags, 0);\n" + " window = windowFlags;\n" " }\n" "}\n";