src/platform/3ds/ctr-gpu.c (view raw)
1/* Copyright (c) 2015 Yuri Kunde Schlesner
2 *
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7#include <3ds.h>
8#include <stdlib.h>
9#include <string.h>
10#include <stdio.h>
11
12#include "ctr-gpu.h"
13
14#include "uishader.h"
15#include "uishader.shbin.h"
16
17struct ctrUIVertex {
18 s16 x,y;
19 s16 u,v;
20 u32 abgr;
21};
22
23#define VRAM_BASE 0x18000000u
24
25#define MAX_NUM_QUADS 1024
26#define COMMAND_LIST_LENGTH (16 * 1024)
27// Each quad requires 4 vertices and 2*3 indices for the two triangles used to draw it
28#define VERTEX_INDEX_BUFFER_SIZE (MAX_NUM_QUADS * (4 * sizeof(struct ctrUIVertex) + 6 * sizeof(u16)))
29
30static struct ctrUIVertex* ctrVertexBuffer = NULL;
31static u16* ctrIndexBuffer = NULL;
32static u16 ctrNumQuads = 0;
33
34static void* gpuColorBuffer[2] = { NULL, NULL };
35static u32* gpuCommandList = NULL;
36static void* screenTexture = NULL;
37
38static shaderProgram_s gpuShader;
39static DVLB_s* passthroughShader = NULL;
40
41static int pendingEvents = 0;
42
43static const struct ctrTexture* activeTexture = NULL;
44
45static u32 _f24FromFloat(float f) {
46 u32 i;
47 memcpy(&i, &f, 4);
48
49 u32 mantissa = (i << 9) >> 9;
50 s32 exponent = (i << 1) >> 24;
51 u32 sign = (i << 0) >> 31;
52
53 // Truncate mantissa
54 mantissa >>= 7;
55
56 // Re-bias exponent
57 exponent = exponent - 127 + 63;
58 if (exponent < 0) {
59 // Underflow: flush to zero
60 return sign << 23;
61 } else if (exponent > 0x7F) {
62 // Overflow: saturate to infinity
63 return sign << 23 | 0x7F << 16;
64 }
65
66 return sign << 23 | exponent << 16 | mantissa;
67}
68
69static u32 _f31FromFloat(float f) {
70 u32 i;
71 memcpy(&i, &f, 4);
72
73 u32 mantissa = (i << 9) >> 9;
74 s32 exponent = (i << 1) >> 24;
75 u32 sign = (i << 0) >> 31;
76
77 // Re-bias exponent
78 exponent = exponent - 127 + 63;
79 if (exponent < 0) {
80 // Underflow: flush to zero
81 return sign << 30;
82 } else if (exponent > 0x7F) {
83 // Overflow: saturate to infinity
84 return sign << 30 | 0x7F << 23;
85 }
86
87 return sign << 30 | exponent << 23 | mantissa;
88}
89
90void ctrClearPending(int events) {
91 int toClear = events & pendingEvents;
92 if (toClear & (1 << GSPEVENT_PSC0)) {
93 gspWaitForPSC0();
94 }
95 if (toClear & (1 << GSPEVENT_PPF)) {
96 gspWaitForPPF();
97 }
98 pendingEvents ^= toClear;
99}
100
101// Replacements for the limiting GPU_SetViewport function in ctrulib
102static void _GPU_SetFramebuffer(intptr_t colorBuffer, intptr_t depthBuffer, u16 w, u16 h) {
103 u32 buf[4];
104
105 // Unknown
106 GPUCMD_AddWrite(GPUREG_0111, 0x00000001);
107 GPUCMD_AddWrite(GPUREG_0110, 0x00000001);
108
109 // Set depth/color buffer address and dimensions
110 buf[0] = depthBuffer >> 3;
111 buf[1] = colorBuffer >> 3;
112 buf[2] = (0x01) << 24 | ((h-1) & 0xFFF) << 12 | (w & 0xFFF) << 0;
113 GPUCMD_AddIncrementalWrites(GPUREG_DEPTHBUFFER_LOC, buf, 3);
114 GPUCMD_AddWrite(GPUREG_006E, buf[2]);
115
116 // Set depth/color buffer pixel format
117 GPUCMD_AddWrite(GPUREG_DEPTHBUFFER_FORMAT, 3 /* D248S */ );
118 GPUCMD_AddWrite(GPUREG_COLORBUFFER_FORMAT, 0 /* RGBA8 */ << 16 | 2 /* Unknown */);
119 GPUCMD_AddWrite(GPUREG_011B, 0); // Unknown
120
121 // Enable color/depth buffers
122 buf[0] = colorBuffer != 0 ? 0xF : 0x0;
123 buf[1] = buf[0];
124 buf[2] = depthBuffer != 0 ? 0x2 : 0x0;
125 buf[3] = buf[2];
126 GPUCMD_AddIncrementalWrites(GPUREG_0112, buf, 4);
127}
128
129static void _GPU_SetViewportEx(u16 x, u16 y, u16 w, u16 h) {
130 u32 buf[4];
131
132 buf[0] = _f24FromFloat(w / 2.0f);
133 buf[1] = _f31FromFloat(2.0f / w) << 1;
134 buf[2] = _f24FromFloat(h / 2.0f);
135 buf[3] = _f31FromFloat(2.0f / h) << 1;
136 GPUCMD_AddIncrementalWrites(GPUREG_0041, buf, 4);
137
138 GPUCMD_AddWrite(GPUREG_0068, (y & 0xFFFF) << 16 | (x & 0xFFFF) << 0);
139
140 buf[0] = 0;
141 buf[1] = 0;
142 buf[2] = ((h-1) & 0xFFFF) << 16 | ((w-1) & 0xFFFF) << 0;
143 GPUCMD_AddIncrementalWrites(GPUREG_SCISSORTEST_MODE, buf, 3);
144}
145
146static void _setDummyTexEnv(int id) {
147 GPU_SetTexEnv(id,
148 GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
149 GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
150 GPU_TEVOPERANDS(0, 0, 0),
151 GPU_TEVOPERANDS(0, 0, 0),
152 GPU_REPLACE,
153 GPU_REPLACE,
154 0x00000000);
155}
156
157Result ctrInitGpu() {
158 Result res = -1;
159
160 // Allocate buffers
161 gpuColorBuffer[0] = vramAlloc(400 * 240 * 4);
162 gpuColorBuffer[1] = vramAlloc(320 * 240 * 4);
163 gpuCommandList = linearAlloc(COMMAND_LIST_LENGTH * sizeof(u32));
164 ctrVertexBuffer = linearAlloc(VERTEX_INDEX_BUFFER_SIZE);
165 if (gpuColorBuffer[0] == NULL || gpuColorBuffer[1] == NULL || gpuCommandList == NULL || ctrVertexBuffer == NULL) {
166 res = -1;
167 goto error_allocs;
168 }
169 // Both buffers share the same allocation, index buffer follows the vertex buffer
170 ctrIndexBuffer = (u16*)(ctrVertexBuffer + (4 * MAX_NUM_QUADS));
171
172 // Load vertex shader binary
173 passthroughShader = DVLB_ParseFile((u32*)uishader, uishader_size);
174 if (passthroughShader == NULL) {
175 res = -1;
176 goto error_dvlb;
177 }
178
179 // Create shader
180 shaderProgramInit(&gpuShader);
181 res = shaderProgramSetVsh(&gpuShader, &passthroughShader->DVLE[0]);
182 if (res < 0) {
183 goto error_shader;
184 }
185
186 // Initialize the GPU in ctrulib and assign the command buffer to accept submission of commands
187 GPU_Init(NULL);
188 GPUCMD_SetBuffer(gpuCommandList, COMMAND_LIST_LENGTH, 0);
189
190 return 0;
191
192error_shader:
193 shaderProgramFree(&gpuShader);
194
195error_dvlb:
196 if (passthroughShader != NULL) {
197 DVLB_Free(passthroughShader);
198 passthroughShader = NULL;
199 }
200
201error_allocs:
202 if (ctrVertexBuffer != NULL) {
203 linearFree(ctrVertexBuffer);
204 ctrVertexBuffer = NULL;
205 ctrIndexBuffer = NULL;
206 }
207
208 if (gpuCommandList != NULL) {
209 GPUCMD_SetBuffer(NULL, 0, 0);
210 linearFree(gpuCommandList);
211 gpuCommandList = NULL;
212 }
213
214 if (gpuColorBuffer[0] != NULL) {
215 vramFree(gpuColorBuffer[0]);
216 gpuColorBuffer[0] = NULL;
217 }
218
219 if (gpuColorBuffer[1] != NULL) {
220 vramFree(gpuColorBuffer[1]);
221 gpuColorBuffer[1] = NULL;
222 }
223 return res;
224}
225
226void ctrDeinitGpu() {
227 shaderProgramFree(&gpuShader);
228
229 DVLB_Free(passthroughShader);
230 passthroughShader = NULL;
231
232 linearFree(screenTexture);
233 screenTexture = NULL;
234
235 linearFree(ctrVertexBuffer);
236 ctrVertexBuffer = NULL;
237 ctrIndexBuffer = NULL;
238
239 GPUCMD_SetBuffer(NULL, 0, 0);
240 linearFree(gpuCommandList);
241 gpuCommandList = NULL;
242
243 vramFree(gpuColorBuffer[0]);
244 gpuColorBuffer[0] = NULL;
245
246 vramFree(gpuColorBuffer[1]);
247 gpuColorBuffer[1] = NULL;
248}
249
250void ctrGpuBeginFrame(int screen) {
251 if (screen > 1) {
252 return;
253 }
254
255 int fw;
256 if (screen == 0) {
257 fw = 400;
258 } else {
259 fw = 320;
260 }
261
262 _GPU_SetFramebuffer(osConvertVirtToPhys((u32)gpuColorBuffer[screen]), 0, 240, fw);
263}
264
265void ctrGpuBeginDrawing(void) {
266 shaderProgramUse(&gpuShader);
267
268 // Disable depth and stencil testing
269 GPU_SetDepthTestAndWriteMask(false, GPU_ALWAYS, GPU_WRITE_COLOR);
270 GPU_SetStencilTest(false, GPU_ALWAYS, 0, 0xFF, 0);
271 GPU_SetStencilOp(GPU_STENCIL_KEEP, GPU_STENCIL_KEEP, GPU_STENCIL_KEEP);
272 GPU_DepthMap(-1.0f, 0.0f);
273
274 // Enable alpha blending
275 GPU_SetAlphaBlending(
276 GPU_BLEND_ADD, GPU_BLEND_ADD, // Operation RGB, Alpha
277 GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, // Color src, dst
278 GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); // Alpha src, dst
279 GPU_SetBlendingColor(0, 0, 0, 0);
280
281 // Disable alpha testing
282 GPU_SetAlphaTest(false, GPU_ALWAYS, 0);
283
284 // Unknown
285 GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
286 GPUCMD_AddWrite(GPUREG_0118, 0);
287
288 GPU_SetTexEnv(0,
289 GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0), // RGB
290 GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0), // Alpha
291 GPU_TEVOPERANDS(0, 0, 0), // RGB
292 GPU_TEVOPERANDS(0, 0, 0), // Alpha
293 GPU_MODULATE, GPU_MODULATE, // Operation RGB, Alpha
294 0x00000000); // Constant color
295 _setDummyTexEnv(1);
296 _setDummyTexEnv(2);
297 _setDummyTexEnv(3);
298 _setDummyTexEnv(4);
299 _setDummyTexEnv(5);
300
301 // Configure vertex attribute format
302 u32 bufferOffsets[] = { osConvertVirtToPhys((u32)ctrVertexBuffer) - VRAM_BASE };
303 u64 arrayTargetAttributes[] = { 0x210 };
304 u8 numAttributesInArray[] = { 3 };
305 GPU_SetAttributeBuffers(
306 3, // Number of attributes
307 (u32*)VRAM_BASE, // Base address
308 GPU_ATTRIBFMT(0, 2, GPU_SHORT) | // Attribute format
309 GPU_ATTRIBFMT(1, 2, GPU_SHORT) |
310 GPU_ATTRIBFMT(2, 4, GPU_UNSIGNED_BYTE),
311 0xFF8, // Non-fixed vertex inputs
312 0x210, // Vertex shader input map
313 1, // Use 1 vertex array
314 bufferOffsets, arrayTargetAttributes, numAttributesInArray);
315}
316
317void ctrGpuEndFrame(int screen, void* outputFramebuffer, int w, int h) {
318 if (screen > 1) {
319 return;
320 }
321
322 int fw;
323 if (screen == 0) {
324 fw = 400;
325 } else {
326 fw = 320;
327 }
328
329 ctrFlushBatch();
330
331 void* colorBuffer = (u8*)gpuColorBuffer[screen] + ((fw - w) * 240 * 4);
332
333 const u32 GX_CROP_INPUT_LINES = (1 << 2);
334
335 ctrClearPending(1 << GSPEVENT_PSC0);
336 ctrClearPending(1 << GSPEVENT_PPF);
337
338 GX_SetDisplayTransfer(NULL,
339 colorBuffer, GX_BUFFER_DIM(240, fw),
340 outputFramebuffer, GX_BUFFER_DIM(h, w),
341 GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) |
342 GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) |
343 GX_CROP_INPUT_LINES);
344 pendingEvents |= (1 << GSPEVENT_PPF);
345}
346
347void ctrGpuEndDrawing(void) {
348 ctrClearPending(1 << GSPEVENT_PPF);
349 gfxSwapBuffersGpu();
350 gspWaitForEvent(GSPEVENT_VBlank0, false);
351
352 void* gpuColorBuffer0End = (char*)gpuColorBuffer[0] + 240 * 400 * 4;
353 void* gpuColorBuffer1End = (char*)gpuColorBuffer[1] + 240 * 320 * 4;
354 GX_SetMemoryFill(NULL,
355 gpuColorBuffer[0], 0x00000000, gpuColorBuffer0End, GX_FILL_32BIT_DEPTH | GX_FILL_TRIGGER,
356 gpuColorBuffer[1], 0x00000000, gpuColorBuffer1End, GX_FILL_32BIT_DEPTH | GX_FILL_TRIGGER);
357 pendingEvents |= 1 << GSPEVENT_PSC0;
358}
359
360void ctrSetViewportSize(s16 w, s16 h) {
361 // Set up projection matrix mapping (0,0) to the top-left and (w,h) to the
362 // bottom-right, taking into account the 3DS' screens' portrait
363 // orientation.
364 float projectionMtx[4 * 4] = {
365 // Rows are in the order w z y x, because ctrulib
366 1.0f, 0.0f, -2.0f / h, 0.0f,
367 1.0f, 0.0f, 0.0f, -2.0f / w,
368 -0.5f, 0.0f, 0.0f, 0.0f,
369 1.0f, 0.0f, 0.0f, 0.0f,
370 };
371
372 GPU_SetFloatUniform(GPU_VERTEX_SHADER, VSH_FVEC_projectionMtx, (u32*)&projectionMtx, 4);
373 _GPU_SetViewportEx(0, 0, h, w);
374}
375
376void ctrActivateTexture(const struct ctrTexture* texture) {
377 if (activeTexture == texture) {
378 return;
379 }
380
381 ctrFlushBatch();
382
383 GPU_SetTextureEnable(GPU_TEXUNIT0);
384 GPU_SetTexture(
385 GPU_TEXUNIT0, (u32*)osConvertVirtToPhys((u32)texture->data),
386 texture->width, texture->height,
387 GPU_TEXTURE_MAG_FILTER(texture->filter) | GPU_TEXTURE_MIN_FILTER(texture->filter) |
388 GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_BORDER) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_BORDER),
389 texture->format);
390 GPU_SetTextureBorderColor(GPU_TEXUNIT0, 0x00000000);
391
392 float textureMtx[2 * 4] = {
393 // Rows are in the order w z y x, because ctrulib
394 0.0f, 0.0f, 0.0f, 1.0f / texture->width,
395 0.0f, 0.0f, 1.0f / texture->height, 0.0f,
396 };
397
398 GPU_SetFloatUniform(GPU_VERTEX_SHADER, VSH_FVEC_textureMtx, (u32*)&textureMtx, 2);
399
400 activeTexture = texture;
401}
402
403void ctrAddRectScaled(u32 color, s16 x, s16 y, s16 w, s16 h, s16 u, s16 v, s16 uw, s16 vh) {
404 if (ctrNumQuads == MAX_NUM_QUADS) {
405 ctrFlushBatch();
406 }
407
408 u16 index = ctrNumQuads * 4;
409 struct ctrUIVertex* vtx = &ctrVertexBuffer[index];
410 vtx->x = x; vtx->y = y;
411 vtx->u = u; vtx->v = v;
412 vtx->abgr = color;
413 vtx++;
414
415 vtx->x = x + w; vtx->y = y;
416 vtx->u = u + uw; vtx->v = v;
417 vtx->abgr = color;
418 vtx++;
419
420 vtx->x = x; vtx->y = y + h;
421 vtx->u = u; vtx->v = v + vh;
422 vtx->abgr = color;
423 vtx++;
424
425 vtx->x = x + w; vtx->y = y + h;
426 vtx->u = u + uw; vtx->v = v + vh;
427 vtx->abgr = color;
428
429 u16* i = &ctrIndexBuffer[ctrNumQuads * 6];
430 i[0] = index + 0; i[1] = index + 1; i[2] = index + 2;
431 i[3] = index + 2; i[4] = index + 1; i[5] = index + 3;
432
433 ctrNumQuads += 1;
434}
435
436void ctrAddRect(u32 color, s16 x, s16 y, s16 u, s16 v, s16 w, s16 h) {
437 ctrAddRectScaled(color,
438 x, y, w, h,
439 u, v, w, h);
440}
441
442void ctrFlushBatch(void) {
443 if (ctrNumQuads == 0) {
444 return;
445 }
446
447 ctrClearPending((1 << GSPEVENT_PSC0));
448
449 GSPGPU_FlushDataCache(NULL, (u8*)ctrVertexBuffer, VERTEX_INDEX_BUFFER_SIZE);
450 GPU_DrawElements(GPU_UNKPRIM, (u32*)(osConvertVirtToPhys((u32)ctrIndexBuffer) - VRAM_BASE), ctrNumQuads * 6);
451
452 GPU_FinishDrawing();
453 GPUCMD_Finalize();
454 GSPGPU_FlushDataCache(NULL, (u8*)gpuCommandList, COMMAND_LIST_LENGTH * sizeof(u32));
455 GPUCMD_FlushAndRun(NULL);
456
457 gspWaitForP3D();
458
459 GPUCMD_SetBufferOffset(0);
460
461 ctrNumQuads = 0;
462}