src/platform/3ds/ctr-gpu.c (view raw)
1/* Copyright (c) 2015 Yuri Kunde Schlesner
2 *
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7#include <3ds.h>
8#include <stdlib.h>
9#include <string.h>
10#include <stdio.h>
11
12#include "ctr-gpu.h"
13
14#include "uishader.h"
15#include "uishader.shbin.h"
16
17struct ctrUIVertex {
18 s16 x,y;
19 s16 u,v;
20 u32 abgr;
21};
22
23#define VRAM_BASE 0x18000000u
24
25#define MAX_NUM_QUADS 1024
26#define COMMAND_LIST_LENGTH (16 * 1024)
27// Each quad requires 4 vertices and 2*3 indices for the two triangles used to draw it
28#define VERTEX_INDEX_BUFFER_SIZE (MAX_NUM_QUADS * (4 * sizeof(struct ctrUIVertex) + 6 * sizeof(u16)))
29
30static struct ctrUIVertex* ctrVertexBuffer = NULL;
31static u16* ctrIndexBuffer = NULL;
32static u16 ctrNumQuads = 0;
33
34static void* gpuColorBuffer = NULL;
35static u32* gpuCommandList = NULL;
36static void* screenTexture = NULL;
37
38static shaderProgram_s gpuShader;
39static DVLB_s* passthroughShader = NULL;
40
41static const struct ctrTexture* activeTexture = NULL;
42
43static u32 _f24FromFloat(float f) {
44 u32 i;
45 memcpy(&i, &f, 4);
46
47 u32 mantissa = (i << 9) >> 9;
48 s32 exponent = (i << 1) >> 24;
49 u32 sign = (i << 0) >> 31;
50
51 // Truncate mantissa
52 mantissa >>= 7;
53
54 // Re-bias exponent
55 exponent = exponent - 127 + 63;
56 if (exponent < 0) {
57 // Underflow: flush to zero
58 return sign << 23;
59 } else if (exponent > 0x7F) {
60 // Overflow: saturate to infinity
61 return sign << 23 | 0x7F << 16;
62 }
63
64 return sign << 23 | exponent << 16 | mantissa;
65}
66
67static u32 _f31FromFloat(float f) {
68 u32 i;
69 memcpy(&i, &f, 4);
70
71 u32 mantissa = (i << 9) >> 9;
72 s32 exponent = (i << 1) >> 24;
73 u32 sign = (i << 0) >> 31;
74
75 // Re-bias exponent
76 exponent = exponent - 127 + 63;
77 if (exponent < 0) {
78 // Underflow: flush to zero
79 return sign << 30;
80 } else if (exponent > 0x7F) {
81 // Overflow: saturate to infinity
82 return sign << 30 | 0x7F << 23;
83 }
84
85 return sign << 30 | exponent << 23 | mantissa;
86}
87
88// Replacements for the limiting GPU_SetViewport function in ctrulib
89static void _GPU_SetFramebuffer(intptr_t colorBuffer, intptr_t depthBuffer, u16 w, u16 h) {
90 u32 buf[4];
91
92 // Unknown
93 GPUCMD_AddWrite(GPUREG_0111, 0x00000001);
94 GPUCMD_AddWrite(GPUREG_0110, 0x00000001);
95
96 // Set depth/color buffer address and dimensions
97 buf[0] = depthBuffer >> 3;
98 buf[1] = colorBuffer >> 3;
99 buf[2] = (0x01) << 24 | ((h-1) & 0xFFF) << 12 | (w & 0xFFF) << 0;
100 GPUCMD_AddIncrementalWrites(GPUREG_DEPTHBUFFER_LOC, buf, 3);
101 GPUCMD_AddWrite(GPUREG_006E, buf[2]);
102
103 // Set depth/color buffer pixel format
104 GPUCMD_AddWrite(GPUREG_DEPTHBUFFER_FORMAT, 3 /* D248S */ );
105 GPUCMD_AddWrite(GPUREG_COLORBUFFER_FORMAT, 0 /* RGBA8 */ << 16 | 2 /* Unknown */);
106 GPUCMD_AddWrite(GPUREG_011B, 0); // Unknown
107
108 // Enable color/depth buffers
109 buf[0] = colorBuffer != 0 ? 0xF : 0x0;
110 buf[1] = buf[0];
111 buf[2] = depthBuffer != 0 ? 0x2 : 0x0;
112 buf[3] = buf[2];
113 GPUCMD_AddIncrementalWrites(GPUREG_0112, buf, 4);
114}
115
116static void _GPU_SetViewportEx(u16 x, u16 y, u16 w, u16 h) {
117 u32 buf[4];
118
119 buf[0] = _f24FromFloat(w / 2.0f);
120 buf[1] = _f31FromFloat(2.0f / w) << 1;
121 buf[2] = _f24FromFloat(h / 2.0f);
122 buf[3] = _f31FromFloat(2.0f / h) << 1;
123 GPUCMD_AddIncrementalWrites(GPUREG_0041, buf, 4);
124
125 GPUCMD_AddWrite(GPUREG_0068, (y & 0xFFFF) << 16 | (x & 0xFFFF) << 0);
126
127 buf[0] = 0;
128 buf[1] = 0;
129 buf[2] = ((h-1) & 0xFFFF) << 16 | ((w-1) & 0xFFFF) << 0;
130 GPUCMD_AddIncrementalWrites(GPUREG_SCISSORTEST_MODE, buf, 3);
131}
132
133static void _setDummyTexEnv(int id) {
134 GPU_SetTexEnv(id,
135 GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
136 GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
137 GPU_TEVOPERANDS(0, 0, 0),
138 GPU_TEVOPERANDS(0, 0, 0),
139 GPU_REPLACE,
140 GPU_REPLACE,
141 0x00000000);
142}
143
144Result ctrInitGpu() {
145 Result res = -1;
146
147 // Allocate buffers
148 gpuColorBuffer = vramAlloc(400 * 240 * 4);
149 gpuCommandList = linearAlloc(COMMAND_LIST_LENGTH * sizeof(u32));
150 ctrVertexBuffer = linearAlloc(VERTEX_INDEX_BUFFER_SIZE);
151 if (gpuColorBuffer == NULL || gpuCommandList == NULL || ctrVertexBuffer == NULL) {
152 res = -1;
153 goto error_allocs;
154 }
155 // Both buffers share the same allocation, index buffer follows the vertex buffer
156 ctrIndexBuffer = (u16*)(ctrVertexBuffer + (4 * MAX_NUM_QUADS));
157
158 // Load vertex shader binary
159 passthroughShader = DVLB_ParseFile((u32*)uishader, uishader_size);
160 if (passthroughShader == NULL) {
161 res = -1;
162 goto error_dvlb;
163 }
164
165 // Create shader
166 shaderProgramInit(&gpuShader);
167 res = shaderProgramSetVsh(&gpuShader, &passthroughShader->DVLE[0]);
168 if (res < 0) {
169 goto error_shader;
170 }
171
172 // Initialize the GPU in ctrulib and assign the command buffer to accept submission of commands
173 GPU_Init(NULL);
174 GPUCMD_SetBuffer(gpuCommandList, COMMAND_LIST_LENGTH, 0);
175
176 return 0;
177
178error_shader:
179 shaderProgramFree(&gpuShader);
180
181error_dvlb:
182 if (passthroughShader != NULL) {
183 DVLB_Free(passthroughShader);
184 passthroughShader = NULL;
185 }
186
187error_allocs:
188 if (ctrVertexBuffer != NULL) {
189 linearFree(ctrVertexBuffer);
190 ctrVertexBuffer = NULL;
191 ctrIndexBuffer = NULL;
192 }
193
194 if (gpuCommandList != NULL) {
195 GPUCMD_SetBuffer(NULL, 0, 0);
196 linearFree(gpuCommandList);
197 gpuCommandList = NULL;
198 }
199
200 if (gpuColorBuffer != NULL) {
201 vramFree(gpuColorBuffer);
202 gpuColorBuffer = NULL;
203 }
204 return res;
205}
206
207void ctrDeinitGpu() {
208 shaderProgramFree(&gpuShader);
209
210 DVLB_Free(passthroughShader);
211 passthroughShader = NULL;
212
213 linearFree(screenTexture);
214 screenTexture = NULL;
215
216 linearFree(ctrVertexBuffer);
217 ctrVertexBuffer = NULL;
218 ctrIndexBuffer = NULL;
219
220 GPUCMD_SetBuffer(NULL, 0, 0);
221 linearFree(gpuCommandList);
222 gpuCommandList = NULL;
223
224 vramFree(gpuColorBuffer);
225 gpuColorBuffer = NULL;
226}
227
228void ctrGpuBeginFrame(void) {
229 shaderProgramUse(&gpuShader);
230
231 void* gpuColorBufferEnd = (char*)gpuColorBuffer + 240 * 400 * 4;
232
233 GX_SetMemoryFill(NULL,
234 gpuColorBuffer, 0x00000000, gpuColorBufferEnd, GX_FILL_32BIT_DEPTH | GX_FILL_TRIGGER,
235 NULL, 0, NULL, 0);
236 gspWaitForPSC0();
237
238 _GPU_SetFramebuffer(osConvertVirtToPhys((u32)gpuColorBuffer), 0, 240, 400);
239
240 // Disable depth and stencil testing
241 GPU_SetDepthTestAndWriteMask(false, GPU_ALWAYS, GPU_WRITE_COLOR);
242 GPU_SetStencilTest(false, GPU_ALWAYS, 0, 0xFF, 0);
243 GPU_SetStencilOp(GPU_STENCIL_KEEP, GPU_STENCIL_KEEP, GPU_STENCIL_KEEP);
244 GPU_DepthMap(-1.0f, 0.0f);
245
246 // Enable alpha blending
247 GPU_SetAlphaBlending(
248 GPU_BLEND_ADD, GPU_BLEND_ADD, // Operation RGB, Alpha
249 GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, // Color src, dst
250 GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); // Alpha src, dst
251 GPU_SetBlendingColor(0, 0, 0, 0);
252
253 // Disable alpha testing
254 GPU_SetAlphaTest(false, GPU_ALWAYS, 0);
255
256 // Unknown
257 GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
258 GPUCMD_AddWrite(GPUREG_0118, 0);
259
260 GPU_SetTexEnv(0,
261 GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0), // RGB
262 GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0), // Alpha
263 GPU_TEVOPERANDS(0, 0, 0), // RGB
264 GPU_TEVOPERANDS(0, 0, 0), // Alpha
265 GPU_MODULATE, GPU_MODULATE, // Operation RGB, Alpha
266 0x00000000); // Constant color
267 _setDummyTexEnv(1);
268 _setDummyTexEnv(2);
269 _setDummyTexEnv(3);
270 _setDummyTexEnv(4);
271 _setDummyTexEnv(5);
272
273 // Configure vertex attribute format
274 u32 bufferOffsets[] = { osConvertVirtToPhys((u32)ctrVertexBuffer) - VRAM_BASE };
275 u64 arrayTargetAttributes[] = { 0x210 };
276 u8 numAttributesInArray[] = { 3 };
277 GPU_SetAttributeBuffers(
278 3, // Number of attributes
279 (u32*)VRAM_BASE, // Base address
280 GPU_ATTRIBFMT(0, 2, GPU_SHORT) | // Attribute format
281 GPU_ATTRIBFMT(1, 2, GPU_SHORT) |
282 GPU_ATTRIBFMT(2, 4, GPU_UNSIGNED_BYTE),
283 0xFF8, // Non-fixed vertex inputs
284 0x210, // Vertex shader input map
285 1, // Use 1 vertex array
286 bufferOffsets, arrayTargetAttributes, numAttributesInArray);
287}
288
289void ctrGpuEndFrame(void* outputFramebuffer, int w, int h) {
290 ctrFlushBatch();
291
292 void* colorBuffer = (u8*)gpuColorBuffer + ((400 - w) * 240 * 4);
293
294 const u32 GX_CROP_INPUT_LINES = (1 << 2);
295
296 GX_SetDisplayTransfer(NULL,
297 colorBuffer, GX_BUFFER_DIM(240, 400),
298 outputFramebuffer, GX_BUFFER_DIM(h, w),
299 GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) |
300 GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) |
301 GX_CROP_INPUT_LINES);
302 gspWaitForPPF();
303}
304
305void ctrSetViewportSize(s16 w, s16 h) {
306 // Set up projection matrix mapping (0,0) to the top-left and (w,h) to the
307 // bottom-right, taking into account the 3DS' screens' portrait
308 // orientation.
309 float projectionMtx[4 * 4] = {
310 // Rows are in the order w z y x, because ctrulib
311 1.0f, 0.0f, -2.0f / h, 0.0f,
312 1.0f, 0.0f, 0.0f, -2.0f / w,
313 -0.5f, 0.0f, 0.0f, 0.0f,
314 1.0f, 0.0f, 0.0f, 0.0f,
315 };
316
317 GPU_SetFloatUniform(GPU_VERTEX_SHADER, VSH_FVEC_projectionMtx, (u32*)&projectionMtx, 4);
318 _GPU_SetViewportEx(0, 0, h, w);
319}
320
321void ctrActivateTexture(const struct ctrTexture* texture) {
322 if (activeTexture == texture) {
323 return;
324 }
325
326 ctrFlushBatch();
327
328 GPU_SetTextureEnable(GPU_TEXUNIT0);
329 GPU_SetTexture(
330 GPU_TEXUNIT0, (u32*)osConvertVirtToPhys((u32)texture->data),
331 texture->width, texture->height,
332 GPU_TEXTURE_MAG_FILTER(texture->filter) | GPU_TEXTURE_MIN_FILTER(texture->filter) |
333 GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_BORDER) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_BORDER),
334 texture->format);
335 GPU_SetTextureBorderColor(GPU_TEXUNIT0, 0x00000000);
336
337 float textureMtx[2 * 4] = {
338 // Rows are in the order w z y x, because ctrulib
339 0.0f, 0.0f, 0.0f, 1.0f / texture->width,
340 0.0f, 0.0f, 1.0f / texture->height, 0.0f,
341 };
342
343 GPU_SetFloatUniform(GPU_VERTEX_SHADER, VSH_FVEC_textureMtx, (u32*)&textureMtx, 2);
344
345 activeTexture = texture;
346}
347
348void ctrAddRectScaled(u32 color, s16 x, s16 y, s16 w, s16 h, s16 u, s16 v, s16 uw, s16 vh) {
349 if (ctrNumQuads == MAX_NUM_QUADS) {
350 ctrFlushBatch();
351 }
352
353 u16 index = ctrNumQuads * 4;
354 struct ctrUIVertex* vtx = &ctrVertexBuffer[index];
355 vtx->x = x; vtx->y = y;
356 vtx->u = u; vtx->v = v;
357 vtx->abgr = color;
358 vtx++;
359
360 vtx->x = x + w; vtx->y = y;
361 vtx->u = u + uw; vtx->v = v;
362 vtx->abgr = color;
363 vtx++;
364
365 vtx->x = x; vtx->y = y + h;
366 vtx->u = u; vtx->v = v + vh;
367 vtx->abgr = color;
368 vtx++;
369
370 vtx->x = x + w; vtx->y = y + h;
371 vtx->u = u + uw; vtx->v = v + vh;
372 vtx->abgr = color;
373
374 u16* i = &ctrIndexBuffer[ctrNumQuads * 6];
375 i[0] = index + 0; i[1] = index + 1; i[2] = index + 2;
376 i[3] = index + 2; i[4] = index + 1; i[5] = index + 3;
377
378 ctrNumQuads += 1;
379}
380
381void ctrAddRect(u32 color, s16 x, s16 y, s16 u, s16 v, s16 w, s16 h) {
382 ctrAddRectScaled(color,
383 x, y, w, h,
384 u, v, w, h);
385}
386
387void ctrFlushBatch(void) {
388 if (ctrNumQuads == 0) {
389 return;
390 }
391
392 GSPGPU_FlushDataCache(NULL, (u8*)ctrVertexBuffer, VERTEX_INDEX_BUFFER_SIZE);
393 GPU_DrawElements(GPU_UNKPRIM, (u32*)(osConvertVirtToPhys((u32)ctrIndexBuffer) - VRAM_BASE), ctrNumQuads * 6);
394
395 GPU_FinishDrawing();
396 GPUCMD_Finalize();
397 GSPGPU_FlushDataCache(NULL, (u8*)gpuCommandList, COMMAND_LIST_LENGTH * sizeof(u32));
398 GPUCMD_FlushAndRun(NULL);
399
400 gspWaitForP3D();
401
402 GPUCMD_SetBufferOffset(0);
403
404 ctrNumQuads = 0;
405}