src/platform/3ds/ctr-gpu.c (view raw)
1/* Copyright (c) 2015 Yuri Kunde Schlesner
2 *
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7#include <3ds.h>
8#include <3ds/gpu/gpu.h>
9#include <3ds/gpu/gx.h>
10#include <stdlib.h>
11#include <string.h>
12#include <stdio.h>
13
14#include "ctr-gpu.h"
15
16#include "uishader.h"
17#include "uishader.shbin.h"
18
19struct ctrUIVertex {
20 s16 x,y;
21 s16 u,v;
22 u32 abgr;
23};
24
25#define VRAM_BASE 0x18000000u
26
27#define MAX_NUM_QUADS 1024
28#define COMMAND_LIST_LENGTH (16 * 1024)
29// Each quad requires 4 vertices and 2*3 indices for the two triangles used to draw it
30#define VERTEX_INDEX_BUFFER_SIZE (MAX_NUM_QUADS * (4 * sizeof(struct ctrUIVertex) + 6 * sizeof(u16)))
31
32static struct ctrUIVertex* ctrVertexBuffer = NULL;
33static u16* ctrIndexBuffer = NULL;
34static u16 ctrNumQuads = 0;
35
36static void* gpuColorBuffer[2] = { NULL, NULL };
37static u32* gpuCommandList = NULL;
38static void* screenTexture = NULL;
39
40static shaderProgram_s gpuShader;
41static DVLB_s* passthroughShader = NULL;
42
43static int pendingEvents = 0;
44
45static const struct ctrTexture* activeTexture = NULL;
46
47void ctrClearPending(int events) {
48 int toClear = events & pendingEvents;
49 if (toClear & (1 << GSPGPU_EVENT_PSC0)) {
50 gspWaitForPSC0();
51 }
52 if (toClear & (1 << GSPGPU_EVENT_PPF)) {
53 gspWaitForPPF();
54 }
55 pendingEvents ^= toClear;
56}
57
58// Replacements for the limiting GPU_SetViewport function in ctrulib
59static void _GPU_SetFramebuffer(intptr_t colorBuffer, intptr_t depthBuffer, u16 w, u16 h) {
60 u32 buf[4];
61
62 // Unknown
63 GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_FLUSH, 0x00000001);
64 GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_INVALIDATE, 0x00000001);
65
66 // Set depth/color buffer address and dimensions
67 buf[0] = depthBuffer >> 3;
68 buf[1] = colorBuffer >> 3;
69 buf[2] = (0x01) << 24 | ((h-1) & 0xFFF) << 12 | (w & 0xFFF) << 0;
70 GPUCMD_AddIncrementalWrites(GPUREG_DEPTHBUFFER_LOC, buf, 3);
71 GPUCMD_AddWrite(GPUREG_RENDERBUF_DIM, buf[2]);
72
73 // Set depth/color buffer pixel format
74 GPUCMD_AddWrite(GPUREG_DEPTHBUFFER_FORMAT, 3 /* D248S */ );
75 GPUCMD_AddWrite(GPUREG_COLORBUFFER_FORMAT, 0 /* RGBA8 */ << 16 | 2 /* Unknown */);
76 GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_BLOCK32, 0); // Unknown
77
78 // Enable color/depth buffers
79 buf[0] = colorBuffer != 0 ? 0xF : 0x0;
80 buf[1] = buf[0];
81 buf[2] = depthBuffer != 0 ? 0x2 : 0x0;
82 buf[3] = buf[2];
83 GPUCMD_AddIncrementalWrites(GPUREG_COLORBUFFER_READ, buf, 4);
84}
85
86static void _GPU_SetViewportEx(u16 x, u16 y, u16 w, u16 h) {
87 u32 buf[4];
88
89 buf[0] = f32tof24(w / 2.0f);
90 buf[1] = f32tof31(2.0f / w) << 1;
91 buf[2] = f32tof24(h / 2.0f);
92 buf[3] = f32tof31(2.0f / h) << 1;
93 GPUCMD_AddIncrementalWrites(GPUREG_VIEWPORT_WIDTH, buf, 4);
94
95 GPUCMD_AddWrite(GPUREG_VIEWPORT_XY, (y & 0xFFFF) << 16 | (x & 0xFFFF) << 0);
96
97 buf[0] = 0;
98 buf[1] = 0;
99 buf[2] = ((h-1) & 0xFFFF) << 16 | ((w-1) & 0xFFFF) << 0;
100 GPUCMD_AddIncrementalWrites(GPUREG_SCISSORTEST_MODE, buf, 3);
101}
102
103static void _setDummyTexEnv(int id) {
104 GPU_SetTexEnv(id,
105 GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
106 GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
107 GPU_TEVOPERANDS(0, 0, 0),
108 GPU_TEVOPERANDS(0, 0, 0),
109 GPU_REPLACE,
110 GPU_REPLACE,
111 0x00000000);
112}
113
114Result ctrInitGpu() {
115 Result res = -1;
116
117 // Allocate buffers
118 gpuColorBuffer[0] = vramAlloc(400 * 240 * 4);
119 gpuColorBuffer[1] = vramAlloc(320 * 240 * 4);
120 gpuCommandList = linearAlloc(COMMAND_LIST_LENGTH * sizeof(u32));
121 ctrVertexBuffer = linearAlloc(VERTEX_INDEX_BUFFER_SIZE);
122 if (gpuColorBuffer[0] == NULL || gpuColorBuffer[1] == NULL || gpuCommandList == NULL || ctrVertexBuffer == NULL) {
123 res = -1;
124 goto error_allocs;
125 }
126 // Both buffers share the same allocation, index buffer follows the vertex buffer
127 ctrIndexBuffer = (u16*)(ctrVertexBuffer + (4 * MAX_NUM_QUADS));
128
129 // Load vertex shader binary
130 passthroughShader = DVLB_ParseFile((u32*)uishader, uishader_size);
131 if (passthroughShader == NULL) {
132 res = -1;
133 goto error_dvlb;
134 }
135
136 // Create shader
137 shaderProgramInit(&gpuShader);
138 res = shaderProgramSetVsh(&gpuShader, &passthroughShader->DVLE[0]);
139 if (res < 0) {
140 goto error_shader;
141 }
142
143 // Initialize the GPU in ctrulib and assign the command buffer to accept submission of commands
144 GPU_Init(NULL);
145 GPUCMD_SetBuffer(gpuCommandList, COMMAND_LIST_LENGTH, 0);
146
147 return 0;
148
149error_shader:
150 shaderProgramFree(&gpuShader);
151
152error_dvlb:
153 if (passthroughShader != NULL) {
154 DVLB_Free(passthroughShader);
155 passthroughShader = NULL;
156 }
157
158error_allocs:
159 if (ctrVertexBuffer != NULL) {
160 linearFree(ctrVertexBuffer);
161 ctrVertexBuffer = NULL;
162 ctrIndexBuffer = NULL;
163 }
164
165 if (gpuCommandList != NULL) {
166 GPUCMD_SetBuffer(NULL, 0, 0);
167 linearFree(gpuCommandList);
168 gpuCommandList = NULL;
169 }
170
171 if (gpuColorBuffer[0] != NULL) {
172 vramFree(gpuColorBuffer[0]);
173 gpuColorBuffer[0] = NULL;
174 }
175
176 if (gpuColorBuffer[1] != NULL) {
177 vramFree(gpuColorBuffer[1]);
178 gpuColorBuffer[1] = NULL;
179 }
180 return res;
181}
182
183void ctrDeinitGpu() {
184 shaderProgramFree(&gpuShader);
185
186 DVLB_Free(passthroughShader);
187 passthroughShader = NULL;
188
189 linearFree(screenTexture);
190 screenTexture = NULL;
191
192 linearFree(ctrVertexBuffer);
193 ctrVertexBuffer = NULL;
194 ctrIndexBuffer = NULL;
195
196 GPUCMD_SetBuffer(NULL, 0, 0);
197 linearFree(gpuCommandList);
198 gpuCommandList = NULL;
199
200 vramFree(gpuColorBuffer[0]);
201 gpuColorBuffer[0] = NULL;
202
203 vramFree(gpuColorBuffer[1]);
204 gpuColorBuffer[1] = NULL;
205}
206
207void ctrGpuBeginFrame(int screen) {
208 if (screen > 1) {
209 return;
210 }
211
212 int fw;
213 if (screen == 0) {
214 fw = 400;
215 } else {
216 fw = 320;
217 }
218
219 _GPU_SetFramebuffer(osConvertVirtToPhys(gpuColorBuffer[screen]), 0, 240, fw);
220}
221
222void ctrGpuBeginDrawing(void) {
223 shaderProgramUse(&gpuShader);
224
225 // Disable depth and stencil testing
226 GPU_SetDepthTestAndWriteMask(false, GPU_ALWAYS, GPU_WRITE_COLOR);
227 GPU_SetStencilTest(false, GPU_ALWAYS, 0, 0xFF, 0);
228 GPU_SetStencilOp(GPU_STENCIL_KEEP, GPU_STENCIL_KEEP, GPU_STENCIL_KEEP);
229 GPU_DepthMap(-1.0f, 0.0f);
230
231 // Enable alpha blending
232 GPU_SetAlphaBlending(
233 GPU_BLEND_ADD, GPU_BLEND_ADD, // Operation RGB, Alpha
234 GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, // Color src, dst
235 GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); // Alpha src, dst
236 GPU_SetBlendingColor(0, 0, 0, 0);
237
238 // Disable alpha testing
239 GPU_SetAlphaTest(false, GPU_ALWAYS, 0);
240
241 // Unknown
242 GPUCMD_AddMaskedWrite(GPUREG_EARLYDEPTH_TEST1, 0x1, 0);
243 GPUCMD_AddWrite(GPUREG_EARLYDEPTH_TEST2, 0);
244
245 GPU_SetTexEnv(0,
246 GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0), // RGB
247 GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0), // Alpha
248 GPU_TEVOPERANDS(0, 0, 0), // RGB
249 GPU_TEVOPERANDS(0, 0, 0), // Alpha
250 GPU_MODULATE, GPU_MODULATE, // Operation RGB, Alpha
251 0x00000000); // Constant color
252 _setDummyTexEnv(1);
253 _setDummyTexEnv(2);
254 _setDummyTexEnv(3);
255 _setDummyTexEnv(4);
256 _setDummyTexEnv(5);
257
258 // Configure vertex attribute format
259 u32 bufferOffsets[] = { osConvertVirtToPhys(ctrVertexBuffer) - VRAM_BASE };
260 u64 arrayTargetAttributes[] = { 0x210 };
261 u8 numAttributesInArray[] = { 3 };
262 GPU_SetAttributeBuffers(
263 3, // Number of attributes
264 (u32*)VRAM_BASE, // Base address
265 GPU_ATTRIBFMT(0, 2, GPU_SHORT) | // Attribute format
266 GPU_ATTRIBFMT(1, 2, GPU_SHORT) |
267 GPU_ATTRIBFMT(2, 4, GPU_UNSIGNED_BYTE),
268 0xFF8, // Non-fixed vertex inputs
269 0x210, // Vertex shader input map
270 1, // Use 1 vertex array
271 bufferOffsets, arrayTargetAttributes, numAttributesInArray);
272}
273
274void ctrGpuEndFrame(int screen, void* outputFramebuffer, int w, int h) {
275 if (screen > 1) {
276 return;
277 }
278
279 int fw;
280 if (screen == 0) {
281 fw = 400;
282 } else {
283 fw = 320;
284 }
285
286 ctrFlushBatch();
287
288 void* colorBuffer = (u8*)gpuColorBuffer[screen] + ((fw - w) * 240 * 4);
289
290 const u32 GX_CROP_INPUT_LINES = (1 << 2);
291
292 ctrClearPending(1 << GSPGPU_EVENT_PSC0);
293 ctrClearPending(1 << GSPGPU_EVENT_PPF);
294
295 GX_DisplayTransfer(
296 colorBuffer, GX_BUFFER_DIM(240, fw),
297 outputFramebuffer, GX_BUFFER_DIM(h, w),
298 GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) |
299 GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) |
300 GX_CROP_INPUT_LINES);
301 pendingEvents |= (1 << GSPGPU_EVENT_PPF);
302}
303
304void ctrGpuEndDrawing(void) {
305 ctrClearPending(1 << GSPGPU_EVENT_PPF);
306 gfxSwapBuffersGpu();
307 gspWaitForEvent(GSPGPU_EVENT_VBlank0, false);
308
309 void* gpuColorBuffer0End = (char*)gpuColorBuffer[0] + 240 * 400 * 4;
310 void* gpuColorBuffer1End = (char*)gpuColorBuffer[1] + 240 * 320 * 4;
311 GX_MemoryFill(
312 gpuColorBuffer[0], 0x00000000, gpuColorBuffer0End, GX_FILL_32BIT_DEPTH | GX_FILL_TRIGGER,
313 gpuColorBuffer[1], 0x00000000, gpuColorBuffer1End, GX_FILL_32BIT_DEPTH | GX_FILL_TRIGGER);
314 pendingEvents |= 1 << GSPGPU_EVENT_PSC0;
315}
316
317void ctrSetViewportSize(s16 w, s16 h) {
318 // Set up projection matrix mapping (0,0) to the top-left and (w,h) to the
319 // bottom-right, taking into account the 3DS' screens' portrait
320 // orientation.
321 float projectionMtx[4 * 4] = {
322 // Rows are in the order w z y x, because ctrulib
323 1.0f, 0.0f, -2.0f / h, 0.0f,
324 1.0f, 0.0f, 0.0f, -2.0f / w,
325 -0.5f, 0.0f, 0.0f, 0.0f,
326 1.0f, 0.0f, 0.0f, 0.0f,
327 };
328
329 GPU_SetFloatUniform(GPU_VERTEX_SHADER, VSH_FVEC_projectionMtx, (u32*)&projectionMtx, 4);
330 _GPU_SetViewportEx(0, 0, h, w);
331}
332
333void ctrActivateTexture(const struct ctrTexture* texture) {
334 if (activeTexture == texture) {
335 return;
336 }
337
338 ctrFlushBatch();
339
340 GPU_SetTextureEnable(GPU_TEXUNIT0);
341 GPU_SetTexture(
342 GPU_TEXUNIT0, (u32*)osConvertVirtToPhys(texture->data),
343 texture->width, texture->height,
344 GPU_TEXTURE_MAG_FILTER(texture->filter) | GPU_TEXTURE_MIN_FILTER(texture->filter) |
345 GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_BORDER) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_BORDER),
346 texture->format);
347 GPU_SetTextureBorderColor(GPU_TEXUNIT0, 0x00000000);
348
349 float textureMtx[2 * 4] = {
350 // Rows are in the order w z y x, because ctrulib
351 0.0f, 0.0f, 0.0f, 1.0f / texture->width,
352 0.0f, 0.0f, 1.0f / texture->height, 0.0f,
353 };
354
355 GPU_SetFloatUniform(GPU_VERTEX_SHADER, VSH_FVEC_textureMtx, (u32*)&textureMtx, 2);
356
357 activeTexture = texture;
358}
359
360void ctrAddRectScaled(u32 color, s16 x, s16 y, s16 w, s16 h, s16 u, s16 v, s16 uw, s16 vh) {
361 if (ctrNumQuads == MAX_NUM_QUADS) {
362 ctrFlushBatch();
363 }
364
365 u16 index = ctrNumQuads * 4;
366 struct ctrUIVertex* vtx = &ctrVertexBuffer[index];
367 vtx->x = x; vtx->y = y;
368 vtx->u = u; vtx->v = v;
369 vtx->abgr = color;
370 vtx++;
371
372 vtx->x = x + w; vtx->y = y;
373 vtx->u = u + uw; vtx->v = v;
374 vtx->abgr = color;
375 vtx++;
376
377 vtx->x = x; vtx->y = y + h;
378 vtx->u = u; vtx->v = v + vh;
379 vtx->abgr = color;
380 vtx++;
381
382 vtx->x = x + w; vtx->y = y + h;
383 vtx->u = u + uw; vtx->v = v + vh;
384 vtx->abgr = color;
385
386 u16* i = &ctrIndexBuffer[ctrNumQuads * 6];
387 i[0] = index + 0; i[1] = index + 1; i[2] = index + 2;
388 i[3] = index + 2; i[4] = index + 1; i[5] = index + 3;
389
390 ctrNumQuads += 1;
391}
392
393void ctrAddRect(u32 color, s16 x, s16 y, s16 u, s16 v, s16 w, s16 h) {
394 ctrAddRectScaled(color,
395 x, y, w, h,
396 u, v, w, h);
397}
398
399void ctrFlushBatch(void) {
400 if (ctrNumQuads == 0) {
401 return;
402 }
403
404 ctrClearPending((1 << GSPGPU_EVENT_PSC0));
405
406 GSPGPU_FlushDataCache(ctrVertexBuffer, VERTEX_INDEX_BUFFER_SIZE);
407 GPU_DrawElements(GPU_GEOMETRY_PRIM, (u32*)(osConvertVirtToPhys(ctrIndexBuffer) - VRAM_BASE), ctrNumQuads * 6);
408
409 GPU_FinishDrawing();
410 GPUCMD_Finalize();
411 GSPGPU_FlushDataCache((u8*)gpuCommandList, COMMAND_LIST_LENGTH * sizeof(u32));
412 GPUCMD_FlushAndRun();
413
414 gspWaitForP3D();
415
416 GPUCMD_SetBufferOffset(0);
417
418 ctrNumQuads = 0;
419}