all repos — mgba @ a7d1b82329f1d4a457fff82d4bfeb38bef0881c6

mGBA Game Boy Advance Emulator

src/platform/3ds/ctr-gpu.c (view raw)

  1/* Copyright (c) 2015 Yuri Kunde Schlesner
  2 *
  3 * This Source Code Form is subject to the terms of the Mozilla Public
  4 * License, v. 2.0. If a copy of the MPL was not distributed with this
  5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6
  7#include <3ds.h>
  8#include <3ds/gpu/gpu.h>
  9#include <3ds/gpu/gx.h>
 10#include <stdlib.h>
 11#include <string.h>
 12#include <stdio.h>
 13
 14#include "ctr-gpu.h"
 15
 16#include "uishader.h"
 17#include "uishader.shbin.h"
 18
 19struct ctrUIVertex {
 20	s16 x,y;
 21	s16 u,v;
 22	u32 abgr;
 23};
 24
 25#define VRAM_BASE 0x18000000u
 26
 27#define MAX_NUM_QUADS 1024
 28#define COMMAND_LIST_LENGTH (16 * 1024)
 29// Each quad requires 4 vertices and 2*3 indices for the two triangles used to draw it
 30#define VERTEX_INDEX_BUFFER_SIZE (MAX_NUM_QUADS * (4 * sizeof(struct ctrUIVertex) + 6 * sizeof(u16)))
 31
 32static struct ctrUIVertex* ctrVertexBuffer = NULL;
 33static u16* ctrIndexBuffer = NULL;
 34static u16 ctrNumQuads = 0;
 35
 36static void* gpuColorBuffer[2] = { NULL, NULL };
 37static u32* gpuCommandList = NULL;
 38static void* screenTexture = NULL;
 39
 40static shaderProgram_s gpuShader;
 41static DVLB_s* passthroughShader = NULL;
 42
 43static int pendingEvents = 0;
 44
 45static const struct ctrTexture* activeTexture = NULL;
 46
 47void ctrClearPending(int events) {
 48	int toClear = events & pendingEvents;
 49	if (toClear & (1 << GSPGPU_EVENT_PSC0)) {
 50		gspWaitForPSC0();
 51	}
 52	if (toClear & (1 << GSPGPU_EVENT_PPF)) {
 53		gspWaitForPPF();
 54	}
 55	pendingEvents ^= toClear;
 56}
 57
 58// Replacements for the limiting GPU_SetViewport function in ctrulib
 59static void _GPU_SetFramebuffer(intptr_t colorBuffer, intptr_t depthBuffer, u16 w, u16 h) {
 60	u32 buf[4];
 61
 62	// Unknown
 63	GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_FLUSH, 0x00000001);
 64	GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_INVALIDATE, 0x00000001);
 65
 66	// Set depth/color buffer address and dimensions
 67	buf[0] = depthBuffer >> 3;
 68	buf[1] = colorBuffer >> 3;
 69	buf[2] = (0x01) << 24 | ((h-1) & 0xFFF) << 12 | (w & 0xFFF) << 0;
 70	GPUCMD_AddIncrementalWrites(GPUREG_DEPTHBUFFER_LOC, buf, 3);
 71	GPUCMD_AddWrite(GPUREG_RENDERBUF_DIM, buf[2]);
 72
 73	// Set depth/color buffer pixel format
 74	GPUCMD_AddWrite(GPUREG_DEPTHBUFFER_FORMAT, 3 /* D248S */ );
 75	GPUCMD_AddWrite(GPUREG_COLORBUFFER_FORMAT, 0 /* RGBA8 */ << 16 | 2 /* Unknown */);
 76	GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_BLOCK32, 0); // Unknown
 77
 78	// Enable color/depth buffers
 79	buf[0] = colorBuffer != 0 ? 0xF : 0x0;
 80	buf[1] = buf[0];
 81	buf[2] = depthBuffer != 0 ? 0x2 : 0x0;
 82	buf[3] = buf[2];
 83	GPUCMD_AddIncrementalWrites(GPUREG_COLORBUFFER_READ, buf, 4);
 84}
 85
 86static void _GPU_SetViewportEx(u16 x, u16 y, u16 w, u16 h) {
 87	u32 buf[4];
 88
 89	buf[0] = f32tof24(w / 2.0f);
 90	buf[1] = f32tof31(2.0f / w) << 1;
 91	buf[2] = f32tof24(h / 2.0f);
 92	buf[3] = f32tof31(2.0f / h) << 1;
 93	GPUCMD_AddIncrementalWrites(GPUREG_VIEWPORT_WIDTH, buf, 4);
 94
 95	GPUCMD_AddWrite(GPUREG_VIEWPORT_XY, (y & 0xFFFF) << 16 | (x & 0xFFFF) << 0);
 96
 97	buf[0] = 0;
 98	buf[1] = 0;
 99	buf[2] = ((h-1) & 0xFFFF) << 16 | ((w-1) & 0xFFFF) << 0;
100	GPUCMD_AddIncrementalWrites(GPUREG_SCISSORTEST_MODE, buf, 3);
101}
102
103static void _setDummyTexEnv(int id) {
104	GPU_SetTexEnv(id,
105		GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
106		GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
107		GPU_TEVOPERANDS(0, 0, 0),
108		GPU_TEVOPERANDS(0, 0, 0),
109		GPU_REPLACE,
110		GPU_REPLACE,
111		0x00000000);
112}
113
114Result ctrInitGpu() {
115	Result res = -1;
116
117	// Allocate buffers
118	gpuColorBuffer[0] = vramAlloc(400 * 240 * 4);
119	gpuColorBuffer[1] = vramAlloc(320 * 240 * 4);
120	gpuCommandList = linearAlloc(COMMAND_LIST_LENGTH * sizeof(u32));
121	ctrVertexBuffer = linearAlloc(VERTEX_INDEX_BUFFER_SIZE);
122	if (gpuColorBuffer[0] == NULL || gpuColorBuffer[1] == NULL || gpuCommandList == NULL || ctrVertexBuffer == NULL) {
123		res = -1;
124		goto error_allocs;
125	}
126	// Both buffers share the same allocation, index buffer follows the vertex buffer
127	ctrIndexBuffer = (u16*)(ctrVertexBuffer + (4 * MAX_NUM_QUADS));
128
129	// Load vertex shader binary
130	passthroughShader = DVLB_ParseFile((u32*)uishader, uishader_size);
131	if (passthroughShader == NULL) {
132		res = -1;
133		goto error_dvlb;
134	}
135
136	// Create shader
137	shaderProgramInit(&gpuShader);
138	res = shaderProgramSetVsh(&gpuShader, &passthroughShader->DVLE[0]);
139	if (res < 0) {
140		goto error_shader;
141	}
142
143	// Initialize the GPU in ctrulib and assign the command buffer to accept submission of commands
144	GPU_Init(NULL);
145	GPUCMD_SetBuffer(gpuCommandList, COMMAND_LIST_LENGTH, 0);
146
147	return 0;
148
149error_shader:
150	shaderProgramFree(&gpuShader);
151
152error_dvlb:
153	if (passthroughShader != NULL) {
154		DVLB_Free(passthroughShader);
155		passthroughShader = NULL;
156	}
157
158error_allocs:
159	if (ctrVertexBuffer != NULL) {
160		linearFree(ctrVertexBuffer);
161		ctrVertexBuffer = NULL;
162		ctrIndexBuffer = NULL;
163	}
164
165	if (gpuCommandList != NULL) {
166		GPUCMD_SetBuffer(NULL, 0, 0);
167		linearFree(gpuCommandList);
168		gpuCommandList = NULL;
169	}
170
171	if (gpuColorBuffer[0] != NULL) {
172		vramFree(gpuColorBuffer[0]);
173		gpuColorBuffer[0] = NULL;
174	}
175
176	if (gpuColorBuffer[1] != NULL) {
177		vramFree(gpuColorBuffer[1]);
178		gpuColorBuffer[1] = NULL;
179	}
180	return res;
181}
182
183void ctrDeinitGpu() {
184	shaderProgramFree(&gpuShader);
185
186	DVLB_Free(passthroughShader);
187	passthroughShader = NULL;
188
189	linearFree(screenTexture);
190	screenTexture = NULL;
191
192	linearFree(ctrVertexBuffer);
193	ctrVertexBuffer = NULL;
194	ctrIndexBuffer = NULL;
195
196	GPUCMD_SetBuffer(NULL, 0, 0);
197	linearFree(gpuCommandList);
198	gpuCommandList = NULL;
199
200	vramFree(gpuColorBuffer[0]);
201	gpuColorBuffer[0] = NULL;
202
203	vramFree(gpuColorBuffer[1]);
204	gpuColorBuffer[1] = NULL;
205}
206
207void ctrGpuBeginFrame(int screen) {
208	if (screen > 1) {
209		return;
210	}
211
212	int fw;
213	if (screen == 0) {
214		fw = 400;
215	} else {
216		fw = 320;
217	}
218
219	_GPU_SetFramebuffer(osConvertVirtToPhys(gpuColorBuffer[screen]), 0, 240, fw);
220}
221
222void ctrGpuBeginDrawing(void) {
223	shaderProgramUse(&gpuShader);
224
225	// Disable depth and stencil testing
226	GPU_SetDepthTestAndWriteMask(false, GPU_ALWAYS, GPU_WRITE_COLOR);
227	GPU_SetStencilTest(false, GPU_ALWAYS, 0, 0xFF, 0);
228	GPU_SetStencilOp(GPU_STENCIL_KEEP, GPU_STENCIL_KEEP, GPU_STENCIL_KEEP);
229	GPU_DepthMap(-1.0f, 0.0f);
230
231	// Enable alpha blending
232	GPU_SetAlphaBlending(
233			GPU_BLEND_ADD, GPU_BLEND_ADD, // Operation RGB, Alpha
234			GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, // Color src, dst
235			GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); // Alpha src, dst
236	GPU_SetBlendingColor(0, 0, 0, 0);
237
238	// Disable alpha testing
239	GPU_SetAlphaTest(false, GPU_ALWAYS, 0);
240
241	// Unknown
242	GPUCMD_AddMaskedWrite(GPUREG_EARLYDEPTH_TEST1, 0x1, 0);
243	GPUCMD_AddWrite(GPUREG_EARLYDEPTH_TEST2, 0);
244
245	GPU_SetTexEnv(0,
246			GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0), // RGB
247			GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0), // Alpha
248			GPU_TEVOPERANDS(0, 0, 0), // RGB
249			GPU_TEVOPERANDS(0, 0, 0), // Alpha
250			GPU_MODULATE, GPU_MODULATE, // Operation RGB, Alpha
251			0x00000000); // Constant color
252	_setDummyTexEnv(1);
253	_setDummyTexEnv(2);
254	_setDummyTexEnv(3);
255	_setDummyTexEnv(4);
256	_setDummyTexEnv(5);
257
258	// Configure vertex attribute format
259	u32 bufferOffsets[] = { osConvertVirtToPhys(ctrVertexBuffer) - VRAM_BASE };
260	u64 arrayTargetAttributes[] = { 0x210 };
261	u8 numAttributesInArray[] = { 3 };
262	GPU_SetAttributeBuffers(
263			3, // Number of attributes
264			(u32*)VRAM_BASE, // Base address
265			GPU_ATTRIBFMT(0, 2, GPU_SHORT) | // Attribute format
266				GPU_ATTRIBFMT(1, 2, GPU_SHORT) |
267				GPU_ATTRIBFMT(2, 4, GPU_UNSIGNED_BYTE),
268			0xFF8, // Non-fixed vertex inputs
269			0x210, // Vertex shader input map
270			1, // Use 1 vertex array
271			bufferOffsets, arrayTargetAttributes, numAttributesInArray);
272}
273
274void ctrGpuEndFrame(int screen, void* outputFramebuffer, int w, int h) {
275	if (screen > 1) {
276		return;
277	}
278
279	int fw;
280	if (screen == 0) {
281		fw = 400;
282	} else {
283		fw = 320;
284	}
285
286	ctrFlushBatch();
287
288	void* colorBuffer = (u8*)gpuColorBuffer[screen] + ((fw - w) * 240 * 4);
289
290	const u32 GX_CROP_INPUT_LINES = (1 << 2);
291
292	ctrClearPending(1 << GSPGPU_EVENT_PSC0);
293	ctrClearPending(1 << GSPGPU_EVENT_PPF);
294
295	GX_DisplayTransfer(
296			colorBuffer,       GX_BUFFER_DIM(240, fw),
297			outputFramebuffer, GX_BUFFER_DIM(h, w),
298			GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) |
299				GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) |
300				GX_CROP_INPUT_LINES);
301	pendingEvents |= (1 << GSPGPU_EVENT_PPF);
302}
303
304void ctrGpuEndDrawing(void) {
305	ctrClearPending(1 << GSPGPU_EVENT_PPF);
306	gfxSwapBuffersGpu();
307	gspWaitForEvent(GSPGPU_EVENT_VBlank0, false);
308
309	void* gpuColorBuffer0End = (char*)gpuColorBuffer[0] + 240 * 400 * 4;
310	void* gpuColorBuffer1End = (char*)gpuColorBuffer[1] + 240 * 320 * 4;
311	GX_MemoryFill(
312		gpuColorBuffer[0], 0x00000000, gpuColorBuffer0End, GX_FILL_32BIT_DEPTH | GX_FILL_TRIGGER,
313		gpuColorBuffer[1], 0x00000000, gpuColorBuffer1End, GX_FILL_32BIT_DEPTH | GX_FILL_TRIGGER);
314	pendingEvents |= 1 << GSPGPU_EVENT_PSC0;
315}
316
317void ctrSetViewportSize(s16 w, s16 h) {
318	// Set up projection matrix mapping (0,0) to the top-left and (w,h) to the
319	// bottom-right, taking into account the 3DS' screens' portrait
320	// orientation.
321	float projectionMtx[4 * 4] = {
322		// Rows are in the order w z y x, because ctrulib
323		1.0f, 0.0f, -2.0f / h, 0.0f,
324		1.0f, 0.0f, 0.0f, -2.0f / w,
325		-0.5f, 0.0f, 0.0f, 0.0f,
326		1.0f, 0.0f, 0.0f, 0.0f,
327	};
328
329	GPU_SetFloatUniform(GPU_VERTEX_SHADER, VSH_FVEC_projectionMtx, (u32*)&projectionMtx, 4);
330	_GPU_SetViewportEx(0, 0, h, w);
331}
332
333void ctrActivateTexture(const struct ctrTexture* texture) {
334	if (activeTexture == texture) {
335		return;
336	}
337
338	ctrFlushBatch();
339
340	GPU_SetTextureEnable(GPU_TEXUNIT0);
341	GPU_SetTexture(
342			GPU_TEXUNIT0, (u32*)osConvertVirtToPhys(texture->data),
343			texture->width, texture->height,
344			GPU_TEXTURE_MAG_FILTER(texture->filter) | GPU_TEXTURE_MIN_FILTER(texture->filter) |
345				GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_BORDER) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_BORDER),
346			texture->format);
347	GPU_SetTextureBorderColor(GPU_TEXUNIT0, 0x00000000);
348
349	float textureMtx[2 * 4] = {
350		// Rows are in the order w z y x, because ctrulib
351		0.0f, 0.0f, 0.0f, 1.0f / texture->width,
352		0.0f, 0.0f, 1.0f / texture->height, 0.0f,
353	};
354
355	GPU_SetFloatUniform(GPU_VERTEX_SHADER, VSH_FVEC_textureMtx, (u32*)&textureMtx, 2);
356
357	activeTexture = texture;
358}
359
360void ctrAddRectScaled(u32 color, s16 x, s16 y, s16 w, s16 h, s16 u, s16 v, s16 uw, s16 vh) {
361	if (ctrNumQuads == MAX_NUM_QUADS) {
362		ctrFlushBatch();
363	}
364
365	u16 index = ctrNumQuads * 4;
366	struct ctrUIVertex* vtx = &ctrVertexBuffer[index];
367	vtx->x = x; vtx->y = y;
368	vtx->u = u; vtx->v = v;
369	vtx->abgr = color;
370	vtx++;
371
372	vtx->x = x + w; vtx->y = y;
373	vtx->u = u + uw; vtx->v = v;
374	vtx->abgr = color;
375	vtx++;
376
377	vtx->x = x; vtx->y = y + h;
378	vtx->u = u; vtx->v = v + vh;
379	vtx->abgr = color;
380	vtx++;
381
382	vtx->x = x + w; vtx->y = y + h;
383	vtx->u = u + uw; vtx->v = v + vh;
384	vtx->abgr = color;
385
386	u16* i = &ctrIndexBuffer[ctrNumQuads * 6];
387	i[0] = index + 0; i[1] = index + 1; i[2] = index + 2;
388	i[3] = index + 2; i[4] = index + 1; i[5] = index + 3;
389
390	ctrNumQuads += 1;
391}
392
393void ctrAddRect(u32 color, s16 x, s16 y, s16 u, s16 v, s16 w, s16 h) {
394	ctrAddRectScaled(color,
395			x, y, w, h,
396			u, v, w, h);
397}
398
399void ctrFlushBatch(void) {
400	if (ctrNumQuads == 0) {
401		return;
402	}
403
404	ctrClearPending((1 << GSPGPU_EVENT_PSC0));
405
406	GSPGPU_FlushDataCache(ctrVertexBuffer, VERTEX_INDEX_BUFFER_SIZE);
407	GPU_DrawElements(GPU_GEOMETRY_PRIM, (u32*)(osConvertVirtToPhys(ctrIndexBuffer) - VRAM_BASE), ctrNumQuads * 6);
408
409	GPU_FinishDrawing();
410	GPUCMD_Finalize();
411	GSPGPU_FlushDataCache((u8*)gpuCommandList, COMMAND_LIST_LENGTH * sizeof(u32));
412	GPUCMD_FlushAndRun();
413
414	gspWaitForP3D();
415
416	GPUCMD_SetBufferOffset(0);
417
418	ctrNumQuads = 0;
419}