all repos — mgba @ f836a67863e66499c812c8d963d37ff56ad10638

mGBA Game Boy Advance Emulator

src/platform/3ds/ctr-gpu.c (view raw)

  1/* Copyright (c) 2015 Yuri Kunde Schlesner
  2 *
  3 * This Source Code Form is subject to the terms of the Mozilla Public
  4 * License, v. 2.0. If a copy of the MPL was not distributed with this
  5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6
  7#include <3ds.h>
  8#include <stdlib.h>
  9#include <string.h>
 10#include <stdio.h>
 11
 12#include "ctr-gpu.h"
 13
 14#include "uishader.h"
 15#include "uishader.shbin.h"
 16
 17struct ctrUIVertex {
 18	s16 x,y;
 19	s16 u,v;
 20	u32 abgr;
 21};
 22
 23#define VRAM_BASE 0x18000000u
 24
 25#define MAX_NUM_QUADS 1024
 26#define COMMAND_LIST_LENGTH (16 * 1024)
 27// Each quad requires 4 vertices and 2*3 indices for the two triangles used to draw it
 28#define VERTEX_INDEX_BUFFER_SIZE (MAX_NUM_QUADS * (4 * sizeof(struct ctrUIVertex) + 6 * sizeof(u16)))
 29
 30static struct ctrUIVertex* ctrVertexBuffer = NULL;
 31static u16* ctrIndexBuffer = NULL;
 32static u16 ctrNumQuads = 0;
 33
 34static void* gpuColorBuffer[2] = { NULL, NULL };
 35static u32* gpuCommandList = NULL;
 36static void* screenTexture = NULL;
 37
 38static shaderProgram_s gpuShader;
 39static DVLB_s* passthroughShader = NULL;
 40
 41static int pendingEvents = 0;
 42
 43static const struct ctrTexture* activeTexture = NULL;
 44
 45static u32 _f24FromFloat(float f) {
 46	u32 i;
 47	memcpy(&i, &f, 4);
 48
 49	u32 mantissa = (i << 9) >>  9;
 50	s32 exponent = (i << 1) >> 24;
 51	u32 sign     = (i << 0) >> 31;
 52
 53	// Truncate mantissa
 54	mantissa >>= 7;
 55
 56	// Re-bias exponent
 57	exponent = exponent - 127 + 63;
 58	if (exponent < 0) {
 59		// Underflow: flush to zero
 60		return sign << 23;
 61	} else if (exponent > 0x7F) {
 62		// Overflow: saturate to infinity
 63		return sign << 23 | 0x7F << 16;
 64	}
 65
 66	return sign << 23 | exponent << 16 | mantissa;
 67}
 68
 69static u32 _f31FromFloat(float f) {
 70	u32 i;
 71	memcpy(&i, &f, 4);
 72
 73	u32 mantissa = (i << 9) >>  9;
 74	s32 exponent = (i << 1) >> 24;
 75	u32 sign     = (i << 0) >> 31;
 76
 77	// Re-bias exponent
 78	exponent = exponent - 127 + 63;
 79	if (exponent < 0) {
 80		// Underflow: flush to zero
 81		return sign << 30;
 82	} else if (exponent > 0x7F) {
 83		// Overflow: saturate to infinity
 84		return sign << 30 | 0x7F << 23;
 85	}
 86
 87	return sign << 30 | exponent << 23 | mantissa;
 88}
 89
 90void ctrClearPending(int events) {
 91	int toClear = events & pendingEvents;
 92	if (toClear & (1 << GSPEVENT_PSC0)) {
 93		gspWaitForPSC0();
 94	}
 95	if (toClear & (1 << GSPEVENT_PPF)) {
 96		gspWaitForPPF();
 97	}
 98	pendingEvents ^= toClear;
 99}
100
101// Replacements for the limiting GPU_SetViewport function in ctrulib
102static void _GPU_SetFramebuffer(intptr_t colorBuffer, intptr_t depthBuffer, u16 w, u16 h) {
103	u32 buf[4];
104
105	// Unknown
106	GPUCMD_AddWrite(GPUREG_0111, 0x00000001);
107	GPUCMD_AddWrite(GPUREG_0110, 0x00000001);
108
109	// Set depth/color buffer address and dimensions
110	buf[0] = depthBuffer >> 3;
111	buf[1] = colorBuffer >> 3;
112	buf[2] = (0x01) << 24 | ((h-1) & 0xFFF) << 12 | (w & 0xFFF) << 0;
113	GPUCMD_AddIncrementalWrites(GPUREG_DEPTHBUFFER_LOC, buf, 3);
114	GPUCMD_AddWrite(GPUREG_006E, buf[2]);
115
116	// Set depth/color buffer pixel format
117	GPUCMD_AddWrite(GPUREG_DEPTHBUFFER_FORMAT, 3 /* D248S */ );
118	GPUCMD_AddWrite(GPUREG_COLORBUFFER_FORMAT, 0 /* RGBA8 */ << 16 | 2 /* Unknown */);
119	GPUCMD_AddWrite(GPUREG_011B, 0); // Unknown
120
121	// Enable color/depth buffers
122	buf[0] = colorBuffer != 0 ? 0xF : 0x0;
123	buf[1] = buf[0];
124	buf[2] = depthBuffer != 0 ? 0x2 : 0x0;
125	buf[3] = buf[2];
126	GPUCMD_AddIncrementalWrites(GPUREG_0112, buf, 4);
127}
128
129static void _GPU_SetViewportEx(u16 x, u16 y, u16 w, u16 h) {
130	u32 buf[4];
131
132	buf[0] = _f24FromFloat(w / 2.0f);
133	buf[1] = _f31FromFloat(2.0f / w) << 1;
134	buf[2] = _f24FromFloat(h / 2.0f);
135	buf[3] = _f31FromFloat(2.0f / h) << 1;
136	GPUCMD_AddIncrementalWrites(GPUREG_0041, buf, 4);
137
138	GPUCMD_AddWrite(GPUREG_0068, (y & 0xFFFF) << 16 | (x & 0xFFFF) << 0);
139
140	buf[0] = 0;
141	buf[1] = 0;
142	buf[2] = ((h-1) & 0xFFFF) << 16 | ((w-1) & 0xFFFF) << 0;
143	GPUCMD_AddIncrementalWrites(GPUREG_SCISSORTEST_MODE, buf, 3);
144}
145
146static void _setDummyTexEnv(int id) {
147	GPU_SetTexEnv(id,
148		GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
149		GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
150		GPU_TEVOPERANDS(0, 0, 0),
151		GPU_TEVOPERANDS(0, 0, 0),
152		GPU_REPLACE,
153		GPU_REPLACE,
154		0x00000000);
155}
156
157Result ctrInitGpu() {
158	Result res = -1;
159
160	// Allocate buffers
161	gpuColorBuffer[0] = vramAlloc(400 * 240 * 4);
162	gpuColorBuffer[1] = vramAlloc(320 * 240 * 4);
163	gpuCommandList = linearAlloc(COMMAND_LIST_LENGTH * sizeof(u32));
164	ctrVertexBuffer = linearAlloc(VERTEX_INDEX_BUFFER_SIZE);
165	if (gpuColorBuffer[0] == NULL || gpuColorBuffer[1] == NULL || gpuCommandList == NULL || ctrVertexBuffer == NULL) {
166		res = -1;
167		goto error_allocs;
168	}
169	// Both buffers share the same allocation, index buffer follows the vertex buffer
170	ctrIndexBuffer = (u16*)(ctrVertexBuffer + (4 * MAX_NUM_QUADS));
171
172	// Load vertex shader binary
173	passthroughShader = DVLB_ParseFile((u32*)uishader, uishader_size);
174	if (passthroughShader == NULL) {
175		res = -1;
176		goto error_dvlb;
177	}
178
179	// Create shader
180	shaderProgramInit(&gpuShader);
181	res = shaderProgramSetVsh(&gpuShader, &passthroughShader->DVLE[0]);
182	if (res < 0) {
183		goto error_shader;
184	}
185
186	// Initialize the GPU in ctrulib and assign the command buffer to accept submission of commands
187	GPU_Init(NULL);
188	GPUCMD_SetBuffer(gpuCommandList, COMMAND_LIST_LENGTH, 0);
189
190	return 0;
191
192error_shader:
193	shaderProgramFree(&gpuShader);
194
195error_dvlb:
196	if (passthroughShader != NULL) {
197		DVLB_Free(passthroughShader);
198		passthroughShader = NULL;
199	}
200
201error_allocs:
202	if (ctrVertexBuffer != NULL) {
203		linearFree(ctrVertexBuffer);
204		ctrVertexBuffer = NULL;
205		ctrIndexBuffer = NULL;
206	}
207
208	if (gpuCommandList != NULL) {
209		GPUCMD_SetBuffer(NULL, 0, 0);
210		linearFree(gpuCommandList);
211		gpuCommandList = NULL;
212	}
213
214	if (gpuColorBuffer[0] != NULL) {
215		vramFree(gpuColorBuffer[0]);
216		gpuColorBuffer[0] = NULL;
217	}
218
219	if (gpuColorBuffer[1] != NULL) {
220		vramFree(gpuColorBuffer[1]);
221		gpuColorBuffer[1] = NULL;
222	}
223	return res;
224}
225
226void ctrDeinitGpu() {
227	shaderProgramFree(&gpuShader);
228
229	DVLB_Free(passthroughShader);
230	passthroughShader = NULL;
231
232	linearFree(screenTexture);
233	screenTexture = NULL;
234
235	linearFree(ctrVertexBuffer);
236	ctrVertexBuffer = NULL;
237	ctrIndexBuffer = NULL;
238
239	GPUCMD_SetBuffer(NULL, 0, 0);
240	linearFree(gpuCommandList);
241	gpuCommandList = NULL;
242
243	vramFree(gpuColorBuffer[0]);
244	gpuColorBuffer[0] = NULL;
245
246	vramFree(gpuColorBuffer[1]);
247	gpuColorBuffer[1] = NULL;
248}
249
250void ctrGpuBeginFrame(int screen) {
251	if (screen > 1) {
252		return;
253	}
254
255	int fw;
256	if (screen == 0) {
257		fw = 400;
258	} else {
259		fw = 320;
260	}
261
262	_GPU_SetFramebuffer(osConvertVirtToPhys((u32)gpuColorBuffer[screen]), 0, 240, fw);
263}
264
265void ctrGpuBeginDrawing(void) {
266	shaderProgramUse(&gpuShader);
267
268	// Disable depth and stencil testing
269	GPU_SetDepthTestAndWriteMask(false, GPU_ALWAYS, GPU_WRITE_COLOR);
270	GPU_SetStencilTest(false, GPU_ALWAYS, 0, 0xFF, 0);
271	GPU_SetStencilOp(GPU_STENCIL_KEEP, GPU_STENCIL_KEEP, GPU_STENCIL_KEEP);
272	GPU_DepthMap(-1.0f, 0.0f);
273
274	// Enable alpha blending
275	GPU_SetAlphaBlending(
276			GPU_BLEND_ADD, GPU_BLEND_ADD, // Operation RGB, Alpha
277			GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, // Color src, dst
278			GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); // Alpha src, dst
279	GPU_SetBlendingColor(0, 0, 0, 0);
280
281	// Disable alpha testing
282	GPU_SetAlphaTest(false, GPU_ALWAYS, 0);
283
284	// Unknown
285	GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
286	GPUCMD_AddWrite(GPUREG_0118, 0);
287
288	GPU_SetTexEnv(0,
289			GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0), // RGB
290			GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0), // Alpha
291			GPU_TEVOPERANDS(0, 0, 0), // RGB
292			GPU_TEVOPERANDS(0, 0, 0), // Alpha
293			GPU_MODULATE, GPU_MODULATE, // Operation RGB, Alpha
294			0x00000000); // Constant color
295	_setDummyTexEnv(1);
296	_setDummyTexEnv(2);
297	_setDummyTexEnv(3);
298	_setDummyTexEnv(4);
299	_setDummyTexEnv(5);
300
301	// Configure vertex attribute format
302	u32 bufferOffsets[] = { osConvertVirtToPhys((u32)ctrVertexBuffer) - VRAM_BASE };
303	u64 arrayTargetAttributes[] = { 0x210 };
304	u8 numAttributesInArray[] = { 3 };
305	GPU_SetAttributeBuffers(
306			3, // Number of attributes
307			(u32*)VRAM_BASE, // Base address
308			GPU_ATTRIBFMT(0, 2, GPU_SHORT) | // Attribute format
309				GPU_ATTRIBFMT(1, 2, GPU_SHORT) |
310				GPU_ATTRIBFMT(2, 4, GPU_UNSIGNED_BYTE),
311			0xFF8, // Non-fixed vertex inputs
312			0x210, // Vertex shader input map
313			1, // Use 1 vertex array
314			bufferOffsets, arrayTargetAttributes, numAttributesInArray);
315}
316
317void ctrGpuEndFrame(int screen, void* outputFramebuffer, int w, int h) {
318	if (screen > 1) {
319		return;
320	}
321
322	int fw;
323	if (screen == 0) {
324		fw = 400;
325	} else {
326		fw = 320;
327	}
328
329	ctrFlushBatch();
330
331	void* colorBuffer = (u8*)gpuColorBuffer[screen] + ((fw - w) * 240 * 4);
332
333	const u32 GX_CROP_INPUT_LINES = (1 << 2);
334
335	ctrClearPending(1 << GSPEVENT_PSC0);
336	ctrClearPending(1 << GSPEVENT_PPF);
337
338	GX_SetDisplayTransfer(NULL,
339			colorBuffer,       GX_BUFFER_DIM(240, fw),
340			outputFramebuffer, GX_BUFFER_DIM(h, w),
341			GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) |
342				GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) |
343				GX_CROP_INPUT_LINES);
344	pendingEvents |= (1 << GSPEVENT_PPF);
345}
346
347void ctrGpuEndDrawing(void) {
348	ctrClearPending(1 << GSPEVENT_PPF);
349	gfxSwapBuffersGpu();
350	gspWaitForEvent(GSPEVENT_VBlank0, false);
351
352	void* gpuColorBuffer0End = (char*)gpuColorBuffer[0] + 240 * 400 * 4;
353	void* gpuColorBuffer1End = (char*)gpuColorBuffer[1] + 240 * 320 * 4;
354	GX_SetMemoryFill(NULL,
355		gpuColorBuffer[0], 0x00000000, gpuColorBuffer0End, GX_FILL_32BIT_DEPTH | GX_FILL_TRIGGER,
356		gpuColorBuffer[1], 0x00000000, gpuColorBuffer1End, GX_FILL_32BIT_DEPTH | GX_FILL_TRIGGER);
357	pendingEvents |= 1 << GSPEVENT_PSC0;
358}
359
360void ctrSetViewportSize(s16 w, s16 h) {
361	// Set up projection matrix mapping (0,0) to the top-left and (w,h) to the
362	// bottom-right, taking into account the 3DS' screens' portrait
363	// orientation.
364	float projectionMtx[4 * 4] = {
365		// Rows are in the order w z y x, because ctrulib
366		1.0f, 0.0f, -2.0f / h, 0.0f,
367		1.0f, 0.0f, 0.0f, -2.0f / w,
368		-0.5f, 0.0f, 0.0f, 0.0f,
369		1.0f, 0.0f, 0.0f, 0.0f,
370	};
371
372	GPU_SetFloatUniform(GPU_VERTEX_SHADER, VSH_FVEC_projectionMtx, (u32*)&projectionMtx, 4);
373	_GPU_SetViewportEx(0, 0, h, w);
374}
375
376void ctrActivateTexture(const struct ctrTexture* texture) {
377	if (activeTexture == texture) {
378		return;
379	}
380
381	ctrFlushBatch();
382
383	GPU_SetTextureEnable(GPU_TEXUNIT0);
384	GPU_SetTexture(
385			GPU_TEXUNIT0, (u32*)osConvertVirtToPhys((u32)texture->data),
386			texture->width, texture->height,
387			GPU_TEXTURE_MAG_FILTER(texture->filter) | GPU_TEXTURE_MIN_FILTER(texture->filter) |
388				GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_BORDER) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_BORDER),
389			texture->format);
390	GPU_SetTextureBorderColor(GPU_TEXUNIT0, 0x00000000);
391
392	float textureMtx[2 * 4] = {
393		// Rows are in the order w z y x, because ctrulib
394		0.0f, 0.0f, 0.0f, 1.0f / texture->width,
395		0.0f, 0.0f, 1.0f / texture->height, 0.0f,
396	};
397
398	GPU_SetFloatUniform(GPU_VERTEX_SHADER, VSH_FVEC_textureMtx, (u32*)&textureMtx, 2);
399
400	activeTexture = texture;
401}
402
403void ctrAddRectScaled(u32 color, s16 x, s16 y, s16 w, s16 h, s16 u, s16 v, s16 uw, s16 vh) {
404	if (ctrNumQuads == MAX_NUM_QUADS) {
405		ctrFlushBatch();
406	}
407
408	u16 index = ctrNumQuads * 4;
409	struct ctrUIVertex* vtx = &ctrVertexBuffer[index];
410	vtx->x = x; vtx->y = y;
411	vtx->u = u; vtx->v = v;
412	vtx->abgr = color;
413	vtx++;
414
415	vtx->x = x + w; vtx->y = y;
416	vtx->u = u + uw; vtx->v = v;
417	vtx->abgr = color;
418	vtx++;
419
420	vtx->x = x; vtx->y = y + h;
421	vtx->u = u; vtx->v = v + vh;
422	vtx->abgr = color;
423	vtx++;
424
425	vtx->x = x + w; vtx->y = y + h;
426	vtx->u = u + uw; vtx->v = v + vh;
427	vtx->abgr = color;
428
429	u16* i = &ctrIndexBuffer[ctrNumQuads * 6];
430	i[0] = index + 0; i[1] = index + 1; i[2] = index + 2;
431	i[3] = index + 2; i[4] = index + 1; i[5] = index + 3;
432
433	ctrNumQuads += 1;
434}
435
436void ctrAddRect(u32 color, s16 x, s16 y, s16 u, s16 v, s16 w, s16 h) {
437	ctrAddRectScaled(color,
438			x, y, w, h,
439			u, v, w, h);
440}
441
442void ctrFlushBatch(void) {
443	if (ctrNumQuads == 0) {
444		return;
445	}
446
447	ctrClearPending((1 << GSPEVENT_PSC0));
448
449	GSPGPU_FlushDataCache(NULL, (u8*)ctrVertexBuffer, VERTEX_INDEX_BUFFER_SIZE);
450	GPU_DrawElements(GPU_UNKPRIM, (u32*)(osConvertVirtToPhys((u32)ctrIndexBuffer) - VRAM_BASE), ctrNumQuads * 6);
451
452	GPU_FinishDrawing();
453	GPUCMD_Finalize();
454	GSPGPU_FlushDataCache(NULL, (u8*)gpuCommandList, COMMAND_LIST_LENGTH * sizeof(u32));
455	GPUCMD_FlushAndRun(NULL);
456
457	gspWaitForP3D();
458
459	GPUCMD_SetBufferOffset(0);
460
461	ctrNumQuads = 0;
462}