all repos — mgba @ bafcee7b1879b61e66302110aecf54f62b672325

mGBA Game Boy Advance Emulator

src/platform/3ds/ctr-gpu.c (view raw)

  1/* Copyright (c) 2015 Yuri Kunde Schlesner
  2 *
  3 * This Source Code Form is subject to the terms of the Mozilla Public
  4 * License, v. 2.0. If a copy of the MPL was not distributed with this
  5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  6
  7#include <3ds.h>
  8#include <stdlib.h>
  9#include <string.h>
 10#include <stdio.h>
 11
 12#include "ctr-gpu.h"
 13
 14#include "uishader.h"
 15#include "uishader.shbin.h"
 16
 17struct ctrUIVertex {
 18	s16 x,y;
 19	s16 u,v;
 20	u32 abgr;
 21};
 22
 23#define VRAM_BASE 0x18000000u
 24
 25#define MAX_NUM_QUADS 1024
 26#define COMMAND_LIST_LENGTH (16 * 1024)
 27// Each quad requires 4 vertices and 2*3 indices for the two triangles used to draw it
 28#define VERTEX_INDEX_BUFFER_SIZE (MAX_NUM_QUADS * (4 * sizeof(struct ctrUIVertex) + 6 * sizeof(u16)))
 29
 30static struct ctrUIVertex* ctrVertexBuffer = NULL;
 31static u16* ctrIndexBuffer = NULL;
 32static u16 ctrNumQuads = 0;
 33
 34static void* gpuColorBuffer = NULL;
 35static u32* gpuCommandList = NULL;
 36static void* screenTexture = NULL;
 37
 38static shaderProgram_s gpuShader;
 39static DVLB_s* passthroughShader = NULL;
 40
 41static const struct ctrTexture* activeTexture = NULL;
 42
 43static u32 _f24FromFloat(float f) {
 44	u32 i;
 45	memcpy(&i, &f, 4);
 46
 47	u32 mantissa = (i << 9) >>  9;
 48	s32 exponent = (i << 1) >> 24;
 49	u32 sign     = (i << 0) >> 31;
 50
 51	// Truncate mantissa
 52	mantissa >>= 7;
 53
 54	// Re-bias exponent
 55	exponent = exponent - 127 + 63;
 56	if (exponent < 0) {
 57		// Underflow: flush to zero
 58		return sign << 23;
 59	} else if (exponent > 0x7F) {
 60		// Overflow: saturate to infinity
 61		return sign << 23 | 0x7F << 16;
 62	}
 63
 64	return sign << 23 | exponent << 16 | mantissa;
 65}
 66
 67static u32 _f31FromFloat(float f) {
 68	u32 i;
 69	memcpy(&i, &f, 4);
 70
 71	u32 mantissa = (i << 9) >>  9;
 72	s32 exponent = (i << 1) >> 24;
 73	u32 sign     = (i << 0) >> 31;
 74
 75	// Re-bias exponent
 76	exponent = exponent - 127 + 63;
 77	if (exponent < 0) {
 78		// Underflow: flush to zero
 79		return sign << 30;
 80	} else if (exponent > 0x7F) {
 81		// Overflow: saturate to infinity
 82		return sign << 30 | 0x7F << 23;
 83	}
 84
 85	return sign << 30 | exponent << 23 | mantissa;
 86}
 87
 88// Replacements for the limiting GPU_SetViewport function in ctrulib
 89static void _GPU_SetFramebuffer(intptr_t colorBuffer, intptr_t depthBuffer, u16 w, u16 h) {
 90	u32 buf[4];
 91
 92	// Unknown
 93	GPUCMD_AddWrite(GPUREG_0111, 0x00000001);
 94	GPUCMD_AddWrite(GPUREG_0110, 0x00000001);
 95
 96	// Set depth/color buffer address and dimensions
 97	buf[0] = depthBuffer >> 3;
 98	buf[1] = colorBuffer >> 3;
 99	buf[2] = (0x01) << 24 | ((h-1) & 0xFFF) << 12 | (w & 0xFFF) << 0;
100	GPUCMD_AddIncrementalWrites(GPUREG_DEPTHBUFFER_LOC, buf, 3);
101	GPUCMD_AddWrite(GPUREG_006E, buf[2]);
102
103	// Set depth/color buffer pixel format
104	GPUCMD_AddWrite(GPUREG_DEPTHBUFFER_FORMAT, 3 /* D248S */ );
105	GPUCMD_AddWrite(GPUREG_COLORBUFFER_FORMAT, 0 /* RGBA8 */ << 16 | 2 /* Unknown */);
106	GPUCMD_AddWrite(GPUREG_011B, 0); // Unknown
107
108	// Enable color/depth buffers
109	buf[0] = colorBuffer != 0 ? 0xF : 0x0;
110	buf[1] = buf[0];
111	buf[2] = depthBuffer != 0 ? 0x2 : 0x0;
112	buf[3] = buf[2];
113	GPUCMD_AddIncrementalWrites(GPUREG_0112, buf, 4);
114}
115
116static void _GPU_SetViewportEx(u16 x, u16 y, u16 w, u16 h) {
117	u32 buf[4];
118
119	buf[0] = _f24FromFloat(w / 2.0f);
120	buf[1] = _f31FromFloat(2.0f / w) << 1;
121	buf[2] = _f24FromFloat(h / 2.0f);
122	buf[3] = _f31FromFloat(2.0f / h) << 1;
123	GPUCMD_AddIncrementalWrites(GPUREG_0041, buf, 4);
124
125	GPUCMD_AddWrite(GPUREG_0068, (y & 0xFFFF) << 16 | (x & 0xFFFF) << 0);
126
127	buf[0] = 0;
128	buf[1] = 0;
129	buf[2] = ((h-1) & 0xFFFF) << 16 | ((w-1) & 0xFFFF) << 0;
130	GPUCMD_AddIncrementalWrites(GPUREG_SCISSORTEST_MODE, buf, 3);
131}
132
133static void _setDummyTexEnv(int id) {
134	GPU_SetTexEnv(id,
135		GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
136		GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
137		GPU_TEVOPERANDS(0, 0, 0),
138		GPU_TEVOPERANDS(0, 0, 0),
139		GPU_REPLACE,
140		GPU_REPLACE,
141		0x00000000);
142}
143
144Result ctrInitGpu() {
145	Result res = -1;
146
147	// Allocate buffers
148	gpuColorBuffer = vramAlloc(400 * 240 * 4);
149	gpuCommandList = linearAlloc(COMMAND_LIST_LENGTH * sizeof(u32));
150	ctrVertexBuffer = linearAlloc(VERTEX_INDEX_BUFFER_SIZE);
151	if (gpuColorBuffer == NULL || gpuCommandList == NULL || ctrVertexBuffer == NULL) {
152		res = -1;
153		goto error_allocs;
154	}
155	// Both buffers share the same allocation, index buffer follows the vertex buffer
156	ctrIndexBuffer = (u16*)(ctrVertexBuffer + (4 * MAX_NUM_QUADS));
157
158	// Load vertex shader binary
159	passthroughShader = DVLB_ParseFile((u32*)uishader, uishader_size);
160	if (passthroughShader == NULL) {
161		res = -1;
162		goto error_dvlb;
163	}
164
165	// Create shader
166	shaderProgramInit(&gpuShader);
167	res = shaderProgramSetVsh(&gpuShader, &passthroughShader->DVLE[0]);
168	if (res < 0) {
169		goto error_shader;
170	}
171
172	// Initialize the GPU in ctrulib and assign the command buffer to accept submission of commands
173	GPU_Init(NULL);
174	GPUCMD_SetBuffer(gpuCommandList, COMMAND_LIST_LENGTH, 0);
175
176	return 0;
177
178error_shader:
179	shaderProgramFree(&gpuShader);
180
181error_dvlb:
182	if (passthroughShader != NULL) {
183		DVLB_Free(passthroughShader);
184		passthroughShader = NULL;
185	}
186
187error_allocs:
188	if (ctrVertexBuffer != NULL) {
189		linearFree(ctrVertexBuffer);
190		ctrVertexBuffer = NULL;
191		ctrIndexBuffer = NULL;
192	}
193
194	if (gpuCommandList != NULL) {
195		GPUCMD_SetBuffer(NULL, 0, 0);
196		linearFree(gpuCommandList);
197		gpuCommandList = NULL;
198	}
199
200	if (gpuColorBuffer != NULL) {
201		vramFree(gpuColorBuffer);
202		gpuColorBuffer = NULL;
203	}
204	return res;
205}
206
207void ctrDeinitGpu() {
208	shaderProgramFree(&gpuShader);
209
210	DVLB_Free(passthroughShader);
211	passthroughShader = NULL;
212
213	linearFree(screenTexture);
214	screenTexture = NULL;
215
216	linearFree(ctrVertexBuffer);
217	ctrVertexBuffer = NULL;
218	ctrIndexBuffer = NULL;
219
220	GPUCMD_SetBuffer(NULL, 0, 0);
221	linearFree(gpuCommandList);
222	gpuCommandList = NULL;
223
224	vramFree(gpuColorBuffer);
225	gpuColorBuffer = NULL;
226}
227
228void ctrGpuBeginFrame(void) {
229	shaderProgramUse(&gpuShader);
230
231	void* gpuColorBufferEnd = (char*)gpuColorBuffer + 240 * 400 * 4;
232
233	GX_SetMemoryFill(NULL,
234			gpuColorBuffer, 0x00000000, gpuColorBufferEnd, GX_FILL_32BIT_DEPTH | GX_FILL_TRIGGER,
235			NULL, 0, NULL, 0);
236	gspWaitForPSC0();
237
238	_GPU_SetFramebuffer(osConvertVirtToPhys((u32)gpuColorBuffer), 0, 240, 400);
239
240	// Disable depth and stencil testing
241	GPU_SetDepthTestAndWriteMask(false, GPU_ALWAYS, GPU_WRITE_COLOR);
242	GPU_SetStencilTest(false, GPU_ALWAYS, 0, 0xFF, 0);
243	GPU_SetStencilOp(GPU_STENCIL_KEEP, GPU_STENCIL_KEEP, GPU_STENCIL_KEEP);
244	GPU_DepthMap(-1.0f, 0.0f);
245
246	// Enable alpha blending
247	GPU_SetAlphaBlending(
248			GPU_BLEND_ADD, GPU_BLEND_ADD, // Operation RGB, Alpha
249			GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, // Color src, dst
250			GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); // Alpha src, dst
251	GPU_SetBlendingColor(0, 0, 0, 0);
252
253	// Disable alpha testing
254	GPU_SetAlphaTest(false, GPU_ALWAYS, 0);
255
256	// Unknown
257	GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
258	GPUCMD_AddWrite(GPUREG_0118, 0);
259
260	GPU_SetTexEnv(0,
261			GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0), // RGB
262			GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0), // Alpha
263			GPU_TEVOPERANDS(0, 0, 0), // RGB
264			GPU_TEVOPERANDS(0, 0, 0), // Alpha
265			GPU_MODULATE, GPU_MODULATE, // Operation RGB, Alpha
266			0x00000000); // Constant color
267	_setDummyTexEnv(1);
268	_setDummyTexEnv(2);
269	_setDummyTexEnv(3);
270	_setDummyTexEnv(4);
271	_setDummyTexEnv(5);
272
273	// Configure vertex attribute format
274	u32 bufferOffsets[] = { osConvertVirtToPhys((u32)ctrVertexBuffer) - VRAM_BASE };
275	u64 arrayTargetAttributes[] = { 0x210 };
276	u8 numAttributesInArray[] = { 3 };
277	GPU_SetAttributeBuffers(
278			3, // Number of attributes
279			(u32*)VRAM_BASE, // Base address
280			GPU_ATTRIBFMT(0, 2, GPU_SHORT) | // Attribute format
281				GPU_ATTRIBFMT(1, 2, GPU_SHORT) |
282				GPU_ATTRIBFMT(2, 4, GPU_UNSIGNED_BYTE),
283			0xFF8, // Non-fixed vertex inputs
284			0x210, // Vertex shader input map
285			1, // Use 1 vertex array
286			bufferOffsets, arrayTargetAttributes, numAttributesInArray);
287}
288
289void ctrGpuEndFrame(void* outputFramebuffer, int w, int h) {
290	ctrFlushBatch();
291
292	void* colorBuffer = (u8*)gpuColorBuffer + ((400 - w) * 240 * 4);
293
294	const u32 GX_CROP_INPUT_LINES = (1 << 2);
295
296	GX_SetDisplayTransfer(NULL,
297			colorBuffer,       GX_BUFFER_DIM(240, 400),
298			outputFramebuffer, GX_BUFFER_DIM(h, w),
299			GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) |
300				GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) |
301				GX_CROP_INPUT_LINES);
302	gspWaitForPPF();
303}
304
305void ctrSetViewportSize(s16 w, s16 h) {
306	// Set up projection matrix mapping (0,0) to the top-left and (w,h) to the
307	// bottom-right, taking into account the 3DS' screens' portrait
308	// orientation.
309	float projectionMtx[4 * 4] = {
310		// Rows are in the order w z y x, because ctrulib
311		1.0f, 0.0f, -2.0f / h, 0.0f,
312		1.0f, 0.0f, 0.0f, -2.0f / w,
313		-0.5f, 0.0f, 0.0f, 0.0f,
314		1.0f, 0.0f, 0.0f, 0.0f,
315	};
316
317	GPU_SetFloatUniform(GPU_VERTEX_SHADER, VSH_FVEC_projectionMtx, (u32*)&projectionMtx, 4);
318	_GPU_SetViewportEx(0, 0, h, w);
319}
320
321void ctrActivateTexture(const struct ctrTexture* texture) {
322	if (activeTexture == texture) {
323		return;
324	}
325
326	ctrFlushBatch();
327
328	GPU_SetTextureEnable(GPU_TEXUNIT0);
329	GPU_SetTexture(
330			GPU_TEXUNIT0, (u32*)osConvertVirtToPhys((u32)texture->data),
331			texture->width, texture->height,
332			GPU_TEXTURE_MAG_FILTER(texture->filter) | GPU_TEXTURE_MIN_FILTER(texture->filter) |
333				GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_BORDER) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_BORDER),
334			texture->format);
335	GPU_SetTextureBorderColor(GPU_TEXUNIT0, 0x00000000);
336
337	float textureMtx[2 * 4] = {
338		// Rows are in the order w z y x, because ctrulib
339		0.0f, 0.0f, 0.0f, 1.0f / texture->width,
340		0.0f, 0.0f, 1.0f / texture->height, 0.0f,
341	};
342
343	GPU_SetFloatUniform(GPU_VERTEX_SHADER, VSH_FVEC_textureMtx, (u32*)&textureMtx, 2);
344
345	activeTexture = texture;
346}
347
348void ctrAddRectScaled(u32 color, s16 x, s16 y, s16 w, s16 h, s16 u, s16 v, s16 uw, s16 vh) {
349	if (ctrNumQuads == MAX_NUM_QUADS) {
350		ctrFlushBatch();
351	}
352
353	u16 index = ctrNumQuads * 4;
354	struct ctrUIVertex* vtx = &ctrVertexBuffer[index];
355	vtx->x = x; vtx->y = y;
356	vtx->u = u; vtx->v = v;
357	vtx->abgr = color;
358	vtx++;
359
360	vtx->x = x + w; vtx->y = y;
361	vtx->u = u + uw; vtx->v = v;
362	vtx->abgr = color;
363	vtx++;
364
365	vtx->x = x; vtx->y = y + h;
366	vtx->u = u; vtx->v = v + vh;
367	vtx->abgr = color;
368	vtx++;
369
370	vtx->x = x + w; vtx->y = y + h;
371	vtx->u = u + uw; vtx->v = v + vh;
372	vtx->abgr = color;
373
374	u16* i = &ctrIndexBuffer[ctrNumQuads * 6];
375	i[0] = index + 0; i[1] = index + 1; i[2] = index + 2;
376	i[3] = index + 2; i[4] = index + 1; i[5] = index + 3;
377
378	ctrNumQuads += 1;
379}
380
381void ctrAddRect(u32 color, s16 x, s16 y, s16 u, s16 v, s16 w, s16 h) {
382	ctrAddRectScaled(color,
383			x, y, w, h,
384			u, v, w, h);
385}
386
387void ctrFlushBatch(void) {
388	if (ctrNumQuads == 0) {
389		return;
390	}
391
392	GSPGPU_FlushDataCache(NULL, (u8*)ctrVertexBuffer, VERTEX_INDEX_BUFFER_SIZE);
393	GPU_DrawElements(GPU_UNKPRIM, (u32*)(osConvertVirtToPhys((u32)ctrIndexBuffer) - VRAM_BASE), ctrNumQuads * 6);
394
395	GPU_FinishDrawing();
396	GPUCMD_Finalize();
397	GSPGPU_FlushDataCache(NULL, (u8*)gpuCommandList, COMMAND_LIST_LENGTH * sizeof(u32));
398	GPUCMD_FlushAndRun(NULL);
399
400	gspWaitForP3D();
401
402	GPUCMD_SetBufferOffset(0);
403
404	ctrNumQuads = 0;
405}