all repos — mgba @ e6710d012261b895fece5b60be2ec2fccb403faf

mGBA Game Boy Advance Emulator

src/ds/gx.c (view raw)

   1/* Copyright (c) 2013-2017 Jeffrey Pfau
   2 *
   3 * This Source Code Form is subject to the terms of the Mozilla Public
   4 * License, v. 2.0. If a copy of the MPL was not distributed with this
   5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6#include <mgba/internal/ds/gx.h>
   7
   8#include <mgba/internal/ds/ds.h>
   9#include <mgba/internal/ds/io.h>
  10
  11mLOG_DEFINE_CATEGORY(DS_GX, "DS GX", "ds.gx");
  12
  13#define DS_GX_FIFO_SIZE 256
  14#define DS_GX_PIPE_SIZE 4
  15
  16static void DSGXDummyRendererInit(struct DSGXRenderer* renderer);
  17static void DSGXDummyRendererReset(struct DSGXRenderer* renderer);
  18static void DSGXDummyRendererDeinit(struct DSGXRenderer* renderer);
  19static void DSGXDummyRendererInvalidateTex(struct DSGXRenderer* renderer, int slot);
  20static void DSGXDummyRendererSetRAM(struct DSGXRenderer* renderer, struct DSGXVertex* verts, struct DSGXPolygon* polys, unsigned polyCount, bool wSort);
  21static void DSGXDummyRendererDrawScanline(struct DSGXRenderer* renderer, int y);
  22static void DSGXDummyRendererGetScanline(struct DSGXRenderer* renderer, int y, const color_t** output);
  23
  24static void DSGXWriteFIFO(struct DSGX* gx, struct DSGXEntry entry);
  25
  26static bool _boxTestVertex(struct DSGX* gx, struct DSGXVertex* vertex);
  27
  28enum CSCode {
  29	CS_LEFT = 1,
  30	CS_RIGHT = 2,
  31	CS_BOTTOM = 4,
  32	CS_TOP = 8,
  33	CS_NEAR = 16,
  34	CS_FAR = 32
  35};
  36
  37static const int32_t _gxCommandCycleBase[DS_GX_CMD_MAX] = {
  38	[DS_GX_CMD_NOP] = 0,
  39	[DS_GX_CMD_MTX_MODE] = 2,
  40	[DS_GX_CMD_MTX_PUSH] = 34,
  41	[DS_GX_CMD_MTX_POP] = 72,
  42	[DS_GX_CMD_MTX_STORE] = 34,
  43	[DS_GX_CMD_MTX_RESTORE] = 72,
  44	[DS_GX_CMD_MTX_IDENTITY] = 38,
  45	[DS_GX_CMD_MTX_LOAD_4x4] = 68,
  46	[DS_GX_CMD_MTX_LOAD_4x3] = 60,
  47	[DS_GX_CMD_MTX_MULT_4x4] = 70,
  48	[DS_GX_CMD_MTX_MULT_4x3] = 62,
  49	[DS_GX_CMD_MTX_MULT_3x3] = 56,
  50	[DS_GX_CMD_MTX_SCALE] = 44,
  51	[DS_GX_CMD_MTX_TRANS] = 44,
  52	[DS_GX_CMD_COLOR] = 2,
  53	[DS_GX_CMD_NORMAL] = 18,
  54	[DS_GX_CMD_TEXCOORD] = 2,
  55	[DS_GX_CMD_VTX_16] = 18,
  56	[DS_GX_CMD_VTX_10] = 16,
  57	[DS_GX_CMD_VTX_XY] = 16,
  58	[DS_GX_CMD_VTX_XZ] = 16,
  59	[DS_GX_CMD_VTX_YZ] = 16,
  60	[DS_GX_CMD_VTX_DIFF] = 16,
  61	[DS_GX_CMD_POLYGON_ATTR] = 2,
  62	[DS_GX_CMD_TEXIMAGE_PARAM] = 2,
  63	[DS_GX_CMD_PLTT_BASE] = 2,
  64	[DS_GX_CMD_DIF_AMB] = 8,
  65	[DS_GX_CMD_SPE_EMI] = 8,
  66	[DS_GX_CMD_LIGHT_VECTOR] = 12,
  67	[DS_GX_CMD_LIGHT_COLOR] = 2,
  68	[DS_GX_CMD_SHININESS] = 64,
  69	[DS_GX_CMD_BEGIN_VTXS] = 2,
  70	[DS_GX_CMD_END_VTXS] = 2,
  71	[DS_GX_CMD_SWAP_BUFFERS] = 784,
  72	[DS_GX_CMD_VIEWPORT] = 2,
  73	[DS_GX_CMD_BOX_TEST] = 206,
  74	[DS_GX_CMD_POS_TEST] = 18,
  75	[DS_GX_CMD_VEC_TEST] = 10,
  76};
  77
  78static const int32_t _gxCommandParams[DS_GX_CMD_MAX] = {
  79	[DS_GX_CMD_MTX_MODE] = 1,
  80	[DS_GX_CMD_MTX_POP] = 1,
  81	[DS_GX_CMD_MTX_STORE] = 1,
  82	[DS_GX_CMD_MTX_RESTORE] = 1,
  83	[DS_GX_CMD_MTX_LOAD_4x4] = 16,
  84	[DS_GX_CMD_MTX_LOAD_4x3] = 12,
  85	[DS_GX_CMD_MTX_MULT_4x4] = 16,
  86	[DS_GX_CMD_MTX_MULT_4x3] = 12,
  87	[DS_GX_CMD_MTX_MULT_3x3] = 9,
  88	[DS_GX_CMD_MTX_SCALE] = 3,
  89	[DS_GX_CMD_MTX_TRANS] = 3,
  90	[DS_GX_CMD_COLOR] = 1,
  91	[DS_GX_CMD_NORMAL] = 1,
  92	[DS_GX_CMD_TEXCOORD] = 1,
  93	[DS_GX_CMD_VTX_16] = 2,
  94	[DS_GX_CMD_VTX_10] = 1,
  95	[DS_GX_CMD_VTX_XY] = 1,
  96	[DS_GX_CMD_VTX_XZ] = 1,
  97	[DS_GX_CMD_VTX_YZ] = 1,
  98	[DS_GX_CMD_VTX_DIFF] = 1,
  99	[DS_GX_CMD_POLYGON_ATTR] = 1,
 100	[DS_GX_CMD_TEXIMAGE_PARAM] = 1,
 101	[DS_GX_CMD_PLTT_BASE] = 1,
 102	[DS_GX_CMD_DIF_AMB] = 1,
 103	[DS_GX_CMD_SPE_EMI] = 1,
 104	[DS_GX_CMD_LIGHT_VECTOR] = 1,
 105	[DS_GX_CMD_LIGHT_COLOR] = 1,
 106	[DS_GX_CMD_SHININESS] = 32,
 107	[DS_GX_CMD_BEGIN_VTXS] = 1,
 108	[DS_GX_CMD_SWAP_BUFFERS] = 1,
 109	[DS_GX_CMD_VIEWPORT] = 1,
 110	[DS_GX_CMD_BOX_TEST] = 3,
 111	[DS_GX_CMD_POS_TEST] = 2,
 112	[DS_GX_CMD_VEC_TEST] = 1,
 113};
 114
 115static struct DSGXRenderer dummyRenderer = {
 116	.init = DSGXDummyRendererInit,
 117	.reset = DSGXDummyRendererReset,
 118	.deinit = DSGXDummyRendererDeinit,
 119	.invalidateTex = DSGXDummyRendererInvalidateTex,
 120	.setRAM = DSGXDummyRendererSetRAM,
 121	.drawScanline = DSGXDummyRendererDrawScanline,
 122	.getScanline = DSGXDummyRendererGetScanline,
 123};
 124
 125static void _pullPipe(struct DSGX* gx) {
 126	if (CircleBufferSize(&gx->fifo) >= sizeof(struct DSGXEntry)) {
 127		struct DSGXEntry entry = { 0 };
 128		CircleBufferRead8(&gx->fifo, (int8_t*) &entry.command);
 129		CircleBufferRead8(&gx->fifo, (int8_t*) &entry.params[0]);
 130		CircleBufferRead8(&gx->fifo, (int8_t*) &entry.params[1]);
 131		CircleBufferRead8(&gx->fifo, (int8_t*) &entry.params[2]);
 132		CircleBufferRead8(&gx->fifo, (int8_t*) &entry.params[3]);
 133		CircleBufferWrite8(&gx->pipe, entry.command);
 134		CircleBufferWrite8(&gx->pipe, entry.params[0]);
 135		CircleBufferWrite8(&gx->pipe, entry.params[1]);
 136		CircleBufferWrite8(&gx->pipe, entry.params[2]);
 137		CircleBufferWrite8(&gx->pipe, entry.params[3]);
 138	}
 139	if (CircleBufferSize(&gx->fifo) >= sizeof(struct DSGXEntry)) {
 140		struct DSGXEntry entry = { 0 };
 141		CircleBufferRead8(&gx->fifo, (int8_t*) &entry.command);
 142		CircleBufferRead8(&gx->fifo, (int8_t*) &entry.params[0]);
 143		CircleBufferRead8(&gx->fifo, (int8_t*) &entry.params[1]);
 144		CircleBufferRead8(&gx->fifo, (int8_t*) &entry.params[2]);
 145		CircleBufferRead8(&gx->fifo, (int8_t*) &entry.params[3]);
 146		CircleBufferWrite8(&gx->pipe, entry.command);
 147		CircleBufferWrite8(&gx->pipe, entry.params[0]);
 148		CircleBufferWrite8(&gx->pipe, entry.params[1]);
 149		CircleBufferWrite8(&gx->pipe, entry.params[2]);
 150		CircleBufferWrite8(&gx->pipe, entry.params[3]);
 151	}
 152}
 153
 154static void _updateClipMatrix(struct DSGX* gx) {
 155	DSGXMtxMultiply(&gx->clipMatrix, &gx->posMatrix, &gx->projMatrix);
 156	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_00 >> 1] = gx->clipMatrix.m[0];
 157	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_01 >> 1] = gx->clipMatrix.m[0] >> 16;
 158	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_02 >> 1] = gx->clipMatrix.m[1];
 159	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_03 >> 1] = gx->clipMatrix.m[1] >> 16;
 160	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_04 >> 1] = gx->clipMatrix.m[2];
 161	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_05 >> 1] = gx->clipMatrix.m[2] >> 16;
 162	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_06 >> 1] = gx->clipMatrix.m[3];
 163	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_07 >> 1] = gx->clipMatrix.m[3] >> 16;
 164	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_08 >> 1] = gx->clipMatrix.m[4];
 165	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_09 >> 1] = gx->clipMatrix.m[4] >> 16;
 166	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_0A >> 1] = gx->clipMatrix.m[5];
 167	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_0B >> 1] = gx->clipMatrix.m[5] >> 16;
 168	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_0C >> 1] = gx->clipMatrix.m[6];
 169	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_0D >> 1] = gx->clipMatrix.m[6] >> 16;
 170	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_0E >> 1] = gx->clipMatrix.m[7];
 171	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_0F >> 1] = gx->clipMatrix.m[7] >> 16;
 172	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_10 >> 1] = gx->clipMatrix.m[8];
 173	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_11 >> 1] = gx->clipMatrix.m[8] >> 16;
 174	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_12 >> 1] = gx->clipMatrix.m[9];
 175	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_13 >> 1] = gx->clipMatrix.m[9] >> 16;
 176	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_14 >> 1] = gx->clipMatrix.m[10];
 177	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_15 >> 1] = gx->clipMatrix.m[10] >> 16;
 178	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_16 >> 1] = gx->clipMatrix.m[11];
 179	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_17 >> 1] = gx->clipMatrix.m[11] >> 16;
 180	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_18 >> 1] = gx->clipMatrix.m[12];
 181	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_19 >> 1] = gx->clipMatrix.m[12] >> 16;
 182	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_1A >> 1] = gx->clipMatrix.m[13];
 183	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_1B >> 1] = gx->clipMatrix.m[13] >> 16;
 184	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_1C >> 1] = gx->clipMatrix.m[14];
 185	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_1D >> 1] = gx->clipMatrix.m[14] >> 16;
 186	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_1E >> 1] = gx->clipMatrix.m[15];
 187	gx->p->memory.io9[DS9_REG_CLIPMTX_RESULT_1F >> 1] = gx->clipMatrix.m[15] >> 16;
 188}
 189
 190static inline int32_t _lerp(int32_t x0, int32_t x1, int32_t q, int64_t r) {
 191	int64_t x = x1 - x0;
 192	x *= q;
 193	x /= r;
 194	x += x0;
 195	return x;
 196}
 197
 198static int _cohenSutherlandCode(const struct DSGX* gx, const struct DSGXVertex* v) {
 199	int code = 0;
 200	if (v->vx < -v->vw) {
 201		code |= CS_LEFT;
 202	} else if (v->vx > v->vw) {
 203		code |= CS_RIGHT;
 204	}
 205	if (v->vy < -v->vw) {
 206		code |= CS_BOTTOM;
 207	} else if (v->vy > v->vw) {
 208		code |= CS_TOP;
 209	}
 210	if (v->vz < -v->vw) {
 211		code |= CS_NEAR;
 212	} else if (v->vz > v->vw) {
 213		code |= CS_FAR;
 214	}
 215	return code;
 216}
 217
 218static bool _lerpVertex(const struct DSGXVertex* v0, const struct DSGXVertex* v1, struct DSGXVertex* out, int32_t q, int64_t r) {
 219	if (!r) {
 220		return false;
 221	}
 222	int cr0 = (v0->color) & 0x1F;
 223	int cg0 = (v0->color >> 5) & 0x1F;
 224	int cb0 = (v0->color >> 10) & 0x1F;
 225	int cr1 = (v1->color) & 0x1F;
 226	int cg1 = (v1->color >> 5) & 0x1F;
 227	int cb1 = (v1->color >> 10) & 0x1F;
 228	cr0 = _lerp(cr0, cr1, q, r) & 0x1F;
 229	cg0 = _lerp(cg0, cg1, q, r) & 0x1F;
 230	cb0 = _lerp(cb0, cb1, q, r) & 0x1F;
 231	out->color = cr0 | (cg0 << 5) | (cb0 << 10);
 232
 233	out->vx = _lerp(v0->vx, v1->vx, q, r);
 234	out->vy = _lerp(v0->vy, v1->vy, q, r);
 235	out->vz = _lerp(v0->vz, v1->vz, q, r);
 236	out->vw = _lerp(v0->vw, v1->vw, q, r);
 237
 238	out->vs = _lerp(v0->vs, v1->vs, q, r);
 239	out->vt = _lerp(v0->vt, v1->vt, q, r);
 240	return true;
 241}
 242
 243static bool _lerpVertexX(const struct DSGXVertex* v0, const struct DSGXVertex* v1, struct DSGXVertex* out, int sign) {
 244	int32_t q = v0->vw - sign * v0->vx;
 245	int64_t r = q - (v1->vw - sign * v1->vx);
 246	return _lerpVertex(v0, v1, out, q, r);
 247}
 248
 249static bool _lerpVertexY(const struct DSGXVertex* v0, const struct DSGXVertex* v1, struct DSGXVertex* out, int sign) {
 250	int32_t q = v0->vw - sign * v0->vy;
 251	int64_t r = q - (v1->vw - sign * v1->vy);
 252	return _lerpVertex(v0, v1, out, q, r);
 253}
 254
 255static bool _lerpVertexZ(const struct DSGXVertex* v0, const struct DSGXVertex* v1, struct DSGXVertex* out, int sign) {
 256	int32_t q = v0->vw - sign * v0->vz;
 257	int64_t r = q - (v1->vw - sign * v1->vz);
 258	return _lerpVertex(v0, v1, out, q, r);
 259}
 260
 261static bool _clipPolygon(struct DSGX* gx, struct DSGXPolygon* poly) {
 262	int nOffscreen = 0;
 263	int offscreenVerts[8] = { 0, 0, 0, 0 };
 264	unsigned oldVerts[4];
 265	int v;
 266
 267	if (!DSGXPolygonAttrsIsBackFace(poly->polyParams) || !DSGXPolygonAttrsIsFrontFace(poly->polyParams)) {
 268		// Calculate normal direction and camera dot product average
 269		int64_t nx = 0;
 270		int64_t ny = 0;
 271		int64_t nz = 0;
 272		int64_t dot = 0;
 273		for (v = 0; v < poly->verts; ++v) {
 274			struct DSGXVertex* v0 = &gx->pendingVertices[poly->vertIds[v]];
 275			struct DSGXVertex* v1;
 276			struct DSGXVertex* v2;
 277			if (v < poly->verts - 2) {
 278				v1 = &gx->pendingVertices[poly->vertIds[v + 1]];
 279				v2 = &gx->pendingVertices[poly->vertIds[v + 2]];
 280			} else if (v < poly->verts - 1) {
 281				v1 = &gx->pendingVertices[poly->vertIds[v + 1]];
 282				v2 = &gx->pendingVertices[poly->vertIds[v + 2 - poly->verts]];
 283			} else {
 284				v1 = &gx->pendingVertices[poly->vertIds[v + 1 - poly->verts]];
 285				v2 = &gx->pendingVertices[poly->vertIds[v + 2 - poly->verts]];
 286			}
 287			nx = ((int64_t) v0->vy * v2->vw - (int64_t) v0->vw * v2->vy) >> 24;
 288			ny = ((int64_t) v0->vw * v2->vx - (int64_t) v0->vx * v2->vw) >> 24;
 289			nz = ((int64_t) v0->vx * v2->vy - (int64_t) v0->vy * v2->vx) >> 24;
 290			dot += nx * v1->vx + ny * v1->vy + nz * v1->vw;
 291		}
 292		if (!DSGXPolygonAttrsIsBackFace(poly->polyParams) && dot < 0) {
 293			return false;
 294		}
 295		if (!DSGXPolygonAttrsIsFrontFace(poly->polyParams) && dot > 0) {
 296			return false;
 297		}
 298	}
 299
 300	// Collect offscreen vertices
 301	for (v = 0; v < poly->verts; ++v) {
 302		offscreenVerts[v] = _cohenSutherlandCode(gx, &gx->pendingVertices[poly->vertIds[v]]);
 303		oldVerts[v] = poly->vertIds[v];
 304		if (offscreenVerts[v]) {
 305			++nOffscreen;
 306		}
 307	}
 308
 309	struct DSGXVertex* vbuf = gx->vertexBuffer[gx->bufferIndex];
 310
 311	if (!nOffscreen) {
 312		for (v = 0; v < poly->verts; ++v) {
 313			if (gx->vertexIndex == DS_GX_VERTEX_BUFFER_SIZE) {
 314				return false;
 315			}
 316			int vertexId = oldVerts[v];
 317			if (gx->pendingVertexIds[vertexId] >= 0) {
 318				poly->vertIds[v] = gx->pendingVertexIds[vertexId];
 319			} else {
 320				vbuf[gx->vertexIndex] = gx->pendingVertices[vertexId];
 321				gx->pendingVertexIds[vertexId] = gx->vertexIndex;
 322				poly->vertIds[v] = gx->vertexIndex;
 323				++gx->vertexIndex;
 324			}
 325		}
 326		return true;
 327	}
 328
 329	struct DSGXVertex inList[8];
 330	struct DSGXVertex outList[8];
 331	int outOffscreenVerts[8] = { 0, 0, 0, 0 };
 332	for (v = 0; v < poly->verts; ++v) {
 333		inList[v] = gx->pendingVertices[oldVerts[v]];
 334	}
 335
 336	int newV;
 337
 338	// Clip near
 339	newV = 0;
 340	for (v = 0; v < poly->verts; ++v) {
 341		if (!(offscreenVerts[v] & CS_NEAR)) {
 342			outList[newV] = inList[v];
 343			outOffscreenVerts[newV] = offscreenVerts[v];
 344			++newV;
 345		} else {
 346			struct DSGXVertex* in = &inList[v];
 347			struct DSGXVertex* in2;
 348			struct DSGXVertex* out;
 349			int iv;
 350
 351			if (v > 0) {
 352				iv = v - 1;
 353			} else {
 354				iv = poly->verts - 1;
 355			}
 356			if (!(offscreenVerts[iv] & CS_NEAR)) {
 357				in2 = &inList[iv];
 358				out = &outList[newV];
 359				if (_lerpVertexZ(in, in2, out, -1)) {
 360					outOffscreenVerts[newV] = _cohenSutherlandCode(gx, out);
 361					++newV;
 362				}
 363			}
 364
 365			if (v < poly->verts - 1) {
 366				iv = v + 1;
 367			} else {
 368				iv = 0;
 369			}
 370			if (!(offscreenVerts[iv] & CS_NEAR)) {
 371				in2 = &inList[iv];
 372				out = &outList[newV];
 373				if (_lerpVertexZ(in, in2, out, -1)) {
 374					outOffscreenVerts[newV] = _cohenSutherlandCode(gx, out);
 375					++newV;
 376				}
 377			}
 378		}
 379	}
 380	poly->verts = newV;
 381	memcpy(inList, outList, newV * sizeof(*inList));
 382	memcpy(offscreenVerts, outOffscreenVerts, newV * sizeof(*offscreenVerts));
 383
 384	// Clip far
 385	newV = 0;
 386	for (v = 0; v < poly->verts; ++v) {
 387		if (!(offscreenVerts[v] & CS_FAR)) {
 388			outList[newV] = inList[v];
 389			outOffscreenVerts[newV] = offscreenVerts[v];
 390			++newV;
 391		} else {
 392			if (!(offscreenVerts[v] & CS_NEAR)) {
 393				outList[newV] = inList[v];
 394				outOffscreenVerts[newV] = offscreenVerts[v];
 395				++newV;
 396			} else {
 397				struct DSGXVertex* in = &inList[v];
 398				struct DSGXVertex* in2;
 399				struct DSGXVertex* out;
 400				int iv;
 401
 402				if (v > 0) {
 403					iv = v - 1;
 404				} else {
 405					iv = poly->verts - 1;
 406				}
 407				if (!(offscreenVerts[iv] & CS_FAR)) {
 408					in2 = &inList[iv];
 409					out = &outList[newV];
 410					if (_lerpVertexZ(in, in2, out, 1)) {
 411						outOffscreenVerts[newV] = _cohenSutherlandCode(gx, out);
 412						++newV;
 413					}
 414				}
 415
 416				if (v < poly->verts - 1) {
 417					iv = v + 1;
 418				} else {
 419					iv = 0;
 420				}
 421				if (!(offscreenVerts[iv] & CS_FAR)) {
 422					in2 = &inList[iv];
 423					out = &outList[newV];
 424					if (_lerpVertexZ(in, in2, out, 1)) {
 425						outOffscreenVerts[newV] = _cohenSutherlandCode(gx, out);
 426						++newV;
 427					}
 428				}
 429			}
 430		}
 431	}
 432	poly->verts = newV;
 433	memcpy(inList, outList, newV * sizeof(*inList));
 434	memcpy(offscreenVerts, outOffscreenVerts, newV * sizeof(*offscreenVerts));
 435
 436	// Clip left
 437	newV = 0;
 438	for (v = 0; v < poly->verts; ++v) {
 439		if (!(offscreenVerts[v] & CS_LEFT)) {
 440			outList[newV] = inList[v];
 441			outOffscreenVerts[newV] = offscreenVerts[v];
 442			++newV;
 443		} else {
 444			struct DSGXVertex* in = &inList[v];
 445			struct DSGXVertex* in2;
 446			struct DSGXVertex* out;
 447			int iv;
 448
 449			if (v > 0) {
 450				iv = v - 1;
 451			} else {
 452				iv = poly->verts - 1;
 453			}
 454			if (!(offscreenVerts[iv] & CS_LEFT)) {
 455				in2 = &inList[iv];
 456				out = &outList[newV];
 457				if (_lerpVertexX(in, in2, out, -1)) {
 458					outOffscreenVerts[newV] = _cohenSutherlandCode(gx, out);
 459					++newV;
 460				}
 461			}
 462
 463			if (v < poly->verts - 1) {
 464				iv = v + 1;
 465			} else {
 466				iv = 0;
 467			}
 468			if (!(offscreenVerts[iv] & CS_LEFT)) {
 469				in2 = &inList[iv];
 470				out = &outList[newV];
 471				if (_lerpVertexX(in, in2, out, -1)) {
 472					outOffscreenVerts[newV] = _cohenSutherlandCode(gx, out);
 473					++newV;
 474				}
 475			}
 476		}
 477	}
 478	poly->verts = newV;
 479	memcpy(inList, outList, newV * sizeof(*inList));
 480	memcpy(offscreenVerts, outOffscreenVerts, newV * sizeof(*offscreenVerts));
 481
 482	// Clip right
 483	newV = 0;
 484	for (v = 0; v < poly->verts; ++v) {
 485		if (!(offscreenVerts[v] & CS_RIGHT)) {
 486			outList[newV] = inList[v];
 487			outOffscreenVerts[newV] = offscreenVerts[v];
 488			++newV;
 489		} else {
 490			struct DSGXVertex* in = &inList[v];
 491			struct DSGXVertex* in2;
 492			struct DSGXVertex* out;
 493			int iv;
 494
 495			if (v > 0) {
 496				iv = v - 1;
 497			} else {
 498				iv = poly->verts - 1;
 499			}
 500			if (!(offscreenVerts[iv] & CS_RIGHT)) {
 501				in2 = &inList[iv];
 502				out = &outList[newV];
 503				if (_lerpVertexX(in, in2, out, 1)) {
 504					outOffscreenVerts[newV] = _cohenSutherlandCode(gx, out);
 505					++newV;
 506				}
 507			}
 508
 509			if (v < poly->verts - 1) {
 510				iv = v + 1;
 511			} else {
 512				iv = 0;
 513			}
 514			if (!(offscreenVerts[iv] & CS_RIGHT)) {
 515				in2 = &inList[iv];
 516				out = &outList[newV];
 517				if (_lerpVertexX(in, in2, out, 1)) {
 518					outOffscreenVerts[newV] = _cohenSutherlandCode(gx, out);
 519					++newV;
 520				}
 521			}
 522		}
 523	}
 524	poly->verts = newV;
 525	memcpy(inList, outList, newV * sizeof(*inList));
 526	memcpy(offscreenVerts, outOffscreenVerts, newV * sizeof(*offscreenVerts));
 527
 528	// Clip bottom
 529	newV = 0;
 530	for (v = 0; v < poly->verts; ++v) {
 531		if (!(offscreenVerts[v] & CS_BOTTOM)) {
 532			outList[newV] = inList[v];
 533			outOffscreenVerts[newV] = offscreenVerts[v];
 534			++newV;
 535		} else {
 536			struct DSGXVertex* in = &inList[v];
 537			struct DSGXVertex* in2;
 538			struct DSGXVertex* out;
 539			int iv;
 540
 541			if (v > 0) {
 542				iv = v - 1;
 543			} else {
 544				iv = poly->verts - 1;
 545			}
 546			if (!(offscreenVerts[iv] & CS_BOTTOM)) {
 547				in2 = &inList[iv];
 548				out = &outList[newV];
 549				if (_lerpVertexY(in, in2, out, -1)) {
 550					outOffscreenVerts[newV] = _cohenSutherlandCode(gx, out);
 551					++newV;
 552				}
 553			}
 554
 555			if (v < poly->verts - 1) {
 556				iv = v + 1;
 557			} else {
 558				iv = 0;
 559			}
 560			if (!(offscreenVerts[iv] & CS_BOTTOM)) {
 561				in2 = &inList[iv];
 562				out = &outList[newV];
 563				if (_lerpVertexY(in, in2, out, -1)) {
 564					outOffscreenVerts[newV] = _cohenSutherlandCode(gx, out);
 565					++newV;
 566				}
 567			}
 568		}
 569	}
 570	poly->verts = newV;
 571	memcpy(inList, outList, newV * sizeof(*inList));
 572	memcpy(offscreenVerts, outOffscreenVerts, newV * sizeof(*offscreenVerts));
 573
 574	// Clip top
 575	newV = 0;
 576	for (v = 0; v < poly->verts; ++v) {
 577		if (!(offscreenVerts[v] & CS_TOP)) {
 578			outList[newV] = inList[v];
 579			outOffscreenVerts[newV] = offscreenVerts[v];
 580			++newV;
 581		} else {
 582			struct DSGXVertex* in = &inList[v];
 583			struct DSGXVertex* in2;
 584			struct DSGXVertex* out;
 585			int iv;
 586
 587			if (v > 0) {
 588				iv = v - 1;
 589			} else {
 590				iv = poly->verts - 1;
 591			}
 592			if (!(offscreenVerts[iv] & CS_TOP)) {
 593				in2 = &inList[iv];
 594				out = &outList[newV];
 595				if (_lerpVertexY(in, in2, out, 1)) {
 596					outOffscreenVerts[newV] = _cohenSutherlandCode(gx, out);
 597					++newV;
 598				}
 599			}
 600
 601			if (v < poly->verts - 1) {
 602				iv = v + 1;
 603			} else {
 604				iv = 0;
 605			}
 606			if (!(offscreenVerts[iv] & CS_TOP)) {
 607				in2 = &inList[iv];
 608				out = &outList[newV];
 609				if (_lerpVertexY(in, in2, out, 1)) {
 610					outOffscreenVerts[newV] = _cohenSutherlandCode(gx, out);
 611					++newV;
 612				}
 613			}
 614		}
 615	}
 616	poly->verts = newV;
 617	memcpy(inList, outList, newV * sizeof(*inList));
 618	memcpy(offscreenVerts, outOffscreenVerts, newV * sizeof(*offscreenVerts));
 619
 620	for (v = 0; v < poly->verts; ++v) {
 621		if (gx->vertexIndex == DS_GX_VERTEX_BUFFER_SIZE) {
 622			return false;
 623		}
 624		// TODO: merge strips
 625		vbuf[gx->vertexIndex] = inList[v];
 626		poly->vertIds[v] = gx->vertexIndex;
 627		++gx->vertexIndex;
 628	}
 629
 630	return newV > 2;
 631}
 632
 633static int32_t _dotViewport(struct DSGXVertex* vertex, int32_t* col) {
 634	int64_t a;
 635	int64_t b;
 636	int64_t sum;
 637	a = col[0];
 638	b = vertex->x;
 639	sum = a * b;
 640	a = col[4];
 641	b = vertex->y;
 642	sum += a * b;
 643	a = col[8];
 644	b = vertex->z;
 645	sum += a * b;
 646	a = col[12];
 647	b = MTX_ONE;
 648	sum += a * b;
 649	return sum >> 8LL;
 650}
 651
 652static int16_t _dotTexture(struct DSGXVertex* vertex, int mode, int32_t* col) {
 653	int64_t a;
 654	int64_t b;
 655	int64_t sum;
 656	switch (mode) {
 657	case 1:
 658		a = col[0];
 659		b = vertex->s << 8;
 660		sum = a * b;
 661		a = col[4];
 662		b = vertex->t << 8;
 663		sum += a * b;
 664		a = col[8];
 665		b = MTX_ONE >> 4;
 666		sum += a * b;
 667		a = col[12];
 668		b = MTX_ONE >> 4;
 669		sum += a * b;
 670		break;
 671	case 2:
 672		return 0;
 673	case 3:
 674		a = col[0];
 675		b = vertex->vx << 8;
 676		sum = a * b;
 677		a = col[4];
 678		b = vertex->vy << 8;
 679		sum += a * b;
 680		a = col[8];
 681		b = vertex->vz << 8;
 682		sum += a * b;
 683		a = col[12];
 684		b = MTX_ONE;
 685		sum += a * b;
 686	}
 687	return sum >> 20;
 688}
 689
 690static void _emitVertex(struct DSGX* gx, uint16_t x, uint16_t y, uint16_t z) {
 691	if (gx->vertexMode < 0 || gx->vertexIndex == DS_GX_VERTEX_BUFFER_SIZE || gx->polygonIndex == DS_GX_POLYGON_BUFFER_SIZE) {
 692		return;
 693	}
 694	gx->currentVertex.x = x;
 695	gx->currentVertex.y = y;
 696	gx->currentVertex.z = z;
 697	gx->currentVertex.vx = _dotViewport(&gx->currentVertex, &gx->clipMatrix.m[0]);
 698	gx->currentVertex.vy = _dotViewport(&gx->currentVertex, &gx->clipMatrix.m[1]);
 699	gx->currentVertex.vz = _dotViewport(&gx->currentVertex, &gx->clipMatrix.m[2]);
 700	gx->currentVertex.vw = _dotViewport(&gx->currentVertex, &gx->clipMatrix.m[3]);
 701
 702	if (DSGXTexParamsGetCoordTfMode(gx->currentPoly.texParams) > 0) {
 703		int32_t m12 = gx->texMatrix.m[12];
 704		int32_t m13 = gx->texMatrix.m[13];
 705		if (DSGXTexParamsGetCoordTfMode(gx->currentPoly.texParams) > 1) {
 706			gx->texMatrix.m[12] = gx->currentVertex.vs;
 707			gx->texMatrix.m[13] = gx->currentVertex.vt;
 708		}
 709		gx->currentVertex.vs = _dotTexture(&gx->currentVertex, DSGXTexParamsGetCoordTfMode(gx->currentPoly.texParams), &gx->texMatrix.m[0]);
 710		gx->currentVertex.vt = _dotTexture(&gx->currentVertex, DSGXTexParamsGetCoordTfMode(gx->currentPoly.texParams), &gx->texMatrix.m[1]);
 711		gx->texMatrix.m[12] = m12;
 712		gx->texMatrix.m[13] = m13;
 713	} else {
 714		gx->currentVertex.vs = gx->currentVertex.s;
 715		gx->currentVertex.vt = gx->currentVertex.t;
 716	}
 717
 718	gx->pendingVertices[gx->pendingVertexIndex] = gx->currentVertex;
 719	gx->currentPoly.vertIds[gx->currentPoly.verts] = gx->pendingVertexIndex;
 720	gx->pendingVertexIndex = (gx->pendingVertexIndex + 1) & 3;
 721
 722	++gx->currentPoly.verts;
 723	int totalVertices;
 724	switch (gx->vertexMode) {
 725	case 0:
 726	case 2:
 727		totalVertices = 3;
 728		break;
 729	case 1:
 730	case 3:
 731		totalVertices = 4;
 732		break;
 733	}
 734	if (gx->currentPoly.verts == totalVertices) {
 735		struct DSGXPolygon* pbuf = gx->polygonBuffer[gx->bufferIndex];
 736
 737		pbuf[gx->polygonIndex] = gx->currentPoly;
 738		switch (gx->vertexMode) {
 739		case 0:
 740		case 1:
 741			gx->currentPoly.verts = 0;
 742			break;
 743		case 2:
 744			// Reverse winding if needed
 745			if (gx->reverseWinding) {
 746				pbuf[gx->polygonIndex].vertIds[1] = gx->currentPoly.vertIds[2];
 747				pbuf[gx->polygonIndex].vertIds[2] = gx->currentPoly.vertIds[1];
 748			}
 749			gx->reverseWinding = !gx->reverseWinding;
 750			gx->currentPoly.vertIds[0] = gx->currentPoly.vertIds[1];
 751			gx->currentPoly.vertIds[1] = gx->currentPoly.vertIds[2];
 752			gx->currentPoly.verts = 2;
 753			break;
 754		case 3:
 755			gx->currentPoly.vertIds[0] = gx->currentPoly.vertIds[2];
 756			gx->currentPoly.vertIds[1] = gx->currentPoly.vertIds[3];
 757			// Ensure quads don't cross over
 758			pbuf[gx->polygonIndex].vertIds[2] = gx->currentPoly.vertIds[3];
 759			pbuf[gx->polygonIndex].vertIds[3] = gx->currentPoly.vertIds[2];
 760			gx->currentPoly.verts = 2;
 761			break;
 762		}
 763
 764		if (_clipPolygon(gx, &pbuf[gx->polygonIndex])) {
 765			++gx->polygonIndex;
 766		}
 767		if (gx->vertexMode < 2) {
 768			memset(gx->pendingVertexIds, -1, sizeof(gx->pendingVertexIds));
 769		} else {
 770			gx->pendingVertexIds[gx->pendingVertexIndex] = -1;
 771			gx->pendingVertexIds[(gx->pendingVertexIndex + 1) & 3] = -1;
 772		}
 773	}
 774}
 775
 776static void _flushOutstanding(struct DSGX* gx) {
 777	if (gx->p->cpuBlocked & DS_CPU_BLOCK_GX) {
 778		gx->p->cpuBlocked &= ~DS_CPU_BLOCK_GX;
 779		DSGXWriteFIFO(gx, gx->outstandingEntry);
 780		gx->outstandingEntry.command = 0;
 781	}
 782	while (gx->outstandingCommand[0] && !gx->outstandingParams[0]) {
 783		DSGXWriteFIFO(gx, (struct DSGXEntry) { 0 });
 784		if (CircleBufferSize(&gx->fifo) == (DS_GX_FIFO_SIZE * sizeof(struct DSGXEntry))) {
 785			return;
 786		}
 787	}
 788}
 789
 790static bool _boxTestVertex(struct DSGX* gx, struct DSGXVertex* vertex) {
 791	int32_t vx = _dotViewport(vertex, &gx->clipMatrix.m[0]);
 792	int32_t vy = _dotViewport(vertex, &gx->clipMatrix.m[1]);
 793	int32_t vz = _dotViewport(vertex, &gx->clipMatrix.m[2]);
 794	int32_t vw = _dotViewport(vertex, &gx->clipMatrix.m[3]);
 795
 796	vx = (vx + vw) * (int64_t) (gx->viewportWidth << 12) / (vw * 2) + (gx->viewportX1 << 12);
 797	vy = (vy + vw) * (int64_t) (gx->viewportHeight << 12) / (vw * 2) + (gx->viewportY1 << 12);
 798	vx >>= 12;
 799	vy >>= 12;
 800
 801	if (vx < gx->viewportX1) {
 802		return false;
 803	}
 804	if (vx > gx->viewportX2) {
 805		return false;
 806	}
 807	if (vy < gx->viewportY1) {
 808		return false;
 809	}
 810	if (vy > gx->viewportY2) {
 811		return false;
 812	}
 813	if (vz < -vw) {
 814		return false;
 815	}
 816	if (vz > vw) {
 817		return false;
 818	}
 819	return true;
 820}
 821
 822static bool _boxTest(struct DSGX* gx) {
 823	int16_t x = gx->activeEntries[0].params[0];
 824	x |= gx->activeEntries[0].params[1] << 8;
 825	int16_t y = gx->activeEntries[0].params[2];
 826	y |= gx->activeEntries[0].params[3] << 8;
 827	int16_t z = gx->activeEntries[1].params[0];
 828	z |= gx->activeEntries[1].params[1] << 8;
 829	int16_t w = gx->activeEntries[1].params[2];
 830	w |= gx->activeEntries[1].params[3] << 8;
 831	int16_t h = gx->activeEntries[2].params[0];
 832	h |= gx->activeEntries[2].params[1] << 8;
 833	int16_t d = gx->activeEntries[2].params[2];
 834	d |= gx->activeEntries[2].params[3] << 8;
 835
 836	struct DSGXVertex vertex = {
 837		.x = x,
 838		.y = y,
 839		.z = z
 840	};
 841	if (_boxTestVertex(gx, &vertex)) {
 842		return true;
 843	}
 844
 845	vertex.x += w;
 846	if (_boxTestVertex(gx, &vertex)) {
 847		return true;
 848	}
 849
 850	vertex.x = x;
 851	vertex.y += h;
 852	if (_boxTestVertex(gx, &vertex)) {
 853		return true;
 854	}
 855
 856	vertex.x += w;
 857	if (_boxTestVertex(gx, &vertex)) {
 858		return true;
 859	}
 860
 861	vertex.x = x;
 862	vertex.y = y;
 863	vertex.z += d;
 864	if (_boxTestVertex(gx, &vertex)) {
 865		return true;
 866	}
 867
 868	vertex.x += w;
 869	if (_boxTestVertex(gx, &vertex)) {
 870		return true;
 871	}
 872
 873	vertex.x = x;
 874	vertex.y += h;
 875	if (_boxTestVertex(gx, &vertex)) {
 876		return true;
 877	}
 878
 879	vertex.x += w;
 880	if (_boxTestVertex(gx, &vertex)) {
 881		return true;
 882	}
 883
 884	return false;
 885}
 886
 887static void _fifoRun(struct mTiming* timing, void* context, uint32_t cyclesLate) {
 888	struct DSGX* gx = context;
 889	uint32_t cycles;
 890	bool first = true;
 891	while (!gx->swapBuffers) {
 892		if (CircleBufferSize(&gx->pipe) <= 2 * sizeof(struct DSGXEntry)) {
 893			_pullPipe(gx);
 894		}
 895
 896		if (!CircleBufferSize(&gx->pipe)) {
 897			cycles = 0;
 898			break;
 899		}
 900
 901		DSRegGXSTAT gxstat = gx->p->memory.io9[DS9_REG_GXSTAT_LO >> 1];
 902		int projMatrixPointer = DSRegGXSTATGetProjMatrixStackLevel(gxstat);
 903
 904		struct DSGXEntry entry = { 0 };
 905		CircleBufferDump(&gx->pipe, (int8_t*) &entry.command, 1);
 906		cycles = _gxCommandCycleBase[entry.command];
 907
 908		if (first) {
 909			first = false;
 910		} else if (!gx->activeParams && cycles > cyclesLate) {
 911			break;
 912		}
 913		CircleBufferRead8(&gx->pipe, (int8_t*) &entry.command);
 914		CircleBufferRead8(&gx->pipe, (int8_t*) &entry.params[0]);
 915		CircleBufferRead8(&gx->pipe, (int8_t*) &entry.params[1]);
 916		CircleBufferRead8(&gx->pipe, (int8_t*) &entry.params[2]);
 917		CircleBufferRead8(&gx->pipe, (int8_t*) &entry.params[3]);
 918
 919		if (gx->activeParams) {
 920			int index = _gxCommandParams[entry.command] - gx->activeParams;
 921			gx->activeEntries[index] = entry;
 922			--gx->activeParams;
 923		} else {
 924			gx->activeParams = _gxCommandParams[entry.command];
 925			if (gx->activeParams) {
 926				--gx->activeParams;
 927			}
 928			if (gx->activeParams) {
 929				gx->activeEntries[0] = entry;
 930			}
 931		}
 932
 933		if (gx->activeParams) {
 934			continue;
 935		}
 936
 937		switch (entry.command) {
 938		case DS_GX_CMD_MTX_MODE:
 939			if (entry.params[0] < 4) {
 940				gx->mtxMode = entry.params[0];
 941			} else {
 942				mLOG(DS_GX, GAME_ERROR, "Invalid GX MTX_MODE %02X", entry.params[0]);
 943			}
 944			break;
 945		case DS_GX_CMD_MTX_PUSH:
 946			switch (gx->mtxMode) {
 947			case 0:
 948				memcpy(&gx->projMatrixStack, &gx->projMatrix, sizeof(gx->projMatrix));
 949				++projMatrixPointer;
 950				break;
 951			case 2:
 952				memcpy(&gx->vecMatrixStack[gx->pvMatrixPointer & 0x1F], &gx->vecMatrix, sizeof(gx->vecMatrix));
 953				// Fall through
 954			case 1:
 955				memcpy(&gx->posMatrixStack[gx->pvMatrixPointer & 0x1F], &gx->posMatrix, sizeof(gx->posMatrix));
 956				++gx->pvMatrixPointer;
 957				break;
 958			case 3:
 959				mLOG(DS_GX, STUB, "Unimplemented GX MTX_PUSH mode");
 960				break;
 961			}
 962			break;
 963		case DS_GX_CMD_MTX_POP: {
 964			int8_t offset = entry.params[0];
 965			offset <<= 2;
 966			offset >>= 2;
 967			switch (gx->mtxMode) {
 968			case 0:
 969				projMatrixPointer -= offset;
 970				memcpy(&gx->projMatrix, &gx->projMatrixStack, sizeof(gx->projMatrix));
 971				break;
 972			case 1:
 973				gx->pvMatrixPointer -= offset;
 974				memcpy(&gx->posMatrix, &gx->posMatrixStack[gx->pvMatrixPointer & 0x1F], sizeof(gx->posMatrix));
 975				break;
 976			case 2:
 977				gx->pvMatrixPointer -= offset;
 978				memcpy(&gx->vecMatrix, &gx->vecMatrixStack[gx->pvMatrixPointer & 0x1F], sizeof(gx->vecMatrix));
 979				memcpy(&gx->posMatrix, &gx->posMatrixStack[gx->pvMatrixPointer & 0x1F], sizeof(gx->posMatrix));
 980				break;
 981			case 3:
 982				mLOG(DS_GX, STUB, "Unimplemented GX MTX_POP mode");
 983				break;
 984			}
 985			_updateClipMatrix(gx);
 986			break;
 987		}
 988		case DS_GX_CMD_MTX_STORE: {
 989			int8_t offset = entry.params[0] & 0x1F;
 990			// TODO: overflow
 991			switch (gx->mtxMode) {
 992			case 0:
 993				memcpy(&gx->projMatrixStack, &gx->projMatrix, sizeof(gx->projMatrixStack));
 994				break;
 995			case 2:
 996				memcpy(&gx->vecMatrixStack[offset], &gx->vecMatrix, sizeof(gx->vecMatrix));
 997				// Fall through
 998			case 1:
 999				memcpy(&gx->posMatrixStack[offset], &gx->posMatrix, sizeof(gx->posMatrix));
1000				break;
1001			case 3:
1002				mLOG(DS_GX, STUB, "Unimplemented GX MTX_STORE mode");
1003				break;
1004			}
1005			break;
1006		}
1007		case DS_GX_CMD_MTX_RESTORE: {
1008			int8_t offset = entry.params[0] & 0x1F;
1009			// TODO: overflow
1010			switch (gx->mtxMode) {
1011			case 0:
1012				memcpy(&gx->projMatrix, &gx->projMatrixStack, sizeof(gx->projMatrix));
1013				break;
1014			case 2:
1015				memcpy(&gx->vecMatrix, &gx->vecMatrixStack[offset], sizeof(gx->vecMatrix));
1016				// Fall through
1017			case 1:
1018				memcpy(&gx->posMatrix, &gx->posMatrixStack[offset], sizeof(gx->posMatrix));
1019				break;
1020			case 3:
1021				mLOG(DS_GX, STUB, "Unimplemented GX MTX_RESTORE mode");
1022				break;
1023			}
1024			_updateClipMatrix(gx);
1025			break;
1026		}
1027		case DS_GX_CMD_MTX_IDENTITY:
1028			switch (gx->mtxMode) {
1029			case 0:
1030				DSGXMtxIdentity(&gx->projMatrix);
1031				break;
1032			case 2:
1033				DSGXMtxIdentity(&gx->vecMatrix);
1034				// Fall through
1035			case 1:
1036				DSGXMtxIdentity(&gx->posMatrix);
1037				break;
1038			case 3:
1039				DSGXMtxIdentity(&gx->texMatrix);
1040				break;
1041			}
1042			_updateClipMatrix(gx);
1043			break;
1044		case DS_GX_CMD_MTX_LOAD_4x4: {
1045			struct DSGXMatrix m;
1046			int i;
1047			for (i = 0; i < 16; ++i) {
1048				m.m[i] = gx->activeEntries[i].params[0];
1049				m.m[i] |= gx->activeEntries[i].params[1] << 8;
1050				m.m[i] |= gx->activeEntries[i].params[2] << 16;
1051				m.m[i] |= gx->activeEntries[i].params[3] << 24;
1052			}
1053			switch (gx->mtxMode) {
1054			case 0:
1055				memcpy(&gx->projMatrix, &m, sizeof(gx->projMatrix));
1056				break;
1057			case 2:
1058				memcpy(&gx->vecMatrix, &m, sizeof(gx->vecMatrix));
1059				// Fall through
1060			case 1:
1061				memcpy(&gx->posMatrix, &m, sizeof(gx->posMatrix));
1062				break;
1063			case 3:
1064				memcpy(&gx->texMatrix, &m, sizeof(gx->texMatrix));
1065				break;
1066			}
1067			_updateClipMatrix(gx);
1068			break;
1069		}
1070		case DS_GX_CMD_MTX_LOAD_4x3: {
1071			struct DSGXMatrix m;
1072			int i, j;
1073			for (j = 0; j < 4; ++j) {
1074				for (i = 0; i < 3; ++i) {
1075					m.m[i + j * 4] = gx->activeEntries[i + j * 3].params[0];
1076					m.m[i + j * 4] |= gx->activeEntries[i + j * 3].params[1] << 8;
1077					m.m[i + j * 4] |= gx->activeEntries[i + j * 3].params[2] << 16;
1078					m.m[i + j * 4] |= gx->activeEntries[i + j * 3].params[3] << 24;
1079				}
1080				m.m[j * 4 + 3] = 0;
1081			}
1082			m.m[15] = MTX_ONE;
1083			switch (gx->mtxMode) {
1084			case 0:
1085				memcpy(&gx->projMatrix, &m, sizeof(gx->projMatrix));
1086				break;
1087			case 2:
1088				memcpy(&gx->vecMatrix, &m, sizeof(gx->vecMatrix));
1089				// Fall through
1090			case 1:
1091				memcpy(&gx->posMatrix, &m, sizeof(gx->posMatrix));
1092				break;
1093			case 3:
1094				memcpy(&gx->texMatrix, &m, sizeof(gx->texMatrix));
1095				break;
1096			}
1097			_updateClipMatrix(gx);
1098			break;
1099		}
1100		case DS_GX_CMD_MTX_MULT_4x4: {
1101			struct DSGXMatrix m;
1102			int i;
1103			for (i = 0; i < 16; ++i) {
1104				m.m[i] = gx->activeEntries[i].params[0];
1105				m.m[i] |= gx->activeEntries[i].params[1] << 8;
1106				m.m[i] |= gx->activeEntries[i].params[2] << 16;
1107				m.m[i] |= gx->activeEntries[i].params[3] << 24;
1108			}
1109			switch (gx->mtxMode) {
1110			case 0:
1111				DSGXMtxMultiply(&gx->projMatrix, &m, &gx->projMatrix);
1112				break;
1113			case 2:
1114				DSGXMtxMultiply(&gx->vecMatrix, &m, &gx->vecMatrix);
1115				// Fall through
1116			case 1:
1117				DSGXMtxMultiply(&gx->posMatrix, &m, &gx->posMatrix);
1118				break;
1119			case 3:
1120				DSGXMtxMultiply(&gx->texMatrix, &m, &gx->texMatrix);
1121				break;
1122			}
1123			_updateClipMatrix(gx);
1124			break;
1125		}
1126		case DS_GX_CMD_MTX_MULT_4x3: {
1127			struct DSGXMatrix m;
1128			int i, j;
1129			for (j = 0; j < 4; ++j) {
1130				for (i = 0; i < 3; ++i) {
1131					m.m[i + j * 4] = gx->activeEntries[i + j * 3].params[0];
1132					m.m[i + j * 4] |= gx->activeEntries[i + j * 3].params[1] << 8;
1133					m.m[i + j * 4] |= gx->activeEntries[i + j * 3].params[2] << 16;
1134					m.m[i + j * 4] |= gx->activeEntries[i + j * 3].params[3] << 24;
1135				}
1136				m.m[j * 4 + 3] = 0;
1137			}
1138			m.m[15] = MTX_ONE;
1139			switch (gx->mtxMode) {
1140			case 0:
1141				DSGXMtxMultiply(&gx->projMatrix, &m, &gx->projMatrix);
1142				break;
1143			case 2:
1144				DSGXMtxMultiply(&gx->vecMatrix, &m, &gx->vecMatrix);
1145				// Fall through
1146			case 1:
1147				DSGXMtxMultiply(&gx->posMatrix, &m, &gx->posMatrix);
1148				break;
1149			case 3:
1150				DSGXMtxMultiply(&gx->texMatrix, &m, &gx->texMatrix);
1151				break;
1152			}
1153			_updateClipMatrix(gx);
1154			break;
1155		}
1156		case DS_GX_CMD_MTX_MULT_3x3: {
1157			struct DSGXMatrix m;
1158			int i, j;
1159			for (j = 0; j < 3; ++j) {
1160				for (i = 0; i < 3; ++i) {
1161					m.m[i + j * 4] = gx->activeEntries[i + j * 3].params[0];
1162					m.m[i + j * 4] |= gx->activeEntries[i + j * 3].params[1] << 8;
1163					m.m[i + j * 4] |= gx->activeEntries[i + j * 3].params[2] << 16;
1164					m.m[i + j * 4] |= gx->activeEntries[i + j * 3].params[3] << 24;
1165				}
1166				m.m[j * 4 + 3] = 0;
1167			}
1168			m.m[12] = 0;
1169			m.m[13] = 0;
1170			m.m[14] = 0;
1171			m.m[15] = MTX_ONE;
1172			switch (gx->mtxMode) {
1173			case 0:
1174				DSGXMtxMultiply(&gx->projMatrix, &m, &gx->projMatrix);
1175				break;
1176			case 2:
1177				DSGXMtxMultiply(&gx->vecMatrix, &m, &gx->vecMatrix);
1178				// Fall through
1179			case 1:
1180				DSGXMtxMultiply(&gx->posMatrix, &m, &gx->posMatrix);
1181				break;
1182			case 3:
1183				DSGXMtxMultiply(&gx->texMatrix, &m, &gx->texMatrix);
1184				break;
1185			}
1186			_updateClipMatrix(gx);
1187			break;
1188		}
1189		case DS_GX_CMD_MTX_TRANS: {
1190			int32_t m[3];
1191			m[0] = gx->activeEntries[0].params[0];
1192			m[0] |= gx->activeEntries[0].params[1] << 8;
1193			m[0] |= gx->activeEntries[0].params[2] << 16;
1194			m[0] |= gx->activeEntries[0].params[3] << 24;
1195			m[1] = gx->activeEntries[1].params[0];
1196			m[1] |= gx->activeEntries[1].params[1] << 8;
1197			m[1] |= gx->activeEntries[1].params[2] << 16;
1198			m[1] |= gx->activeEntries[1].params[3] << 24;
1199			m[2] = gx->activeEntries[2].params[0];
1200			m[2] |= gx->activeEntries[2].params[1] << 8;
1201			m[2] |= gx->activeEntries[2].params[2] << 16;
1202			m[2] |= gx->activeEntries[2].params[3] << 24;
1203			switch (gx->mtxMode) {
1204			case 0:
1205				DSGXMtxTranslate(&gx->projMatrix, m);
1206				break;
1207			case 2:
1208				DSGXMtxTranslate(&gx->vecMatrix, m);
1209				// Fall through
1210			case 1:
1211				DSGXMtxTranslate(&gx->posMatrix, m);
1212				break;
1213			case 3:
1214				DSGXMtxTranslate(&gx->texMatrix, m);
1215				break;
1216			}
1217			_updateClipMatrix(gx);
1218			break;
1219		}
1220		case DS_GX_CMD_MTX_SCALE: {
1221			int32_t m[3];
1222			m[0] = gx->activeEntries[0].params[0];
1223			m[0] |= gx->activeEntries[0].params[1] << 8;
1224			m[0] |= gx->activeEntries[0].params[2] << 16;
1225			m[0] |= gx->activeEntries[0].params[3] << 24;
1226			m[1] = gx->activeEntries[1].params[0];
1227			m[1] |= gx->activeEntries[1].params[1] << 8;
1228			m[1] |= gx->activeEntries[1].params[2] << 16;
1229			m[1] |= gx->activeEntries[1].params[3] << 24;
1230			m[2] = gx->activeEntries[2].params[0];
1231			m[2] |= gx->activeEntries[2].params[1] << 8;
1232			m[2] |= gx->activeEntries[2].params[2] << 16;
1233			m[2] |= gx->activeEntries[2].params[3] << 24;
1234			switch (gx->mtxMode) {
1235			case 0:
1236				DSGXMtxScale(&gx->projMatrix, m);
1237				break;
1238			case 1:
1239			case 2:
1240				DSGXMtxScale(&gx->posMatrix, m);
1241				break;
1242			case 3:
1243				DSGXMtxScale(&gx->texMatrix, m);
1244				break;
1245			}
1246			_updateClipMatrix(gx);
1247			break;
1248		}
1249		case DS_GX_CMD_COLOR:
1250			gx->currentVertex.color = entry.params[0];
1251			gx->currentVertex.color |= entry.params[1] << 8;
1252			break;
1253		case DS_GX_CMD_TEXCOORD:
1254			gx->currentVertex.s = entry.params[0];
1255			gx->currentVertex.s |= entry.params[1] << 8;
1256			gx->currentVertex.t = entry.params[2];
1257			gx->currentVertex.t |= entry.params[3] << 8;
1258			break;
1259		case DS_GX_CMD_VTX_16: {
1260			int16_t x = gx->activeEntries[0].params[0];
1261			x |= gx->activeEntries[0].params[1] << 8;
1262			int16_t y = gx->activeEntries[0].params[2];
1263			y |= gx->activeEntries[0].params[3] << 8;
1264			int16_t z = gx->activeEntries[1].params[0];
1265			z |= gx->activeEntries[1].params[1] << 8;
1266			_emitVertex(gx, x, y, z);
1267			break;
1268		}
1269		case DS_GX_CMD_VTX_10: {
1270			int32_t xyz = entry.params[0];
1271			xyz |= entry.params[1] << 8;
1272			xyz |= entry.params[2] << 16;
1273			xyz |= entry.params[3] << 24;
1274			int16_t x = (xyz << 6) & 0xFFC0;
1275			int16_t y = (xyz >> 4) & 0xFFC0;
1276			int16_t z = (xyz >> 14) & 0xFFC0;
1277			_emitVertex(gx, x, y, z);
1278			break;
1279		}
1280		case DS_GX_CMD_VTX_XY: {
1281			int16_t x = entry.params[0];
1282			x |= entry.params[1] << 8;
1283			int16_t y = entry.params[2];
1284			y |= entry.params[3] << 8;
1285			_emitVertex(gx, x, y, gx->currentVertex.z);
1286			break;
1287		}
1288		case DS_GX_CMD_VTX_XZ: {
1289			int16_t x = entry.params[0];
1290			x |= entry.params[1] << 8;
1291			int16_t z = entry.params[2];
1292			z |= entry.params[3] << 8;
1293			_emitVertex(gx, x, gx->currentVertex.y, z);
1294			break;
1295		}
1296		case DS_GX_CMD_VTX_YZ: {
1297			int16_t y = entry.params[0];
1298			y |= entry.params[1] << 8;
1299			int16_t z = entry.params[2];
1300			z |= entry.params[3] << 8;
1301			_emitVertex(gx, gx->currentVertex.x, y, z);
1302			break;
1303		}
1304		case DS_GX_CMD_VTX_DIFF: {
1305			int32_t xyz = entry.params[0];
1306			xyz |= entry.params[1] << 8;
1307			xyz |= entry.params[2] << 16;
1308			xyz |= entry.params[3] << 24;
1309			int16_t x = (xyz << 6) & 0xFFC0;
1310			int16_t y = (xyz >> 4) & 0xFFC0;
1311			int16_t z = (xyz >> 14) & 0xFFC0;
1312			_emitVertex(gx, gx->currentVertex.x + (x >> 6), gx->currentVertex.y + (y >> 6), gx->currentVertex.z + (z >> 6));
1313			break;
1314		}
1315		case DS_GX_CMD_POLYGON_ATTR:
1316			gx->nextPoly.polyParams = entry.params[0];
1317			gx->nextPoly.polyParams |= entry.params[1] << 8;
1318			gx->nextPoly.polyParams |= entry.params[2] << 16;
1319			gx->nextPoly.polyParams |= entry.params[3] << 24;
1320			break;
1321		case DS_GX_CMD_TEXIMAGE_PARAM:
1322			gx->nextPoly.texParams = entry.params[0];
1323			gx->nextPoly.texParams |= entry.params[1] << 8;
1324			gx->nextPoly.texParams |= entry.params[2] << 16;
1325			gx->nextPoly.texParams |= entry.params[3] << 24;
1326			break;
1327		case DS_GX_CMD_PLTT_BASE:
1328			gx->nextPoly.palBase = entry.params[0];
1329			gx->nextPoly.palBase |= entry.params[1] << 8;
1330			gx->nextPoly.palBase |= entry.params[2] << 16;
1331			gx->nextPoly.palBase |= entry.params[3] << 24;
1332			break;
1333		case DS_GX_CMD_BEGIN_VTXS:
1334			gx->vertexMode = entry.params[0] & 3;
1335			gx->currentPoly = gx->nextPoly;
1336			gx->reverseWinding = false;
1337			memset(gx->pendingVertexIds, -1, sizeof(gx->pendingVertexIds));
1338			break;
1339		case DS_GX_CMD_END_VTXS:
1340			gx->vertexMode = -1;
1341			break;
1342		case DS_GX_CMD_SWAP_BUFFERS:
1343			gx->swapBuffers = true;
1344			gx->wSort = entry.params[0] & 2;
1345			memset(&gx->currentVertex, 0, sizeof(gx->currentVertex));
1346			memset(&gx->nextPoly, 0, sizeof(gx-> nextPoly));
1347			gx->currentVertex.color = 0x7FFF;
1348			break;
1349		case DS_GX_CMD_VIEWPORT:
1350			gx->viewportX1 = (uint8_t) entry.params[0];
1351			gx->viewportY1 = (uint8_t) entry.params[1];
1352			gx->viewportX2 = (uint8_t) entry.params[2];
1353			gx->viewportY2 = (uint8_t) entry.params[3];
1354			gx->viewportWidth = gx->viewportX2 - gx->viewportX1 + 1;
1355			gx->viewportHeight = gx->viewportY2 - gx->viewportY1 + 1;
1356			gx->renderer->viewportX = gx->viewportX1;
1357			gx->renderer->viewportY = gx->viewportY1;
1358			gx->renderer->viewportWidth = gx->viewportWidth;
1359			gx->renderer->viewportHeight = gx->viewportHeight;
1360			break;
1361		case DS_GX_CMD_BOX_TEST:
1362			gxstat = DSRegGXSTATClearTestBusy(gxstat);
1363			gxstat = DSRegGXSTATTestFillBoxTestResult(gxstat, _boxTest(gx));
1364			break;
1365		default:
1366			mLOG(DS_GX, STUB, "Unimplemented GX command %02X:%02X %02X %02X %02X", entry.command, entry.params[0], entry.params[1], entry.params[2], entry.params[3]);
1367			break;
1368		}
1369
1370		gxstat = DSRegGXSTATSetPVMatrixStackLevel(gxstat, gx->pvMatrixPointer);
1371		gxstat = DSRegGXSTATSetProjMatrixStackLevel(gxstat, projMatrixPointer);
1372		gxstat = DSRegGXSTATTestFillMatrixStackError(gxstat, projMatrixPointer || gx->pvMatrixPointer >= 0x1F);
1373		gx->p->memory.io9[DS9_REG_GXSTAT_LO >> 1] = gxstat;
1374
1375		if (cyclesLate >= cycles) {
1376			cyclesLate -= cycles;
1377		} else {
1378			break;
1379		}
1380	}
1381	if (cycles && !gx->swapBuffers) {
1382		mTimingSchedule(timing, &gx->fifoEvent, cycles - cyclesLate);
1383	}
1384	if (CircleBufferSize(&gx->fifo) < (DS_GX_FIFO_SIZE * sizeof(struct DSGXEntry))) {
1385		_flushOutstanding(gx);
1386	}
1387	DSGXUpdateGXSTAT(gx);
1388}
1389
1390void DSGXInit(struct DSGX* gx) {
1391	gx->renderer = &dummyRenderer;
1392	CircleBufferInit(&gx->fifo, sizeof(struct DSGXEntry) * DS_GX_FIFO_SIZE);
1393	CircleBufferInit(&gx->pipe, sizeof(struct DSGXEntry) * DS_GX_PIPE_SIZE);
1394	gx->vertexBuffer[0] = malloc(sizeof(struct DSGXVertex) * DS_GX_VERTEX_BUFFER_SIZE);
1395	gx->vertexBuffer[1] = malloc(sizeof(struct DSGXVertex) * DS_GX_VERTEX_BUFFER_SIZE);
1396	gx->polygonBuffer[0] = malloc(sizeof(struct DSGXPolygon) * DS_GX_POLYGON_BUFFER_SIZE);
1397	gx->polygonBuffer[1] = malloc(sizeof(struct DSGXPolygon) * DS_GX_POLYGON_BUFFER_SIZE);
1398	gx->fifoEvent.name = "DS GX FIFO";
1399	gx->fifoEvent.priority = 0xC;
1400	gx->fifoEvent.context = gx;
1401	gx->fifoEvent.callback = _fifoRun;
1402}
1403
1404void DSGXDeinit(struct DSGX* gx) {
1405	DSGXAssociateRenderer(gx, &dummyRenderer);
1406	CircleBufferDeinit(&gx->fifo);
1407	CircleBufferDeinit(&gx->pipe);
1408	free(gx->vertexBuffer[0]);
1409	free(gx->vertexBuffer[1]);
1410	free(gx->polygonBuffer[0]);
1411	free(gx->polygonBuffer[1]);
1412}
1413
1414void DSGXReset(struct DSGX* gx) {
1415	CircleBufferClear(&gx->fifo);
1416	CircleBufferClear(&gx->pipe);
1417	DSGXMtxIdentity(&gx->projMatrix);
1418	DSGXMtxIdentity(&gx->texMatrix);
1419	DSGXMtxIdentity(&gx->posMatrix);
1420	DSGXMtxIdentity(&gx->vecMatrix);
1421
1422	DSGXMtxIdentity(&gx->clipMatrix);
1423	DSGXMtxIdentity(&gx->projMatrixStack);
1424	DSGXMtxIdentity(&gx->texMatrixStack);
1425	int i;
1426	for (i = 0; i < 32; ++i) {
1427		DSGXMtxIdentity(&gx->posMatrixStack[i]);
1428		DSGXMtxIdentity(&gx->vecMatrixStack[i]);
1429	}
1430	gx->swapBuffers = false;
1431	gx->bufferIndex = 0;
1432	gx->vertexIndex = 0;
1433	gx->polygonIndex = 0;
1434	gx->mtxMode = 0;
1435	gx->pvMatrixPointer = 0;
1436	gx->vertexMode = -1;
1437
1438	gx->viewportX1 = 0;
1439	gx->viewportY1 = 0;
1440	gx->viewportX2 = DS_VIDEO_HORIZONTAL_PIXELS - 1;
1441	gx->viewportY2 = DS_VIDEO_VERTICAL_PIXELS - 1;
1442	gx->viewportWidth = gx->viewportX2 - gx->viewportX1 + 1;
1443	gx->viewportHeight = gx->viewportY2 - gx->viewportY1 + 1;
1444
1445	memset(gx->outstandingParams, 0, sizeof(gx->outstandingParams));
1446	memset(gx->outstandingCommand, 0, sizeof(gx->outstandingCommand));
1447	memset(&gx->outstandingEntry, 0, sizeof(gx->outstandingEntry));
1448	gx->activeParams = 0;
1449	memset(&gx->currentVertex, 0, sizeof(gx->currentVertex));
1450	memset(&gx->nextPoly, 0, sizeof(gx-> nextPoly));
1451	gx->currentVertex.color = 0x7FFF;
1452	gx->dmaSource = -1;
1453}
1454
1455void DSGXAssociateRenderer(struct DSGX* gx, struct DSGXRenderer* renderer) {
1456	gx->renderer->deinit(gx->renderer);
1457	gx->renderer = renderer;
1458	memcpy(gx->renderer->tex, gx->tex, sizeof(gx->renderer->tex));
1459	memcpy(gx->renderer->texPal, gx->texPal, sizeof(gx->renderer->texPal));
1460	gx->renderer->init(gx->renderer);
1461}
1462
1463void DSGXUpdateGXSTAT(struct DSGX* gx) {
1464	uint32_t value = gx->p->memory.io9[DS9_REG_GXSTAT_HI >> 1] << 16;
1465	value = DSRegGXSTATIsDoIRQ(value);
1466
1467	size_t entries = CircleBufferSize(&gx->fifo) / sizeof(struct DSGXEntry);
1468	// XXX
1469	if (gx->swapBuffers) {
1470		entries++;
1471	}
1472	value = DSRegGXSTATSetFIFOEntries(value, entries);
1473	value = DSRegGXSTATSetFIFOLtHalf(value, entries < (DS_GX_FIFO_SIZE / 2));
1474	value = DSRegGXSTATSetFIFOEmpty(value, entries == 0);
1475
1476	if ((DSRegGXSTATGetDoIRQ(value) == 1 && entries < (DS_GX_FIFO_SIZE / 2)) ||
1477		(DSRegGXSTATGetDoIRQ(value) == 2 && entries == 0)) {
1478		DSRaiseIRQ(gx->p->ds9.cpu, gx->p->ds9.memory.io, DS_IRQ_GEOM_FIFO);
1479	}
1480
1481	value = DSRegGXSTATSetBusy(value, mTimingIsScheduled(&gx->p->ds9.timing, &gx->fifoEvent) || gx->swapBuffers);
1482
1483	gx->p->memory.io9[DS9_REG_GXSTAT_HI >> 1] = value >> 16;
1484
1485	struct GBADMA* dma = NULL;
1486	if (gx->dmaSource >= 0) {
1487		dma = &gx->p->ds9.memory.dma[gx->dmaSource];
1488		if (GBADMARegisterGetTiming9(dma->reg) != DS_DMA_TIMING_GEOM_FIFO) {
1489			gx->dmaSource = -1;
1490		} else if (GBADMARegisterIsEnable(dma->reg) && entries < (DS_GX_FIFO_SIZE / 2) && !dma->nextCount) {
1491			dma->nextCount = dma->count;
1492			if (dma->count > 112) {
1493				dma->nextCount = 112;
1494			}
1495			dma->when = mTimingCurrentTime(&gx->p->ds9.timing);
1496			DSDMAUpdate(&gx->p->ds9);
1497		}
1498	}
1499}
1500
1501static void DSGXUnpackCommand(struct DSGX* gx, uint32_t command) {
1502	gx->outstandingCommand[0] = command;
1503	gx->outstandingCommand[1] = command >> 8;
1504	gx->outstandingCommand[2] = command >> 16;
1505	gx->outstandingCommand[3] = command >> 24;
1506	if (gx->outstandingCommand[0] >= DS_GX_CMD_MAX) {
1507		gx->outstandingCommand[0] = 0;
1508	}
1509	if (gx->outstandingCommand[1] >= DS_GX_CMD_MAX) {
1510		gx->outstandingCommand[1] = 0;
1511	}
1512	if (gx->outstandingCommand[2] >= DS_GX_CMD_MAX) {
1513		gx->outstandingCommand[2] = 0;
1514	}
1515	if (gx->outstandingCommand[3] >= DS_GX_CMD_MAX) {
1516		gx->outstandingCommand[3] = 0;
1517	}
1518	gx->outstandingParams[0] = _gxCommandParams[gx->outstandingCommand[0]];
1519	gx->outstandingParams[1] = _gxCommandParams[gx->outstandingCommand[1]];
1520	gx->outstandingParams[2] = _gxCommandParams[gx->outstandingCommand[2]];
1521	gx->outstandingParams[3] = _gxCommandParams[gx->outstandingCommand[3]];
1522	_flushOutstanding(gx);
1523	DSGXUpdateGXSTAT(gx);
1524}
1525
1526static void DSGXWriteFIFO(struct DSGX* gx, struct DSGXEntry entry) {
1527	if (CircleBufferSize(&gx->fifo) == (DS_GX_FIFO_SIZE * sizeof(entry))) {
1528		mLOG(DS_GX, INFO, "FIFO full");
1529		if (gx->p->cpuBlocked & DS_CPU_BLOCK_GX) {
1530			abort();
1531		}
1532		gx->p->cpuBlocked |= DS_CPU_BLOCK_GX;
1533		gx->outstandingEntry = entry;
1534		gx->p->ds9.cpu->nextEvent = 0;
1535		return;
1536	}
1537	if (gx->outstandingCommand[0]) {
1538		entry.command = gx->outstandingCommand[0];
1539		if (gx->outstandingParams[0]) {
1540			--gx->outstandingParams[0];
1541		}
1542		if (!gx->outstandingParams[0]) {
1543			// TODO: improve this
1544			memmove(&gx->outstandingParams[0], &gx->outstandingParams[1], sizeof(gx->outstandingParams[0]) * 3);
1545			memmove(&gx->outstandingCommand[0], &gx->outstandingCommand[1], sizeof(gx->outstandingCommand[0]) * 3);
1546			gx->outstandingParams[3] = 0;
1547			gx->outstandingCommand[3] = 0;
1548		}
1549	} else {
1550		gx->outstandingParams[0] = _gxCommandParams[entry.command];
1551		if (gx->outstandingParams[0]) {
1552			--gx->outstandingParams[0];
1553		}
1554		if (gx->outstandingParams[0]) {
1555			gx->outstandingCommand[0] = entry.command;
1556		}
1557	}
1558	uint32_t cycles = _gxCommandCycleBase[entry.command];
1559	if (!cycles) {
1560		return;
1561	}
1562	if (CircleBufferSize(&gx->fifo) == 0 && CircleBufferSize(&gx->pipe) < (DS_GX_PIPE_SIZE * sizeof(entry))) {
1563		CircleBufferWrite8(&gx->pipe, entry.command);
1564		CircleBufferWrite8(&gx->pipe, entry.params[0]);
1565		CircleBufferWrite8(&gx->pipe, entry.params[1]);
1566		CircleBufferWrite8(&gx->pipe, entry.params[2]);
1567		CircleBufferWrite8(&gx->pipe, entry.params[3]);
1568	} else if (CircleBufferSize(&gx->fifo) < (DS_GX_FIFO_SIZE * sizeof(entry))) {
1569		CircleBufferWrite8(&gx->fifo, entry.command);
1570		CircleBufferWrite8(&gx->fifo, entry.params[0]);
1571		CircleBufferWrite8(&gx->fifo, entry.params[1]);
1572		CircleBufferWrite8(&gx->fifo, entry.params[2]);
1573		CircleBufferWrite8(&gx->fifo, entry.params[3]);
1574	}
1575	if (entry.command == DS_GX_CMD_BOX_TEST) {
1576		DSRegGXSTAT gxstat = gx->p->memory.io9[DS9_REG_GXSTAT_LO >> 1];
1577		gxstat = DSRegGXSTATFillTestBusy(gxstat);
1578		gxstat = DSRegGXSTATClearBoxTestResult(gxstat);
1579		gx->p->memory.io9[DS9_REG_GXSTAT_LO >> 1] = gxstat;
1580	}
1581	if (!gx->swapBuffers && !mTimingIsScheduled(&gx->p->ds9.timing, &gx->fifoEvent)) {
1582		mTimingSchedule(&gx->p->ds9.timing, &gx->fifoEvent, cycles);
1583	}
1584
1585	_flushOutstanding(gx);
1586}
1587
1588uint16_t DSGXWriteRegister(struct DSGX* gx, uint32_t address, uint16_t value) {
1589	uint16_t oldValue = gx->p->memory.io9[address >> 1];
1590	switch (address) {
1591	case DS9_REG_DISP3DCNT:
1592		mLOG(DS_GX, STUB, "Unimplemented GX write %03X:%04X", address, value);
1593		break;
1594	case DS9_REG_GXSTAT_LO:
1595		value = DSRegGXSTATIsMatrixStackError(value);
1596		if (value) {
1597			oldValue = DSRegGXSTATClearMatrixStackError(oldValue);
1598			oldValue = DSRegGXSTATClearProjMatrixStackLevel(oldValue);
1599		}
1600		value = oldValue;
1601		break;
1602	case DS9_REG_GXSTAT_HI:
1603		value = DSRegGXSTATIsDoIRQ(value << 16) >> 16;
1604		gx->p->memory.io9[address >> 1] = value;
1605		DSGXUpdateGXSTAT(gx);
1606		value = gx->p->memory.io9[address >> 1];
1607		break;
1608	default:
1609		if (address < DS9_REG_GXFIFO_00) {
1610			mLOG(DS_GX, STUB, "Unimplemented GX write %03X:%04X", address, value);
1611		} else if (address <= DS9_REG_GXFIFO_1F) {
1612			mLOG(DS_GX, STUB, "Unimplemented GX write %03X:%04X", address, value);
1613		} else if (address < DS9_REG_GXSTAT_LO) {
1614			struct DSGXEntry entry = {
1615				.command = (address & 0x1FC) >> 2,
1616				.params = {
1617					value,
1618					value >> 8,
1619				}
1620			};
1621			if (entry.command < DS_GX_CMD_MAX) {
1622				DSGXWriteFIFO(gx, entry);
1623			}
1624		} else {
1625			mLOG(DS_GX, STUB, "Unimplemented GX write %03X:%04X", address, value);
1626		}
1627		break;
1628	}
1629	return value;
1630}
1631
1632uint32_t DSGXWriteRegister32(struct DSGX* gx, uint32_t address, uint32_t value) {
1633	switch (address) {
1634	case DS9_REG_DISP3DCNT:
1635		mLOG(DS_GX, STUB, "Unimplemented GX write %03X:%08X", address, value);
1636		break;
1637	case DS9_REG_GXSTAT_LO:
1638		value = (value & 0xFFFF0000) | DSGXWriteRegister(gx, DS9_REG_GXSTAT_LO, value);
1639		value = (value & 0x0000FFFF) | (DSGXWriteRegister(gx, DS9_REG_GXSTAT_HI, value >> 16) << 16);
1640		break;
1641	default:
1642		if (address < DS9_REG_GXFIFO_00) {
1643			mLOG(DS_GX, STUB, "Unimplemented GX write %03X:%08X", address, value);
1644		} else if (address <= DS9_REG_GXFIFO_1F) {
1645			if (gx->outstandingParams[0]) {
1646				struct DSGXEntry entry = {
1647					.command = gx->outstandingCommand[0],
1648					.params = {
1649						value,
1650						value >> 8,
1651						value >> 16,
1652						value >> 24
1653					}
1654				};
1655				DSGXWriteFIFO(gx, entry);
1656			} else {
1657				DSGXUnpackCommand(gx, value);
1658			}
1659		} else if (address < DS9_REG_GXSTAT_LO) {
1660			struct DSGXEntry entry = {
1661				.command = (address & 0x1FC) >> 2,
1662				.params = {
1663					value,
1664					value >> 8,
1665					value >> 16,
1666					value >> 24
1667				}
1668			};
1669			DSGXWriteFIFO(gx, entry);
1670		} else {
1671			mLOG(DS_GX, STUB, "Unimplemented GX write %03X:%08X", address, value);
1672		}
1673		break;
1674	}
1675	return value;
1676}
1677
1678void DSGXFlush(struct DSGX* gx) {
1679	if (gx->swapBuffers) {
1680		gx->renderer->setRAM(gx->renderer, gx->vertexBuffer[gx->bufferIndex], gx->polygonBuffer[gx->bufferIndex], gx->polygonIndex, gx->wSort);
1681		gx->swapBuffers = false;
1682		gx->bufferIndex ^= 1;
1683		gx->vertexIndex = 0;
1684		gx->pendingVertexIndex = 0;
1685		gx->polygonIndex = 0;
1686		if (CircleBufferSize(&gx->fifo)) {
1687			mTimingSchedule(&gx->p->ds9.timing, &gx->fifoEvent, 0);
1688		}
1689	}
1690
1691	DSGXUpdateGXSTAT(gx);
1692}
1693
1694void DSGXScheduleDMA(struct DSCommon* dscore, int number, struct GBADMA* info) {
1695	UNUSED(info);
1696	dscore->p->gx.dmaSource = number;
1697}
1698
1699static void DSGXDummyRendererInit(struct DSGXRenderer* renderer) {
1700	UNUSED(renderer);
1701	// Nothing to do
1702}
1703
1704static void DSGXDummyRendererReset(struct DSGXRenderer* renderer) {
1705	UNUSED(renderer);
1706	// Nothing to do
1707}
1708
1709static void DSGXDummyRendererDeinit(struct DSGXRenderer* renderer) {
1710	UNUSED(renderer);
1711	// Nothing to do
1712}
1713
1714static void DSGXDummyRendererInvalidateTex(struct DSGXRenderer* renderer, int slot) {
1715	UNUSED(renderer);
1716	UNUSED(slot);
1717	// Nothing to do
1718}
1719
1720static void DSGXDummyRendererSetRAM(struct DSGXRenderer* renderer, struct DSGXVertex* verts, struct DSGXPolygon* polys, unsigned polyCount, bool wSort) {
1721	UNUSED(renderer);
1722	UNUSED(verts);
1723	UNUSED(polys);
1724	UNUSED(polyCount);
1725	UNUSED(wSort);
1726	// Nothing to do
1727}
1728
1729static void DSGXDummyRendererDrawScanline(struct DSGXRenderer* renderer, int y) {
1730	UNUSED(renderer);
1731	UNUSED(y);
1732	// Nothing to do
1733}
1734
1735static void DSGXDummyRendererGetScanline(struct DSGXRenderer* renderer, int y, const color_t** output) {
1736	UNUSED(renderer);
1737	UNUSED(y);
1738	*output = NULL;
1739	// Nothing to do
1740}