all repos — mgba @ 2f9bcf63b73e3edfd80a572981c3885bb30a09b9

mGBA Game Boy Advance Emulator

src/gba/renderers/video-software.c (view raw)

   1#include "video-software.h"
   2
   3#include "gba.h"
   4#include "gba-io.h"
   5
   6#include <string.h>
   7
   8static void GBAVideoSoftwareRendererInit(struct GBAVideoRenderer* renderer);
   9static void GBAVideoSoftwareRendererDeinit(struct GBAVideoRenderer* renderer);
  10static void GBAVideoSoftwareRendererWriteOAM(struct GBAVideoRenderer* renderer, uint32_t oam);
  11static void GBAVideoSoftwareRendererWritePalette(struct GBAVideoRenderer* renderer, uint32_t address, uint16_t value);
  12static uint16_t GBAVideoSoftwareRendererWriteVideoRegister(struct GBAVideoRenderer* renderer, uint32_t address, uint16_t value);
  13static void GBAVideoSoftwareRendererDrawScanline(struct GBAVideoRenderer* renderer, int y);
  14static void GBAVideoSoftwareRendererFinishFrame(struct GBAVideoRenderer* renderer);
  15
  16static void GBAVideoSoftwareRendererUpdateDISPCNT(struct GBAVideoSoftwareRenderer* renderer);
  17static void GBAVideoSoftwareRendererWriteBGCNT(struct GBAVideoSoftwareRenderer* renderer, struct GBAVideoSoftwareBackground* bg, uint16_t value);
  18static void GBAVideoSoftwareRendererWriteBGPA(struct GBAVideoSoftwareBackground* bg, uint16_t value);
  19static void GBAVideoSoftwareRendererWriteBGPB(struct GBAVideoSoftwareBackground* bg, uint16_t value);
  20static void GBAVideoSoftwareRendererWriteBGPC(struct GBAVideoSoftwareBackground* bg, uint16_t value);
  21static void GBAVideoSoftwareRendererWriteBGPD(struct GBAVideoSoftwareBackground* bg, uint16_t value);
  22static void GBAVideoSoftwareRendererWriteBGX_LO(struct GBAVideoSoftwareBackground* bg, uint16_t value);
  23static void GBAVideoSoftwareRendererWriteBGX_HI(struct GBAVideoSoftwareBackground* bg, uint16_t value);
  24static void GBAVideoSoftwareRendererWriteBGY_LO(struct GBAVideoSoftwareBackground* bg, uint16_t value);
  25static void GBAVideoSoftwareRendererWriteBGY_HI(struct GBAVideoSoftwareBackground* bg, uint16_t value);
  26static void GBAVideoSoftwareRendererWriteBLDCNT(struct GBAVideoSoftwareRenderer* renderer, uint16_t value);
  27
  28static void _drawScanline(struct GBAVideoSoftwareRenderer* renderer, int y);
  29static void _drawBackgroundMode0(struct GBAVideoSoftwareRenderer* renderer, struct GBAVideoSoftwareBackground* background, int y);
  30static void _drawBackgroundMode2(struct GBAVideoSoftwareRenderer* renderer, struct GBAVideoSoftwareBackground* background, int y);
  31static void _drawBackgroundMode3(struct GBAVideoSoftwareRenderer* renderer, struct GBAVideoSoftwareBackground* background, int y);
  32static void _drawBackgroundMode4(struct GBAVideoSoftwareRenderer* renderer, struct GBAVideoSoftwareBackground* background, int y);
  33static void _drawBackgroundMode5(struct GBAVideoSoftwareRenderer* renderer, struct GBAVideoSoftwareBackground* background, int y);
  34static void _preprocessTransformedSprite(struct GBAVideoSoftwareRenderer* renderer, struct GBATransformedObj* sprite, int y);
  35static void _preprocessSprite(struct GBAVideoSoftwareRenderer* renderer, struct GBAObj* sprite, int y);
  36static void _postprocessSprite(struct GBAVideoSoftwareRenderer* renderer, unsigned priority);
  37
  38static void _updatePalettes(struct GBAVideoSoftwareRenderer* renderer);
  39static inline color_t _brighten(color_t color, int y);
  40static inline color_t _darken(color_t color, int y);
  41static color_t _mix(int weightA, color_t colorA, int weightB, color_t colorB);
  42
  43void GBAVideoSoftwareRendererCreate(struct GBAVideoSoftwareRenderer* renderer) {
  44	renderer->d.init = GBAVideoSoftwareRendererInit;
  45	renderer->d.deinit = GBAVideoSoftwareRendererDeinit;
  46	renderer->d.writeVideoRegister = GBAVideoSoftwareRendererWriteVideoRegister;
  47	renderer->d.writeOAM = GBAVideoSoftwareRendererWriteOAM;
  48	renderer->d.writePalette = GBAVideoSoftwareRendererWritePalette;
  49	renderer->d.drawScanline = GBAVideoSoftwareRendererDrawScanline;
  50	renderer->d.finishFrame = GBAVideoSoftwareRendererFinishFrame;
  51}
  52
  53static void GBAVideoSoftwareRendererInit(struct GBAVideoRenderer* renderer) {
  54	struct GBAVideoSoftwareRenderer* softwareRenderer = (struct GBAVideoSoftwareRenderer*) renderer;
  55	int i;
  56
  57	softwareRenderer->dispcnt.packed = 0x0080;
  58
  59	softwareRenderer->target1Obj = 0;
  60	softwareRenderer->target1Bd = 0;
  61	softwareRenderer->target2Obj = 0;
  62	softwareRenderer->target2Bd = 0;
  63	softwareRenderer->blendEffect = BLEND_NONE;
  64	memset(softwareRenderer->normalPalette, 0, sizeof(softwareRenderer->normalPalette));
  65	memset(softwareRenderer->variantPalette, 0, sizeof(softwareRenderer->variantPalette));
  66	memset(softwareRenderer->enabledBitmap, 0, sizeof(softwareRenderer->enabledBitmap));
  67
  68	softwareRenderer->blda = 0;
  69	softwareRenderer->bldb = 0;
  70	softwareRenderer->bldy = 0;
  71
  72	softwareRenderer->win0.priority = 0;
  73	softwareRenderer->win1.priority = 1;
  74	softwareRenderer->objwin.priority = 2;
  75	softwareRenderer->winout.priority = 3;
  76
  77	for (i = 0; i < 4; ++i) {
  78		struct GBAVideoSoftwareBackground* bg = &softwareRenderer->bg[i];
  79		bg->index = i;
  80		bg->enabled = 0;
  81		bg->priority = 0;
  82		bg->charBase = 0;
  83		bg->mosaic = 0;
  84		bg->multipalette = 0;
  85		bg->screenBase = 0;
  86		bg->overflow = 0;
  87		bg->size = 0;
  88		bg->target1 = 0;
  89		bg->target2 = 0;
  90		bg->x = 0;
  91		bg->y = 0;
  92		bg->refx = 0;
  93		bg->refy = 0;
  94		bg->dx = 256;
  95		bg->dmx = 0;
  96		bg->dy = 0;
  97		bg->dmy = 256;
  98		bg->sx = 0;
  99		bg->sy = 0;
 100	}
 101}
 102
 103static void GBAVideoSoftwareRendererDeinit(struct GBAVideoRenderer* renderer) {
 104	struct GBAVideoSoftwareRenderer* softwareRenderer = (struct GBAVideoSoftwareRenderer*) renderer;
 105}
 106
 107static uint16_t GBAVideoSoftwareRendererWriteVideoRegister(struct GBAVideoRenderer* renderer, uint32_t address, uint16_t value) {
 108	struct GBAVideoSoftwareRenderer* softwareRenderer = (struct GBAVideoSoftwareRenderer*) renderer;
 109	switch (address) {
 110	case REG_DISPCNT:
 111		softwareRenderer->dispcnt.packed = value;
 112		GBAVideoSoftwareRendererUpdateDISPCNT(softwareRenderer);
 113		break;
 114	case REG_BG0CNT:
 115		value &= 0xFFCF;
 116		GBAVideoSoftwareRendererWriteBGCNT(softwareRenderer, &softwareRenderer->bg[0], value);
 117		break;
 118	case REG_BG1CNT:
 119		value &= 0xFFCF;
 120		GBAVideoSoftwareRendererWriteBGCNT(softwareRenderer, &softwareRenderer->bg[1], value);
 121		break;
 122	case REG_BG2CNT:
 123		value &= 0xFFCF;
 124		GBAVideoSoftwareRendererWriteBGCNT(softwareRenderer, &softwareRenderer->bg[2], value);
 125		break;
 126	case REG_BG3CNT:
 127		value &= 0xFFCF;
 128		GBAVideoSoftwareRendererWriteBGCNT(softwareRenderer, &softwareRenderer->bg[3], value);
 129		break;
 130	case REG_BG0HOFS:
 131		value &= 0x01FF;
 132		softwareRenderer->bg[0].x = value;
 133		break;
 134	case REG_BG0VOFS:
 135		value &= 0x01FF;
 136		softwareRenderer->bg[0].y = value;
 137		break;
 138	case REG_BG1HOFS:
 139		value &= 0x01FF;
 140		softwareRenderer->bg[1].x = value;
 141		break;
 142	case REG_BG1VOFS:
 143		value &= 0x01FF;
 144		softwareRenderer->bg[1].y = value;
 145		break;
 146	case REG_BG2HOFS:
 147		value &= 0x01FF;
 148		softwareRenderer->bg[2].x = value;
 149		break;
 150	case REG_BG2VOFS:
 151		value &= 0x01FF;
 152		softwareRenderer->bg[2].y = value;
 153		break;
 154	case REG_BG3HOFS:
 155		value &= 0x01FF;
 156		softwareRenderer->bg[3].x = value;
 157		break;
 158	case REG_BG3VOFS:
 159		value &= 0x01FF;
 160		softwareRenderer->bg[3].y = value;
 161		break;
 162	case REG_BG2PA:
 163		GBAVideoSoftwareRendererWriteBGPA(&softwareRenderer->bg[2], value);
 164		break;
 165	case REG_BG2PB:
 166		GBAVideoSoftwareRendererWriteBGPB(&softwareRenderer->bg[2], value);
 167		break;
 168	case REG_BG2PC:
 169		GBAVideoSoftwareRendererWriteBGPC(&softwareRenderer->bg[2], value);
 170		break;
 171	case REG_BG2PD:
 172		GBAVideoSoftwareRendererWriteBGPD(&softwareRenderer->bg[2], value);
 173		break;
 174	case REG_BG2X_LO:
 175		GBAVideoSoftwareRendererWriteBGX_LO(&softwareRenderer->bg[2], value);
 176		break;
 177	case REG_BG2X_HI:
 178		GBAVideoSoftwareRendererWriteBGX_HI(&softwareRenderer->bg[2], value);
 179		break;
 180	case REG_BG2Y_LO:
 181		GBAVideoSoftwareRendererWriteBGY_LO(&softwareRenderer->bg[2], value);
 182		break;
 183	case REG_BG2Y_HI:
 184		GBAVideoSoftwareRendererWriteBGY_HI(&softwareRenderer->bg[2], value);
 185		break;
 186	case REG_BG3PA:
 187		GBAVideoSoftwareRendererWriteBGPA(&softwareRenderer->bg[3], value);
 188		break;
 189	case REG_BG3PB:
 190		GBAVideoSoftwareRendererWriteBGPB(&softwareRenderer->bg[3], value);
 191		break;
 192	case REG_BG3PC:
 193		GBAVideoSoftwareRendererWriteBGPC(&softwareRenderer->bg[3], value);
 194		break;
 195	case REG_BG3PD:
 196		GBAVideoSoftwareRendererWriteBGPD(&softwareRenderer->bg[3], value);
 197		break;
 198	case REG_BG3X_LO:
 199		GBAVideoSoftwareRendererWriteBGX_LO(&softwareRenderer->bg[3], value);
 200		break;
 201	case REG_BG3X_HI:
 202		GBAVideoSoftwareRendererWriteBGX_HI(&softwareRenderer->bg[3], value);
 203		break;
 204	case REG_BG3Y_LO:
 205		GBAVideoSoftwareRendererWriteBGY_LO(&softwareRenderer->bg[3], value);
 206		break;
 207	case REG_BG3Y_HI:
 208		GBAVideoSoftwareRendererWriteBGY_HI(&softwareRenderer->bg[3], value);
 209		break;
 210	case REG_BLDCNT:
 211		GBAVideoSoftwareRendererWriteBLDCNT(softwareRenderer, value);
 212		break;
 213	case REG_BLDALPHA:
 214		softwareRenderer->blda = value & 0x1F;
 215		if (softwareRenderer->blda > 0x10) {
 216			softwareRenderer->blda = 0x10;
 217		}
 218		softwareRenderer->bldb = (value >> 8) & 0x1F;
 219		if (softwareRenderer->bldb > 0x10) {
 220			softwareRenderer->bldb = 0x10;
 221		}
 222		break;
 223	case REG_BLDY:
 224		softwareRenderer->bldy = value & 0x1F;
 225		if (softwareRenderer->bldy > 0x10) {
 226			softwareRenderer->bldy = 0x10;
 227		}
 228		_updatePalettes(softwareRenderer);
 229		break;
 230	case REG_WIN0H:
 231		softwareRenderer->win0H.packed = value;
 232		if (softwareRenderer->win0H.start > softwareRenderer->win0H.end || softwareRenderer->win0H.end > VIDEO_HORIZONTAL_PIXELS) {
 233			softwareRenderer->win0H.end = VIDEO_HORIZONTAL_PIXELS;
 234		}
 235		break;
 236	case REG_WIN1H:
 237		softwareRenderer->win1H.packed = value;
 238		if (softwareRenderer->win1H.start > softwareRenderer->win1H.end || softwareRenderer->win1H.end > VIDEO_HORIZONTAL_PIXELS) {
 239			softwareRenderer->win1H.end = VIDEO_HORIZONTAL_PIXELS;
 240		}
 241		break;
 242	case REG_WIN0V:
 243		softwareRenderer->win0V.packed = value;
 244		if (softwareRenderer->win0V.start > softwareRenderer->win0V.end || softwareRenderer->win0V.end > VIDEO_HORIZONTAL_PIXELS) {
 245			softwareRenderer->win0V.end = VIDEO_VERTICAL_PIXELS;
 246		}
 247		break;
 248	case REG_WIN1V:
 249		softwareRenderer->win1V.packed = value;
 250		if (softwareRenderer->win1V.start > softwareRenderer->win1V.end || softwareRenderer->win1V.end > VIDEO_HORIZONTAL_PIXELS) {
 251			softwareRenderer->win1V.end = VIDEO_VERTICAL_PIXELS;
 252		}
 253		break;
 254	case REG_WININ:
 255		softwareRenderer->win0.packed = value;
 256		softwareRenderer->win1.packed = value >> 8;
 257		break;
 258	case REG_WINOUT:
 259		softwareRenderer->winout.packed = value;
 260		softwareRenderer->objwin.packed = value >> 8;
 261		break;
 262	case REG_MOSAIC:
 263	case REG_GREENSWP:
 264		GBALog(0, GBA_LOG_STUB, "Stub video register write: 0x%03X", address);
 265		break;
 266	default:
 267		GBALog(0, GBA_LOG_GAME_ERROR, "Invalid video register: 0x%03X", address);
 268	}
 269	return value;
 270}
 271
 272static void GBAVideoSoftwareRendererWriteOAM(struct GBAVideoRenderer* renderer, uint32_t oam) {
 273	struct GBAVideoSoftwareRenderer* softwareRenderer = (struct GBAVideoSoftwareRenderer*) renderer;
 274	if ((oam & 0x3) != 0x3) {
 275		oam >>= 2;
 276		struct GBAObj* sprite = &renderer->oam->obj[oam];
 277		int enabled = sprite->transformed || !sprite->disable;
 278		enabled <<= (oam & 0x1F);
 279		softwareRenderer->enabledBitmap[oam >> 5] = (softwareRenderer->enabledBitmap[oam >> 5] & ~(1 << (oam & 0x1F))) | enabled;
 280	}
 281}
 282
 283static void GBAVideoSoftwareRendererWritePalette(struct GBAVideoRenderer* renderer, uint32_t address, uint16_t value) {
 284	struct GBAVideoSoftwareRenderer* softwareRenderer = (struct GBAVideoSoftwareRenderer*) renderer;
 285#ifdef COLOR_16_BIT
 286	color_t color = value;
 287#else
 288	color_t color = 0;
 289	color |= (value << 3) & 0xF8;
 290	color |= (value << 6) & 0xF800;
 291	color |= (value << 9) & 0xF80000;
 292#endif
 293	softwareRenderer->normalPalette[address >> 1] = color;
 294	if (softwareRenderer->blendEffect == BLEND_BRIGHTEN) {
 295		softwareRenderer->variantPalette[address >> 1] = _brighten(color, softwareRenderer->bldy);
 296	} else if (softwareRenderer->blendEffect == BLEND_DARKEN) {
 297		softwareRenderer->variantPalette[address >> 1] = _darken(color, softwareRenderer->bldy);
 298	}
 299}
 300
 301#define BREAK_WINDOW(WIN) \
 302	int activeWindow; \
 303	int startX = 0; \
 304	if (softwareRenderer->WIN ## H.end > 0) { \
 305		for (activeWindow = 0; activeWindow < softwareRenderer->nWindows; ++activeWindow) { \
 306			if (softwareRenderer->WIN ## H.start < softwareRenderer->windows[activeWindow].endX) { \
 307				struct Window oldWindow = softwareRenderer->windows[activeWindow]; \
 308				if (softwareRenderer->WIN ## H.start > startX) { \
 309					int nextWindow = softwareRenderer->nWindows; \
 310					++softwareRenderer->nWindows; \
 311					for (; nextWindow > activeWindow; --nextWindow) { \
 312						softwareRenderer->windows[nextWindow] = softwareRenderer->windows[nextWindow - 1]; \
 313					} \
 314					softwareRenderer->windows[activeWindow].endX = softwareRenderer->WIN ## H.start; \
 315					++activeWindow; \
 316				} \
 317				softwareRenderer->windows[activeWindow].control = softwareRenderer->WIN; \
 318				softwareRenderer->windows[activeWindow].endX = softwareRenderer->WIN ## H.end; \
 319				if (softwareRenderer->WIN ## H.end >= oldWindow.endX) { \
 320					for (++activeWindow; softwareRenderer->WIN ## H.end >= softwareRenderer->windows[activeWindow].endX && softwareRenderer->nWindows > 1; ++activeWindow) { \
 321						softwareRenderer->windows[activeWindow] = softwareRenderer->windows[activeWindow + 1]; \
 322						--softwareRenderer->nWindows; \
 323					} \
 324				} else { \
 325					++activeWindow; \
 326					int nextWindow = softwareRenderer->nWindows; \
 327					++softwareRenderer->nWindows; \
 328					for (; nextWindow > activeWindow; --nextWindow) { \
 329						softwareRenderer->windows[nextWindow] = softwareRenderer->windows[nextWindow - 1]; \
 330					} \
 331					softwareRenderer->windows[activeWindow] = oldWindow; \
 332				} \
 333				break; \
 334			} \
 335			startX = softwareRenderer->windows[activeWindow].endX; \
 336		} \
 337	}
 338
 339static void GBAVideoSoftwareRendererDrawScanline(struct GBAVideoRenderer* renderer, int y) {
 340	struct GBAVideoSoftwareRenderer* softwareRenderer = (struct GBAVideoSoftwareRenderer*) renderer;
 341
 342	color_t* row = &softwareRenderer->outputBuffer[softwareRenderer->outputBufferStride * y];
 343	if (softwareRenderer->dispcnt.forcedBlank) {
 344		int x;
 345		for (x = 0; x < VIDEO_HORIZONTAL_PIXELS; ++x) {
 346			row[x] = GBA_COLOR_WHITE;
 347		}
 348		return;
 349	}
 350
 351	memset(softwareRenderer->spriteLayer, 0, sizeof(softwareRenderer->spriteLayer));
 352
 353	softwareRenderer->windows[0].endX = VIDEO_HORIZONTAL_PIXELS;
 354	softwareRenderer->nWindows = 1;
 355	if (softwareRenderer->dispcnt.win0Enable || softwareRenderer->dispcnt.win1Enable || softwareRenderer->dispcnt.objwinEnable) {
 356		softwareRenderer->windows[0].control = softwareRenderer->winout;
 357		if (softwareRenderer->dispcnt.win1Enable && y < softwareRenderer->win1V.end && y >= softwareRenderer->win1V.start) {
 358			BREAK_WINDOW(win1);
 359		}
 360		if (softwareRenderer->dispcnt.win0Enable && y < softwareRenderer->win0V.end && y >= softwareRenderer->win0V.start) {
 361			BREAK_WINDOW(win0);
 362		}
 363	} else {
 364		softwareRenderer->windows[0].control.packed = 0xFF;
 365	}
 366
 367	int w;
 368	int x = 0;
 369	for (w = 0; w < softwareRenderer->nWindows; ++w) {
 370		// TOOD: handle objwin on backdrop
 371		uint32_t backdrop = FLAG_UNWRITTEN | FLAG_PRIORITY | FLAG_IS_BACKGROUND;
 372		if (!softwareRenderer->target1Bd || softwareRenderer->blendEffect == BLEND_NONE || softwareRenderer->blendEffect == BLEND_ALPHA || !softwareRenderer->windows[w].control.blendEnable) {
 373			backdrop |= softwareRenderer->normalPalette[0];
 374		} else {
 375			backdrop |= softwareRenderer->variantPalette[0];
 376		}
 377		for (; x < softwareRenderer->windows[w].endX; ++x) {
 378			softwareRenderer->row[x] = backdrop;
 379		}
 380	}
 381
 382	_drawScanline(softwareRenderer, y);
 383
 384	if (softwareRenderer->target2Bd) {
 385		x = 0;
 386		for (w = 0; w < softwareRenderer->nWindows; ++w) {
 387		uint32_t backdrop = FLAG_UNWRITTEN | FLAG_PRIORITY | FLAG_IS_BACKGROUND;
 388			if (!softwareRenderer->target1Bd || softwareRenderer->blendEffect == BLEND_NONE || softwareRenderer->blendEffect == BLEND_ALPHA || !softwareRenderer->windows[w].control.blendEnable) {
 389				backdrop |= softwareRenderer->normalPalette[0];
 390			} else {
 391				backdrop |= softwareRenderer->variantPalette[0];
 392			}
 393			for (; x < softwareRenderer->windows[w].endX; ++x) {
 394				uint32_t color = softwareRenderer->row[x];
 395				if (color & FLAG_TARGET_1 && !(color & FLAG_FINALIZED)) {
 396					softwareRenderer->row[x] = _mix(softwareRenderer->bldb, backdrop, softwareRenderer->blda, color);
 397				}
 398			}
 399		}
 400	}
 401
 402#ifdef COLOR_16_BIT
 403	for (x = 0; x < VIDEO_HORIZONTAL_PIXELS; ++x) {
 404		row[x] = softwareRenderer->row[x];
 405	}
 406#else
 407	memcpy(row, softwareRenderer->row, VIDEO_HORIZONTAL_PIXELS * sizeof(*row));
 408#endif
 409}
 410
 411static void GBAVideoSoftwareRendererFinishFrame(struct GBAVideoRenderer* renderer) {
 412	struct GBAVideoSoftwareRenderer* softwareRenderer = (struct GBAVideoSoftwareRenderer*) renderer;
 413
 414	softwareRenderer->bg[2].sx = softwareRenderer->bg[2].refx;
 415	softwareRenderer->bg[2].sy = softwareRenderer->bg[2].refy;
 416	softwareRenderer->bg[3].sx = softwareRenderer->bg[3].refx;
 417	softwareRenderer->bg[3].sy = softwareRenderer->bg[3].refy;
 418}
 419
 420static void GBAVideoSoftwareRendererUpdateDISPCNT(struct GBAVideoSoftwareRenderer* renderer) {
 421	renderer->bg[0].enabled = renderer->dispcnt.bg0Enable;
 422	renderer->bg[1].enabled = renderer->dispcnt.bg1Enable;
 423	renderer->bg[2].enabled = renderer->dispcnt.bg2Enable;
 424	renderer->bg[3].enabled = renderer->dispcnt.bg3Enable;
 425}
 426
 427static void GBAVideoSoftwareRendererWriteBGCNT(struct GBAVideoSoftwareRenderer* renderer, struct GBAVideoSoftwareBackground* bg, uint16_t value) {
 428	(void)(renderer);
 429	union GBARegisterBGCNT reg = { .packed = value };
 430	bg->priority = reg.priority;
 431	bg->charBase = reg.charBase << 14;
 432	bg->mosaic = reg.mosaic;
 433	bg->multipalette = reg.multipalette;
 434	bg->screenBase = reg.screenBase << 11;
 435	bg->overflow = reg.overflow;
 436	bg->size = reg.size;
 437}
 438
 439static void GBAVideoSoftwareRendererWriteBGPA(struct GBAVideoSoftwareBackground* bg, uint16_t value) {
 440	bg->dx = value;
 441}
 442
 443static void GBAVideoSoftwareRendererWriteBGPB(struct GBAVideoSoftwareBackground* bg, uint16_t value) {
 444	bg->dmx = value;
 445}
 446
 447static void GBAVideoSoftwareRendererWriteBGPC(struct GBAVideoSoftwareBackground* bg, uint16_t value) {
 448	bg->dy = value;
 449}
 450
 451static void GBAVideoSoftwareRendererWriteBGPD(struct GBAVideoSoftwareBackground* bg, uint16_t value) {
 452	bg->dmy = value;
 453}
 454
 455static void GBAVideoSoftwareRendererWriteBGX_LO(struct GBAVideoSoftwareBackground* bg, uint16_t value) {
 456	bg->refx = (bg->refx & 0xFFFF0000) | value;
 457	bg->sx = bg->refx;
 458}
 459
 460static void GBAVideoSoftwareRendererWriteBGX_HI(struct GBAVideoSoftwareBackground* bg, uint16_t value) {
 461	bg->refx = (bg->refx & 0x0000FFFF) | (value << 16);
 462	bg->refx <<= 4;
 463	bg->refx >>= 4;
 464	bg->sx = bg->refx;
 465}
 466
 467static void GBAVideoSoftwareRendererWriteBGY_LO(struct GBAVideoSoftwareBackground* bg, uint16_t value) {
 468	bg->refy = (bg->refy & 0xFFFF0000) | value;
 469	bg->sy = bg->refy;
 470}
 471
 472static void GBAVideoSoftwareRendererWriteBGY_HI(struct GBAVideoSoftwareBackground* bg, uint16_t value) {
 473	bg->refy = (bg->refy & 0x0000FFFF) | (value << 16);
 474	bg->refy <<= 4;
 475	bg->refy >>= 4;
 476	bg->sy = bg->refy;
 477}
 478
 479static void GBAVideoSoftwareRendererWriteBLDCNT(struct GBAVideoSoftwareRenderer* renderer, uint16_t value) {
 480	union {
 481		struct {
 482			unsigned target1Bg0 : 1;
 483			unsigned target1Bg1 : 1;
 484			unsigned target1Bg2 : 1;
 485			unsigned target1Bg3 : 1;
 486			unsigned target1Obj : 1;
 487			unsigned target1Bd : 1;
 488			enum BlendEffect effect : 2;
 489			unsigned target2Bg0 : 1;
 490			unsigned target2Bg1 : 1;
 491			unsigned target2Bg2 : 1;
 492			unsigned target2Bg3 : 1;
 493			unsigned target2Obj : 1;
 494			unsigned target2Bd : 1;
 495		};
 496		uint16_t packed;
 497	} bldcnt = { .packed = value };
 498
 499	enum BlendEffect oldEffect = renderer->blendEffect;
 500
 501	renderer->bg[0].target1 = bldcnt.target1Bg0;
 502	renderer->bg[1].target1 = bldcnt.target1Bg1;
 503	renderer->bg[2].target1 = bldcnt.target1Bg2;
 504	renderer->bg[3].target1 = bldcnt.target1Bg3;
 505	renderer->bg[0].target2 = bldcnt.target2Bg0;
 506	renderer->bg[1].target2 = bldcnt.target2Bg1;
 507	renderer->bg[2].target2 = bldcnt.target2Bg2;
 508	renderer->bg[3].target2 = bldcnt.target2Bg3;
 509
 510	renderer->blendEffect = bldcnt.effect;
 511	renderer->target1Obj = bldcnt.target1Obj;
 512	renderer->target1Bd = bldcnt.target1Bd;
 513	renderer->target2Obj = bldcnt.target2Obj;
 514	renderer->target2Bd = bldcnt.target2Bd;
 515
 516	if (oldEffect != renderer->blendEffect) {
 517		_updatePalettes(renderer);
 518	}
 519}
 520
 521#define TEST_LAYER_ENABLED(X) \
 522	(renderer->bg[X].enabled && \
 523	(renderer->currentWindow.bg ## X ## Enable || \
 524	(renderer->dispcnt.objwinEnable && renderer->objwin.bg ## X ## Enable)) && \
 525	renderer->bg[X].priority == priority)
 526
 527static void _drawScanline(struct GBAVideoSoftwareRenderer* renderer, int y) {
 528	int w;
 529	renderer->end = 0;
 530	if (renderer->dispcnt.objEnable) {
 531		for (w = 0; w < renderer->nWindows; ++w) {
 532			renderer->start = renderer->end;
 533			renderer->end = renderer->windows[w].endX;
 534			renderer->currentWindow = renderer->windows[w].control;
 535			if (!renderer->currentWindow.objEnable) {
 536				continue;
 537			}
 538			int i, j;
 539			for (j = 0; j < 4; ++j) {
 540				uint32_t bitmap = renderer->enabledBitmap[j];
 541				if (!bitmap) {
 542					continue;
 543				}
 544				for (i = j * 32; i < (j + 1) * 32; ++i) {
 545					if (bitmap & 1) {
 546						struct GBAObj* sprite = &renderer->d.oam->obj[i];
 547						if (sprite->transformed) {
 548							_preprocessTransformedSprite(renderer, &renderer->d.oam->tobj[i], y);
 549						} else {
 550							_preprocessSprite(renderer, sprite, y);
 551						}
 552					}
 553					bitmap >>= 1;
 554				}
 555			}
 556		}
 557	}
 558
 559	int priority;
 560	for (priority = 0; priority < 4; ++priority) {
 561		_postprocessSprite(renderer, priority);
 562		renderer->end = 0;
 563		for (w = 0; w < renderer->nWindows; ++w) {
 564			renderer->start = renderer->end;
 565			renderer->end = renderer->windows[w].endX;
 566			renderer->currentWindow = renderer->windows[w].control;
 567			if (TEST_LAYER_ENABLED(0) && renderer->dispcnt.mode < 2) {
 568				_drawBackgroundMode0(renderer, &renderer->bg[0], y);
 569			}
 570			if (TEST_LAYER_ENABLED(1) && renderer->dispcnt.mode < 2) {
 571				_drawBackgroundMode0(renderer, &renderer->bg[1], y);
 572			}
 573			if (TEST_LAYER_ENABLED(2)) {
 574				switch (renderer->dispcnt.mode) {
 575				case 0:
 576					_drawBackgroundMode0(renderer, &renderer->bg[2], y);
 577					break;
 578				case 1:
 579				case 2:
 580					_drawBackgroundMode2(renderer, &renderer->bg[2], y);
 581					break;
 582				case 3:
 583					_drawBackgroundMode3(renderer, &renderer->bg[2], y);
 584					break;
 585				case 4:
 586					_drawBackgroundMode4(renderer, &renderer->bg[2], y);
 587					break;
 588				case 5:
 589					_drawBackgroundMode5(renderer, &renderer->bg[2], y);
 590					break;
 591				}
 592				renderer->bg[2].sx += renderer->bg[2].dmx;
 593				renderer->bg[2].sy += renderer->bg[2].dmy;
 594			}
 595			if (TEST_LAYER_ENABLED(3)) {
 596				switch (renderer->dispcnt.mode) {
 597				case 0:
 598					_drawBackgroundMode0(renderer, &renderer->bg[3], y);
 599					break;
 600				case 2:
 601					_drawBackgroundMode2(renderer, &renderer->bg[3], y);
 602					break;
 603				}
 604				renderer->bg[3].sx += renderer->bg[3].dmx;
 605				renderer->bg[3].sy += renderer->bg[3].dmy;
 606			}
 607		}
 608	}
 609}
 610
 611static void _composite(struct GBAVideoSoftwareRenderer* renderer, int offset, uint32_t color, uint32_t current) {
 612	// We stash the priority on the top bits so we can do a one-operator comparison
 613	// The lower the number, the higher the priority, and sprites take precendence over backgrounds
 614	// We want to do special processing if the color pixel is target 1, however
 615	if ((color & 0xF8000000) < (current & 0xF8000000)) {
 616		if (current & FLAG_UNWRITTEN) {
 617			renderer->row[offset] = color | (current & FLAG_OBJWIN);
 618		} else if (!(color & FLAG_TARGET_1) || !(current & FLAG_TARGET_2)) {
 619			renderer->row[offset] = color | FLAG_FINALIZED;
 620		} else {
 621			renderer->row[offset] = _mix(renderer->bldb, current, renderer->blda, color) | FLAG_FINALIZED;
 622		}
 623	} else {
 624		if (current & FLAG_TARGET_1 && color & FLAG_TARGET_2) {
 625			renderer->row[offset] = _mix(renderer->blda, current, renderer->bldb, color) | FLAG_FINALIZED;
 626		} else {
 627			renderer->row[offset] = current | FLAG_FINALIZED;
 628		}
 629	}
 630}
 631
 632#define BACKGROUND_DRAW_PIXEL_16_NORMAL \
 633	pixelData = tileData & 0xF; \
 634	current = renderer->row[outX]; \
 635	if (pixelData && !(current & FLAG_FINALIZED)) { \
 636		if (!objwinSlowPath || !(current & FLAG_OBJWIN) != objwinOnly) { \
 637			_composite(renderer, outX, renderer->normalPalette[pixelData | paletteData] | flags, current); \
 638		} \
 639	} \
 640	tileData >>= 4;
 641
 642#define BACKGROUND_DRAW_PIXEL_16_VARIANT \
 643	pixelData = tileData & 0xF; \
 644	current = renderer->row[outX]; \
 645	if (tileData & 0xF && !(current & FLAG_FINALIZED)) { \
 646		if (!objwinSlowPath || !(current & FLAG_OBJWIN) != objwinOnly) { \
 647			_composite(renderer, outX, renderer->variantPalette[pixelData | paletteData] | flags, current); \
 648		} \
 649	} \
 650	tileData >>= 4;
 651
 652#define BACKGROUND_DRAW_PIXEL_256_NORMAL \
 653	pixelData = tileData & 0xFF; \
 654	current = renderer->row[outX]; \
 655	if (pixelData && !(current & FLAG_FINALIZED)) { \
 656		if (!objwinSlowPath || !(current & FLAG_OBJWIN) != objwinOnly) { \
 657			_composite(renderer, outX, renderer->normalPalette[pixelData] | flags, current); \
 658		} \
 659	} \
 660	tileData >>= 8;
 661
 662#define BACKGROUND_DRAW_PIXEL_256_VARIANT \
 663	pixelData = tileData & 0xFF; \
 664	current = renderer->row[outX]; \
 665	if (pixelData && !(renderer->row[outX] & FLAG_FINALIZED)) { \
 666		if (!objwinSlowPath || !(current & FLAG_OBJWIN) != objwinOnly) { \
 667			_composite(renderer, outX, renderer->variantPalette[pixelData] | flags, current); \
 668		} \
 669	} \
 670	tileData >>= 8;
 671
 672#define BACKGROUND_TEXT_SELECT_CHARACTER \
 673	localX = tileX * 8 + inX; \
 674	xBase = localX & 0xF8; \
 675	if (background->size & 1) { \
 676		xBase += (localX & 0x100) << 5; \
 677	} \
 678	screenBase = (background->screenBase >> 1) + (xBase >> 3) + (yBase << 2); \
 679	mapData.packed = renderer->d.vram[screenBase]; \
 680	if (!mapData.vflip) { \
 681		localY = inY & 0x7; \
 682	} else { \
 683		localY = 7 - (inY & 0x7); \
 684	}
 685
 686#define BACKGROUND_MODE_0_TILE_16_LOOP(TYPE) \
 687	uint32_t tileData; \
 688	uint32_t current; \
 689	int paletteData, pixelData; \
 690	for (; tileX < tileEnd; ++tileX) { \
 691		BACKGROUND_TEXT_SELECT_CHARACTER; \
 692		paletteData = mapData.palette << 4; \
 693		charBase = ((background->charBase + (mapData.tile << 5)) >> 2) + localY; \
 694		tileData = ((uint32_t*)renderer->d.vram)[charBase]; \
 695		if (tileData) { \
 696			if (!mapData.hflip) { \
 697				BACKGROUND_DRAW_PIXEL_16_ ## TYPE; \
 698				++outX; \
 699				BACKGROUND_DRAW_PIXEL_16_ ## TYPE; \
 700				++outX; \
 701				BACKGROUND_DRAW_PIXEL_16_ ## TYPE; \
 702				++outX; \
 703				BACKGROUND_DRAW_PIXEL_16_ ## TYPE; \
 704				++outX; \
 705				BACKGROUND_DRAW_PIXEL_16_ ## TYPE; \
 706				++outX; \
 707				BACKGROUND_DRAW_PIXEL_16_ ## TYPE; \
 708				++outX; \
 709				BACKGROUND_DRAW_PIXEL_16_ ## TYPE; \
 710				++outX; \
 711				BACKGROUND_DRAW_PIXEL_16_ ## TYPE; \
 712				++outX; \
 713			} else { \
 714				outX += 7; \
 715				BACKGROUND_DRAW_PIXEL_16_ ## TYPE; \
 716				--outX; \
 717				BACKGROUND_DRAW_PIXEL_16_ ## TYPE; \
 718				--outX; \
 719				BACKGROUND_DRAW_PIXEL_16_ ## TYPE; \
 720				--outX; \
 721				BACKGROUND_DRAW_PIXEL_16_ ## TYPE; \
 722				--outX; \
 723				BACKGROUND_DRAW_PIXEL_16_ ## TYPE; \
 724				--outX; \
 725				BACKGROUND_DRAW_PIXEL_16_ ## TYPE; \
 726				--outX; \
 727				BACKGROUND_DRAW_PIXEL_16_ ## TYPE; \
 728				--outX; \
 729				BACKGROUND_DRAW_PIXEL_16_ ## TYPE; \
 730				outX += 8; \
 731			} \
 732		} else { \
 733			outX += 8; \
 734		} \
 735	}
 736
 737#define BACKGROUND_MODE_0_TILE_256_LOOP(TYPE) \
 738	uint32_t tileData; \
 739	uint32_t current; \
 740	int pixelData; \
 741	for (; tileX < tileEnd; ++tileX) { \
 742		BACKGROUND_TEXT_SELECT_CHARACTER; \
 743		charBase = ((background->charBase + (mapData.tile << 6)) >> 2) + (localY << 1); \
 744		if (!mapData.hflip) { \
 745			tileData = ((uint32_t*)renderer->d.vram)[charBase]; \
 746			if (tileData) { \
 747					BACKGROUND_DRAW_PIXEL_256_ ## TYPE; \
 748					++outX; \
 749					BACKGROUND_DRAW_PIXEL_256_ ## TYPE; \
 750					++outX; \
 751					BACKGROUND_DRAW_PIXEL_256_ ## TYPE; \
 752					++outX; \
 753					BACKGROUND_DRAW_PIXEL_256_ ## TYPE; \
 754					++outX; \
 755			} else { \
 756				outX += 4; \
 757			} \
 758			tileData = ((uint32_t*)renderer->d.vram)[charBase + 1]; \
 759			if (tileData) { \
 760					BACKGROUND_DRAW_PIXEL_256_ ## TYPE; \
 761					++outX; \
 762					BACKGROUND_DRAW_PIXEL_256_ ## TYPE; \
 763					++outX; \
 764					BACKGROUND_DRAW_PIXEL_256_ ## TYPE; \
 765					++outX; \
 766					BACKGROUND_DRAW_PIXEL_256_ ## TYPE; \
 767					++outX; \
 768			} else { \
 769				outX += 4; \
 770			} \
 771		} else { \
 772			uint32_t tileData = ((uint32_t*)renderer->d.vram)[charBase + 1]; \
 773			if (tileData) { \
 774				outX += 3; \
 775				BACKGROUND_DRAW_PIXEL_256_ ## TYPE; \
 776				--outX; \
 777				BACKGROUND_DRAW_PIXEL_256_ ## TYPE; \
 778				--outX; \
 779				BACKGROUND_DRAW_PIXEL_256_ ## TYPE; \
 780				--outX; \
 781				BACKGROUND_DRAW_PIXEL_256_ ## TYPE; \
 782				outX += 4; \
 783			} else { \
 784				outX += 4; \
 785			} \
 786			tileData = ((uint32_t*)renderer->d.vram)[charBase]; \
 787			if (tileData) { \
 788				outX += 3; \
 789				BACKGROUND_DRAW_PIXEL_256_ ## TYPE; \
 790				--outX; \
 791				BACKGROUND_DRAW_PIXEL_256_ ## TYPE; \
 792				--outX; \
 793				BACKGROUND_DRAW_PIXEL_256_ ## TYPE; \
 794				--outX; \
 795				BACKGROUND_DRAW_PIXEL_256_ ## TYPE; \
 796				outX += 4; \
 797			} else { \
 798				outX += 4; \
 799			} \
 800		} \
 801	}
 802
 803#define PREPARE_OBJWIN \
 804	int objwinSlowPath = renderer->dispcnt.objwinEnable; \
 805	int objwinOnly = 0; \
 806	if (objwinSlowPath) { \
 807		switch (background->index) { \
 808		case 0: \
 809			objwinSlowPath = renderer->objwin.bg0Enable != renderer->currentWindow.bg0Enable; \
 810			objwinOnly = renderer->objwin.bg0Enable; \
 811			break; \
 812		case 1: \
 813			objwinSlowPath = renderer->objwin.bg1Enable != renderer->currentWindow.bg1Enable; \
 814			objwinOnly = renderer->objwin.bg1Enable; \
 815			break; \
 816		case 2: \
 817			objwinSlowPath = renderer->objwin.bg2Enable != renderer->currentWindow.bg2Enable; \
 818			objwinOnly = renderer->objwin.bg2Enable; \
 819			break; \
 820		case 3: \
 821			objwinSlowPath = renderer->objwin.bg3Enable != renderer->currentWindow.bg3Enable; \
 822			objwinOnly = renderer->objwin.bg3Enable; \
 823			break; \
 824		} \
 825	}
 826
 827static void _drawBackgroundMode0(struct GBAVideoSoftwareRenderer* renderer, struct GBAVideoSoftwareBackground* background, int y) {
 828	int inX = renderer->start + background->x;
 829	int inY = y + background->y;
 830	union GBATextMapData mapData;
 831	PREPARE_OBJWIN;
 832
 833	unsigned yBase = inY & 0xF8;
 834	if (background->size == 2) {
 835		yBase += inY & 0x100;
 836	} else if (background->size == 3) {
 837		yBase += (inY & 0x100) << 1;
 838	}
 839
 840	int localX;
 841	int localY;
 842
 843	unsigned xBase;
 844
 845	int flags = (background->priority << OFFSET_PRIORITY) | FLAG_IS_BACKGROUND;
 846	flags |= FLAG_TARGET_1 * (background->target1 && renderer->blendEffect == BLEND_ALPHA);
 847	flags |= FLAG_TARGET_2 * background->target2;
 848
 849	uint32_t screenBase;
 850	uint32_t charBase;
 851	int variant = background->target1 && renderer->currentWindow.blendEnable && (renderer->blendEffect == BLEND_BRIGHTEN || renderer->blendEffect == BLEND_DARKEN);
 852
 853	int outX = renderer->start;
 854	int tileX = 0;
 855	int tileEnd = (renderer->end - renderer->start + (inX & 0x7)) >> 3;
 856	if (inX & 0x7) {
 857		uint32_t tileData;
 858		uint32_t current;
 859		int pixelData, paletteData;
 860		int mod8 = inX & 0x7;
 861		BACKGROUND_TEXT_SELECT_CHARACTER;
 862
 863		int end = outX + 0x8 - mod8;
 864		if (!background->multipalette) {
 865			paletteData = mapData.palette << 4;
 866			charBase = ((background->charBase + (mapData.tile << 5)) >> 2) + localY;
 867			tileData = ((uint32_t*)renderer->d.vram)[charBase];
 868			if (!mapData.hflip) {
 869				tileData >>= 4 * mod8;
 870				if (!variant) {
 871					for (; outX < end; ++outX) {
 872						BACKGROUND_DRAW_PIXEL_16_NORMAL;
 873					}
 874				} else {
 875					for (; outX < end; ++outX) {
 876						BACKGROUND_DRAW_PIXEL_16_VARIANT;
 877					}
 878				}
 879			} else {
 880				if (!variant) {
 881					for (outX = end - 1; outX >= renderer->start; --outX) {
 882						BACKGROUND_DRAW_PIXEL_16_NORMAL;
 883					}
 884				} else {
 885					for (outX = end - 1; outX >= renderer->start; --outX) {
 886						BACKGROUND_DRAW_PIXEL_16_VARIANT;
 887					}
 888				}
 889			}
 890		} else {
 891			// TODO: hflip
 892			charBase = ((background->charBase + (mapData.tile << 6)) >> 2) + (localY << 1);
 893			int end2 = end - 4;
 894			int shift = inX & 0x3;
 895			if (end2 > 0) {
 896				tileData = ((uint32_t*)renderer->d.vram)[charBase];
 897				tileData >>= 8 * shift;
 898				shift = 0;
 899				if (!variant) {
 900					for (; outX < end2; ++outX) {
 901						BACKGROUND_DRAW_PIXEL_256_NORMAL;
 902					}
 903				} else {
 904					for (; outX < end2; ++outX) {
 905						BACKGROUND_DRAW_PIXEL_256_VARIANT;
 906					}
 907				}
 908			}
 909
 910			tileData = ((uint32_t*)renderer->d.vram)[charBase + 1];
 911			tileData >>= 8 * shift;
 912			if (!variant) {
 913				for (; outX < end; ++outX) {
 914					BACKGROUND_DRAW_PIXEL_256_NORMAL;
 915				}
 916			} else {
 917				for (; outX < end; ++outX) {
 918					BACKGROUND_DRAW_PIXEL_256_VARIANT;
 919				}
 920			}
 921		}
 922	}
 923	if (inX & 0x7 || (renderer->end - renderer->start) & 0x7) {
 924		tileX = tileEnd;
 925		uint32_t tileData;
 926		uint32_t current;
 927		int pixelData, paletteData;
 928		int mod8 = (inX + renderer->end - renderer->start) & 0x7;
 929		BACKGROUND_TEXT_SELECT_CHARACTER;
 930
 931		int end = 0x8 - mod8;
 932		if (!background->multipalette) {
 933			charBase = ((background->charBase + (mapData.tile << 5)) >> 2) + localY;
 934			tileData = ((uint32_t*)renderer->d.vram)[charBase];
 935			paletteData = mapData.palette << 4;
 936			if (!mapData.hflip) {
 937				outX = renderer->end - mod8;
 938				if (outX < 0) {
 939					tileData >>= 4 * -outX;
 940					outX = 0;
 941				}
 942				if (!variant) {
 943					for (; outX < renderer->end; ++outX) {
 944						BACKGROUND_DRAW_PIXEL_16_NORMAL;
 945					}
 946				} else {
 947					for (; outX < renderer->end; ++outX) {
 948						BACKGROUND_DRAW_PIXEL_16_VARIANT;
 949					}
 950				}
 951			} else {
 952				tileData >>= 4 * (0x8 - mod8);
 953				int end2 = renderer->end - 8;
 954				if (end2 < -1) {
 955					end2 = -1;
 956				}
 957				if (!variant) {
 958					for (outX = renderer->end - 1; outX > end2; --outX) {
 959						BACKGROUND_DRAW_PIXEL_16_NORMAL;
 960					}
 961				} else {
 962					for (outX = renderer->end - 1; outX > end2; --outX) {
 963						BACKGROUND_DRAW_PIXEL_16_VARIANT;
 964					}
 965				}
 966			}
 967		} else {
 968			// TODO: hflip
 969			charBase = ((background->charBase + (mapData.tile << 6)) >> 2) + (localY << 1);
 970			outX = renderer->end - 8 + end;
 971			int end2 = 4 - end;
 972			if (end2 > 0) {
 973				tileData = ((uint32_t*)renderer->d.vram)[charBase];
 974				if (!variant) {
 975					for (; outX < renderer->end - end2; ++outX) {
 976						BACKGROUND_DRAW_PIXEL_256_NORMAL;
 977					}
 978				} else {
 979					for (; outX < renderer->end - end2; ++outX) {
 980						BACKGROUND_DRAW_PIXEL_256_VARIANT;
 981					}
 982				}
 983				++charBase;
 984			}
 985
 986			tileData = ((uint32_t*)renderer->d.vram)[charBase];
 987			if (!variant) {
 988				for (; outX < renderer->end; ++outX) {
 989					BACKGROUND_DRAW_PIXEL_256_NORMAL;
 990				}
 991			} else {
 992				for (; outX < renderer->end; ++outX) {
 993					BACKGROUND_DRAW_PIXEL_256_VARIANT;
 994				}
 995			}
 996		}
 997
 998		tileX = (inX & 0x7) != 0;
 999		outX = renderer->start + tileX * 8 - (inX & 0x7);
1000	}
1001
1002	if (!background->multipalette) {
1003		if (!variant) {
1004			BACKGROUND_MODE_0_TILE_16_LOOP(NORMAL);
1005		 } else {
1006			BACKGROUND_MODE_0_TILE_16_LOOP(VARIANT);
1007		 }
1008	} else {
1009		if (!variant) {
1010			BACKGROUND_MODE_0_TILE_256_LOOP(NORMAL);
1011		 } else {
1012			BACKGROUND_MODE_0_TILE_256_LOOP(VARIANT);
1013		 }
1014	}
1015}
1016
1017#define BACKGROUND_BITMAP_INIT \
1018	(void)(unused); \
1019	int32_t x = background->sx - background->dx; \
1020	int32_t y = background->sy - background->dy; \
1021	int32_t localX; \
1022	int32_t localY; \
1023	\
1024	int flags = (background->priority << OFFSET_PRIORITY) | FLAG_IS_BACKGROUND; \
1025	flags |= FLAG_TARGET_1 * (background->target1 && renderer->blendEffect == BLEND_ALPHA); \
1026	flags |= FLAG_TARGET_2 * background->target2; \
1027	int variant = background->target1 && renderer->currentWindow.blendEnable && (renderer->blendEffect == BLEND_BRIGHTEN || renderer->blendEffect == BLEND_DARKEN);
1028
1029#define BACKGROUND_BITMAP_ITERATE(W, H) \
1030	x += background->dx; \
1031	y += background->dy; \
1032	\
1033	if (x < 0 || y < 0 || (x >> 8) >= W || (y >> 8) >= H) { \
1034		continue; \
1035	} else { \
1036		localX = x; \
1037		localY = y; \
1038	}
1039
1040static void _drawBackgroundMode2(struct GBAVideoSoftwareRenderer* renderer, struct GBAVideoSoftwareBackground* background, int unused) {
1041	int sizeAdjusted = 0x8000 << background->size;
1042
1043	BACKGROUND_BITMAP_INIT;
1044	PREPARE_OBJWIN;
1045
1046	uint32_t screenBase = background->screenBase;
1047	uint32_t charBase = background->charBase;
1048	uint8_t mapData;
1049	uint8_t tileData;
1050
1051	int outX;
1052	for (outX = renderer->start; outX < VIDEO_HORIZONTAL_PIXELS; ++outX) {
1053		x += background->dx;
1054		y += background->dy;
1055
1056		if (background->overflow) {
1057			localX = x & (sizeAdjusted - 1);
1058			localY = y & (sizeAdjusted - 1);
1059		} else if (x < 0 || y < 0 || x >= sizeAdjusted || y >= sizeAdjusted) {
1060			continue;
1061		} else {
1062			localX = x;
1063			localY = y;
1064		}
1065		mapData = ((uint8_t*)renderer->d.vram)[screenBase + (localX >> 11) + (((localY >> 7) & 0x7F0) << background->size)];
1066		tileData = ((uint8_t*)renderer->d.vram)[charBase + (mapData << 6) + ((localY & 0x700) >> 5) + ((localX & 0x700) >> 8)];
1067
1068		uint32_t current = renderer->row[outX];
1069		if (tileData && !(current & FLAG_FINALIZED) && (!objwinSlowPath || !(current & FLAG_OBJWIN) != objwinOnly)) {
1070			if (!variant) {
1071				_composite(renderer, outX, renderer->normalPalette[tileData] | flags, current);
1072			} else {
1073				_composite(renderer, outX, renderer->variantPalette[tileData] | flags, current);
1074			}
1075		}
1076	}
1077}
1078
1079static void _drawBackgroundMode3(struct GBAVideoSoftwareRenderer* renderer, struct GBAVideoSoftwareBackground* background, int unused) {
1080	BACKGROUND_BITMAP_INIT;
1081	PREPARE_OBJWIN;
1082
1083	uint16_t color;
1084	uint32_t color32;
1085
1086	int outX;
1087	for (outX = 0; outX < VIDEO_HORIZONTAL_PIXELS; ++outX) {
1088		BACKGROUND_BITMAP_ITERATE(VIDEO_HORIZONTAL_PIXELS, VIDEO_VERTICAL_PIXELS);
1089
1090		color = ((uint16_t*)renderer->d.vram)[(localX >> 8) + (localY >> 8) * VIDEO_HORIZONTAL_PIXELS];
1091		color32 = 0;
1092		color32 |= (color << 3) & 0xF8;
1093		color32 |= (color << 6) & 0xF800;
1094		color32 |= (color << 9) & 0xF80000;
1095
1096		uint32_t current = renderer->row[outX];
1097		if (!(current & FLAG_FINALIZED) && (!objwinSlowPath || !(current & FLAG_OBJWIN) != objwinOnly)) {
1098			if (!variant) {
1099				_composite(renderer, outX, color32 | flags, current);
1100			} else if (renderer->blendEffect == BLEND_BRIGHTEN) {
1101				_composite(renderer, outX, _brighten(color32, renderer->bldy) | flags, current);
1102			} else if (renderer->blendEffect == BLEND_DARKEN) {
1103				_composite(renderer, outX, _darken(color32, renderer->bldy) | flags, current);
1104			}
1105		}
1106	}
1107}
1108
1109static void _drawBackgroundMode4(struct GBAVideoSoftwareRenderer* renderer, struct GBAVideoSoftwareBackground* background, int unused) {
1110	BACKGROUND_BITMAP_INIT;
1111	PREPARE_OBJWIN;
1112
1113	uint16_t color;
1114	uint32_t offset = 0;
1115	if (renderer->dispcnt.frameSelect) {
1116		offset = 0xA000;
1117	}
1118
1119	int outX;
1120	for (outX = 0; outX < VIDEO_HORIZONTAL_PIXELS; ++outX) {
1121		BACKGROUND_BITMAP_ITERATE(VIDEO_HORIZONTAL_PIXELS, VIDEO_VERTICAL_PIXELS);
1122
1123		color = ((uint8_t*)renderer->d.vram)[offset + (localX >> 8) + (localY >> 8) * VIDEO_HORIZONTAL_PIXELS];
1124
1125		uint32_t current = renderer->row[outX];
1126		if (color && !(current & FLAG_FINALIZED) && (!objwinSlowPath || !(current & FLAG_OBJWIN) != objwinOnly)) {
1127			if (!variant) {
1128				_composite(renderer, outX, renderer->normalPalette[color] | flags, current);
1129			} else {
1130				_composite(renderer, outX, renderer->variantPalette[color] | flags, current);
1131			}
1132		}
1133	}
1134}
1135
1136static void _drawBackgroundMode5(struct GBAVideoSoftwareRenderer* renderer, struct GBAVideoSoftwareBackground* background, int unused) {
1137	BACKGROUND_BITMAP_INIT;
1138	PREPARE_OBJWIN;
1139
1140	uint32_t color;
1141	uint32_t offset = 0;
1142	if (renderer->dispcnt.frameSelect) {
1143		offset = 0xA000;
1144	}
1145
1146	int outX;
1147	for (outX = 0; outX < VIDEO_HORIZONTAL_PIXELS; ++outX) {
1148		BACKGROUND_BITMAP_ITERATE(160, 128);
1149
1150		color = ((uint16_t*)renderer->d.vram)[offset + (localX >> 8) + (localY >> 8) * 160];
1151#ifndef COLOR_16_BIT
1152		color_t color32 = 0;
1153		color32 |= (color << 9) & 0xF80000;
1154		color32 |= (color << 3) & 0xF8;
1155		color32 |= (color << 6) & 0xF800;
1156		color = color32;
1157#endif
1158
1159		uint32_t current = renderer->row[outX];
1160		if (!(current & FLAG_FINALIZED) && (!objwinSlowPath || !(current & FLAG_OBJWIN) != objwinOnly)) {
1161			if (!variant) {
1162				_composite(renderer, outX, color | flags, current);
1163			} else if (renderer->blendEffect == BLEND_BRIGHTEN) {
1164				_composite(renderer, outX, _brighten(color, renderer->bldy) | flags, current);
1165			} else if (renderer->blendEffect == BLEND_DARKEN) {
1166				_composite(renderer, outX, _darken(color, renderer->bldy) | flags, current);
1167			}
1168		}
1169	}
1170}
1171
1172static const int _objSizes[32] = {
1173	8, 8,
1174	16, 16,
1175	32, 32,
1176	64, 64,
1177	16, 8,
1178	32, 8,
1179	32, 16,
1180	64, 32,
1181	8, 16,
1182	8, 32,
1183	16, 32,
1184	32, 64,
1185	0, 0,
1186	0, 0,
1187	0, 0,
1188	0, 0
1189};
1190
1191#define SPRITE_NORMAL_LOOP(DEPTH, TYPE) \
1192		SPRITE_YBASE_ ## DEPTH(inY); \
1193		int outX = x >= start ? x : start; \
1194		int condition = x + width; \
1195		if (end < condition) { \
1196			condition = end; \
1197		} \
1198		for (; outX < condition; ++outX) { \
1199			int inX = outX - x; \
1200			if (sprite->hflip) { \
1201				inX = width - inX - 1; \
1202			} \
1203			if (!(renderer->row[outX] & FLAG_UNWRITTEN)) { \
1204				continue; \
1205			} \
1206			SPRITE_XBASE_ ## DEPTH(inX); \
1207			SPRITE_DRAW_PIXEL_ ## DEPTH ## _ ## TYPE(inX); \
1208		}
1209
1210#define SPRITE_TRANSFORMED_LOOP(DEPTH, TYPE) \
1211	int outX; \
1212	for (outX = x >= start ? x : start; outX < x + totalWidth && outX < end; ++outX) { \
1213		if (!(renderer->row[outX] & FLAG_UNWRITTEN)) { \
1214			continue; \
1215		} \
1216		int inX = outX - x; \
1217		int localX = ((mat->a * (inX - (totalWidth >> 1)) + mat->b * (inY - (totalHeight >> 1))) >> 8) + (width >> 1); \
1218		int localY = ((mat->c * (inX - (totalWidth >> 1)) + mat->d * (inY - (totalHeight >> 1))) >> 8) + (height >> 1); \
1219		\
1220		if (localX < 0 || localX >= width || localY < 0 || localY >= height) { \
1221			continue; \
1222		} \
1223		\
1224		SPRITE_YBASE_ ## DEPTH(localY); \
1225		SPRITE_XBASE_ ## DEPTH(localX); \
1226		SPRITE_DRAW_PIXEL_ ## DEPTH ## _ ## TYPE(localX); \
1227	}
1228
1229#define SPRITE_XBASE_16(localX) unsigned xBase = (localX & ~0x7) * 4 + ((localX >> 1) & 2);
1230#define SPRITE_YBASE_16(localY) unsigned yBase = (localY & ~0x7) * (renderer->dispcnt.objCharacterMapping ? width >> 1 : 0x80) + (localY & 0x7) * 4;
1231
1232#define SPRITE_DRAW_PIXEL_16_NORMAL(localX) \
1233	uint16_t tileData = renderer->d.vram[(yBase + charBase + xBase) >> 1]; \
1234	tileData = (tileData >> ((localX & 3) << 2)) & 0xF; \
1235	if (tileData && !(renderer->spriteLayer[outX])) { \
1236		renderer->spriteLayer[outX] = renderer->normalPalette[0x100 | tileData | (sprite->palette << 4)] | flags; \
1237	}
1238
1239#define SPRITE_DRAW_PIXEL_16_VARIANT(localX) \
1240	uint16_t tileData = renderer->d.vram[(yBase + charBase + xBase) >> 1]; \
1241	tileData = (tileData >> ((localX & 3) << 2)) & 0xF; \
1242	if (tileData && !(renderer->spriteLayer[outX])) { \
1243		renderer->spriteLayer[outX] = renderer->variantPalette[0x100 | tileData | (sprite->palette << 4)] | flags; \
1244	}
1245
1246#define SPRITE_DRAW_PIXEL_16_OBJWIN(localX) \
1247	uint16_t tileData = renderer->d.vram[(yBase + charBase + xBase) >> 1]; \
1248	tileData = (tileData >> ((localX & 3) << 2)) & 0xF; \
1249	if (tileData) { \
1250		renderer->row[outX] |= FLAG_OBJWIN; \
1251	}
1252
1253#define SPRITE_XBASE_256(localX) unsigned xBase = (localX & ~0x7) * 8 + (localX & 6);
1254#define SPRITE_YBASE_256(localY) unsigned yBase = (localY & ~0x7) * (renderer->dispcnt.objCharacterMapping ? width : 0x80) + (localY & 0x7) * 8;
1255
1256#define SPRITE_DRAW_PIXEL_256_NORMAL(localX) \
1257	uint16_t tileData = renderer->d.vram[(yBase + charBase + xBase) >> 1]; \
1258	tileData = (tileData >> ((localX & 1) << 3)) & 0xFF; \
1259	if (tileData && !(renderer->spriteLayer[outX])) { \
1260		renderer->spriteLayer[outX] = renderer->normalPalette[0x100 | tileData] | flags; \
1261	}
1262
1263#define SPRITE_DRAW_PIXEL_256_VARIANT(localX) \
1264	uint16_t tileData = renderer->d.vram[(yBase + charBase + xBase) >> 1]; \
1265	tileData = (tileData >> ((localX & 1) << 3)) & 0xFF; \
1266	if (tileData && !(renderer->spriteLayer[outX])) { \
1267		renderer->spriteLayer[outX] = renderer->variantPalette[0x100 | tileData] | flags; \
1268	}
1269
1270#define SPRITE_DRAW_PIXEL_256_OBJWIN(localX) \
1271	uint16_t tileData = renderer->d.vram[(yBase + charBase + xBase) >> 1]; \
1272	tileData = (tileData >> ((localX & 1) << 3)) & 0xFF; \
1273	if (tileData) { \
1274		renderer->row[outX] |= FLAG_OBJWIN; \
1275	}
1276
1277static void _preprocessSprite(struct GBAVideoSoftwareRenderer* renderer, struct GBAObj* sprite, int y) {
1278	int width = _objSizes[sprite->shape * 8 + sprite->size * 2];
1279	int height = _objSizes[sprite->shape * 8 + sprite->size * 2 + 1];
1280	int start = renderer->start;
1281	int end = renderer->end;
1282	if ((y < sprite->y && (sprite->y + height - 256 < 0 || y >= sprite->y + height - 256)) || y >= sprite->y + height) {
1283		return;
1284	}
1285	int flags = (sprite->priority << OFFSET_PRIORITY) | FLAG_FINALIZED;
1286	flags |= FLAG_TARGET_1 * ((renderer->target1Obj && renderer->blendEffect == BLEND_ALPHA) || sprite->mode == OBJ_MODE_SEMITRANSPARENT);
1287	flags |= FLAG_TARGET_2 *renderer->target2Obj;
1288	flags |= FLAG_OBJWIN * (sprite->mode == OBJ_MODE_OBJWIN);
1289	int x = sprite->x;
1290	int inY = y - sprite->y;
1291	if (sprite->y + height - 256 >= 0) {
1292		inY += 256;
1293	}
1294	if (sprite->vflip) {
1295		inY = height - inY - 1;
1296	}
1297	unsigned charBase = BASE_TILE + sprite->tile * 0x20;
1298	int variant = renderer->target1Obj && renderer->currentWindow.blendEnable && (renderer->blendEffect == BLEND_BRIGHTEN || renderer->blendEffect == BLEND_DARKEN);
1299	if (!sprite->multipalette) {
1300		if (flags & FLAG_OBJWIN) {
1301			SPRITE_NORMAL_LOOP(16, OBJWIN);
1302		} else if (!variant) {
1303			SPRITE_NORMAL_LOOP(16, NORMAL);
1304		} else {
1305			SPRITE_NORMAL_LOOP(16, VARIANT);
1306		}
1307	} else {
1308		if (flags & FLAG_OBJWIN) {
1309			SPRITE_NORMAL_LOOP(256, OBJWIN);
1310		} else if (!variant) {
1311			SPRITE_NORMAL_LOOP(256, NORMAL);
1312		} else {
1313			SPRITE_NORMAL_LOOP(256, VARIANT);
1314		}
1315	}
1316}
1317
1318static void _preprocessTransformedSprite(struct GBAVideoSoftwareRenderer* renderer, struct GBATransformedObj* sprite, int y) {
1319	int width = _objSizes[sprite->shape * 8 + sprite->size * 2];
1320	int totalWidth = width << sprite->doublesize;
1321	int height = _objSizes[sprite->shape * 8 + sprite->size * 2 + 1];
1322	int totalHeight = height << sprite->doublesize;
1323	int start = renderer->start;
1324	int end = renderer->end;
1325	if ((y < sprite->y && (sprite->y + totalHeight - 256 < 0 || y >= sprite->y + totalHeight - 256)) || y >= sprite->y + totalHeight) {
1326		return;
1327	}
1328	int flags = (sprite->priority << OFFSET_PRIORITY) | FLAG_FINALIZED;
1329	flags |= FLAG_TARGET_1 * ((renderer->target1Obj && renderer->blendEffect == BLEND_ALPHA) || sprite->mode == OBJ_MODE_SEMITRANSPARENT);
1330	flags |= FLAG_TARGET_2 * renderer->target2Obj;
1331	flags |= FLAG_OBJWIN * (sprite->mode == OBJ_MODE_OBJWIN);
1332	int x = sprite->x;
1333	unsigned charBase = BASE_TILE + sprite->tile * 0x20;
1334	struct GBAOAMMatrix* mat = &renderer->d.oam->mat[sprite->matIndex];
1335	int variant = renderer->target1Obj && renderer->currentWindow.blendEnable && (renderer->blendEffect == BLEND_BRIGHTEN || renderer->blendEffect == BLEND_DARKEN);
1336	int inY = y - sprite->y;
1337	if (inY < 0) {
1338		inY += 256;
1339	}
1340	if (!sprite->multipalette) {
1341		if (flags & FLAG_OBJWIN) {
1342			SPRITE_TRANSFORMED_LOOP(16, OBJWIN);
1343		} else if (!variant) {
1344			SPRITE_TRANSFORMED_LOOP(16, NORMAL);
1345		} else {
1346			SPRITE_TRANSFORMED_LOOP(16, VARIANT);
1347		}
1348	} else {
1349		if (flags & FLAG_OBJWIN) {
1350			SPRITE_TRANSFORMED_LOOP(256, OBJWIN);
1351		} else if (!variant) {
1352			SPRITE_TRANSFORMED_LOOP(256, NORMAL);
1353		} else {
1354			SPRITE_TRANSFORMED_LOOP(256, VARIANT);
1355		}
1356	}
1357}
1358
1359static void _postprocessSprite(struct GBAVideoSoftwareRenderer* renderer, unsigned priority) {
1360	int x;
1361	for (x = 0; x < VIDEO_HORIZONTAL_PIXELS; ++x) {
1362		uint32_t color = renderer->spriteLayer[x];
1363		uint32_t current = renderer->row[x];
1364		if ((color & FLAG_FINALIZED) && (color & FLAG_PRIORITY) >> OFFSET_PRIORITY == priority && !(current & FLAG_FINALIZED)) {
1365			_composite(renderer, x, color & ~FLAG_FINALIZED, current);
1366		}
1367	}
1368}
1369
1370static void _updatePalettes(struct GBAVideoSoftwareRenderer* renderer) {
1371	int i;
1372	if (renderer->blendEffect == BLEND_BRIGHTEN) {
1373		for (i = 0; i < 512; ++i) {
1374			renderer->variantPalette[i] = _brighten(renderer->normalPalette[i], renderer->bldy);
1375		}
1376	} else if (renderer->blendEffect == BLEND_DARKEN) {
1377		for (i = 0; i < 512; ++i) {
1378			renderer->variantPalette[i] = _darken(renderer->normalPalette[i], renderer->bldy);
1379		}
1380	} else {
1381		for (i = 0; i < 512; ++i) {
1382			renderer->variantPalette[i] = renderer->normalPalette[i];
1383		}
1384	}
1385}
1386
1387static inline color_t _brighten(color_t color, int y) {
1388	color_t c = 0;
1389	color_t a;
1390#ifdef COLOR_16_BIT
1391	a = color & 0x1F;
1392	c |= (a + ((0x1F - a) * y) / 16) & 0x1F;
1393
1394	a = color & 0x3E0;
1395	c |= (a + ((0x3E0 - a) * y) / 16) & 0x3E0;
1396
1397	a = color & 0x7C00;
1398	c |= (a + ((0x7C00 - a) * y) / 16) & 0x7C00;
1399#else
1400	a = color & 0xF8;
1401	c |= (a + ((0xF8 - a) * y) / 16) & 0xF8;
1402
1403	a = color & 0xF800;
1404	c |= (a + ((0xF800 - a) * y) / 16) & 0xF800;
1405
1406	a = color & 0xF80000;
1407	c |= (a + ((0xF80000 - a) * y) / 16) & 0xF80000;
1408#endif
1409	return c;
1410}
1411
1412static inline color_t _darken(color_t color, int y) {
1413	color_t c = 0;
1414	color_t a;
1415#ifdef COLOR_16_BIT
1416	a = color & 0x1F;
1417	c |= (a - (a * y) / 16) & 0x1F;
1418
1419	a = color & 0x3E0;
1420	c |= (a - (a * y) / 16) & 0x3E0;
1421
1422	a = color & 0x7C00;
1423	c |= (a - (a * y) / 16) & 0x7C00;
1424#else
1425	a = color & 0xF8;
1426	c |= (a - (a * y) / 16) & 0xF8;
1427
1428	a = color & 0xF800;
1429	c |= (a - (a * y) / 16) & 0xF800;
1430
1431	a = color & 0xF80000;
1432	c |= (a - (a * y) / 16) & 0xF80000;
1433#endif
1434	return c;
1435}
1436
1437static color_t _mix(int weightA, color_t colorA, int weightB, color_t colorB) {
1438	color_t c = 0;
1439	color_t a, b;
1440#ifdef COLOR_16_BIT
1441	a = colorA & 0x1F;
1442	b = colorB & 0x1F;
1443	c |= ((a * weightA + b * weightB) / 16) & 0x3F;
1444	if (c & 0x0020) {
1445		c = 0x001F;
1446	}
1447
1448	a = colorA & 0x3E0;
1449	b = colorB & 0x3E0;
1450	c |= ((a * weightA + b * weightB) / 16) & 0x7E0;
1451	if (c & 0x0400) {
1452		c |= 0x03E0;
1453	}
1454
1455	a = colorA & 0x7C00;
1456	b = colorB & 0x7C00;
1457	c |= ((a * weightA + b * weightB) / 16) & 0xFC00;
1458	if (c & 0x8000) {
1459		c |= 0x7C00;
1460	}
1461#else
1462	a = colorA & 0xF8;
1463	b = colorB & 0xF8;
1464	c |= ((a * weightA + b * weightB) / 16) & 0x1F8;
1465	if (c & 0x00000100) {
1466		c = 0x000000F8;
1467	}
1468
1469	a = colorA & 0xF800;
1470	b = colorB & 0xF800;
1471	c |= ((a * weightA + b * weightB) / 16) & 0x1F800;
1472	if (c & 0x00010000) {
1473		c = (c & 0x000000F8) | 0x0000F800;
1474	}
1475
1476	a = colorA & 0xF80000;
1477	b = colorB & 0xF80000;
1478	c |= ((a * weightA + b * weightB) / 16) & 0x1F80000;
1479	if (c & 0x01000000) {
1480		c = (c & 0x0000F8F8) | 0x00F80000;
1481	}
1482#endif
1483	return c;
1484}