GBA BIOS: Make HLE BIOS calls interruptable (fixes #1711, fixes #1823)
@@ -15,6 +15,7 @@ - GBA: Fix timing advancing too quickly in rare cases
- GBA BIOS: Implement dummy sound driver calls - GBA BIOS: Improve HLE BIOS timing - GBA BIOS: Fix reloading video registers after reset (fixes mgba.io/i/1808) + - GBA BIOS: Make HLE BIOS calls interruptable (fixes mgba.io/i/1711 and mgba.io/i/1823) - GBA DMA: Linger last DMA on bus (fixes mgba.io/i/301 and mgba.io/i/1320) - GBA Memory: Improve gamepak prefetch timing - GBA Memory: Stall on VRAM access in mode 2 (fixes mgba.io/i/190)
@@ -109,6 +109,8 @@ bool haltPending;
bool cpuBlocked; bool earlyExit; uint32_t dmaPC; + uint32_t biosStall; + int idleDetectionStep; int idleDetectionFailures; int32_t cachedRegisters[16];
@@ -201,7 +201,8 @@ * | bit 0: Is CPU halted?
* | bit 1: POSTFLG * | bit 2: Is IRQ pending? * 0x00320 - 0x00323: Next IRQ event - * 0x00324 - 0x003FF: Reserved (leave zero) + * 0x00324 - 0x00327: Interruptable BIOS stall cycles + * 0x00328 - 0x003FF: Reserved (leave zero) * 0x00400 - 0x007FF: I/O memory * 0x00800 - 0x00BFF: Palette * 0x00C00 - 0x00FFF: OAM@@ -334,8 +335,9 @@ uint64_t globalCycles;
uint32_t lastPrefetchedPc; GBASerializedMiscFlags miscFlags; uint32_t nextIrq; + int32_t biosStall; - uint32_t reserved[55]; + uint32_t reserved[54]; uint16_t io[SIZE_IO >> 1]; uint16_t pram[SIZE_PALETTE_RAM >> 1];
@@ -298,10 +298,10 @@ int loops = clz32(denom) - clz32(num);
if (loops < 1) { loops = 1; } - cpu->cycles += 4 /* prologue */ + 13 * loops + 7 /* epilogue */; + gba->biosStall = 4 /* prologue */ + 13 * loops + 7 /* epilogue */; } -static int16_t _ArcTan(int32_t i, int32_t* r1, int32_t* r3, int32_t* cycles) { +static int16_t _ArcTan(int32_t i, int32_t* r1, int32_t* r3, uint32_t* cycles) { int currentCycles = 37; currentCycles += _mulWait(i * i); int32_t a = -((i * i) >> 14);@@ -325,11 +325,11 @@ }
if (r3) { *r3 = b; } - *cycles += currentCycles; + *cycles = currentCycles; return (i * b) >> 16; } -static int16_t _ArcTan2(int32_t x, int32_t y, int32_t* r1, int32_t* cycles) { +static int16_t _ArcTan2(int32_t x, int32_t y, int32_t* r1, uint32_t* cycles) { if (!y) { if (x >= 0) { return 0;@@ -363,9 +363,9 @@ return 0xC000 - _ArcTan((x << 14) / y, r1, NULL, cycles);
} } -static int32_t _Sqrt(uint32_t x, int32_t* cycles) { +static int32_t _Sqrt(uint32_t x, uint32_t* cycles) { if (!x) { - *cycles += 53; + *cycles = 53; return 0; } int32_t currentCycles = 15;@@ -412,7 +412,7 @@ bound = oldBound;
break; } } - *cycles += currentCycles; + *cycles = currentCycles; return bound; }@@ -422,6 +422,9 @@ mLOG(GBA_BIOS, DEBUG, "SWI: %02X r0: %08X r1: %08X r2: %08X r3: %08X",
immediate, cpu->gprs[0], cpu->gprs[1], cpu->gprs[2], cpu->gprs[3]); switch (immediate) { + case 0xF0: // Used for internal stall counting + cpu->gprs[4] = gba->biosStall; + return; case 0xFA: GBAPrintFlush(gba); return;@@ -431,6 +434,8 @@ if (gba->memory.fullBios) {
ARMRaiseSWI(cpu); return; } + + bool useStall = false; switch (immediate) { case GBA_SWI_SOFT_RESET: _SoftReset(gba);@@ -452,19 +457,24 @@ // IntrWait
ARMRaiseSWI(cpu); return; case GBA_SWI_DIV: + useStall = true; _Div(gba, cpu->gprs[0], cpu->gprs[1]); break; case GBA_SWI_DIV_ARM: + useStall = true; _Div(gba, cpu->gprs[1], cpu->gprs[0]); break; case GBA_SWI_SQRT: - cpu->gprs[0] = _Sqrt(cpu->gprs[0], &cpu->cycles); + useStall = true; + cpu->gprs[0] = _Sqrt(cpu->gprs[0], &gba->biosStall); break; case GBA_SWI_ARCTAN: - cpu->gprs[0] = _ArcTan(cpu->gprs[0], &cpu->gprs[1], &cpu->gprs[3], &cpu->cycles); + useStall = true; + cpu->gprs[0] = _ArcTan(cpu->gprs[0], &cpu->gprs[1], &cpu->gprs[3], &gba->biosStall); break; case GBA_SWI_ARCTAN2: - cpu->gprs[0] = (uint16_t) _ArcTan2(cpu->gprs[0], cpu->gprs[1], &cpu->gprs[1], &cpu->cycles); + useStall = true; + cpu->gprs[0] = (uint16_t) _ArcTan2(cpu->gprs[0], cpu->gprs[1], &cpu->gprs[1], &gba->biosStall); cpu->gprs[3] = 0x170; break; case GBA_SWI_CPU_SET:@@ -589,12 +599,25 @@ return;
default: mLOG(GBA_BIOS, STUB, "Stub software interrupt: %02X", immediate); } - gba->cpu->cycles += 45 + cpu->memory.activeNonseqCycles16 /* 8 bit load for SWI # */; - // Return cycles - if (gba->cpu->executionMode == MODE_ARM) { - gba->cpu->cycles += cpu->memory.activeNonseqCycles32 + cpu->memory.activeSeqCycles32; - } else { - gba->cpu->cycles += cpu->memory.activeNonseqCycles16 + cpu->memory.activeSeqCycles16; + if (useStall) { + if (gba->biosStall >= 18) { + gba->biosStall -= 18; + gba->cpu->cycles += gba->biosStall & 3; + gba->biosStall &= ~3; + ARMRaiseSWI(cpu); + } else { + gba->cpu->cycles += gba->biosStall; + useStall = false; + } + } + if (!useStall) { + gba->cpu->cycles += 45 + cpu->memory.activeNonseqCycles16 /* 8 bit load for SWI # */; + // Return cycles + if (gba->cpu->executionMode == MODE_ARM) { + gba->cpu->cycles += cpu->memory.activeNonseqCycles32 + cpu->memory.activeSeqCycles32; + } else { + gba->cpu->cycles += cpu->memory.activeNonseqCycles16 + cpu->memory.activeSeqCycles16; + } } gba->memory.biosPrefetch = 0xE3A02004; }
@@ -201,6 +201,7 @@
gba->cpuBlocked = false; gba->earlyExit = false; gba->dmaPC = 0; + gba->biosStall = 0; if (gba->yankedRomSize) { gba->memory.romSize = gba->yankedRomSize; gba->memory.romMask = toPow2(gba->memory.romSize) - 1;@@ -928,4 +929,4 @@
static void _clearSoftwareBreakpoint(struct ARMDebugger* debugger, const struct ARMDebugBreakpoint* breakpoint) { GBAClearBreakpoint((struct GBA*) debugger->cpu->master, breakpoint->d.address, breakpoint->sw.mode, breakpoint->sw.opcode); } -#endif+#endif
@@ -3,28 +3,28 @@
#include <mgba/internal/gba/memory.h> const uint8_t hleBios[SIZE_BIOS] = { - 0x06, 0x00, 0x00, 0xea, 0x66, 0x00, 0x00, 0xea, 0x0b, 0x00, 0x00, 0xea, + 0x06, 0x00, 0x00, 0xea, 0x66, 0x00, 0x00, 0xea, 0x0c, 0x00, 0x00, 0xea, 0xfe, 0xff, 0xff, 0xea, 0xfe, 0xff, 0xff, 0xea, 0x00, 0x00, 0xa0, 0xe1, 0x59, 0x00, 0x00, 0xea, 0xfe, 0xff, 0xff, 0xea, 0x02, 0x03, 0xa0, 0xe3, 0x03, 0x10, 0xd0, 0xe5, 0xea, 0x00, 0x51, 0xe3, 0x4c, 0x01, 0x9f, 0x15, 0x10, 0xff, 0x2f, 0xe1, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x29, 0xe1, - 0x00, 0x00, 0x5d, 0xe3, 0x01, 0xd3, 0xa0, 0x03, 0x20, 0xd0, 0x4d, 0x02, - 0x00, 0x58, 0x2d, 0xe9, 0x02, 0xb0, 0x5e, 0xe5, 0xd4, 0xc0, 0xa0, 0xe3, - 0x0b, 0xb1, 0x9c, 0xe7, 0x00, 0x00, 0x5b, 0xe3, 0x00, 0xc0, 0x4f, 0xe1, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5d, 0xe3, 0x01, 0xd3, 0xa0, 0x03, + 0x20, 0xd0, 0x4d, 0x02, 0x10, 0x58, 0x2d, 0xe9, 0x02, 0xb0, 0x5e, 0xe5, + 0xd4, 0xc0, 0xa0, 0xe3, 0x0b, 0xb1, 0x9c, 0xe7, 0xd2, 0xcf, 0xa0, 0xe3, + 0x0b, 0x00, 0x5c, 0xe1, 0x00, 0x00, 0xf0, 0x0f, 0x00, 0xc0, 0x4f, 0xe1, 0x00, 0x10, 0x2d, 0xe9, 0x80, 0xc0, 0x0c, 0xe2, 0x1f, 0xc0, 0x8c, 0xe3, - 0x0c, 0xf0, 0x29, 0xe1, 0x00, 0x40, 0x2d, 0xe9, 0x00, 0x00, 0xa0, 0xe1, - 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, + 0x0c, 0xf0, 0x21, 0xe1, 0x00, 0x40, 0x2d, 0xe9, 0x00, 0x00, 0x5b, 0xe3, 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, 0x0f, 0xe0, 0xa0, 0xe1, 0x1b, 0xff, 0x2f, 0x11, 0x00, 0x00, 0xa0, 0xe1, - 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, - 0x00, 0x40, 0xbd, 0xe8, 0x93, 0xf0, 0x29, 0xe3, 0x00, 0x10, 0xbd, 0xe8, - 0x0c, 0xf0, 0x69, 0xe1, 0x00, 0x58, 0xbd, 0xe8, 0x0e, 0xf0, 0xb0, 0xe1, - 0x00, 0x00, 0x00, 0x00, 0x04, 0x20, 0xa0, 0xe3, 0xb0, 0x01, 0x00, 0x00, + 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x00, 0xa0, 0xe1, 0x00, 0x40, 0xbd, 0xe8, + 0x93, 0xf0, 0x29, 0xe3, 0x00, 0x10, 0xbd, 0xe8, 0x0c, 0xf0, 0x69, 0xe1, + 0x10, 0x58, 0xbd, 0xe8, 0x0e, 0xf0, 0xb0, 0xe1, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x20, 0xa0, 0xe3, 0x00, 0x00, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb4, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, - 0xcc, 0x01, 0x00, 0x00, 0xc4, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, - 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, - 0xb0, 0x01, 0x00, 0x00, 0x14, 0x02, 0x00, 0x00, 0xa8, 0x02, 0x00, 0x00, + 0xcc, 0x01, 0x00, 0x00, 0xc4, 0x01, 0x00, 0x00, 0x48, 0x03, 0x00, 0x00, + 0x48, 0x03, 0x00, 0x00, 0x48, 0x03, 0x00, 0x00, 0x48, 0x03, 0x00, 0x00, + 0x48, 0x03, 0x00, 0x00, 0x14, 0x02, 0x00, 0x00, 0xa8, 0x02, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00, 0xb0, 0x01, 0x00, 0x00,@@ -72,5 +72,6 @@ 0x01, 0x40, 0xa0, 0xe1, 0x01, 0x50, 0xa0, 0xe1, 0x01, 0x60, 0xa0, 0xe1,
0x01, 0x70, 0xa0, 0xe1, 0x01, 0x80, 0xa0, 0xe1, 0x01, 0x90, 0xa0, 0xe1, 0x01, 0xa0, 0xa0, 0xe1, 0xfa, 0x07, 0xa0, 0xe8, 0xfa, 0x07, 0xa0, 0xe8, 0xfa, 0x07, 0xa0, 0xe8, 0xfa, 0x07, 0xa0, 0xe8, 0x00, 0x10, 0xa0, 0xe3, - 0xf0, 0x07, 0xbd, 0xe8, 0x1e, 0xff, 0x2f, 0xe1, 0xb0, 0x01, 0x00, 0x00 + 0xf0, 0x07, 0xbd, 0xe8, 0x1e, 0xff, 0x2f, 0xe1, 0xb0, 0x01, 0x00, 0x00, + 0x04, 0x40, 0x54, 0xe2, 0xfd, 0xff, 0xff, 0x8a, 0x1e, 0xff, 0x2f, 0xe1 };
@@ -25,25 +25,26 @@ bx r0
.word 0 .word 0xE129F000 +.word 0 @ Padding for back-compat + swiBase: cmp sp, #0 moveq sp, #0x04000000 subeq sp, #0x20 -stmfd sp!, {r11-r12, lr} +stmfd sp!, {r4, r11-r12, lr} ldrb r11, [lr, #-2] mov r12, #swiTable ldr r11, [r12, r11, lsl #2] -cmp r11, #0 +mov r12, #StallCall +cmp r12, r11 +swieq 0xF00000 @ Special mGBA-internal call to load the stall count into r4 mrs r12, spsr stmfd sp!, {r12} and r12, #0x80 orr r12, #0x1F -msr cpsr, r12 +msr cpsr_c, r12 stmfd sp!, {lr} -nop -nop -nop -nop +cmp r11, #0 nop nop nop@@ -55,15 +56,16 @@ bxne r11
nop nop nop -nop ldmfd sp!, {lr} msr cpsr, #0x93 ldmfd sp!, {r12} msr spsr, r12 -ldmfd sp!, {r11-r12, lr} +ldmfd sp!, {r4, r11-r12, lr} movs pc, lr .word 0 .word 0xE3A02004 + +.word 0 @ Padding for back-compat swiTable: .word SoftReset @ 0x00@@ -131,11 +133,6 @@ @ Unimplemented
SoftReset: RegisterRamReset: Stop: -Div: -DivArm: -Sqrt: -ArcTan: -ArcTan2: GetBiosChecksum: BgAffineSet: ObjAffineSet:@@ -305,3 +302,14 @@ ldmfd sp!, {r4-r10}
bx lr .ltorg + +Div: +DivArm: +Sqrt: +ArcTan: +ArcTan2: + +StallCall: +subs r4, #4 +bhi StallCall +bx lr
@@ -69,6 +69,7 @@ STORE_32(gba->irqEvent.when - mTimingCurrentTime(&gba->timing), 0, &state->nextIrq);
} miscFlags = GBASerializedMiscFlagsSetBlocked(miscFlags, gba->cpuBlocked); STORE_32(miscFlags, 0, &state->miscFlags); + STORE_32(gba->biosStall, 0, &state->nextIrq); GBAMemorySerialize(&gba->memory, state); GBAIOSerialize(gba, state);@@ -187,6 +188,7 @@ LOAD_32(when, 0, &state->nextIrq);
mTimingSchedule(&gba->timing, &gba->irqEvent, when); } gba->cpuBlocked = GBASerializedMiscFlagsGetBlocked(miscFlags); + LOAD_32(gba->biosStall, 0, &state->nextIrq); GBAVideoDeserialize(&gba->video, state); GBAMemoryDeserialize(&gba->memory, state);