GB: Redo double speed emulation (closes #1515)
@@ -96,6 +96,7 @@ - Core: Rework thread state synchronization
- Core: Improve support for ROM patch cheats, supporting disabling overlapping patches - GB: Allow pausing event loop while CPU is blocked - GB: Add support for sleep and shutdown callbacks + - GB: Redo double speed emulation (closes mgba.io/i/1515) - GB Core: Return the current number of banks for ROM/SRAM, not theoretical max - GB I/O: Implement preliminary support for PCM12/PCM34 (closes mgba.io/i/1468) - GBA: Allow pausing event loop while CPU is blocked
@@ -133,6 +133,7 @@ #pragma pack(pop)
uint16_t index; + int tMultiplier; int32_t cycles; int32_t nextEvent; enum SM83ExecutionState executionState;
@@ -65,7 +65,7 @@ audio->style = style;
if (style == GB_AUDIO_GBA) { audio->timingFactor = 4; } else { - audio->timingFactor = 1; + audio->timingFactor = 2; } audio->frameEvent.context = audio;@@ -339,7 +339,7 @@ mTimingDeschedule(audio->timing, &audio->ch3Event);
if (audio->playingCh3) { audio->ch3.readable = audio->style != GB_AUDIO_DMG; // TODO: Where does this cycle delay come from? - mTimingSchedule(audio->timing, &audio->ch3Event, audio->timingFactor * 4 + 2 * (2048 - audio->ch3.rate)); + mTimingSchedule(audio->timing, &audio->ch3Event, audio->timingFactor * (4 + 2 * (2048 - audio->ch3.rate))); } *audio->nr52 &= ~0x0004; *audio->nr52 |= audio->playingCh3 << 2;@@ -477,11 +477,8 @@ } else if (!wasEnable) {
audio->skipFrame = false; audio->frame = 7; - if (audio->p) { - unsigned timingFactor = 0x400 >> !audio->p->doubleSpeed; - if (audio->p->timer.internalDiv & timingFactor) { - audio->skipFrame = true; - } + if (audio->p && audio->p->timer.internalDiv & 0x400) { + audio->skipFrame = true; } } }@@ -914,7 +911,7 @@ ch->sample >>= volume;
audio->ch3.readable = true; if (audio->style == GB_AUDIO_DMG) { mTimingDeschedule(audio->timing, &audio->ch3Fade); - mTimingSchedule(timing, &audio->ch3Fade, 2 - cyclesLate); + mTimingSchedule(timing, &audio->ch3Fade, 4 - cyclesLate); } int cycles = 2 * (2048 - ch->rate); mTimingSchedule(timing, &audio->ch3Event, audio->timingFactor * cycles - cyclesLate);
@@ -742,7 +742,7 @@ if (!enable) {
gb->memory.ime = false; GBUpdateIRQs(gb); } else { - mTimingSchedule(&gb->timing, &gb->eiPending, 4); + mTimingSchedule(&gb->timing, &gb->eiPending, 4 * cpu->tMultiplier); } }@@ -796,7 +796,7 @@ void GBStop(struct SM83Core* cpu) {
struct GB* gb = (struct GB*) cpu->master; if (gb->model >= GB_MODEL_CGB && gb->memory.io[GB_REG_KEY1] & 1) { gb->doubleSpeed ^= 1; - gb->audio.timingFactor = gb->doubleSpeed + 1; + gb->cpu->tMultiplier = 2 - gb->doubleSpeed; gb->memory.io[GB_REG_KEY1] = 0; gb->memory.io[GB_REG_KEY1] |= gb->doubleSpeed << 7; } else {
@@ -417,15 +417,15 @@ _writeSGBBits(gb, (value >> 4) & 3);
} return; case GB_REG_TIMA: - if (value && mTimingUntil(&gb->timing, &gb->timer.irq) > 1) { + if (value && mTimingUntil(&gb->timing, &gb->timer.irq) > 2 - (int) gb->doubleSpeed) { mTimingDeschedule(&gb->timing, &gb->timer.irq); } - if (mTimingUntil(&gb->timing, &gb->timer.irq) == -1) { + if (mTimingUntil(&gb->timing, &gb->timer.irq) == (int) gb->doubleSpeed - 2) { return; } break; case GB_REG_TMA: - if (mTimingUntil(&gb->timing, &gb->timer.irq) == -1) { + if (mTimingUntil(&gb->timing, &gb->timer.irq) == (int) gb->doubleSpeed - 2) { gb->memory.io[GB_REG_TIMA] = value; } break;
@@ -531,10 +531,7 @@ if (base >= 0xE000) {
base &= 0xDFFF; } mTimingDeschedule(&gb->timing, &gb->memory.dmaEvent); - mTimingSchedule(&gb->timing, &gb->memory.dmaEvent, 8); - if (gb->cpu->cycles + 8 < gb->cpu->nextEvent) { - gb->cpu->nextEvent = gb->cpu->cycles + 8; - } + mTimingSchedule(&gb->timing, &gb->memory.dmaEvent, 8 * (2 - gb->doubleSpeed)); gb->memory.dmaSource = base; gb->memory.dmaDest = 0; gb->memory.dmaRemaining = 0xA0;@@ -580,7 +577,7 @@ ++gb->memory.dmaSource;
++gb->memory.dmaDest; gb->memory.dmaRemaining = dmaRemaining - 1; if (gb->memory.dmaRemaining) { - mTimingSchedule(timing, &gb->memory.dmaEvent, 4 - cyclesLate); + mTimingSchedule(timing, &gb->memory.dmaEvent, 4 * (2 - gb->doubleSpeed) - cyclesLate); } }@@ -594,7 +591,7 @@ ++gb->memory.hdmaDest;
--gb->memory.hdmaRemaining; if (gb->memory.hdmaRemaining) { mTimingDeschedule(timing, &gb->memory.hdmaEvent); - mTimingSchedule(timing, &gb->memory.hdmaEvent, 2 - cyclesLate); + mTimingSchedule(timing, &gb->memory.hdmaEvent, 4 - cyclesLate); } else { gb->cpuBlocked = false; gb->memory.io[GB_REG_HDMA1] = gb->memory.hdmaSource >> 8;
@@ -175,8 +175,6 @@ gb->doubleSpeed = GBSerializedCpuFlagsGetDoubleSpeed(flags);
gb->cpu->halted = GBSerializedCpuFlagsGetHalted(flags); gb->cpuBlocked = GBSerializedCpuFlagsGetBlocked(flags); - gb->audio.timingFactor = gb->doubleSpeed + 1; - LOAD_32LE(gb->cpu->cycles, 0, &state->cpu.cycles); LOAD_32LE(gb->cpu->nextEvent, 0, &state->cpu.nextEvent); gb->timing.root = NULL;
@@ -77,7 +77,7 @@ GBUpdateIRQs(sio->p);
sio->pendingSB = 0xFF; } } else { - mTimingSchedule(timing, &sio->event, sio->period); + mTimingSchedule(timing, &sio->event, sio->period * (2 - sio->p->doubleSpeed)); } }@@ -93,7 +93,7 @@ sio->period = GBSIOCyclesPerTransfer[GBRegisterSCGetClockSpeed(sc)]; // TODO Shift Clock
if (GBRegisterSCIsEnable(sc)) { mTimingDeschedule(&sio->p->timing, &sio->event); if (GBRegisterSCIsShiftClock(sc)) { - mTimingSchedule(&sio->p->timing, &sio->event, sio->period); + mTimingSchedule(&sio->p->timing, &sio->event, sio->period * (2 - sio->p->doubleSpeed)); sio->remainingBits = 8; } }
@@ -128,7 +128,7 @@ break;
case TRANSFER_FINISHING: // Finish the transfer // We need to make sure the other GBs catch up so they don't get behind - node->nextEvent += node->d.p->period - 8; // Split the cycles to avoid waiting too long + node->nextEvent += node->d.p->period * (2 - node->d.p->p->doubleSpeed) - 8; // Split the cycles to avoid waiting too long #ifndef NDEBUG ATOMIC_ADD(node->p->d.transferId, 1); #endif@@ -208,7 +208,7 @@ static void _GBSIOLockstepNodeProcessEvents(struct mTiming* timing, void* user, uint32_t cyclesLate) {
struct GBSIOLockstepNode* node = user; mLockstepLock(&node->p->d); if (node->p->d.attached < 2) { - mTimingSchedule(timing, &node->event, (GBSIOCyclesPerTransfer[0] >> 1) - cyclesLate); + mTimingSchedule(timing, &node->event, (GBSIOCyclesPerTransfer[0] >> 1) * (2 - node->d.p->p->doubleSpeed) - cyclesLate); mLockstepUnlock(&node->p->d); return; }
@@ -20,17 +20,18 @@ GBUpdateIRQs(timer->p);
} static void _GBTimerDivIncrement(struct GBTimer* timer, uint32_t cyclesLate) { - while (timer->nextDiv >= GB_DMG_DIV_PERIOD) { - timer->nextDiv -= GB_DMG_DIV_PERIOD; + int tMultiplier = 2 - timer->p->doubleSpeed; + while (timer->nextDiv >= GB_DMG_DIV_PERIOD * tMultiplier) { + timer->nextDiv -= GB_DMG_DIV_PERIOD * tMultiplier; // Make sure to trigger when the correct bit is a falling edge if (timer->timaPeriod > 0 && (timer->internalDiv & (timer->timaPeriod - 1)) == timer->timaPeriod - 1) { ++timer->p->memory.io[GB_REG_TIMA]; if (!timer->p->memory.io[GB_REG_TIMA]) { - mTimingSchedule(&timer->p->timing, &timer->irq, 7 - ((timer->p->cpu->executionState - cyclesLate) & 3)); + mTimingSchedule(&timer->p->timing, &timer->irq, 7 * tMultiplier - ((timer->p->cpu->executionState * tMultiplier - cyclesLate) & (3 * tMultiplier))); } } - unsigned timingFactor = 0x3FF >> !timer->p->doubleSpeed; + unsigned timingFactor = 0x1FF; if ((timer->internalDiv & timingFactor) == timingFactor) { GBAudioUpdateFrame(&timer->p->audio, &timer->p->timing); }@@ -52,7 +53,7 @@ }
if (timaToGo < divsToGo) { divsToGo = timaToGo; } - timer->nextDiv = GB_DMG_DIV_PERIOD * divsToGo; + timer->nextDiv = GB_DMG_DIV_PERIOD * divsToGo * (2 - timer->p->doubleSpeed); mTimingSchedule(timing, &timer->event, timer->nextDiv - cyclesLate); }@@ -66,7 +67,7 @@ timer->irq.name = "GB Timer IRQ";
timer->irq.callback = _GBTimerIRQ; timer->event.priority = 0x21; - timer->nextDiv = GB_DMG_DIV_PERIOD; // TODO: GBC differences + timer->nextDiv = GB_DMG_DIV_PERIOD * 2; timer->timaPeriod = 1024 >> 4; }@@ -74,27 +75,27 @@ void GBTimerDivReset(struct GBTimer* timer) {
timer->nextDiv -= mTimingUntil(&timer->p->timing, &timer->event); mTimingDeschedule(&timer->p->timing, &timer->event); _GBTimerDivIncrement(timer, 0); - if (((timer->internalDiv << 1) | ((timer->nextDiv >> 3) & 1)) & timer->timaPeriod) { + int tMultiplier = 2 - timer->p->doubleSpeed; + if (((timer->internalDiv << 1) | ((timer->nextDiv >> (4 - timer->p->doubleSpeed)) & 1)) & timer->timaPeriod) { ++timer->p->memory.io[GB_REG_TIMA]; if (!timer->p->memory.io[GB_REG_TIMA]) { - mTimingSchedule(&timer->p->timing, &timer->irq, 7 - (timer->p->cpu->executionState & 3)); + mTimingSchedule(&timer->p->timing, &timer->irq, (7 - (timer->p->cpu->executionState & 3)) * tMultiplier); } } - unsigned timingFactor = 0x400 >> !timer->p->doubleSpeed; - if (timer->internalDiv & timingFactor) { + if (timer->internalDiv & 0x200) { GBAudioUpdateFrame(&timer->p->audio, &timer->p->timing); } timer->p->memory.io[GB_REG_DIV] = 0; timer->internalDiv = 0; - timer->nextDiv = GB_DMG_DIV_PERIOD; - mTimingSchedule(&timer->p->timing, &timer->event, timer->nextDiv - ((timer->p->cpu->executionState + 1) & 3)); + timer->nextDiv = GB_DMG_DIV_PERIOD * (2 - timer->p->doubleSpeed); + mTimingSchedule(&timer->p->timing, &timer->event, timer->nextDiv - ((timer->p->cpu->executionState + 1) & 3) * tMultiplier); } uint8_t GBTimerUpdateTAC(struct GBTimer* timer, GBRegisterTAC tac) { if (GBRegisterTACIsRun(tac)) { timer->nextDiv -= mTimingUntil(&timer->p->timing, &timer->event); mTimingDeschedule(&timer->p->timing, &timer->event); - _GBTimerDivIncrement(timer, (timer->p->cpu->executionState + 2) & 3); + _GBTimerDivIncrement(timer, ((timer->p->cpu->executionState + 2) & 3) * (2 - timer->p->doubleSpeed)); switch (GBRegisterTACGetClock(tac)) { case 0:@@ -111,7 +112,7 @@ timer->timaPeriod = 256 >> 4;
break; } - timer->nextDiv += GB_DMG_DIV_PERIOD; + timer->nextDiv += GB_DMG_DIV_PERIOD * (2 - timer->p->doubleSpeed); mTimingSchedule(&timer->p->timing, &timer->event, timer->nextDiv); } else { timer->timaPeriod = 0;
@@ -253,7 +253,7 @@ video->p->memory.io[GB_REG_IF] |= (1 << GB_IRQ_VBLANK);
GBUpdateIRQs(video->p); video->p->memory.io[GB_REG_STAT] = video->stat; mTimingDeschedule(&video->p->timing, &video->modeEvent); - mTimingSchedule(&video->p->timing, &video->modeEvent, next); + mTimingSchedule(&video->p->timing, &video->modeEvent, next << 1); } void _endMode0(struct mTiming* timing, void* context, uint32_t cyclesLate) {@@ -297,7 +297,7 @@ }
GBUpdateIRQs(video->p); video->p->memory.io[GB_REG_STAT] = video->stat; - mTimingSchedule(timing, &video->modeEvent, (next << video->p->doubleSpeed) - cyclesLate); + mTimingSchedule(timing, &video->modeEvent, (next << 1) - cyclesLate); } void _endMode1(struct mTiming* timing, void* context, uint32_t cyclesLate) {@@ -334,14 +334,14 @@ video->p->memory.io[GB_REG_IF] |= (1 << GB_IRQ_LCDSTAT);
GBUpdateIRQs(video->p); } video->p->memory.io[GB_REG_STAT] = video->stat; - mTimingSchedule(timing, &video->modeEvent, (next << video->p->doubleSpeed) - cyclesLate); + mTimingSchedule(timing, &video->modeEvent, (next << 1) - cyclesLate); } void _endMode2(struct mTiming* timing, void* context, uint32_t cyclesLate) { struct GBVideo* video = context; _cleanOAM(video, video->ly); video->x = -(video->p->memory.io[GB_REG_SCX] & 7); - video->dotClock = mTimingCurrentTime(timing) - cyclesLate + 5 - (video->x << video->p->doubleSpeed); + video->dotClock = mTimingCurrentTime(timing) - cyclesLate + 10 - (video->x << 1); int32_t next = GB_VIDEO_MODE_3_LENGTH_BASE + video->objMax * 6 - video->x; video->mode = 3; video->modeEvent.callback = _endMode3;@@ -352,7 +352,7 @@ video->p->memory.io[GB_REG_IF] |= (1 << GB_IRQ_LCDSTAT);
GBUpdateIRQs(video->p); } video->p->memory.io[GB_REG_STAT] = video->stat; - mTimingSchedule(timing, &video->modeEvent, (next << video->p->doubleSpeed) - cyclesLate); + mTimingSchedule(timing, &video->modeEvent, (next << 1) - cyclesLate); } void _endMode3(struct mTiming* timing, void* context, uint32_t cyclesLate) {@@ -375,18 +375,18 @@ }
video->p->memory.io[GB_REG_STAT] = video->stat; // TODO: Cache SCX & 7 in case it changes int32_t next = GB_VIDEO_MODE_0_LENGTH_BASE - video->objMax * 6 - (video->p->memory.io[GB_REG_SCX] & 7); - mTimingSchedule(timing, &video->modeEvent, (next << video->p->doubleSpeed) - cyclesLate); + mTimingSchedule(timing, &video->modeEvent, (next << 1) - cyclesLate); } void _updateFrameCount(struct mTiming* timing, void* context, uint32_t cyclesLate) { UNUSED(cyclesLate); struct GBVideo* video = context; if (video->p->cpu->executionState != SM83_CORE_FETCH) { - mTimingSchedule(timing, &video->frameEvent, 4 - ((video->p->cpu->executionState + 1) & 3)); + mTimingSchedule(timing, &video->frameEvent, (4 - ((video->p->cpu->executionState + 1) & 3)) * (2 - video->p->doubleSpeed)); return; } if (!GBRegisterLCDCIsEnable(video->p->memory.io[GB_REG_LCDC])) { - mTimingSchedule(timing, &video->frameEvent, GB_VIDEO_TOTAL_LENGTH); + mTimingSchedule(timing, &video->frameEvent, GB_VIDEO_TOTAL_LENGTH << 1); } --video->frameskipCounter;@@ -424,7 +424,7 @@ if (video->mode != 3) {
return; } int oldX = video->x; - video->x = (int32_t) (mTimingCurrentTime(&video->p->timing) - cyclesLate - video->dotClock) >> video->p->doubleSpeed; + video->x = ((int32_t) (mTimingCurrentTime(&video->p->timing) - cyclesLate - video->dotClock)) >> 1; if (video->x > GB_VIDEO_HORIZONTAL_PIXELS) { video->x = GB_VIDEO_HORIZONTAL_PIXELS; } else if (video->x < 0) {@@ -444,7 +444,7 @@ video->mode = 2;
video->modeEvent.callback = _endMode2; int32_t next = GB_VIDEO_MODE_2_LENGTH - 5; // TODO: Why is this fudge factor needed? Might be related to T-cycles for load/store differing mTimingDeschedule(&video->p->timing, &video->modeEvent); - mTimingSchedule(&video->p->timing, &video->modeEvent, next << video->p->doubleSpeed); + mTimingSchedule(&video->p->timing, &video->modeEvent, next << 1); video->ly = 0; video->p->memory.io[GB_REG_LY] = 0;@@ -471,7 +471,7 @@ video->renderer->writePalette(video->renderer, 0, video->dmgPalette[0]);
mTimingDeschedule(&video->p->timing, &video->modeEvent); mTimingDeschedule(&video->p->timing, &video->frameEvent); - mTimingSchedule(&video->p->timing, &video->frameEvent, GB_VIDEO_TOTAL_LENGTH); + mTimingSchedule(&video->p->timing, &video->frameEvent, GB_VIDEO_TOTAL_LENGTH << 1); } video->p->memory.io[GB_REG_STAT] = video->stat; }
@@ -61,6 +61,7 @@ cpu->pc = 0;
cpu->instruction = 0; + cpu->tMultiplier = 2; cpu->cycles = 0; cpu->nextEvent = 0; cpu->executionState = SM83_CORE_FETCH;@@ -102,7 +103,7 @@ cpu->instruction = _SM83InstructionIRQDelay;
} static void _SM83Step(struct SM83Core* cpu) { - ++cpu->cycles; + cpu->cycles += cpu->tMultiplier; enum SM83ExecutionState state = cpu->executionState; cpu->executionState = SM83_CORE_IDLE_0; switch (state) {@@ -147,23 +148,31 @@ break;
} } -void SM83Tick(struct SM83Core* cpu) { - while (cpu->cycles >= cpu->nextEvent) { - cpu->irqh.processEvents(cpu); - } +static inline bool _SM83TickInternal(struct SM83Core* cpu) { + bool running = true; _SM83Step(cpu); - if (cpu->cycles + 2 >= cpu->nextEvent) { + int t = cpu->tMultiplier; + if (cpu->cycles + t * 2 >= cpu->nextEvent) { int32_t diff = cpu->nextEvent - cpu->cycles; cpu->cycles = cpu->nextEvent; - cpu->executionState += diff; + cpu->executionState += diff >> (t - 1); // NB: This assumes tMultiplier is either 1 or 2 cpu->irqh.processEvents(cpu); - cpu->cycles += SM83_CORE_EXECUTE - cpu->executionState; + cpu->cycles += (SM83_CORE_EXECUTE - cpu->executionState) * t; + running = false; } else { - cpu->cycles += 2; + cpu->cycles += t * 2; } cpu->executionState = SM83_CORE_FETCH; cpu->instruction(cpu); - ++cpu->cycles; + cpu->cycles += t; + return running; +} + +void SM83Tick(struct SM83Core* cpu) { + while (cpu->cycles >= cpu->nextEvent) { + cpu->irqh.processEvents(cpu); + } + _SM83TickInternal(cpu); } void SM83Run(struct SM83Core* cpu) {@@ -173,19 +182,6 @@ if (cpu->cycles >= cpu->nextEvent) {
cpu->irqh.processEvents(cpu); break; } - _SM83Step(cpu); - if (cpu->cycles + 2 >= cpu->nextEvent) { - int32_t diff = cpu->nextEvent - cpu->cycles; - cpu->cycles = cpu->nextEvent; - cpu->executionState += diff; - cpu->irqh.processEvents(cpu); - cpu->cycles += SM83_CORE_EXECUTE - cpu->executionState; - running = false; - } else { - cpu->cycles += 2; - } - cpu->executionState = SM83_CORE_FETCH; - cpu->instruction(cpu); - ++cpu->cycles; + running = _SM83TickInternal(cpu) && running; } }