GBA Memory: More sensible approach to prefetch. It reports fewer successes but on average is more accurate.
Jeffrey Pfau jeffrey@endrift.com
Fri, 26 Jun 2015 23:53:11 -0700
3 files changed,
36 insertions(+),
12 deletions(-)
M
src/gba/gba.c
→
src/gba/gba.c
@@ -451,8 +451,12 @@
void GBATimerUpdateRegister(struct GBA* gba, int timer) { struct GBATimer* currentTimer = &gba->timers[timer]; if (currentTimer->enable && !currentTimer->countUp) { + int32_t prefetchSkew = 0; + if ((gba->memory.lastPrefetchedPc - gba->cpu->gprs[ARM_PC]) < gba->memory.lastPrefetchedLoads * WORD_SIZE_THUMB) { + prefetchSkew = (gba->memory.lastPrefetchedPc - gba->cpu->gprs[ARM_PC]) / gba->cpu->memory.activeSeqCycles16; + } // Reading this takes two cycles (1N+1I), so let's remove them preemptively - gba->memory.io[(REG_TM0CNT_LO + (timer << 2)) >> 1] = currentTimer->oldReload + ((gba->cpu->cycles - currentTimer->lastEvent - 2) >> currentTimer->prescaleBits); + gba->memory.io[(REG_TM0CNT_LO + (timer << 2)) >> 1] = currentTimer->oldReload + ((gba->cpu->cycles - currentTimer->lastEvent - 2 + prefetchSkew) >> currentTimer->prescaleBits); } }
M
src/gba/memory.c
→
src/gba/memory.c
@@ -79,7 +79,6 @@ cpu->memory.activeSeqCycles16 = 0;
cpu->memory.activeNonseqCycles32 = 0; cpu->memory.activeNonseqCycles16 = 0; gba->memory.biosPrefetch = 0; - gba->memory.prefetch = false; } void GBAMemoryDeinit(struct GBA* gba) {@@ -113,6 +112,9 @@ gba->memory.dma[3].count = 0x10000;
gba->memory.activeDMA = -1; gba->memory.nextDMA = INT_MAX; gba->memory.eventDiff = 0; + + gba->memory.prefetch = false; + gba->memory.lastPrefetchedPc = 0; if (!gba->memory.wram || !gba->memory.iwram) { GBAMemoryDeinit(gba);@@ -1526,23 +1528,39 @@ struct GBA* gba = (struct GBA*) cpu->master;
struct GBAMemory* memory = &gba->memory; if (!memory->prefetch || memory->activeRegion < REGION_CART0) { + // The wait is the stall return wait; } - int32_t stall = memory->waitstatesNonseq16[memory->activeRegion] - memory->waitstatesSeq16[memory->activeRegion] + 1; + int32_t s = cpu->memory.activeSeqCycles16 + 1; + int32_t n2s = cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16 + 1; - // Base number of cycles for this insn is N - int32_t base = memory->waitstatesSeq16[memory->activeRegion] + 1; - if (cpu->executionMode == MODE_ARM) { - base <<= 1; + // Figure out how many sequential loads we can jam in + int32_t stall = s; + int32_t loads = 1; + int32_t previousLoads = 0; + + // Don't prefetch too much if we're overlapping with a previous prefetch + if ((memory->lastPrefetchedPc - cpu->gprs[ARM_PC]) < memory->lastPrefetchedLoads * WORD_SIZE_THUMB) { + previousLoads = (memory->lastPrefetchedPc - cpu->gprs[ARM_PC]) >> 1; } - if (base <= wait) { - --base; - } else { - base = wait; + while (stall < wait && loads + previousLoads < 8) { + stall += s; + ++loads; + } + if (stall > wait && loads == 1) { + // We might need to stall a bit extra if we haven't finished the first S cycle + wait = stall; } + // This instruction used to have an N, convert it to an S. + wait -= n2s; - cpu->cycles -= stall + base - 1; + // TODO: Invalidate prefetch on branch + memory->lastPrefetchedLoads = loads; + memory->lastPrefetchedPc = cpu->gprs[ARM_PC] + WORD_SIZE_THUMB * loads; + + // The next |loads|S waitstates disappear entirely, so long as they're all in a row + cpu->cycles -= (s - 1) * loads; return wait; }
M
src/gba/memory.h
→
src/gba/memory.h
@@ -132,6 +132,8 @@ char waitstatesPrefetchNonseq32[16];
char waitstatesPrefetchNonseq16[16]; int activeRegion; bool prefetch; + uint32_t lastPrefetchedPc; + uint32_t lastPrefetchedLoads; uint32_t biosPrefetch; struct GBADMA dma[4];