GBA Memory: Revert lazy prefetch This reverts commits d4ecdfc2ac29913867c9e2b91c8a946ffa312f50 and aed62605cf22974807b93fb4f219c0a74a570cc6.
Jeffrey Pfau jeffrey@endrift.com
Fri, 03 Jul 2015 00:22:04 -0700
3 files changed,
38 insertions(+),
44 deletions(-)
M
src/gba/gba.c
→
src/gba/gba.c
@@ -176,9 +176,8 @@ }
} static void GBAProcessEvents(struct ARMCore* cpu) { - struct GBA* gba = (struct GBA*) cpu->master; - GBAMemoryInvalidatePrefetch(gba); - while (cpu->cycles >= cpu->nextEvent) { + do { + struct GBA* gba = (struct GBA*) cpu->master; int32_t cycles = cpu->nextEvent; int32_t nextEvent = INT_MAX; int32_t testEvent;@@ -224,7 +223,7 @@
if (cpu->halted) { cpu->cycles = cpu->nextEvent; } - } + } while (cpu->cycles >= cpu->nextEvent); } static int32_t GBATimersProcessEvents(struct GBA* gba, int32_t cycles) {@@ -451,10 +450,13 @@ }
void GBATimerUpdateRegister(struct GBA* gba, int timer) { struct GBATimer* currentTimer = &gba->timers[timer]; - GBAMemoryInvalidatePrefetch(gba); if (currentTimer->enable && !currentTimer->countUp) { + int32_t prefetchSkew = 0; + if (gba->memory.lastPrefetchedPc - gba->memory.lastPrefetchedLoads * WORD_SIZE_THUMB >= (uint32_t) gba->cpu->gprs[ARM_PC]) { + prefetchSkew = (gba->memory.lastPrefetchedPc - gba->cpu->gprs[ARM_PC]) * (gba->cpu->memory.activeSeqCycles16 + 1) / WORD_SIZE_THUMB; + } // Reading this takes two cycles (1N+1I), so let's remove them preemptively - gba->memory.io[(REG_TM0CNT_LO + (timer << 2)) >> 1] = currentTimer->oldReload + ((gba->cpu->cycles - currentTimer->lastEvent - 2) >> currentTimer->prescaleBits); + gba->memory.io[(REG_TM0CNT_LO + (timer << 2)) >> 1] = currentTimer->oldReload + ((gba->cpu->cycles - currentTimer->lastEvent - 2 + prefetchSkew) >> currentTimer->prescaleBits); } }
M
src/gba/memory.c
→
src/gba/memory.c
@@ -114,8 +114,6 @@ gba->memory.nextDMA = INT_MAX;
gba->memory.eventDiff = 0; gba->memory.prefetch = false; - gba->memory.prefetchCycles = 0; - gba->memory.prefetchStalls = 0; gba->memory.lastPrefetchedPc = 0; if (!gba->memory.wram || !gba->memory.iwram) {@@ -237,7 +235,6 @@ }
} gba->lastJump = address; - GBAMemoryInvalidatePrefetch(gba); memory->lastPrefetchedPc = 0; memory->lastPrefetchedLoads = 0; if (newRegion == memory->activeRegion && (newRegion < REGION_CART0 || (address & (SIZE_CART0 - 1)) < memory->romSize)) {@@ -1537,40 +1534,41 @@ // The wait is the stall
return wait; } - // Offload the prefetch timing until the next event, which will happen too early. - memory->prefetchCycles += wait; - ++memory->prefetchStalls; - return wait; -} + int32_t s = cpu->memory.activeSeqCycles16 + 1; + int32_t n2s = cpu->memory.activeNonseqCycles16 - cpu->memory.activeSeqCycles16 + 1; -void GBAMemoryInvalidatePrefetch(struct GBA* gba) { - int32_t waited = gba->memory.prefetchCycles; - int32_t nWaits = gba->memory.prefetchStalls; - gba->memory.prefetchCycles = 0; - gba->memory.prefetchStalls = 0; + // Figure out how many sequential loads we can jam in + int32_t stall = s; + int32_t loads = 1; + int32_t previousLoads = 0; - if (!waited) { - return; + // Don't prefetch too much if we're overlapping with a previous prefetch + uint32_t dist = (memory->lastPrefetchedPc - cpu->gprs[ARM_PC]) >> 1; + if (dist < memory->lastPrefetchedLoads) { + previousLoads = dist; + } + while (stall < wait) { + stall += s; + ++loads; + } + if (loads + previousLoads > 8) { + int diff = (loads + previousLoads) - 8; + loads -= diff; + stall -= s * diff; + } else if (stall > wait && loads == 1) { + // We might need to stall a bit extra if we haven't finished the first S cycle + wait = stall; } + // This instruction used to have an N, convert it to an S. + wait -= n2s; - int32_t s = gba->cpu->memory.activeSeqCycles16 + 1; - int32_t n2s = gba->cpu->memory.activeNonseqCycles16 - gba->cpu->memory.activeSeqCycles16; + // TODO: Invalidate prefetch on branch + memory->lastPrefetchedLoads = loads; + memory->lastPrefetchedPc = cpu->gprs[ARM_PC] + WORD_SIZE_THUMB * loads; - // Figure out how many sequential loads we can jam in - int32_t loads = (waited - 1) / s; - int32_t diff = waited - loads * s; - - // The next |loads|S waitstates disappear entirely, so long as they're all in a row. - // Each instruction that waited has an N cycle that was converted into an S, so those - // disappear as well. - int32_t toRemove = (s - 1) * loads + n2s * nWaits + diff; - if (toRemove > gba->cpu->cycles) { - // We have to delay invalidating... - gba->memory.prefetchCycles = waited; - gba->memory.prefetchStalls = nWaits; - return; - } - gba->cpu->cycles -= toRemove; + // The next |loads|S waitstates disappear entirely, so long as they're all in a row + cpu->cycles -= (s - 1) * loads; + return wait; } void GBAMemorySerialize(const struct GBAMemory* memory, struct GBASerializedState* state) {
M
src/gba/memory.h
→
src/gba/memory.h
@@ -130,13 +130,9 @@ char waitstatesPrefetchSeq16[16];
char waitstatesPrefetchNonseq32[16]; char waitstatesPrefetchNonseq16[16]; int activeRegion; - bool prefetch; uint32_t lastPrefetchedPc; uint32_t lastPrefetchedLoads; - int32_t prefetchCycles; - int prefetchStalls; - uint32_t biosPrefetch; struct GBADMA dma[4];@@ -179,8 +175,6 @@ void GBAMemoryRunHblankDMAs(struct GBA* gba, int32_t cycles);
void GBAMemoryRunVblankDMAs(struct GBA* gba, int32_t cycles); void GBAMemoryUpdateDMAs(struct GBA* gba, int32_t cycles); int32_t GBAMemoryRunDMAs(struct GBA* gba, int32_t cycles); - -void GBAMemoryInvalidatePrefetch(struct GBA* gba); struct GBASerializedState; void GBAMemorySerialize(const struct GBAMemory* memory, struct GBASerializedState* state);