Implement CpuSet/CpuFastSet in assembly (fixes #31 and #76)
Jeffrey Pfau jeffrey@endrift.com
Thu, 03 Jul 2014 03:51:53 -0700
3 files changed,
163 insertions(+),
128 deletions(-)
M
src/gba/gba-bios.c
→
src/gba/gba-bios.c
@@ -17,92 +17,6 @@ (void)(registers);
GBALog(gba, GBA_LOG_STUB, "RegisterRamReset unimplemented"); } -static void _CpuSet(struct GBA* gba) { - struct ARMCore* cpu = gba->cpu; - uint32_t source = cpu->gprs[0]; - uint32_t dest = cpu->gprs[1]; - uint32_t mode = cpu->gprs[2]; - int count = mode & 0x000FFFFF; - int fill = mode & 0x01000000; - int wordsize = (mode & 0x04000000) ? 4 : 2; - int i; - if (fill) { - if (wordsize == 4) { - source &= 0xFFFFFFFC; - dest &= 0xFFFFFFFC; - int32_t word = cpu->memory.load32(cpu, source, &cpu->cycles); - for (i = 0; i < count; ++i) { - cpu->memory.store32(cpu, dest + (i << 2), word, &cpu->cycles); - cpu->irqh.processEvents(cpu); - } - } else { - source &= 0xFFFFFFFE; - dest &= 0xFFFFFFFE; - uint16_t word = cpu->memory.load16(cpu, source, &cpu->cycles); - for (i = 0; i < count; ++i) { - cpu->memory.store16(cpu, dest + (i << 1), word, &cpu->cycles); - cpu->irqh.processEvents(cpu); - } - } - } else { - if (wordsize == 4) { - source &= 0xFFFFFFFC; - dest &= 0xFFFFFFFC; - for (i = 0; i < count; ++i) { - int32_t word = cpu->memory.load32(cpu, source + (i << 2), &cpu->cycles); - cpu->memory.store32(cpu, dest + (i << 2), word, &cpu->cycles); - cpu->irqh.processEvents(cpu); - } - } else { - source &= 0xFFFFFFFE; - dest &= 0xFFFFFFFE; - for (i = 0; i < count; ++i) { - uint16_t word = cpu->memory.load16(cpu, source + (i << 1), &cpu->cycles); - cpu->memory.store16(cpu, dest + (i << 1), word, &cpu->cycles); - cpu->irqh.processEvents(cpu); - } - } - } -} - -static void _FastCpuSet(struct GBA* gba) { - struct ARMCore* cpu = gba->cpu; - uint32_t source = cpu->gprs[0] & 0xFFFFFFFC; - uint32_t dest = cpu->gprs[1] & 0xFFFFFFFC; - uint32_t mode = cpu->gprs[2]; - int count = mode & 0x000FFFFF; - int storeCycles = cpu->memory.waitMultiple(cpu, dest, 4); - count = ((count + 7) >> 3) << 3; - int i; - if (mode & 0x01000000) { - int32_t word = cpu->memory.load32(cpu, source, &cpu->cycles); - for (i = 0; i < count; i += 4) { - cpu->memory.store32(cpu, dest + ((i + 0) << 2), word, 0); - cpu->memory.store32(cpu, dest + ((i + 1) << 2), word, 0); - cpu->memory.store32(cpu, dest + ((i + 2) << 2), word, 0); - cpu->memory.store32(cpu, dest + ((i + 3) << 2), word, 0); - cpu->cycles += storeCycles; - cpu->irqh.processEvents(cpu); - } - } else { - int loadCycles = cpu->memory.waitMultiple(cpu, source, 4); - for (i = 0; i < count; i += 4) { - int32_t word0 = cpu->memory.load32(cpu, source + ((i + 0) << 2), 0); - int32_t word1 = cpu->memory.load32(cpu, source + ((i + 1) << 2), 0); - int32_t word2 = cpu->memory.load32(cpu, source + ((i + 2) << 2), 0); - int32_t word3 = cpu->memory.load32(cpu, source + ((i + 3) << 2), 0); - cpu->cycles += loadCycles; - cpu->irqh.processEvents(cpu); - cpu->memory.store32(cpu, dest + ((i + 0) << 2), word0, 0); - cpu->memory.store32(cpu, dest + ((i + 1) << 2), word1, 0); - cpu->memory.store32(cpu, dest + ((i + 2) << 2), word2, 0); - cpu->memory.store32(cpu, dest + ((i + 3) << 2), word3, 0); - cpu->cycles += storeCycles; - cpu->irqh.processEvents(cpu); - } - } -} - static void _BgAffineSet(struct GBA* gba) { struct ARMCore* cpu = gba->cpu; int i = cpu->gprs[2];@@ -237,10 +151,8 @@ case 0xA:
cpu->gprs[0] = atan2f(cpu->gprs[1] / 16384.f, cpu->gprs[0] / 16384.f) / (2 * M_PI) * 0x10000; break; case 0xB: - _CpuSet(gba); - break; case 0xC: - _FastCpuSet(gba); + ARMRaiseSWI(cpu); break; case 0xD: cpu->gprs[0] = GBAChecksum(gba->memory.bios, SIZE_BIOS);
M
src/gba/hle-bios.c
→
src/gba/hle-bios.c
@@ -2,23 +2,47 @@ #include "hle-bios.h"
#include "gba-memory.h" -const size_t hleBiosLength = 196; +const size_t hleBiosLength = 488; const uint8_t hleBios[SIZE_BIOS] = { 0x06, 0x00, 0x00, 0xea, 0xfe, 0xff, 0xff, 0xea, 0x05, 0x00, 0x00, 0xea, 0xfe, 0xff, 0xff, 0xea, 0xfe, 0xff, 0xff, 0xea, 0x00, 0x00, 0xa0, 0xe1, - 0x0e, 0x00, 0x00, 0xea, 0xfe, 0xff, 0xff, 0xea, 0x02, 0xf3, 0xa0, 0xe3, + 0x1a, 0x00, 0x00, 0xea, 0xfe, 0xff, 0xff, 0xea, 0x02, 0xf3, 0xa0, 0xe3, 0x00, 0x00, 0x5d, 0xe3, 0x01, 0xd3, 0xa0, 0x03, 0x20, 0xd0, 0x4d, 0x02, - 0x04, 0x40, 0x2d, 0xe9, 0x02, 0x20, 0x5e, 0xe5, 0x04, 0x00, 0x52, 0xe3, - 0x0b, 0x00, 0x00, 0x0b, 0x05, 0x00, 0x52, 0xe3, 0x01, 0x00, 0xa0, 0x03, - 0x01, 0x10, 0xa0, 0x03, 0x07, 0x00, 0x00, 0x0b, 0x04, 0x40, 0xbd, 0xe8, - 0x0e, 0xf0, 0xb0, 0xe1, 0x0f, 0x50, 0x2d, 0xe9, 0x01, 0x03, 0xa0, 0xe3, + 0x30, 0x40, 0x2d, 0xe9, 0x02, 0x40, 0x5e, 0xe5, 0x54, 0x50, 0xa0, 0xe3, + 0x04, 0x41, 0x95, 0xe7, 0x00, 0x00, 0x54, 0xe3, 0x0f, 0xe0, 0xa0, 0xe1, + 0x14, 0xff, 0x2f, 0x11, 0x30, 0x40, 0xbd, 0xe8, 0x0e, 0xf0, 0xb0, 0xe1, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xa8, 0x00, 0x00, 0x00, 0xa0, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf4, 0x00, 0x00, 0x00, + 0x7c, 0x01, 0x00, 0x00, 0x0f, 0x50, 0x2d, 0xe9, 0x01, 0x03, 0xa0, 0xe3, 0x00, 0xe0, 0x8f, 0xe2, 0x04, 0xf0, 0x10, 0xe5, 0x0f, 0x50, 0xbd, 0xe8, - 0x04, 0xf0, 0x5e, 0xe2, 0x10, 0x40, 0x2d, 0xe9, 0x00, 0x30, 0x0f, 0xe1, - 0x80, 0x30, 0xc3, 0xe3, 0x03, 0xf0, 0x29, 0xe1, 0x01, 0x43, 0xa0, 0xe3, - 0x00, 0x00, 0x50, 0xe3, 0x00, 0x00, 0xa0, 0xe3, 0x01, 0x20, 0xa0, 0xe3, - 0x00, 0x00, 0x00, 0x0a, 0x01, 0x03, 0xc4, 0xe5, 0x04, 0x22, 0xc4, 0xe5, - 0xb8, 0x30, 0x54, 0xe1, 0x01, 0x30, 0x13, 0xe0, 0x01, 0x30, 0x23, 0x10, - 0xb8, 0x30, 0x44, 0x11, 0x04, 0x02, 0xc4, 0xe5, 0xf7, 0xff, 0xff, 0x0a, - 0x00, 0x00, 0x0f, 0xe1, 0x80, 0x00, 0x80, 0xe3, 0x00, 0xf0, 0x29, 0xe1, - 0x10, 0x80, 0xbd, 0xe8 + 0x04, 0xf0, 0x5e, 0xe2, 0x01, 0x00, 0xa0, 0xe3, 0x01, 0x10, 0xa0, 0xe3, + 0x00, 0x40, 0x2d, 0xe9, 0x00, 0x50, 0x4f, 0xe1, 0x1f, 0xf0, 0x29, 0xe3, + 0x01, 0x43, 0xa0, 0xe3, 0x00, 0x00, 0x50, 0xe3, 0x00, 0x00, 0xa0, 0xe3, + 0x01, 0x20, 0xa0, 0xe3, 0x00, 0x00, 0x00, 0x0a, 0x01, 0x03, 0xc4, 0xe5, + 0x08, 0x02, 0xc4, 0xe5, 0xb8, 0x30, 0x54, 0xe1, 0x01, 0x30, 0x13, 0xe0, + 0x01, 0x30, 0x23, 0x10, 0xb8, 0x30, 0x44, 0x11, 0x08, 0x22, 0xc4, 0xe5, + 0xf7, 0xff, 0xff, 0x0a, 0x93, 0xf0, 0x29, 0xe3, 0x05, 0xf0, 0x69, 0xe1, + 0x00, 0x80, 0xbd, 0xe8, 0x9f, 0xf0, 0x29, 0xe3, 0x02, 0x36, 0xa0, 0xe1, + 0x23, 0x36, 0xa0, 0xe1, 0x01, 0x04, 0x12, 0xe3, 0x0d, 0x00, 0x00, 0x0a, + 0x01, 0x03, 0x12, 0xe3, 0x04, 0x00, 0x00, 0x0a, 0x04, 0x00, 0xb0, 0xe8, + 0x04, 0x00, 0xa1, 0xe8, 0x01, 0x30, 0x53, 0xe2, 0xfc, 0xff, 0xff, 0x1a, + 0x13, 0x00, 0x00, 0xea, 0x01, 0x00, 0xc0, 0xe3, 0x01, 0x10, 0xc1, 0xe3, + 0xb0, 0x20, 0xd0, 0xe1, 0xb2, 0x20, 0xc1, 0xe0, 0x01, 0x30, 0x53, 0xe2, + 0xfc, 0xff, 0xff, 0x1a, 0x0c, 0x00, 0x00, 0xea, 0x01, 0x03, 0x12, 0xe3, + 0x04, 0x00, 0x00, 0x0a, 0x04, 0x00, 0xb0, 0xe8, 0x04, 0x00, 0xa1, 0xe8, + 0x01, 0x30, 0x53, 0xe2, 0xfb, 0xff, 0xff, 0x1a, 0x05, 0x00, 0x00, 0xea, + 0x01, 0x00, 0xc0, 0xe3, 0x01, 0x10, 0xc1, 0xe3, 0xb2, 0x20, 0xd0, 0xe0, + 0xb2, 0x20, 0xc1, 0xe0, 0x01, 0x30, 0x53, 0xe2, 0xfb, 0xff, 0xff, 0x1a, + 0x93, 0xf0, 0x29, 0xe3, 0x1e, 0xff, 0x2f, 0xe1, 0x9f, 0xf0, 0x29, 0xe3, + 0xf0, 0x07, 0x2d, 0xe9, 0x02, 0x36, 0xa0, 0xe1, 0x23, 0x36, 0xa0, 0xe1, + 0x01, 0x04, 0x12, 0xe3, 0x0c, 0x00, 0x00, 0x0a, 0x10, 0x00, 0xb0, 0xe8, + 0x03, 0x20, 0xa0, 0xe1, 0x04, 0x50, 0xa0, 0xe1, 0x04, 0x30, 0xa0, 0xe1, + 0x04, 0x60, 0xa0, 0xe1, 0x04, 0x70, 0xa0, 0xe1, 0x04, 0x80, 0xa0, 0xe1, + 0x04, 0x90, 0xa0, 0xe1, 0x04, 0xa0, 0xa0, 0xe1, 0xf8, 0x07, 0xa1, 0xe8, + 0x08, 0x20, 0x52, 0xe2, 0xfc, 0xff, 0xff, 0xca, 0x04, 0x00, 0x00, 0xea, + 0x03, 0x20, 0xa0, 0xe1, 0xf8, 0x07, 0xb0, 0xe8, 0xf8, 0x07, 0xa1, 0xe8, + 0x08, 0x20, 0x52, 0xe2, 0xfb, 0xff, 0xff, 0xca, 0xf0, 0x07, 0xbd, 0xe8, + 0x93, 0xf0, 0x29, 0xe3, 0x1e, 0xff, 0x2f, 0xe1 };
M
src/gba/hle-bios.s
→
src/gba/hle-bios.s
@@ -18,17 +18,32 @@ swiBase:
cmp sp, #0 moveq sp, #0x04000000 subeq sp, #0x20 -stmfd sp!, {r2, lr} -ldrb r2, [lr, #-2] -cmp r2, #4 -bleq IntrWait -cmp r2, #5 -moveq r0, #1 -moveq r1, #1 -bleq IntrWait -ldmfd sp!, {r2, lr} +stmfd sp!, {r4-r5, lr} +ldrb r4, [lr, #-2] +mov r5, #swiTable +ldr r4, [r5, r4, lsl #2] +cmp r4, #0 +mov lr, pc +bxne r4 +ldmfd sp!, {r4-r5, lr} movs pc, lr +swiTable: +.word SoftReset +.word RegisterRamReset +.word Halt +.word Stop +.word IntrWait +.word VBlankIntrWait +.word Div +.word DivArm +.word Sqrt +.word ArcTan +.word ArcTan2 +.word CpuSet +.word CpuFastSet +# ... The rest of this table isn't needed if the rest aren't implemented + irqBase: stmfd sp!, {r0-r3, r12, lr} mov r0, #0x04000000@@ -37,33 +52,117 @@ ldr pc, [r0, #-4]
ldmfd sp!, {r0-r3, r12, lr} subs pc, lr, #4 +VBlankIntrWait: +mov r0, #1 +mov r1, #1 IntrWait: -stmfd sp!, {r4, lr} -# Save inputs -mrs r3, cpsr -bic r3, #0x80 -msr cpsr, r3 +stmfd sp!, {lr} +mrs r5, spsr +msr cpsr, #0x1F # Pull current interrupts enabled and add the ones we need mov r4, #0x04000000 # See if we want to return immediately cmp r0, #0 mov r0, #0 mov r2, #1 -beq .L1 +beq 1f # Halt -.L0: +0: strb r0, [r4, #0x301] -.L1: +1: # Check which interrupts were acknowledged -strb r2, [r4, #0x204] +strb r0, [r4, #0x208] ldrh r3, [r4, #-8] ands r3, r1 eorne r3, r1 strneh r3, [r4, #-8] -strb r0, [r4, #0x204] -beq .L0 -#Restore state -mrs r0, cpsr -orr r0, #0x80 -msr cpsr, r0 -ldmfd sp!, {r4, pc} +strb r2, [r4, #0x208] +beq 0b +msr cpsr, #0x93 +msr spsr, r5 +ldmfd sp!, {pc} + +CpuSet: +msr cpsr, #0x9F +mov r3, r2, lsl #12 +mov r3, r3, lsr #12 +tst r2, #0x01000000 +beq 0f +# Fill +tst r2, #0x04000000 +beq 1f +# Word +ldmia r0!, {r2} +2: +stmia r1!, {r2} +subs r3, #1 +bne 2b +b 3f +# Halfword +1: +bic r0, #1 +bic r1, #1 +ldrh r2, [r0] +2: +strh r2, [r1], #2 +subs r3, #1 +bne 2b +b 3f +# Copy +0: +tst r2, #0x04000000 +beq 1f +# Word +2: +ldmia r0!, {r2} +stmia r1!, {r2} +subs r3, #1 +bne 2b +b 3f +# Halfword +1: +bic r0, #1 +bic r1, #1 +2: +ldrh r2, [r0], #2 +strh r2, [r1], #2 +subs r3, #1 +bne 2b +3: +msr cpsr, #0x93 +bx lr + +CpuFastSet: +msr cpsr, #0x9F +stmfd sp!, {r4-r10} +mov r3, r2, lsl #12 +mov r3, r3, lsr #12 +tst r2, #0x01000000 +beq 0f +# Fill +ldmia r0!, {r4} +mov r2, r3 +mov r5, r4 +mov r3, r4 +mov r6, r4 +mov r7, r4 +mov r8, r4 +mov r9, r4 +mov r10, r4 +1: +stmia r1!, {r3-r10} +subs r2, #8 +bgt 1b +b 2f +# Copy +0: +mov r2, r3 +1: +ldmia r0!, {r3-r10} +stmia r1!, {r3-r10} +subs r2, #8 +bgt 1b +2: +ldmfd sp!, {r4-r10} +msr cpsr, #0x93 +bx lr