all repos — mgba @ 0c6061965146725057e163029ca330777257dae4

mGBA Game Boy Advance Emulator

DS GX: Precompute reciprocals
Vicki Pfau vi@endrift.com
Wed, 01 Mar 2017 20:42:04 -0800
commit

0c6061965146725057e163029ca330777257dae4

parent

4920ab36902ccf3fe2490741e5dea6c8289bcf7a

1 files changed, 22 insertions(+), 18 deletions(-)

jump to
M src/ds/gx/software.csrc/ds/gx/software.c

@@ -141,20 +141,22 @@ yw = 0;

} else if (yw > height) { yw = height; } - span->ep[index].x = ((int64_t) (edge->x1 - edge->x0) * yw) / height + edge->x0; + int64_t heightRecip = 0x100000000LL / height; + span->ep[index].x = ((((int64_t) (edge->x1 - edge->x0) * yw) * heightRecip) >> 32) + edge->x0; if (index && span->ep[0].x > span->ep[index].x) { int32_t temp = span->ep[index].x; span->ep[index] = span->ep[0]; span->ep[0].x = temp; index = 0; } - int32_t w = ((int64_t) (edge->w1 - edge->w0) * yw) / height + edge->w0; + int32_t w = ((((int64_t) (edge->w1 - edge->w0) * yw) * heightRecip) >> 32) + edge->w0; + int64_t wRecip = 0x1000000000000LL / w; span->ep[index].w = w; - span->ep[index].cr = (((int32_t) (edge->cr1 * edge->w1 - edge->cr0 * edge->w0) * yw) / height + edge->cr0 * edge->w0) / w; - span->ep[index].cg = (((int32_t) (edge->cg1 * edge->w1 - edge->cg0 * edge->w0) * yw) / height + edge->cg0 * edge->w0) / w; - span->ep[index].cb = (((int32_t) (edge->cb1 * edge->w1 - edge->cb0 * edge->w0) * yw) / height + edge->cb0 * edge->w0) / w; - span->ep[index].s = (((int32_t) (edge->s1 * edge->w1 - edge->s0 * edge->w0) * yw) / height + edge->s0 * edge->w0) / w; - span->ep[index].t = (((int32_t) (edge->t1 * edge->w1 - edge->t0 * edge->w0) * yw) / height + edge->t0 * edge->w0) / w; + span->ep[index].cr = ((((((int32_t) (edge->cr1 * edge->w1 - edge->cr0 * edge->w0) * yw) * heightRecip) >> 32) + edge->cr0 * edge->w0) * wRecip) >> 48; + span->ep[index].cg = ((((((int32_t) (edge->cg1 * edge->w1 - edge->cg0 * edge->w0) * yw) * heightRecip) >> 32) + edge->cg0 * edge->w0) * wRecip) >> 48; + span->ep[index].cb = ((((((int32_t) (edge->cb1 * edge->w1 - edge->cb0 * edge->w0) * yw) * heightRecip) >> 32) + edge->cb0 * edge->w0) * wRecip) >> 48; + span->ep[index].s = ((((((int32_t) (edge->s1 * edge->w1 - edge->s0 * edge->w0) * yw) * heightRecip) >> 32) + edge->s0 * edge->w0) * wRecip) >> 48; + span->ep[index].t = ((((((int32_t) (edge->t1 * edge->w1 - edge->t0 * edge->w0) * yw) * heightRecip) >> 32) + edge->t0 * edge->w0) * wRecip) >> 48; return true; }

@@ -193,20 +195,22 @@ xw = width;

} int32_t w0 = span->ep[0].w; int32_t w1 = span->ep[1].w; - int32_t w = ((int64_t) (w1 - w0) * xw) / width + w0; + int64_t widthRecip = 0x100000000LL / width; + int32_t w = ((((int64_t) (w1 - w0) * xw) * widthRecip) >> 32) + w0; ep->w = w; + int64_t wRecip = 0x1000000000000LL / w; - uint64_t r = ((span->ep[1].cr * (int64_t) w1 - span->ep[0].cr * (int64_t) w0) * xw) / width + span->ep[0].cr * (int64_t) w0; - uint64_t g = ((span->ep[1].cg * (int64_t) w1 - span->ep[0].cg * (int64_t) w0) * xw) / width + span->ep[0].cg * (int64_t) w0; - uint64_t b = ((span->ep[1].cb * (int64_t) w1 - span->ep[0].cb * (int64_t) w0) * xw) / width + span->ep[0].cb * (int64_t) w0; - ep->cr = r / w; - ep->cg = g / w; - ep->cb = b / w; + uint64_t r = ((((span->ep[1].cr * (int64_t) w1 - span->ep[0].cr * (int64_t) w0) * xw) * widthRecip) >> 32) + span->ep[0].cr * (int64_t) w0; + uint64_t g = ((((span->ep[1].cg * (int64_t) w1 - span->ep[0].cg * (int64_t) w0) * xw) * widthRecip) >> 32) + span->ep[0].cg * (int64_t) w0; + uint64_t b = ((((span->ep[1].cb * (int64_t) w1 - span->ep[0].cb * (int64_t) w0) * xw) * widthRecip) >> 32) + span->ep[0].cb * (int64_t) w0; + ep->cr = (r * wRecip) >> 48; + ep->cg = (g * wRecip) >> 48; + ep->cb = (b * wRecip) >> 48; - int32_t s = ((span->ep[1].s * (int64_t) w1 - span->ep[0].s * (int64_t) w0) * xw) / width + span->ep[0].s * (int64_t) w0; - int32_t t = ((span->ep[1].t * (int64_t) w1 - span->ep[0].t * (int64_t) w0) * xw) / width + span->ep[0].t * (int64_t) w0; - ep->s = s / w; - ep->t = t / w; + int32_t s = ((((span->ep[1].s * (int64_t) w1 - span->ep[0].s * (int64_t) w0) * xw) * widthRecip) >> 32) + span->ep[0].s * (int64_t) w0; + int32_t t = ((((span->ep[1].t * (int64_t) w1 - span->ep[0].t * (int64_t) w0) * xw) * widthRecip) >> 32) + span->ep[0].t * (int64_t) w0; + ep->s = (s * wRecip) >> 48; + ep->t = (t * wRecip) >> 48; } void DSGXSoftwareRendererCreate(struct DSGXSoftwareRenderer* renderer) {