Add some NEON resizing code
Jeffrey Pfau jeffrey@endrift.com
Sat, 26 Apr 2014 03:31:53 -0700
2 files changed,
103 insertions(+),
9 deletions(-)
M
src/platform/sdl/sw-main.c
→
src/platform/sdl/sw-main.c
@@ -18,6 +18,11 @@ #include <errno.h>
#include <signal.h> #include <sys/time.h> +#ifdef __ARM_NEON +void _neon2x(void* dest, void* src, int width, int height); +void _neon4x(void* dest, void* src, int width, int height); +#endif + struct SoftwareRenderer { struct GBAVideoSoftwareRenderer d; struct GBASDLAudio audio;@@ -26,9 +31,11 @@ #if SDL_VERSION_ATLEAST(2, 0, 0)
SDL_Window* window; SDL_Texture* tex; SDL_Renderer* sdlRenderer; +#else + int ratio; +#endif int viewportWidth; int viewportHeight; -#endif }; static int _GBASDLInit(struct SoftwareRenderer* renderer);@@ -47,6 +54,9 @@ usage(argv[0], GRAPHICS_USAGE);
return 1; } + renderer.viewportWidth = opts.width; + renderer.viewportHeight = opts.height; + if (!_GBASDLInit(&renderer)) { return 1; }@@ -65,8 +75,6 @@
GBAMapOptionsToContext(&opts, &context); #if SDL_VERSION_ATLEAST(2, 0, 0) - renderer.viewportWidth = opts.width; - renderer.viewportHeight = opts.height; renderer.events.fullscreen = opts.fullscreen; renderer.window = SDL_CreateWindow("GBAc", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, renderer.viewportWidth, renderer.viewportHeight, SDL_WINDOW_OPENGL | (SDL_WINDOW_FULLSCREEN_DESKTOP * renderer.events.fullscreen)); SDL_GetWindowSize(renderer.window, &renderer.viewportWidth, &renderer.viewportHeight);@@ -91,12 +99,23 @@ #endif
#else SDL_Surface* surface = SDL_GetVideoSurface(); SDL_LockSurface(surface); - renderer.d.outputBuffer = surface->pixels; + + renderer.ratio = renderer.viewportWidth / VIDEO_HORIZONTAL_PIXELS; + if (renderer.ratio == 1) { + renderer.d.outputBuffer = surface->pixels; +#ifdef COLOR_16_BIT + renderer.d.outputBufferStride = surface->pitch / 2; +#else + renderer.d.outputBufferStride = surface->pitch / 4; +#endif + } else { #ifdef COLOR_16_BIT - renderer.d.outputBufferStride = surface->pitch / 2; + renderer.d.outputBuffer = malloc(240 * 160 * 2); #else - renderer.d.outputBufferStride = surface->pitch / 4; + renderer.d.outputBuffer = malloc(240 * 160 * 4); #endif + renderer.d.outputBufferStride = 240; + } #endif GBAThreadStart(&context);@@ -128,9 +147,9 @@ GBASDLInitAudio(&renderer->audio);
#if !SDL_VERSION_ATLEAST(2, 0, 0) #ifdef COLOR_16_BIT - SDL_SetVideoMode(240, 160, 16, SDL_DOUBLEBUF | SDL_HWSURFACE); + SDL_SetVideoMode(renderer->viewportWidth, renderer->viewportHeight, 16, SDL_DOUBLEBUF | SDL_HWSURFACE); #else - SDL_SetVideoMode(240, 160, 32, SDL_DOUBLEBUF | SDL_HWSURFACE); + SDL_SetVideoMode(renderer->viewportWidth, renderer->viewportHeight, 32, SDL_DOUBLEBUF | SDL_HWSURFACE); #endif #endif@@ -156,6 +175,20 @@ #else
renderer->d.outputBufferStride /= 4; #endif #else + switch (renderer->ratio) { +#if defined(__ARM_NEON) && COLOR_16_BIT + case 2: + _neon2x(surface->pixels, renderer->d.outputBuffer, 240, 160); + break; + case 4: + _neon4x(surface->pixels, renderer->d.outputBuffer, 240, 160); + break; +#endif + case 1: + break; + default: + abort(); + } SDL_UnlockSurface(surface); SDL_Flip(surface); SDL_LockSurface(surface);
M
src/util/arm-algo.S
→
src/util/arm-algo.S
@@ -26,6 +26,67 @@ strh r6, [r8], #2
strh r7, [r8], #2 subs r10, #8 bne .L1 -.L9: pop {r4-r10} bx lr + +# r0: Destination +# r1: Source +# r2: Width +# r3: Height +.global _neon2x +_neon2x: +push {r4-r5} +lsl r4, r2, #2 +.n20: +mov r2, r4, lsr #4 +add r5, r0, r4 +.n21: +vld2.32 {d0[], d1[]}, [r1]! +vmov d2, d0 +vmov d3, d1 +vzip.16 d0, d2 +vzip.16 d1, d3 +vst1.32 {q0}, [r0]! +vst1.32 {q0}, [r5]! +subs r2, #1 +bne .n21 +subs r3, #1 +mov r0, r5 +bne .n20 +pop {r4-r5} +bx lr + +.global _neon4x +_neon4x: +push {r4-r7} +lsl r4, r2, #3 +.n40: +mov r2, r4, lsr #5 +add r5, r0, r4 +add r6, r5, r4 +add r7, r6, r4 +.n41: +vld4.16 {d0[], d1[], d2[], d3[]}, [r1]! +vst1.16 {d0}, [r0]! +vst1.16 {d0}, [r5]! +vst1.16 {d0}, [r6]! +vst1.16 {d0}, [r7]! +vst1.16 {d1}, [r0]! +vst1.16 {d1}, [r5]! +vst1.16 {d1}, [r6]! +vst1.16 {d1}, [r7]! +vst1.16 {d2}, [r0]! +vst1.16 {d2}, [r5]! +vst1.16 {d2}, [r6]! +vst1.16 {d2}, [r7]! +vst1.16 {d3}, [r0]! +vst1.16 {d3}, [r5]! +vst1.16 {d3}, [r6]! +vst1.16 {d3}, [r7]! +subs r2, #1 +bne .n41 +subs r3, #1 +mov r0, r7 +bne .n40 +pop {r4-r7} +bx lr