all repos — mgba @ 7d5dff4fc9737c1174f37fbe8313e6a553d47074

mGBA Game Boy Advance Emulator

Merge pull request #120 from yuriks/3ds-gpu-rewrite

3DS GPU rewrite
endrift jeffrey@endrift.com
Wed, 16 Sep 2015 20:16:37 -0700
commit

7d5dff4fc9737c1174f37fbe8313e6a553d47074

parent

4d24b16735001b96dcee2bebc49560185f969b03

M src/platform/3ds/3ds-memory.csrc/platform/3ds/3ds-memory.c

@@ -5,8 +5,6 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this

* file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "util/memory.h" -#define asm __asm__ - #include <3ds.h> void* anonymousMemoryMap(size_t size) {
M src/platform/3ds/3ds-vfs.hsrc/platform/3ds/3ds-vfs.h

@@ -8,8 +8,6 @@ #define N3DS_VFS_H

#include "util/vfs.h" -#define asm __asm__ - #include <3ds.h> extern FS_archive sdmcArchive;
M src/platform/3ds/CMakeLists.txtsrc/platform/3ds/CMakeLists.txt

@@ -5,7 +5,7 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-format" PARENT_SCOPE)

set(OS_DEFINES COLOR_16_BIT COLOR_5_6_5) include_directories(${CMAKE_CURRENT_BINARY_DIR}) -list(APPEND OS_LIB sf2d ctru) +list(APPEND OS_LIB ctru) file(GLOB OS_SRC ${CMAKE_CURRENT_SOURCE_DIR}/3ds-*.c) set(OS_SRC ${OS_SRC} PARENT_SCOPE) source_group("3DS-specific code" FILES ${OS_SRC})

@@ -19,9 +19,22 @@ endif()

set(VFS_SRC ${VFS_SRC} PARENT_SCOPE) set(OS_DEFINES ${OS_DEFINES} PARENT_SCOPE) -list(APPEND GUI_SRC ${CMAKE_CURRENT_BINARY_DIR}/font.c ${CMAKE_CURRENT_SOURCE_DIR}/gui-font.c) +list(APPEND GUI_SRC + ${CMAKE_CURRENT_BINARY_DIR}/font.c + ${CMAKE_CURRENT_BINARY_DIR}/uishader.c + ${CMAKE_CURRENT_BINARY_DIR}/uishader.h + ${CMAKE_CURRENT_BINARY_DIR}/uishader.shbin.h -set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/font.c PROPERTIES GENERATED ON) + ${CMAKE_CURRENT_SOURCE_DIR}/gui-font.c + ${CMAKE_CURRENT_SOURCE_DIR}/ctr-gpu.c + ${CMAKE_CURRENT_SOURCE_DIR}/ctr-gpu.h) + +set_source_files_properties( + ${CMAKE_CURRENT_BINARY_DIR}/font.c + ${CMAKE_CURRENT_BINARY_DIR}/uishader.c + ${CMAKE_CURRENT_BINARY_DIR}/uishader.h + ${CMAKE_CURRENT_BINARY_DIR}/uishader.shbin.h + PROPERTIES GENERATED ON) add_executable(${BINARY_NAME}.elf ${GUI_SRC} main.c ctru-heap.c) set_target_properties(${BINARY_NAME}.elf PROPERTIES COMPILE_DEFINITIONS "${OS_DEFINES};${FEATURE_DEFINES}") target_link_libraries(${BINARY_NAME}.elf ${BINARY_NAME} ${M_LIBRARY} ${OS_LIB})

@@ -37,6 +50,22 @@

add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/font.c COMMAND ${RAW2C} ${CMAKE_SOURCE_DIR}/src/platform/3ds/font.raw DEPENDS ${CMAKE_SOURCE_DIR}/src/platform/3ds/font.raw) + +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/uishader.shbin ${CMAKE_CURRENT_BINARY_DIR}/uishader.shbin.h + MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/uishader.vsh + COMMAND ${PICASSO} + -o ${CMAKE_CURRENT_BINARY_DIR}/uishader.shbin + -h ${CMAKE_CURRENT_BINARY_DIR}/uishader.shbin.h + ${CMAKE_CURRENT_SOURCE_DIR}/uishader.vsh + COMMENT "picasso uishader.vsh") + +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/uishader.c ${CMAKE_CURRENT_BINARY_DIR}/uishader.h + MAIN_DEPENDENCY ${CMAKE_CURRENT_BINARY_DIR}/uishader.shbin + COMMAND ${RAW2C} ${CMAKE_CURRENT_BINARY_DIR}/uishader.shbin + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "raw2c uishader.shbin") add_custom_target(${BINARY_NAME}.3dsx ALL ${3DSXTOOL} ${BINARY_NAME}.elf ${BINARY_NAME}.3dsx --smdh=${BINARY_NAME}.smdh
M src/platform/3ds/CMakeToolchain.txtsrc/platform/3ds/CMakeToolchain.txt

@@ -9,12 +9,24 @@ set(DEVKITARM $ENV{DEVKITARM})

else() set(DEVKITARM ${DEVKITPRO}/devkitARM) endif() +set(toolchain_bin_dir ${DEVKITARM}/bin) -set(toolchain_bin_dir ${DEVKITARM}/bin) +if(DEFINED ENV{CTRULIB}) + set(CTRULIB $ENV{CTRULIB}) +else() + set(CTRULIB ${DEVKITPRO}/libctru) +endif() + +if(DEFINED ENV{PICASSO}) + set(PICASSO $ENV{PICASSO}) +else() + set(PICASSO ${toolchain_bin_dir}/picasso) +endif() + set(cross_prefix ${toolchain_bin_dir}/arm-none-eabi-) -set(inc_flags -I${DEVKITPRO}/libctru/include) +set(inc_flags -I${CTRULIB}/include) set(arch_flags "-march=armv6k -mtune=mpcore -mfpu=vfp -mfloat-abi=hard") -set(link_flags "-L${DEVKITPRO}/libctru/lib -lctru -lm -specs=3dsx.specs ${arch_flags}") +set(link_flags "-L${CTRULIB}/lib -lctru -lm -specs=3dsx.specs ${arch_flags}") set(CMAKE_SYSTEM_NAME Generic CACHE INTERNAL "system name") set(CMAKE_SYSTEM_PROCESSOR arm CACHE INTERNAL "processor")
A src/platform/3ds/ctr-gpu.c

@@ -0,0 +1,405 @@

+/* Copyright (c) 2015 Yuri Kunde Schlesner + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <3ds.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#include "ctr-gpu.h" + +#include "uishader.h" +#include "uishader.shbin.h" + +struct ctrUIVertex { + s16 x,y; + s16 u,v; + u32 abgr; +}; + +#define VRAM_BASE 0x18000000u + +#define MAX_NUM_QUADS 1024 +#define COMMAND_LIST_LENGTH (16 * 1024) +// Each quad requires 4 vertices and 2*3 indices for the two triangles used to draw it +#define VERTEX_INDEX_BUFFER_SIZE (MAX_NUM_QUADS * (4 * sizeof(struct ctrUIVertex) + 6 * sizeof(u16))) + +static struct ctrUIVertex* ctrVertexBuffer = NULL; +static u16* ctrIndexBuffer = NULL; +static u16 ctrNumQuads = 0; + +static void* gpuColorBuffer = NULL; +static u32* gpuCommandList = NULL; +static void* screenTexture = NULL; + +static shaderProgram_s gpuShader; +static DVLB_s* passthroughShader = NULL; + +static const struct ctrTexture* activeTexture = NULL; + +static u32 _f24FromFloat(float f) { + u32 i; + memcpy(&i, &f, 4); + + u32 mantissa = (i << 9) >> 9; + s32 exponent = (i << 1) >> 24; + u32 sign = (i << 0) >> 31; + + // Truncate mantissa + mantissa >>= 7; + + // Re-bias exponent + exponent = exponent - 127 + 63; + if (exponent < 0) { + // Underflow: flush to zero + return sign << 23; + } else if (exponent > 0x7F) { + // Overflow: saturate to infinity + return sign << 23 | 0x7F << 16; + } + + return sign << 23 | exponent << 16 | mantissa; +} + +static u32 _f31FromFloat(float f) { + u32 i; + memcpy(&i, &f, 4); + + u32 mantissa = (i << 9) >> 9; + s32 exponent = (i << 1) >> 24; + u32 sign = (i << 0) >> 31; + + // Re-bias exponent + exponent = exponent - 127 + 63; + if (exponent < 0) { + // Underflow: flush to zero + return sign << 30; + } else if (exponent > 0x7F) { + // Overflow: saturate to infinity + return sign << 30 | 0x7F << 23; + } + + return sign << 30 | exponent << 23 | mantissa; +} + +// Replacements for the limiting GPU_SetViewport function in ctrulib +static void _GPU_SetFramebuffer(intptr_t colorBuffer, intptr_t depthBuffer, u16 w, u16 h) { + u32 buf[4]; + + // Unknown + GPUCMD_AddWrite(GPUREG_0111, 0x00000001); + GPUCMD_AddWrite(GPUREG_0110, 0x00000001); + + // Set depth/color buffer address and dimensions + buf[0] = depthBuffer >> 3; + buf[1] = colorBuffer >> 3; + buf[2] = (0x01) << 24 | ((h-1) & 0xFFF) << 12 | (w & 0xFFF) << 0; + GPUCMD_AddIncrementalWrites(GPUREG_DEPTHBUFFER_LOC, buf, 3); + GPUCMD_AddWrite(GPUREG_006E, buf[2]); + + // Set depth/color buffer pixel format + GPUCMD_AddWrite(GPUREG_DEPTHBUFFER_FORMAT, 3 /* D248S */ ); + GPUCMD_AddWrite(GPUREG_COLORBUFFER_FORMAT, 0 /* RGBA8 */ << 16 | 2 /* Unknown */); + GPUCMD_AddWrite(GPUREG_011B, 0); // Unknown + + // Enable color/depth buffers + buf[0] = colorBuffer != 0 ? 0xF : 0x0; + buf[1] = buf[0]; + buf[2] = depthBuffer != 0 ? 0x2 : 0x0; + buf[3] = buf[2]; + GPUCMD_AddIncrementalWrites(GPUREG_0112, buf, 4); +} + +static void _GPU_SetViewportEx(u16 x, u16 y, u16 w, u16 h) { + u32 buf[4]; + + buf[0] = _f24FromFloat(w / 2.0f); + buf[1] = _f31FromFloat(2.0f / w) << 1; + buf[2] = _f24FromFloat(h / 2.0f); + buf[3] = _f31FromFloat(2.0f / h) << 1; + GPUCMD_AddIncrementalWrites(GPUREG_0041, buf, 4); + + GPUCMD_AddWrite(GPUREG_0068, (y & 0xFFFF) << 16 | (x & 0xFFFF) << 0); + + buf[0] = 0; + buf[1] = 0; + buf[2] = ((h-1) & 0xFFFF) << 16 | ((w-1) & 0xFFFF) << 0; + GPUCMD_AddIncrementalWrites(GPUREG_SCISSORTEST_MODE, buf, 3); +} + +static void _setDummyTexEnv(int id) { + GPU_SetTexEnv(id, + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), + GPU_TEVOPERANDS(0, 0, 0), + GPU_TEVOPERANDS(0, 0, 0), + GPU_REPLACE, + GPU_REPLACE, + 0x00000000); +} + +Result ctrInitGpu() { + Result res = -1; + + // Allocate buffers + gpuColorBuffer = vramAlloc(400 * 240 * 4); + gpuCommandList = linearAlloc(COMMAND_LIST_LENGTH * sizeof(u32)); + ctrVertexBuffer = linearAlloc(VERTEX_INDEX_BUFFER_SIZE); + if (gpuColorBuffer == NULL || gpuCommandList == NULL || ctrVertexBuffer == NULL) { + res = -1; + goto error_allocs; + } + // Both buffers share the same allocation, index buffer follows the vertex buffer + ctrIndexBuffer = (u16*)(ctrVertexBuffer + (4 * MAX_NUM_QUADS)); + + // Load vertex shader binary + passthroughShader = DVLB_ParseFile((u32*)uishader, uishader_size); + if (passthroughShader == NULL) { + res = -1; + goto error_dvlb; + } + + // Create shader + shaderProgramInit(&gpuShader); + res = shaderProgramSetVsh(&gpuShader, &passthroughShader->DVLE[0]); + if (res < 0) { + goto error_shader; + } + + // Initialize the GPU in ctrulib and assign the command buffer to accept submission of commands + GPU_Init(NULL); + GPUCMD_SetBuffer(gpuCommandList, COMMAND_LIST_LENGTH, 0); + + return 0; + +error_shader: + shaderProgramFree(&gpuShader); + +error_dvlb: + if (passthroughShader != NULL) { + DVLB_Free(passthroughShader); + passthroughShader = NULL; + } + +error_allocs: + if (ctrVertexBuffer != NULL) { + linearFree(ctrVertexBuffer); + ctrVertexBuffer = NULL; + ctrIndexBuffer = NULL; + } + + if (gpuCommandList != NULL) { + GPUCMD_SetBuffer(NULL, 0, 0); + linearFree(gpuCommandList); + gpuCommandList = NULL; + } + + if (gpuColorBuffer != NULL) { + vramFree(gpuColorBuffer); + gpuColorBuffer = NULL; + } + return res; +} + +void ctrDeinitGpu() { + shaderProgramFree(&gpuShader); + + DVLB_Free(passthroughShader); + passthroughShader = NULL; + + linearFree(screenTexture); + screenTexture = NULL; + + linearFree(ctrVertexBuffer); + ctrVertexBuffer = NULL; + ctrIndexBuffer = NULL; + + GPUCMD_SetBuffer(NULL, 0, 0); + linearFree(gpuCommandList); + gpuCommandList = NULL; + + vramFree(gpuColorBuffer); + gpuColorBuffer = NULL; +} + +void ctrGpuBeginFrame(void) { + shaderProgramUse(&gpuShader); + + void* gpuColorBufferEnd = (char*)gpuColorBuffer + 240 * 400 * 4; + + GX_SetMemoryFill(NULL, + gpuColorBuffer, 0x00000000, gpuColorBufferEnd, GX_FILL_32BIT_DEPTH | GX_FILL_TRIGGER, + NULL, 0, NULL, 0); + gspWaitForPSC0(); + + _GPU_SetFramebuffer(osConvertVirtToPhys((u32)gpuColorBuffer), 0, 240, 400); + + // Disable depth and stencil testing + GPU_SetDepthTestAndWriteMask(false, GPU_ALWAYS, GPU_WRITE_COLOR); + GPU_SetStencilTest(false, GPU_ALWAYS, 0, 0xFF, 0); + GPU_SetStencilOp(GPU_STENCIL_KEEP, GPU_STENCIL_KEEP, GPU_STENCIL_KEEP); + GPU_DepthMap(-1.0f, 0.0f); + + // Enable alpha blending + GPU_SetAlphaBlending( + GPU_BLEND_ADD, GPU_BLEND_ADD, // Operation RGB, Alpha + GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, // Color src, dst + GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); // Alpha src, dst + GPU_SetBlendingColor(0, 0, 0, 0); + + // Disable alpha testing + GPU_SetAlphaTest(false, GPU_ALWAYS, 0); + + // Unknown + GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0); + GPUCMD_AddWrite(GPUREG_0118, 0); + + GPU_SetTexEnv(0, + GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0), // RGB + GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0), // Alpha + GPU_TEVOPERANDS(0, 0, 0), // RGB + GPU_TEVOPERANDS(0, 0, 0), // Alpha + GPU_MODULATE, GPU_MODULATE, // Operation RGB, Alpha + 0x00000000); // Constant color + _setDummyTexEnv(1); + _setDummyTexEnv(2); + _setDummyTexEnv(3); + _setDummyTexEnv(4); + _setDummyTexEnv(5); + + // Configure vertex attribute format + u32 bufferOffsets[] = { osConvertVirtToPhys((u32)ctrVertexBuffer) - VRAM_BASE }; + u64 arrayTargetAttributes[] = { 0x210 }; + u8 numAttributesInArray[] = { 3 }; + GPU_SetAttributeBuffers( + 3, // Number of attributes + (u32*)VRAM_BASE, // Base address + GPU_ATTRIBFMT(0, 2, GPU_SHORT) | // Attribute format + GPU_ATTRIBFMT(1, 2, GPU_SHORT) | + GPU_ATTRIBFMT(2, 4, GPU_UNSIGNED_BYTE), + 0xFF8, // Non-fixed vertex inputs + 0x210, // Vertex shader input map + 1, // Use 1 vertex array + bufferOffsets, arrayTargetAttributes, numAttributesInArray); +} + +void ctrGpuEndFrame(void* outputFramebuffer, int w, int h) { + ctrFlushBatch(); + + void* colorBuffer = (u8*)gpuColorBuffer + ((400 - w) * 240 * 4); + + const u32 GX_CROP_INPUT_LINES = (1 << 2); + + GX_SetDisplayTransfer(NULL, + colorBuffer, GX_BUFFER_DIM(240, 400), + outputFramebuffer, GX_BUFFER_DIM(h, w), + GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | + GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | + GX_CROP_INPUT_LINES); + gspWaitForPPF(); +} + +void ctrSetViewportSize(s16 w, s16 h) { + // Set up projection matrix mapping (0,0) to the top-left and (w,h) to the + // bottom-right, taking into account the 3DS' screens' portrait + // orientation. + float projectionMtx[4 * 4] = { + // Rows are in the order w z y x, because ctrulib + 1.0f, 0.0f, -2.0f / h, 0.0f, + 1.0f, 0.0f, 0.0f, -2.0f / w, + -0.5f, 0.0f, 0.0f, 0.0f, + 1.0f, 0.0f, 0.0f, 0.0f, + }; + + GPU_SetFloatUniform(GPU_VERTEX_SHADER, VSH_FVEC_projectionMtx, (u32*)&projectionMtx, 4); + _GPU_SetViewportEx(0, 0, h, w); +} + +void ctrActivateTexture(const struct ctrTexture* texture) { + if (activeTexture == texture) { + return; + } + + ctrFlushBatch(); + + GPU_SetTextureEnable(GPU_TEXUNIT0); + GPU_SetTexture( + GPU_TEXUNIT0, (u32*)osConvertVirtToPhys((u32)texture->data), + texture->width, texture->height, + GPU_TEXTURE_MAG_FILTER(texture->filter) | GPU_TEXTURE_MIN_FILTER(texture->filter) | + GPU_TEXTURE_WRAP_S(GPU_CLAMP_TO_BORDER) | GPU_TEXTURE_WRAP_T(GPU_CLAMP_TO_BORDER), + texture->format); + GPU_SetTextureBorderColor(GPU_TEXUNIT0, 0x00000000); + + float textureMtx[2 * 4] = { + // Rows are in the order w z y x, because ctrulib + 0.0f, 0.0f, 0.0f, 1.0f / texture->width, + 0.0f, 0.0f, 1.0f / texture->height, 0.0f, + }; + + GPU_SetFloatUniform(GPU_VERTEX_SHADER, VSH_FVEC_textureMtx, (u32*)&textureMtx, 2); + + activeTexture = texture; +} + +void ctrAddRectScaled(u32 color, s16 x, s16 y, s16 w, s16 h, s16 u, s16 v, s16 uw, s16 vh) { + if (ctrNumQuads == MAX_NUM_QUADS) { + ctrFlushBatch(); + } + + u16 index = ctrNumQuads * 4; + struct ctrUIVertex* vtx = &ctrVertexBuffer[index]; + vtx->x = x; vtx->y = y; + vtx->u = u; vtx->v = v; + vtx->abgr = color; + vtx++; + + vtx->x = x + w; vtx->y = y; + vtx->u = u + uw; vtx->v = v; + vtx->abgr = color; + vtx++; + + vtx->x = x; vtx->y = y + h; + vtx->u = u; vtx->v = v + vh; + vtx->abgr = color; + vtx++; + + vtx->x = x + w; vtx->y = y + h; + vtx->u = u + uw; vtx->v = v + vh; + vtx->abgr = color; + + u16* i = &ctrIndexBuffer[ctrNumQuads * 6]; + i[0] = index + 0; i[1] = index + 1; i[2] = index + 2; + i[3] = index + 2; i[4] = index + 1; i[5] = index + 3; + + ctrNumQuads += 1; +} + +void ctrAddRect(u32 color, s16 x, s16 y, s16 u, s16 v, s16 w, s16 h) { + ctrAddRectScaled(color, + x, y, w, h, + u, v, w, h); +} + +void ctrFlushBatch(void) { + if (ctrNumQuads == 0) { + return; + } + + GSPGPU_FlushDataCache(NULL, (u8*)ctrVertexBuffer, VERTEX_INDEX_BUFFER_SIZE); + GPU_DrawElements(GPU_UNKPRIM, (u32*)(osConvertVirtToPhys((u32)ctrIndexBuffer) - VRAM_BASE), ctrNumQuads * 6); + + GPU_FinishDrawing(); + GPUCMD_Finalize(); + GSPGPU_FlushDataCache(NULL, (u8*)gpuCommandList, COMMAND_LIST_LENGTH * sizeof(u32)); + GPUCMD_FlushAndRun(NULL); + + gspWaitForP3D(); + + GPUCMD_SetBufferOffset(0); + + ctrNumQuads = 0; +}
A src/platform/3ds/ctr-gpu.h

@@ -0,0 +1,41 @@

+/* Copyright (c) 2015 Yuri Kunde Schlesner + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef GUI_GPU_H +#define GUI_GPU_H + +#include <3ds.h> + +struct ctrTexture { + void* data; + u32 format; + u32 filter; + u16 width; + u16 height; +}; + +inline void ctrTexture_Init(struct ctrTexture* tex) { + tex->data = NULL; + tex->format = GPU_RGB565; + tex->filter = GPU_NEAREST; + tex->width = 0; + tex->height = 0; +} + +Result ctrInitGpu(void); +void ctrDeinitGpu(void); + +void ctrGpuBeginFrame(void); +void ctrGpuEndFrame(void* outputFramebuffer, int w, int h); + +void ctrSetViewportSize(s16 w, s16 h); + +void ctrActivateTexture(const struct ctrTexture* texture); +void ctrAddRectScaled(u32 color, s16 x, s16 y, s16 w, s16 h, s16 u, s16 v, s16 uw, s16 vh); +void ctrAddRect(u32 color, s16 x, s16 y, s16 u, s16 v, s16 w, s16 h); +void ctrFlushBatch(void); + +#endif
M src/platform/3ds/gui-font.csrc/platform/3ds/gui-font.c

@@ -8,15 +8,14 @@ #include "util/gui/font-metrics.h"

#include "util/png-io.h" #include "util/vfs.h" #include "font.h" - -#include <sf2d.h> +#include "ctr-gpu.h" #define CELL_HEIGHT 16 #define CELL_WIDTH 16 #define GLYPH_HEIGHT 12 struct GUIFont { - sf2d_texture* tex; + struct ctrTexture texture; }; struct GUIFont* GUIFontCreate(void) {

@@ -24,14 +23,23 @@ struct GUIFont* guiFont = malloc(sizeof(struct GUIFont));

if (!guiFont) { return 0; } - guiFont->tex = sf2d_create_texture(256, 128, TEXFMT_RGB5A1, SF2D_PLACE_RAM); - memcpy(guiFont->tex->data, font, font_size); - guiFont->tex->tiled = 1; + + struct ctrTexture* tex = &guiFont->texture; + ctrTexture_Init(tex); + tex->data = vramAlloc(256 * 128 * 2); + tex->format = GPU_RGBA5551; + tex->width = 256; + tex->height = 128; + + GSPGPU_FlushDataCache(NULL, (u8*)font, font_size); + GX_RequestDma(NULL, (u32*)font, tex->data, font_size); + gspWaitForDMA(); + return guiFont; } void GUIFontDestroy(struct GUIFont* font) { - sf2d_free_texture(font->tex); + vramFree(font->texture.data); free(font); }

@@ -48,18 +56,18 @@ }

return defaultFontMetrics[glyph].width; } -void GUIFontDrawGlyph(const struct GUIFont* font, int x, int y, uint32_t color, uint32_t glyph) { +void GUIFontDrawGlyph(const struct GUIFont* font, int glyph_x, int glyph_y, uint32_t color, uint32_t glyph) { + ctrActivateTexture(&font->texture); + if (glyph > 0x7F) { glyph = 0; } - color = (color >> 24) | (color << 8); + struct GUIFontGlyphMetric metric = defaultFontMetrics[glyph]; - sf2d_draw_texture_part_blend(font->tex, - x - metric.padding.left, - y - GLYPH_HEIGHT, - (glyph & 15) * CELL_WIDTH, - (glyph >> 4) * CELL_HEIGHT, - CELL_WIDTH, - CELL_HEIGHT, - color); + u16 x = glyph_x - metric.padding.left; + u16 y = glyph_y - GLYPH_HEIGHT; + u16 u = (glyph % 16u) * CELL_WIDTH; + u16 v = (glyph / 16u) * CELL_HEIGHT; + + ctrAddRect(color, x, y, u, v, CELL_WIDTH, CELL_HEIGHT); }
M src/platform/3ds/main.csrc/platform/3ds/main.c

@@ -14,9 +14,9 @@ #include "util/gui/font.h"

#include "util/memory.h" #include "3ds-vfs.h" +#include "ctr-gpu.h" #include <3ds.h> -#include <sf2d.h> static enum ScreenMode { SM_PA_BOTTOM,

@@ -46,23 +46,29 @@ static struct GBAAVStream stream;

static int16_t* audioLeft = 0; static int16_t* audioRight = 0; static size_t audioPos = 0; -static sf2d_texture* tex; +static struct ctrTexture gbaOutputTexture; extern bool allocateRomBuffer(void); static void _postAudioBuffer(struct GBAAVStream* stream, struct GBAAudio* audio); static void _drawStart(void) { + ctrGpuBeginFrame(); if (screenMode < SM_PA_TOP) { - sf2d_start_frame(GFX_BOTTOM, GFX_LEFT); + ctrSetViewportSize(320, 240); } else { - sf2d_start_frame(GFX_TOP, GFX_LEFT); + ctrSetViewportSize(400, 240); } } static void _drawEnd(void) { - sf2d_end_frame(); - sf2d_swapbuffers(); + int screen = screenMode < SM_PA_TOP ? GFX_BOTTOM : GFX_TOP; + u16 width = 0, height = 0; + + void* outputFramebuffer = gfxGetFramebuffer(screen, GFX_LEFT, &height, &width); + ctrGpuEndFrame(outputFramebuffer, width, height); + gfxSwapBuffersGpu(); + gspWaitForEvent(GSPEVENT_VBlank0, false); } static void _setup(struct GBAGUIRunner* runner) {

@@ -117,64 +123,105 @@ }

} static void _drawTex(bool faded) { + u32 color = faded ? 0x3FFFFFFF : 0xFFFFFFFF; + + int screen_w = screenMode < SM_PA_TOP ? 320 : 400; + int screen_h = 240; + + int w, h; + switch (screenMode) { case SM_PA_TOP: - sf2d_draw_texture_scale_blend(tex, 80, 296, 1, -1, 0xFFFFFF3F | (faded ? 0 : 0xC0)); - break; case SM_PA_BOTTOM: default: - sf2d_draw_texture_scale_blend(tex, 40, 296, 1, -1, 0xFFFFFF3F | (faded ? 0 : 0xC0)); + w = VIDEO_HORIZONTAL_PIXELS; + h = VIDEO_VERTICAL_PIXELS; break; case SM_AF_TOP: - sf2d_draw_texture_scale_blend(tex, 20, 384, 1.5, -1.5, 0xFFFFFF3F | (faded ? 0 : 0xC0)); + w = 360; + h = 240; break; case SM_AF_BOTTOM: - sf2d_draw_texture_scale_blend(tex, 0, 368 - 40 / 3, 4 / 3.0, -4 / 3.0, 0xFFFFFF3F | (faded ? 0 : 0xC0)); + // Largest possible size with 3:2 aspect ratio and integer dimensions + w = 318; + h = 212; break; case SM_SF_TOP: - sf2d_draw_texture_scale_blend(tex, 0, 384, 5 / 3.0, -1.5, 0xFFFFFF3F | (faded ? 0 : 0xC0)); - break; case SM_SF_BOTTOM: - sf2d_draw_texture_scale_blend(tex, 0, 384, 4 / 3.0, -1.5, 0xFFFFFF3F | (faded ? 0 : 0xC0)); + w = screen_w; + h = screen_h; break; } + + int x = (screen_w - w) / 2; + int y = (screen_h - h) / 2; + + ctrAddRectScaled(color, x, y, w, h, 0, 0, VIDEO_HORIZONTAL_PIXELS, VIDEO_VERTICAL_PIXELS); } static void _drawFrame(struct GBAGUIRunner* runner, bool faded) { UNUSED(runner); - GSPGPU_FlushDataCache(0, (u8*) renderer.outputBuffer, 256 * VIDEO_VERTICAL_PIXELS * 2); - GX_SetDisplayTransfer(0, (u32*) renderer.outputBuffer, GX_BUFFER_DIM(256, VIDEO_VERTICAL_PIXELS), tex->data, GX_BUFFER_DIM(256, VIDEO_VERTICAL_PIXELS), 0x000002202); - _drawTex(faded); + + void* outputBuffer = renderer.outputBuffer; + struct ctrTexture* tex = &gbaOutputTexture; + + GSPGPU_FlushDataCache(NULL, outputBuffer, 256 * VIDEO_VERTICAL_PIXELS * 2); + GX_SetDisplayTransfer(NULL, + outputBuffer, GX_BUFFER_DIM(256, VIDEO_VERTICAL_PIXELS), + tex->data, GX_BUFFER_DIM(256, 256), + GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGB565) | + GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB565) | + GX_TRANSFER_OUT_TILED(1) | GX_TRANSFER_FLIP_VERT(1)); + #if RESAMPLE_LIBRARY == RESAMPLE_BLIP_BUF if (!hasSound) { blip_clear(runner->context.gba->audio.left); blip_clear(runner->context.gba->audio.right); } #endif + + gspWaitForPPF(); + ctrActivateTexture(tex); + _drawTex(faded); } static void _drawScreenshot(struct GBAGUIRunner* runner, const uint32_t* pixels, bool faded) { UNUSED(runner); - u16* newPixels = linearMemAlign(256 * VIDEO_VERTICAL_PIXELS * 2, 0x80); - unsigned y, x; - for (y = 0; y < VIDEO_VERTICAL_PIXELS; ++y) { - for (x = 0; x < VIDEO_HORIZONTAL_PIXELS; ++x) { - u16 pixel = (*pixels >> 19) & 0x1F; - pixel |= (*pixels >> 5) & 0x7C0; - pixel |= (*pixels << 8) & 0xF800; - newPixels[y * 256 + x] = pixel; - ++pixels; + + struct ctrTexture* tex = &gbaOutputTexture; + + u16* newPixels = linearMemAlign(256 * VIDEO_VERTICAL_PIXELS * sizeof(u32), 0x100); + + // Convert image from RGBX8 to BGR565 + for (unsigned y = 0; y < VIDEO_VERTICAL_PIXELS; ++y) { + for (unsigned x = 0; x < VIDEO_HORIZONTAL_PIXELS; ++x) { + // 0xXXBBGGRR -> 0bRRRRRGGGGGGBBBBB + u32 p = *pixels++; + newPixels[y * 256 + x] = + (p << 24 >> (24 + 3) << 11) | // R + (p << 16 >> (24 + 2) << 5) | // G + (p << 8 >> (24 + 3) << 0); // B } - memset(&newPixels[y * 256 + VIDEO_HORIZONTAL_PIXELS], 0, 32); + memset(&newPixels[y * 256 + VIDEO_HORIZONTAL_PIXELS], 0, (256 - VIDEO_HORIZONTAL_PIXELS) * sizeof(u32)); } - GSPGPU_FlushDataCache(0, (void*) newPixels, VIDEO_HORIZONTAL_PIXELS * VIDEO_VERTICAL_PIXELS * 2); - GX_SetDisplayTransfer(0, (void*) newPixels, GX_BUFFER_DIM(VIDEO_HORIZONTAL_PIXELS, VIDEO_VERTICAL_PIXELS), tex->data, GX_BUFFER_DIM(256, VIDEO_VERTICAL_PIXELS), 0x000002202); + + GSPGPU_FlushDataCache(NULL, (void*)newPixels, 256 * VIDEO_VERTICAL_PIXELS * sizeof(u32)); + GX_SetDisplayTransfer(NULL, + (void*)newPixels, GX_BUFFER_DIM(256, VIDEO_VERTICAL_PIXELS), + tex->data, GX_BUFFER_DIM(256, 256), + GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGB565) | + GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB565) | + GX_TRANSFER_OUT_TILED(1) | GX_TRANSFER_FLIP_VERT(1)); + gspWaitForPPF(); linearFree(newPixels); + + ctrActivateTexture(tex); _drawTex(faded); } static uint16_t _pollGameInput(struct GBAGUIRunner* runner) { UNUSED(runner); + hidScanInput(); uint32_t activeKeys = hidKeysHeld() & 0xF00003FF; activeKeys |= activeKeys >> 24;

@@ -305,9 +352,25 @@ audioLeft = linearMemAlign(AUDIO_SAMPLE_BUFFER * sizeof(int16_t), 0x80);

audioRight = linearMemAlign(AUDIO_SAMPLE_BUFFER * sizeof(int16_t), 0x80); } - sf2d_init(); - sf2d_set_clear_color(0); - tex = sf2d_create_texture(256, 256, TEXFMT_RGB565, SF2D_PLACE_VRAM); + gfxInit(GSP_BGR8_OES, GSP_BGR8_OES, false); + + if (ctrInitGpu() < 0) { + goto cleanup; + } + + ctrTexture_Init(&gbaOutputTexture); + gbaOutputTexture.format = GPU_RGB565; + gbaOutputTexture.filter = GPU_LINEAR; + gbaOutputTexture.width = 256; + gbaOutputTexture.height = 256; + gbaOutputTexture.data = vramAlloc(256 * 256 * 2); + void* outputTextureEnd = (u8*)gbaOutputTexture.data + 256 * 256 * 2; + + // Zero texture data to make sure no garbage around the border interferes with filtering + GX_SetMemoryFill(NULL, + gbaOutputTexture.data, 0x0000, outputTextureEnd, GX_FILL_16BIT_DEPTH | GX_FILL_TRIGGER, + NULL, 0, NULL, 0); + gspWaitForPSC0(); sdmcArchive = (FS_archive) { ARCH_SDMC,

@@ -352,8 +415,10 @@

cleanup: linearFree(renderer.outputBuffer); - sf2d_free_texture(tex); - sf2d_fini(); + ctrDeinitGpu(); + vramFree(gbaOutputTexture.data); + + gfxExit(); if (hasSound) { linearFree(audioLeft);
A src/platform/3ds/uishader.vsh

@@ -0,0 +1,41 @@

+; Copyright (c) 2015 Yuri Kunde Schlesner +; +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this +; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +; uishader.vsh - Simply multiplies input position and texcoords with +; corresponding matrices before outputting + +; Uniforms +.fvec projectionMtx[4] +.fvec textureMtx[2] + +; Constants +.constf consts1(0.0, 1.0, 0.0039215686, 0.0) + +; Outputs : here only position and color +.out out_pos position +.out out_tc0 texcoord0 +.out out_col color + +; Inputs : here we have only vertices +.alias in_pos v0 +.alias in_tc0 v1 +.alias in_col v2 + +.proc main + dp4 out_pos.x, projectionMtx[0], in_pos + dp4 out_pos.y, projectionMtx[1], in_pos + dp4 out_pos.z, projectionMtx[2], in_pos + dp4 out_pos.w, projectionMtx[3], in_pos + + dp4 out_tc0.x, textureMtx[0], in_tc0 + dp4 out_tc0.y, textureMtx[1], in_tc0 + mov out_tc0.zw, consts1.xxxy + + ; Normalize color by multiplying by 1 / 255 + mul out_col, consts1.z, in_col + + end +.end