Hack back flat ROM pointer support

It was present in prerelease alpha version, but when porting to the RP2040, it was converted to double indirection, as it worked with the shitty LRU algo that I used at the time.
However, double indirection is not only slower just by requiring an extra load instruction, it's also slower by trashing the cache. On Windows, I get a 10% performance decrease with double indirection, which is pretty wild.
This commit is contained in:
SonoSooS 2024-06-03 06:05:03 +02:00
parent 93f95219fb
commit 136f4c1cfe
6 changed files with 45 additions and 3 deletions

View File

@ -29,6 +29,8 @@
//#define CONFIG_APU_MONO
#define CONFIG_APU_N_PER_TICK 8
#define CONFIG_APU_N_BUFSIZE 32768
//#define CONFIG_ENABLE_LRU
//#define CONFIG_USE_FLAT_ROM
//#define CONFIG_PPU_ACTION_ON_START
@ -201,6 +203,13 @@
#define CONFIG_ENABLE_LRU 1
#endif
#ifndef CONFIG_USE_FLAT_ROM
#define CONFIG_USE_FLAT_ROM 0
#else
#undef CONFIG_USE_FLAT_ROM
#define CONFIG_USE_FLAT_ROM 1
#endif
#ifndef CONFIG_PPU_ACTION_ON_START
#define CONFIG_PPU_ACTION_ON_START 0
#else

View File

@ -75,9 +75,14 @@ static inline const r8* pgf_resolve_ROM_internal(void* userdata, word addr, word
if(ud->mb->mi->ROM != NULL)
#endif
{
#if CONFIG_USE_FLAT_ROM
res = &ud->mb->mi->ROM[bank * 0x4000];
return &res[addr & 0x3FFF];
#else
res = ud->mb->mi->ROM[bank];
if(res != NULL)
return &res[addr & 0x3FFF];
#endif
}
#if CONFIG_ENABLE_LRU

4
mi.h
View File

@ -42,7 +42,11 @@ struct mb_mi_cache
struct mi_dispatch
{
#if CONFIG_USE_FLAT_ROM
const r8* __restrict ROM;
#else
const r8* __restrict const * __restrict ROM;
#endif
r8* __restrict WRAM;
r8* __restrict VRAM;
r8* __restrict SRAM;

View File

@ -58,18 +58,29 @@ PGB_FUNC ATTR_FORCE_NOINLINE __attribute__((optimize("Os"))) static const r8* __
{
if(r_addr < MICACHE_R_VALUE(0x4000))
{
#if CONFIG_USE_FLAT_ROM
ret = &mi->ROM[0];
return &ret[r_addr << MICACHE_R_BITS];
#else
ret = mi->ROM[0];
if(ret != NULL)
return &ret[r_addr << MICACHE_R_BITS];
#endif
}
else
{
#if CONFIG_USE_FLAT_ROM
r_addr &= MICACHE_R_VALUE(0x3FFF);
ret = &mi->ROM[mi->BANK_ROM << 14];
return &ret[r_addr << MICACHE_R_BITS];
#else
ret = mi->ROM[mi->BANK_ROM];
if(ret != NULL)
{
r_addr &= MICACHE_R_VALUE(0x3FFF);
return &ret[r_addr << MICACHE_R_BITS];
}
#endif
}
}

View File

@ -20,6 +20,7 @@
#define CONFIG_APU_N_PER_TICK 8
#define CONFIG_APU_N_BUFSIZE 32768
//#define CONFIG_ENABLE_LRU
//#define CONFIG_USE_FLAT_ROM
//#define CONFIG_PPU_ACTION_ON_START
// 0 - none

View File

@ -633,7 +633,10 @@ static struct mi_dispatch dis;
static struct mb_state mb;
static struct ppu_t pp;
static struct apu_t apu;
#if !CONFIG_USE_FLAT_ROM
static const r8* __restrict rommap[512];
#endif
static pixel_t* __restrict fb_lines[145];
@ -692,7 +695,10 @@ int main(int argc, char** argv)
audbuf = malloc(sizeof(*audbuf) * apu.outbuf_size);
apu.outbuf = audbuf;
#if !CONFIG_USE_FLAT_ROM
memset((void*)&rommap[0], 0, sizeof(rommap));
#endif
pixel_t* __restrict framebuffer = malloc(sizeof(pixel_t) * 256 * 145);
memset(framebuffer, 0, sizeof(pixel_t) * 256 * 145);
for(i = 0; i != 145; ++i)
@ -776,11 +782,17 @@ int main(int argc, char** argv)
fs = ff;
}
#if !CONFIG_USE_FLAT_ROM
for(i = 0; i != (fs >> 14); i++)
rommap[i] = &img[i << 14];
#endif
#if !CONFIG_ENABLE_LRU
dis.ROM = rommap;
#if CONFIG_USE_FLAT_ROM
dis.ROM = img;
#else
dis.ROM = rommap;
#endif
#else
dis.ROM = 0;
dis.dispatch_ROM_Bank = cb_ROM_LRU;
@ -800,7 +812,7 @@ int main(int argc, char** argv)
mb.mi = &dis;
micache_invalidate(&mb.micache);
mi_init_params_from_header(mb.mi, rommap[0]);
mi_init_params_from_header(mb.mi, img);
mb.IE = 0;
mb.IF = 0;
@ -929,7 +941,7 @@ int main(int argc, char** argv)
fread(&dis.WRAM[0x6000], 256, 1, f);
fclose(f);
memcpy(&dis.WRAM[0x6100], &dis.ROM[0][0x100], 0x50);
memcpy(&dis.WRAM[0x6100], &img[0x100], 0x50);
mb.micache.mc_read[0] = &dis.WRAM[0x6000];
mb.micache.mc_execute[0] = &dis.WRAM[0x6000];