Use NDMA to clear RAM and clear DSi registers

This commit is contained in:
RocketRobz 2024-02-27 13:55:38 -07:00
parent 5a69abae92
commit d030ddc16a
5 changed files with 426 additions and 75 deletions

View File

@ -1,70 +0,0 @@
/*-----------------------------------------------------------------
Copyright (C) 2005 Michael "Chishm" Chisholm
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
If you use this code, please give due credit and email me about your
project at chishm@hotmail.com
------------------------------------------------------------------*/
.arm
.global arm7clearRAM
.type arm7clearRAM STT_FUNC
arm7clearRAM:
push {r0-r9}
// clear exclusive IWRAM
// 0380:0000 to 0380:FFFF, total 64KiB
mov r0, #0
mov r1, #0
mov r2, #0
mov r3, #0
mov r4, #0
mov r5, #0
mov r6, #0
mov r7, #0
mov r8, #0x03800000
sub r8, #0x00008000
mov r9, #0x03800000
orr r9, r9, #0x10000
clear_IWRAM_loop:
stmia r8!, {r0, r1, r2, r3, r4, r5, r6, r7}
cmp r8, r9
blt clear_IWRAM_loop
// clear most of EWRAM - except after RAM end - 0xc000, which has the bootstub
mov r8, #0x02000000
ldr r9,=0x4004008
ldr r9,[r9]
ands r9,r9,#0x8000
bne dsi_mode
mov r9, #0x02400000
b ds_mode
dsi_mode:
mov r9, #0x03000000
ds_mode:
sub r9, #0x0000c000
clear_EWRAM_loop:
stmia r8!, {r0, r1, r2, r3, r4, r5, r6, r7}
cmp r8, r9
blt clear_EWRAM_loop
pop {r0-r9}
bx lr

View File

@ -47,13 +47,13 @@ Helpful information:
#define ARM7
#include <nds/arm7/audio.h>
#include <nds/arm7/sdmmc.h>
#include "dmaTwl.h"
#include "tonccpy.h"
#include "fat.h"
#include "dldi_patcher.h"
#include "card.h"
#include "boot.h"
void arm7clearRAM();
//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// Important things
#define TEMP_MEM 0x02FFD000
@ -159,6 +159,15 @@ void passArgs_ARM7 (void) {
void memset_addrs_arm7(u32 start, u32 end)
{
if (!dsiMode && !(REG_SCFG_EXT & BIT(16))) {
toncset((u32*)start, 0, ((int)end - (int)start));
return;
}
dma_twlFill32(0, 0, (u32*)start, ((int)end - (int)start));
}
/*-------------------------------------------------------------------------
resetMemory_ARM7
Clears all of the NDS's RAM that is visible to the ARM7
@ -182,6 +191,13 @@ void resetMemory_ARM7 (void)
}
REG_SOUNDCNT = 0;
REG_SNDCAP0CNT = 0;
REG_SNDCAP1CNT = 0;
REG_SNDCAP0DAD = 0;
REG_SNDCAP0LEN = 0;
REG_SNDCAP1DAD = 0;
REG_SNDCAP1LEN = 0;
//clear out ARM7 DMA channels and timers
for (i=0; i<4; i++) {
@ -192,12 +208,15 @@ void resetMemory_ARM7 (void)
TIMER_DATA(i) = 0;
}
arm7clearRAM();
memset_addrs_arm7(0x03800000 - 0x8000, 0x03800000 + (dsiMode ? 0xC000 : 0x10000)); // clear exclusive IWRAM
memset_addrs_arm7(0x02004000, (dsiMode ? 0x03000000 : 0x02400000) - 0xC000); // clear part of EWRAM - except before bootstub
REG_IE = 0;
REG_IF = ~0;
(*(vu32*)(0x04000000-4)) = 0; //IRQ_HANDLER ARM7 version
(*(vu32*)(0x04000000-8)) = ~0; //VBLANK_INTR_WAIT_FLAGS, ARM7 version
REG_AUXIE = 0;
REG_AUXIF = ~0;
*(vu32*)0x0380FFFC = 0; // IRQ_HANDLER ARM7 version
*(vu32*)0x0380FFF8 = 0; // VBLANK_INTR_WAIT_FLAGS, ARM7 version
REG_POWERCNT = 1; //turn off power to stuff
// Get settings location

223
bootloader/source/dmaTwl.h Normal file
View File

@ -0,0 +1,223 @@
#pragma once
typedef struct
{
const void* src; // Source address; not used in fill mode
void* dst; // Destination address
u32 totalWordCount; // For auto-start mode without infinite repeat
u32 wordCount; // Number of words to transfer per start trigger
u32 blockInterval; // Sets prescaler and cycles of delay between physical blocks
u32 fillData; // For fill mode
u32 control;
} dma_twl_config_t;
#define REG_NDMAGCNT (*(vu32*)0x04004100)
#define NDMAGCNT_YIELD_CYCLES_0 (0 << 16)
#define NDMAGCNT_YIELD_CYCLES_1 (1 << 16)
#define NDMAGCNT_YIELD_CYCLES_2 (2 << 16)
#define NDMAGCNT_YIELD_CYCLES_4 (3 << 16)
#define NDMAGCNT_YIELD_CYCLES_8 (4 << 16)
#define NDMAGCNT_YIELD_CYCLES_16 (5 << 16)
#define NDMAGCNT_YIELD_CYCLES_32 (6 << 16)
#define NDMAGCNT_YIELD_CYCLES_64 (7 << 16)
#define NDMAGCNT_YIELD_CYCLES_128 (8 << 16)
#define NDMAGCNT_YIELD_CYCLES_256 (9 << 16)
#define NDMAGCNT_YIELD_CYCLES_512 (10 << 16)
#define NDMAGCNT_YIELD_CYCLES_1024 (11 << 16)
#define NDMAGCNT_YIELD_CYCLES_2048 (12 << 16)
#define NDMAGCNT_YIELD_CYCLES_4096 (13 << 16)
#define NDMAGCNT_YIELD_CYCLES_8192 (14 << 16)
#define NDMAGCNT_YIELD_CYCLES_16384 (15 << 16)
#define NDMAGCNT_ARBITRATION_FIXED (0 << 31)
#define NDMAGCNT_ARBITRATION_ROUND_ROBIN (1 << 31)
#define REG_NDMA0SAD (*(vu32*)0x04004104)
#define REG_NDMA0DAD (*(vu32*)0x04004108)
#define REG_NDMA0TCNT (*(vu32*)0x0400410C)
#define REG_NDMA0WCNT (*(vu32*)0x04004110)
#define REG_NDMA0BCNT (*(vu32*)0x04004114)
#define REG_NDMA0FDATA (*(vu32*)0x04004118)
#define REG_NDMA0CNT (*(vu32*)0x0400411C)
#define REG_NDMA1SAD (*(vu32*)0x04004120)
#define REG_NDMA1DAD (*(vu32*)0x04004124)
#define REG_NDMA1TCNT (*(vu32*)0x04004128)
#define REG_NDMA1WCNT (*(vu32*)0x0400412C)
#define REG_NDMA1BCNT (*(vu32*)0x04004130)
#define REG_NDMA1FDATA (*(vu32*)0x04004134)
#define REG_NDMA1CNT (*(vu32*)0x04004138)
#define REG_NDMA2SAD (*(vu32*)0x0400413C)
#define REG_NDMA2DAD (*(vu32*)0x04004140)
#define REG_NDMA2TCNT (*(vu32*)0x04004144)
#define REG_NDMA2WCNT (*(vu32*)0x04004148)
#define REG_NDMA2BCNT (*(vu32*)0x0400414C)
#define REG_NDMA2FDATA (*(vu32*)0x04004150)
#define REG_NDMA2CNT (*(vu32*)0x04004154)
#define REG_NDMA3SAD (*(vu32*)0x04004158)
#define REG_NDMA3DAD (*(vu32*)0x0400415C)
#define REG_NDMA3TCNT (*(vu32*)0x04004160)
#define REG_NDMA3WCNT (*(vu32*)0x04004164)
#define REG_NDMA3BCNT (*(vu32*)0x04004168)
#define REG_NDMA3FDATA (*(vu32*)0x0400416C)
#define REG_NDMA3CNT (*(vu32*)0x04004170)
#define NDMABCNT_INTERVAL(x) (x)
#define NDMABCNT_PRESCALER_1 (0 << 16)
#define NDMABCNT_PRESCALER_4 (1 << 16)
#define NDMABCNT_PRESCALER_16 (2 << 16)
#define NDMABCNT_PRESCALER_64 (3 << 16)
#define NDMACNT_DST_MODE_INCREMENT (0 << 10)
#define NDMACNT_DST_MODE_DECREMENT (1 << 10)
#define NDMACNT_DST_MODE_FIXED (2 << 10)
#define NDMACNT_DST_RELOAD (1 << 12)
#define NDMACNT_SRC_MODE_INCREMENT (0 << 13)
#define NDMACNT_SRC_MODE_DECREMENT (1 << 13)
#define NDMACNT_SRC_MODE_FIXED (2 << 13)
#define NDMACNT_SRC_MODE_FILLDATA (3 << 13)
#define NDMACNT_SRC_RELOAD (1 << 15)
#define NDMACNT_PHYSICAL_COUNT_1 (0 << 16)
#define NDMACNT_PHYSICAL_COUNT_2 (1 << 16)
#define NDMACNT_PHYSICAL_COUNT_4 (2 << 16)
#define NDMACNT_PHYSICAL_COUNT_8 (3 << 16)
#define NDMACNT_PHYSICAL_COUNT_16 (4 << 16)
#define NDMACNT_PHYSICAL_COUNT_32 (5 << 16)
#define NDMACNT_PHYSICAL_COUNT_64 (6 << 16)
#define NDMACNT_PHYSICAL_COUNT_128 (7 << 16)
#define NDMACNT_PHYSICAL_COUNT_256 (8 << 16)
#define NDMACNT_PHYSICAL_COUNT_512 (9 << 16)
#define NDMACNT_PHYSICAL_COUNT_1024 (10 << 16)
#define NDMACNT_PHYSICAL_COUNT_2048 (11 << 16)
#define NDMACNT_PHYSICAL_COUNT_4096 (12 << 16)
#define NDMACNT_PHYSICAL_COUNT_8192 (13 << 16)
#define NDMACNT_PHYSICAL_COUNT_16384 (14 << 16)
#define NDMACNT_PHYSICAL_COUNT_32768 (15 << 16)
#define NDMACNT_MODE_TIMER_0 (0 << 24)
#define NDMACNT_MODE_TIMER_1 (1 << 24)
#define NDMACNT_MODE_TIMER_2 (2 << 24)
#define NDMACNT_MODE_TIMER_3 (3 << 24)
#define NDMACNT_MODE_DS_SLOTA_ROM_XFER (4 << 24)
#define NDMACNT_MODE_DS_SLOTB_ROM_XFER (5 << 24)
#define NDMACNT_MODE_VBLANK (6 << 24)
#ifdef LIBTWL_ARM9
#define NDMACNT_MODE_HBLANK (7 << 24)
#define NDMACNT_MODE_DISPLAY (8 << 24)
#define NDMACNT_MODE_MMEM_DISP_FIFO (9 << 24)
#define NDMACNT_MODE_GX_FIFO (10 << 24)
#define NDMACNT_MODE_CAMERA (11 << 24)
#endif
#ifdef LIBTWL_ARM7
#define NDMACNT_MODE_WIFI (7 << 24)
#define NDMACNT_MODE_SDMMC (8 << 24)
#define NDMACNT_MODE_SDIO (9 << 24)
#define NDMACNT_MODE_AES_IN (10 << 24)
#define NDMACNT_MODE_AES_OUT (11 << 24)
#define NDMACNT_MODE_MIC (12 << 24)
#endif
#define NDMACNT_MODE_IMMEDIATE (1 << 28)
#define NDMACNT_REPEAT_INFINITELY (1 << 29)
#define NDMACNT_IRQ (1 << 30)
#define NDMACNT_ENABLE (1 << 31)
#ifdef __cplusplus
extern "C" {
#endif
/// @brief Configures twl ndma to use fixed arbitration.
/// In this mode ndma0 has the highest and ndma3 the lowest priority,
/// similar to the nitro dma channels. Note that ndma0 has a lower
/// priority than nitro dma channel 3. When ndma channels are active
/// the dsp and cpu can not access the bus.
static inline void dma_twlSetFixedArbitration(void)
{
REG_NDMAGCNT = NDMAGCNT_ARBITRATION_FIXED;
}
/// @brief Configures twl ndma to use round robin arbitration.
/// In this mode nitro dma channels still have a higher priority,
/// but bus access is distributed between all ndma channels and
/// the dsp and cpu.
/// This is done in the order ndma0, ndma1, ndma2, ndma3, dsp/cpu.
/// Candidates that do not have any outstanding request are skipped,
/// and the dsp takes priority over the cpu (as usual). The amount
/// of cycles reserved for the dsp/cpu is configurable.
/// @param yieldCycles The number of cycles that will be yielded to the
/// dsp/cpu in the round robin schedule. When there is no request
/// outstanding the cycles will not be wasted. Should be one of
/// NDMAGCNT_YIELD_CYCLES_*.
static inline void dma_twlSetRoundRobinArbitration(u32 yieldCycles)
{
REG_NDMAGCNT = NDMAGCNT_ARBITRATION_ROUND_ROBIN | yieldCycles;
}
static inline void dma_twlSetParams(int dma, const dma_twl_config_t* config)
{
vu32* channel = &(&REG_NDMA0SAD)[7 * dma];
channel[0] = (u32)config->src;
channel[1] = (u32)config->dst;
channel[2] = config->totalWordCount;
channel[3] = config->wordCount;
channel[4] = config->blockInterval;
channel[5] = config->fillData;
channel[6] = config->control;
}
static inline void dma_twlWait(int dma)
{
vu32* cnt = &(&REG_NDMA0CNT)[7 * dma];
while (*cnt & NDMACNT_ENABLE);
}
static inline void dma_twlCopy32Async(int dma, const void* src, void* dst, u32 length)
{
vu32* channel = &(&REG_NDMA0SAD)[7 * dma];
channel[0] = (u32)src; //SAD
channel[1] = (u32)dst; //DAD
channel[3] = length >> 2; //WCNT
channel[4] = NDMABCNT_PRESCALER_1 | NDMABCNT_INTERVAL(0); //BCNT
channel[6] = NDMACNT_DST_MODE_INCREMENT | NDMACNT_SRC_MODE_INCREMENT |
NDMACNT_PHYSICAL_COUNT_1 | NDMACNT_MODE_IMMEDIATE | NDMACNT_ENABLE;
}
static inline void dma_twlCopy32(int dma, const void* src, void* dst, u32 length)
{
dma_twlCopy32Async(dma, src, dst, length);
dma_twlWait(dma);
}
static inline void dma_twlFill32Async(int dma, u32 value, void* dst, u32 length)
{
vu32* channel = &(&REG_NDMA0SAD)[7 * dma];
channel[1] = (u32)dst; //DAD
channel[3] = length >> 2; //WCNT
channel[4] = NDMABCNT_PRESCALER_1 | NDMABCNT_INTERVAL(0); //BCNT
channel[5] = value; //FDATA
channel[6] = NDMACNT_DST_MODE_INCREMENT | NDMACNT_SRC_MODE_FILLDATA |
NDMACNT_PHYSICAL_COUNT_1 | NDMACNT_MODE_IMMEDIATE | NDMACNT_ENABLE;
}
static inline void dma_twlFill32(int dma, u32 value, void* dst, u32 length)
{
dma_twlFill32Async(dma, value, dst, length);
dma_twlWait(dma);
}
#ifdef __cplusplus
}
#endif

136
bootloader/source/tonccpy.c Normal file
View File

@ -0,0 +1,136 @@
#include "tonccpy.h"
//# tonccpy.c
//! VRAM-safe cpy.
/*! This version mimics memcpy in functionality, with
the benefit of working for VRAM as well. It is also
slightly faster than the original memcpy, but faster
implementations can be made.
\param dst Destination pointer.
\param src Source pointer.
\param size Fill-length in bytes.
\note The pointers and size need not be word-aligned.
*/
void tonccpy(void *dst, const void *src, uint size)
{
if(size==0 || dst==0 || src==0)
return;
uint count;
u16 *dst16; // hword destination
u8 *src8; // byte source
// Ideal case: copy by 4x words. Leaves tail for later.
if( ((u32)src|(u32)dst)%4==0 && size>=4)
{
u32 *src32= (u32*)src, *dst32= (u32*)dst;
count= size/4;
uint tmp= count&3;
count /= 4;
// Duff's Device, good friend!
switch(tmp) {
do { *dst32++ = *src32++;
case 3: *dst32++ = *src32++;
case 2: *dst32++ = *src32++;
case 1: *dst32++ = *src32++;
case 0: ; } while(count--);
}
// Check for tail
size &= 3;
if(size == 0)
return;
src8= (u8*)src32;
dst16= (u16*)dst32;
}
else // Unaligned.
{
uint dstOfs= (u32)dst&1;
src8= (u8*)src;
dst16= (u16*)(dst-dstOfs);
// Head: 1 byte.
if(dstOfs != 0)
{
*dst16= (*dst16 & 0xFF) | *src8++<<8;
dst16++;
if(--size==0)
return;
}
}
// Unaligned main: copy by 2x byte.
count= size/2;
while(count--)
{
*dst16++ = src8[0] | src8[1]<<8;
src8 += 2;
}
// Tail: 1 byte.
if(size&1)
*dst16= (*dst16 &~ 0xFF) | *src8;
}
//# toncset.c
//! VRAM-safe memset, internal routine.
/*! This version mimics memset in functionality, with
the benefit of working for VRAM as well. It is also
slightly faster than the original memset.
\param dst Destination pointer.
\param fill Word to fill with.
\param size Fill-length in bytes.
\note The \a dst pointer and \a size need not be
word-aligned. In the case of unaligned fills, \a fill
will be masked off to match the situation.
*/
void __toncset(void *dst, u32 fill, uint size)
{
if(size==0 || dst==0)
return;
uint left= (u32)dst&3;
u32 *dst32= (u32*)(dst-left);
u32 count, mask;
// Unaligned head.
if(left != 0)
{
// Adjust for very small stint.
if(left+size<4)
{
mask= BIT_MASK(size*8)<<(left*8);
*dst32= (*dst32 &~ mask) | (fill & mask);
return;
}
mask= BIT_MASK(left*8);
*dst32= (*dst32 & mask) | (fill&~mask);
dst32++;
size -= 4-left;
}
// Main stint.
count= size/4;
uint tmp= count&3;
count /= 4;
switch(tmp) {
do { *dst32++ = fill;
case 3: *dst32++ = fill;
case 2: *dst32++ = fill;
case 1: *dst32++ = fill;
case 0: ; } while(count--);
}
// Tail
size &= 3;
if(size)
{
mask= BIT_MASK(size*8);
*dst32= (*dst32 &~ mask) | (fill & mask);
}
}

View File

@ -0,0 +1,43 @@
//# Stuff you may not have yet.
#ifndef TONCCPY_H
#define TONCCPY_H
#ifdef __cplusplus
extern "C" {
#endif
#include <nds/ndstypes.h>
typedef unsigned int uint;
#define BIT_MASK(len) ( (1<<(len))-1 )
static inline u32 quad8(u16 x) { x |= x<<8; return x | x<<16; }
//# Declarations and inlines.
void tonccpy(void *dst, const void *src, uint size);
void __toncset(void *dst, u32 fill, uint size);
static inline void toncset(void *dst, u8 src, uint size);
static inline void toncset16(void *dst, u16 src, uint size);
static inline void toncset32(void *dst, u32 src, uint size);
//! VRAM-safe memset, byte version. Size in bytes.
static inline void toncset(void *dst, u8 src, uint size)
{ __toncset(dst, quad8(src), size); }
//! VRAM-safe memset, halfword version. Size in hwords.
static inline void toncset16(void *dst, u16 src, uint size)
{ __toncset(dst, src|src<<16, size*2); }
//! VRAM-safe memset, word version. Size in words.
static inline void toncset32(void *dst, u32 src, uint size)
{ __toncset(dst, src, size*4); }
#ifdef __cplusplus
}
#endif
#endif