mirror of
https://github.com/ApacheThunder/NTR_Launcher.git
synced 2025-06-19 03:25:38 -04:00

* Overhaul using improved bootloader from NitrohaxTWL. * Alternate bootloader removed as it appears uneeded. * Improved debug output using text instead of colored boxes from NitroHax's original bootloader design. Debug mode will now show enhanced status readouts of cart load process. Text displayed in red if an error occurs (visible even when debug mode is off if error occured)
127 lines
3.5 KiB
C
127 lines
3.5 KiB
C
#include <stddef.h>
|
|
#include "tonccpy.h"
|
|
//# tonccpy.c
|
|
|
|
//! VRAM-safe cpy.
|
|
/*! This version mimics memcpy in functionality, with
|
|
the benefit of working for VRAM as well. It is also
|
|
slightly faster than the original memcpy, but faster
|
|
implementations can be made.
|
|
\param dst Destination pointer.
|
|
\param src Source pointer.
|
|
\param size Fill-length in bytes.
|
|
\note The pointers and size need not be word-aligned.
|
|
*/
|
|
void tonccpy(void *dst, const void *src, uint size) {
|
|
|
|
if(size==0 || dst==NULL || src==NULL) { return; }
|
|
|
|
uint count;
|
|
u16 *dst16; // hword destination
|
|
u8 *src8; // byte source
|
|
|
|
// Ideal case: copy by 4x words. Leaves tail for later.
|
|
if( ((u32)src|(u32)dst)%4==0 && size>=4) {
|
|
u32 *src32= (u32*)src, *dst32= (u32*)dst;
|
|
|
|
count= size/4;
|
|
uint tmp= count&3;
|
|
count /= 4;
|
|
|
|
// Duff's Device, good friend!
|
|
// Added fall through attribute to silance the compiler about this. ;)
|
|
switch(tmp) {
|
|
do { *dst32++ = *src32++; // fallthrough
|
|
case 3: *dst32++ = *src32++; // fallthrough
|
|
case 2: *dst32++ = *src32++; // fallthrough
|
|
case 1: *dst32++ = *src32++; // fallthrough
|
|
case 0: ;} while(count--); // fallthrough
|
|
}
|
|
|
|
// Check for tail
|
|
size &= 3;
|
|
if(size == 0) { return; }
|
|
src8= (u8*)src32;
|
|
dst16= (u16*)dst32;
|
|
} else {
|
|
// Unaligned.
|
|
uint dstOfs= (u32)dst&1;
|
|
src8= (u8*)src;
|
|
dst16= (u16*)(dst-dstOfs);
|
|
|
|
// Head: 1 byte.
|
|
if(dstOfs != 0) {
|
|
*dst16= (*dst16 & 0xFF) | *src8++<<8;
|
|
dst16++;
|
|
if(--size==0) { return; }
|
|
}
|
|
}
|
|
|
|
// Unaligned main: copy by 2x byte.
|
|
count= size/2;
|
|
while(count--) {
|
|
*dst16++ = src8[0] | src8[1]<<8;
|
|
src8 += 2;
|
|
}
|
|
|
|
// Tail: 1 byte.
|
|
if(size&1) { *dst16= (*dst16 &~ 0xFF) | *src8; }
|
|
}
|
|
//# toncset.c
|
|
|
|
//! VRAM-safe memset, internal routine.
|
|
/*! This version mimics memset in functionality, with
|
|
the benefit of working for VRAM as well. It is also
|
|
slightly faster than the original memset.
|
|
\param dst Destination pointer.
|
|
\param fill Word to fill with.
|
|
\param size Fill-length in bytes.
|
|
\note The \a dst pointer and \a size need not be
|
|
word-aligned. In the case of unaligned fills, \a fill
|
|
will be masked off to match the situation.
|
|
*/
|
|
void __toncset(void *dst, u32 fill, uint size) {
|
|
if(size==0 || dst==NULL) { return; }
|
|
|
|
uint left= (u32)dst&3;
|
|
u32 *dst32= (u32*)(dst-left);
|
|
u32 count, mask;
|
|
|
|
// Unaligned head.
|
|
if(left != 0) {
|
|
// Adjust for very small stint.
|
|
if(left+size<4) {
|
|
mask= BIT_MASK(size*8)<<(left*8);
|
|
*dst32= (*dst32 &~ mask) | (fill & mask);
|
|
return;
|
|
}
|
|
|
|
mask= BIT_MASK(left*8);
|
|
*dst32= (*dst32 & mask) | (fill&~mask);
|
|
dst32++;
|
|
size -= 4-left;
|
|
}
|
|
|
|
// Main stint.
|
|
count= size/4;
|
|
uint tmp= count&3;
|
|
count /= 4;
|
|
|
|
// Added fall through attribute to silance the compiler about this. ;)
|
|
switch(tmp) {
|
|
do { *dst32++ = fill; // fallthrough
|
|
case 3: *dst32++ = fill; // fallthrough
|
|
case 2: *dst32++ = fill; // fallthrough
|
|
case 1: *dst32++ = fill; // fallthrough
|
|
case 0: ;} while(count--); // fallthrough
|
|
}
|
|
|
|
// Tail
|
|
size &= 3;
|
|
if(size) {
|
|
mask= BIT_MASK(size*8);
|
|
*dst32= (*dst32 &~ mask) | (fill & mask);
|
|
}
|
|
}
|
|
|