XuluMenu/udiskloader/source/tonccpy.c
ApacheThunder 2550af0c99 Add stage2 loader
* Can now boot stage2 (in safe block mode) if holding L-Shoulder +
R-Shoulder + A + B + DPAD Up on boot. This allows using built in stage2
usb update mode for "bootleg" style N-Cards that have Xulumenu
installed.

* This button combo will not do anything for regular N-Card users as
they already have a proper stage2 section on nand and it will always end
up booting to USB update mode with this button combo before xulumenu can
boot.
2024-10-26 00:18:57 -05:00

127 lines
3.5 KiB
C

#include <stddef.h>
#include "tonccpy.h"
//# tonccpy.c
//! VRAM-safe cpy.
/*! This version mimics memcpy in functionality, with
the benefit of working for VRAM as well. It is also
slightly faster than the original memcpy, but faster
implementations can be made.
\param dst Destination pointer.
\param src Source pointer.
\param size Fill-length in bytes.
\note The pointers and size need not be word-aligned.
*/
void tonccpy(void *dst, const void *src, uint size) {
if(size==0 || dst==NULL || src==NULL) { return; }
uint count;
u16 *dst16; // hword destination
u8 *src8; // byte source
// Ideal case: copy by 4x words. Leaves tail for later.
if( ((u32)src|(u32)dst)%4==0 && size>=4) {
u32 *src32= (u32*)src, *dst32= (u32*)dst;
count= size/4;
uint tmp= count&3;
count /= 4;
// Duff's Device, good friend!
// Added fall through attribute to silance the compiler about this. ;)
switch(tmp) {
do { *dst32++ = *src32++; // fallthrough
case 3: *dst32++ = *src32++; // fallthrough
case 2: *dst32++ = *src32++; // fallthrough
case 1: *dst32++ = *src32++; // fallthrough
case 0: ;} while(count--); // fallthrough
}
// Check for tail
size &= 3;
if(size == 0) { return; }
src8= (u8*)src32;
dst16= (u16*)dst32;
} else {
// Unaligned.
uint dstOfs= (u32)dst&1;
src8= (u8*)src;
dst16= (u16*)(dst-dstOfs);
// Head: 1 byte.
if(dstOfs != 0) {
*dst16= (*dst16 & 0xFF) | *src8++<<8;
dst16++;
if(--size==0) { return; }
}
}
// Unaligned main: copy by 2x byte.
count= size/2;
while(count--) {
*dst16++ = src8[0] | src8[1]<<8;
src8 += 2;
}
// Tail: 1 byte.
if(size&1) { *dst16= (*dst16 &~ 0xFF) | *src8; }
}
//# toncset.c
//! VRAM-safe memset, internal routine.
/*! This version mimics memset in functionality, with
the benefit of working for VRAM as well. It is also
slightly faster than the original memset.
\param dst Destination pointer.
\param fill Word to fill with.
\param size Fill-length in bytes.
\note The \a dst pointer and \a size need not be
word-aligned. In the case of unaligned fills, \a fill
will be masked off to match the situation.
*/
void __toncset(void *dst, u32 fill, uint size) {
if(size==0 || dst==NULL) { return; }
uint left= (u32)dst&3;
u32 *dst32= (u32*)(dst-left);
u32 count, mask;
// Unaligned head.
if(left != 0) {
// Adjust for very small stint.
if(left+size<4) {
mask= BIT_MASK(size*8)<<(left*8);
*dst32= (*dst32 &~ mask) | (fill & mask);
return;
}
mask= BIT_MASK(left*8);
*dst32= (*dst32 & mask) | (fill&~mask);
dst32++;
size -= 4-left;
}
// Main stint.
count= size/4;
uint tmp= count&3;
count /= 4;
// Added fall through attribute to silance the compiler about this. ;)
switch(tmp) {
do { *dst32++ = fill; // fallthrough
case 3: *dst32++ = fill; // fallthrough
case 2: *dst32++ = fill; // fallthrough
case 1: *dst32++ = fill; // fallthrough
case 0: ;} while(count--); // fallthrough
}
// Tail
size &= 3;
if(size) {
mask= BIT_MASK(size*8);
*dst32= (*dst32 &~ mask) | (fill & mask);
}
}