
/* Based on functions in linux/string.h */

#include <linux/types.h>	/* for size_t */


static inline void * __memcpy(void * to, const void * from, size_t n)
{
__asm__("cld\n\t"
	"movl %%edi,%%ecx\n\t"
	"andl $1,%%ecx\n\t"
	"subl %%ecx,%%edx\n\t"
	"rep ; movsb\n\t"		/* 16-bit align destination */
	"movl %%edx,%%ecx\n\t"
	"shrl $2,%%ecx\n\t"
	"rep ; movsl\n\t"
	"testb $1,%%dl\n\t"
	"je 1f\n\t"
	"movsb\n"
	"1:\ttestb $2,%%dl\n\t"
	"je 2f\n\t"
	"movsw\n"
	"2:\n"
	::"d" (n),"D" ((long) to),"S" ((long) from)
	: "cx","dx","di","si");
return (to);
}

static inline void * __memcpyb(void * to, const void * from, size_t n)
{
__asm__("cld\n\t"
	"rep ; movsb\n\t"
	::"c" (n),"D" ((long) to),"S" ((long) from)
	: "cx","di","si");
return (to);
}

static inline void * __memsetb(void * s,char c,size_t count)
{
__asm__("cld\n\t"
	"rep\n\t"
	"stosb"
	::"a" (c),"D" (s),"c" (count)
	:"cx","di");
return s;
}

static inline void * __memset(void * s,char c,size_t count)
{
__asm__(
	"cld\n\t"
	"cmpl $12,%%edx\n\t"
	"jl 1f\n\t"			/* if (count >= 12) */

	"movzbl %%al,%%ax\n\t"
	"movl %%eax,%%ecx\n\t"
	"shll $8,%%ecx\n\t"		/* c |= c << 8 */
	"orl %%ecx,%%eax\n\t"
	"movl %%eax,%%ecx\n\t"
	"shll $16,%%ecx\n\t"		/* c |= c << 16 */
	"orl %%ecx,%%eax\n\t" 
	
	"movl %%edx,%%ecx\n\t"
	"negl %%ecx\n\t"
	"andl $3,%%ecx\n\t"		/* (-s % 4) */
	"subl %%ecx,%%edx\n\t"		/* count -= (-s % 4) */
	"rep ; stosb\n\t"		/* align to longword boundary */
	
	"movl %%edx,%%ecx\n\t"
	"shrl $2,%%ecx\n\t"
	"rep ; stosl\n\t"		/* fill longwords */
	
	"andl $3,%%edx\n"		/* fill last few bytes */
	"1:\tmovl %%edx,%%ecx\n\t"	/* <= 12 entry point */
	"rep ; stosb\n\t"
	::"a" (c),"D" (s),"d" (count)
	:"cx","dx","di");
return s;
}

static inline void * __memset2(void * s,short c,size_t count)
/* count is in 16-bit pixels */
/* s is assumed to be 16-bit aligned */
{
__asm__(
	"cld\n\t"
	"cmpl $12,%%edx\n\t"
	"jl 1f\n\t"			/* if (count >= 12) */

	"movzwl %%ax,%%eax\n\t"
	"movl %%eax,%%ecx\n\t"
	"shll $16,%%ecx\n\t"		/* c |= c << 16 */
	"orl %%ecx,%%eax\n\t" 
	
	"movl %%edi,%%ecx\n\t"
	"andl $2,%%ecx\n\t"		/* s & 2 */
	"jz 2f\n\t"
	"decl %%edx\n\t"		/* count -= 1 */
	"stosw\n\t"			/* align to longword boundary */
	
	"2:\n\t"
	"movl %%edx,%%ecx\n\t"
	"shrl $1,%%ecx\n\t"
	"rep ; stosl\n\t"		/* fill longwords */
	
	"andl $1,%%edx\n"		/* one 16-bit word left? */
	"jz 3f\n\t"			/* no, finished */
	"1:\tmovl %%edx,%%ecx\n\t"	/* <= 12 entry point */
	"rep ; stosw\n\t"
	"3:\n\t"
	::"a" (c),"D" (s),"d" (count)
	:"cx","dx","di");
return s;
}

static inline void * __memset3(void * s,int c,size_t count)
/* count is in 24-bit pixels (3 bytes per pixel) */
{
__asm__(
	"cmpl $8,%%edx\n\t"
	"jmp 2f\n\t"	/* debug */
	"jl 2f\n\t"

	"movl %%eax,%%ebx\n\t"		/* eax = RGB0 */
	"shll $24,%%ebx\n\t"
	"orl %%ebx,%%eax\n\t"		/* eax = RGBR */	
	
	"movl %%eax,%%ebx\n\t"
	"rorl $8,%%ebx\n\t"		/* ebx = GBRG */
	
	"movl %%eax,%%ecx\n\t"
	"rorl $16,%%ecx\n\t"		/* ecx = BRGB */

	"cmpl $16,%%edx\n\t"
	"jl 1\n\t"
	"5:\n\t"			/* loop enrolling */
	"movl %%eax,(%%edi)\n\t"	/* write RGBR */ 
	"movl %%ebx,4(%%edi)\n\t"	/* write GBRG */
	"movl %%ecx,8(%%edi)\n\t"	/* write BRGB */
	"movl %%eax,12(%%edi)\n\t"
	"movl %%ebx,16(%%edi)\n\t"
	"movl %%ecx,20(%%edi)\n\t"
	"movl %%eax,24(%%edi)\n\t"
	"movl %%ebx,28(%%edi)\n\t"
	"movl %%ecx,32(%%edi)\n\t"
	"movl %%eax,36(%%edi)\n\t"
	"movl %%ebx,40(%%edi)\n\t"
	"movl %%ecx,44(%%edi)\n\t"
	"addl $48,%%edi\n\t"
	"subl $16,%%edx\n\t"
	"cmpl $16,%%edx\n\t"
	"jge 5b\n\t"
	"andl %%edx,%%edx\n\t"
	"jz 4f\n\t"			/* finished */
	"cmpl $4,%%edx\n\t"
	"jl 2f\n\t"			/* less than 4 pixels left */

	"1:\n\t"
	"movl %%eax,(%%edi)\n\t"	/* write RGBR */ 
	"movl %%ebx,4(%%edi)\n\t"	/* write GBRG */
	"movl %%ecx,8(%%edi)\n\t"	/* write BRGB */
	"addl $12,%%edi\n\t"
	"subl $4,%%edx\n\t"
	"cmpl $4,%%edx\n\t"
	"jge 1f\n\t"

	"2:\n\t"
	"cmpl $0,%%edx\n\t"		/* none left? */
	"jle 4f\n\t"			/* finished */

	"mov %%eax,%%ecx\n\t"
	"shrl $16,%%ecx\n\t"		/* B in cl */

	"3:\n\t"			/* write last few pixels */
	"movw %%ax,(%%edi)\n\t"		/* write RG */ 
	"movb %%cl,2(%%edi)\n\t"	/* write B */
	"addl $3,%%edi\n\t"	
	"decl %%edx\n\t"
	"jnz 3b\n\t"

	"4:\n\t"
	::"a" (c),"D" (s),"d" (count)
	:"ax","bx","cx","dx","di");
return s;
}
