Цитата(xelax @ Jun 25 2009, 17:54)

в зависимости от разрядности копируемых данных(байтное, 2байтное и 4байтное)?
5, 6, 7, 8, 9...... а еще начинающиеся с 0,1,2,.... а еще заканчивающиеся на 0,1,2,3....

Вот, кода реализация в IAR была какая-то тормозная пользовал вместо библиотечной такое
Код
//---------------------------------------------------------------------------
// void *(memcpy)(void *p1, const void *p2, size_t n)
// Copy char p2[n] to p1[n]
//---------------------------------------------------------------------------
memcpy:
teq r2,#0 // Is p1 == 0 ?
bxeq lr // If p1 == 0, return
stmdb sp!,{lr} // Push return address
mov r12,r0 // Copy pointer p1
cmp r2,#8 // Is buffer long or short?
ble byteserial // Jump if n <= 8
sub r3,r0,r1 // Compare pointers p1, p2
tst r3,#3 // Strings aligned same?
bne byteserial // Jump if buffers not aligned
// Both strings are similarly aligned WRT word boundaries.
// At least a portion of the data can be copied an entire
// word at a time, which is faster than copying bytes.
wordserial:
ands r3,r0,#3 // Check byte alignment
beq wordaligned // Jump if p1, p2 word-aligned
rsb r3,r3,#4 // m = no. of odd initial bytes
sub r2,r2,r3 // n = n - m
// If the two buffers do not begin on word boundaries, begin
// by copying the odd bytes that precede the first full word.
preloop:
ldrb lr,[r1],#1 // Read byte from source
subs r3,r3,#1 // --m (decrement loop count)
strb lr,[r12],#1 // Write byte to destination
bne preloop // Loop if more bytes to move
wordaligned:
#if WORDS8_TRANSFER == 1
movs r3,r2,asr #5 // Any chunks of 8 words?
beq octsdone // Jump if no 8-word chunks
and r2,r2,#0x1F // Subtract chunks from n
stmdb sp!,{r4-r10} // Save registers on stack
// The strings are long enough that we can transfer at least
// some portion of the data in 8-word chunks.
octloop:
ldmia r1!,{r4-r10,lr} // Load 8 words from source
subs r3,r3,#1 // More 8-word chunks to move?
stmia r12!,{r4-r10,lr} // Write 8 words to destination
bne octloop // Loop if more chunks
ldmia sp!,{r4-r10} // Restore registers from stack
octsdone:
#endif
movs r3,r2,asr #2 // Any more whole words to move?
beq wordsdone // Jump if no more whole words
// Copy as much of the remaining data as possible one word at
// a time.
wordloop2:
ldr lr,[r1],#4 // Read next word from source
subs r3,r3,#1 // Decrement word count
str lr,[r12],#4 // Write next word to destination
bne wordloop2 // Loop while more words to move
wordsdone:
ands r2,r2,#3 // Any last bytes to transfer?
beq theend // Return if already done
// The two strings do not end on word boundaries.
// Copy the remaining data one byte at a time.
byteserial:
ldrb lr,[r1],#1 // Read byte from source
subs r2,r2,#1 // --n (decrement loop count)
strb lr,[r12],#1 // Write byte to destination
bne byteserial // Loop if more bytes to move
theend:
ldmia sp!,{lr} // Return
bx lr
Цитата(xelax @ Jun 25 2009, 18:45)

но всё равно основное то копирование идёт пословно.
Зависит от реализации, пример, когда не только пословно, выше (под ключем WORDS8_TRANSFER)