/* * ustrlen.c * Pieter Droogendijk (gin@binky.homeunix.org) * * As far as I can tell the fastest possible implementation of strlen(). * If anyone can improve it, I'll be glad to hear it. I also hope people will try * to, and send their findings to me (as well as the good people at GNU :) */ #include "ustring.h" /* * Possible ANSI violation? * This will make sure the magic spans far enough for 64 bit machines. */ static const longword submagic = 0x01010101 | (0x01010101<<16)<<16; static const longword andmagic = 0x80808080 | (0x80808080<<16)<<16; unsigned int ustrlen (str) const char *str; { const char *chptr = str; const longword *lwptr; longword lw; /* * First, we go through every character one by one until we hit the longword * boundary. We'll use the standard method, slightly optimized (no unnessacery * instruction cache clears because of unnessacery jumps). * This will never loop on a 32 bit system. */ for (;;) { if (!((longword)chptr & (sizeof (longword)-1))) break; if (!*chptr) return chptr-str; if (!((longword)++chptr & (sizeof (longword)-1))) break; if (!*chptr) return chptr-str; if (!((longword)++chptr & (sizeof (longword)-1))) break; if (!*chptr) return chptr-str; /* * Can't remove the last check, since 64 bit machines may need to loop. * Also, conditionally removing it will result in the optimizer making a mess. * Checks have been swapped here to remove a possible jump. */ if (!*++chptr) return chptr-str; if (!((longword)chptr & (sizeof (longword)-1))) break; } /* * Any valid ascii character will occupy only the least significant 7 bits. If * one were to substract '1' from any character but null, nothing special would * happen. However, if the character IS null, according to two's complement, * the all bits will become 1, including the 8th: * * 0-1 = -1 = ~1+1 = 0b11111110+1 = 0b11111111 * ^'high bit' * If, however, the character is an EXTENDED ascii character, then the 8th bit * is already set before subtracting, and we get a misfire. In a string * containing multiple 8-bit character codes, performance will drop * dramatically. * So every time we hit, check if any high bits where changed by the * subtraction. If none where, continue with the next longword. */ lwptr = (longword*) chptr; /* check string a longword at a time */ for (;;) { /* * Doing *++lwptr and simply using lwptr instead of (lwptr-1) in code ahead, * will add instructions here, to this performance-sensitive loop. Case of the * optimizer making a mess. * * We store the result of the subtraction, so the and can be performed as a * test, here as well as in code to come. I'm almost positive this can not be * improved any further. It takes four instructions to increment lwptr, * subtract submagic, store this value and AND it with andmagic. */ lw = *lwptr++-submagic; /* See if any high bits are set after subtracting. */ if (lw & andmagic) { /* * Where any of the bits changed by the subtraction? * If not, continue with the next cycle. * * This takes two instructions, an XOR and a test. It does have to fetch the * longword from memory however... But it's for the best, since otherwise it * would have to be stored in the code above, resulting in an extra * instruction. */ if (!((lw^*(lwptr-1))&andmagic)) continue; /* * At least one of the high bits changed; this means one of the characters * was null. */ chptr = (const char*)(lwptr-1); if (!*chptr) return chptr-str; chptr++; if (!*chptr) return chptr-str; chptr++; if (!*chptr) return chptr-str; chptr++; /* For 64 bit machines */ if (sizeof (longword) > 4) { if (!*chptr) return chptr-str; chptr++; if (!*chptr) return chptr-str; chptr++; if (!*chptr) return chptr-str; chptr++; if (!*chptr) return chptr-str; chptr++; } /* Since all other checks failed, it must be the last one. */ return chptr-str; } } }