linux.conf.au
Optimization

 	/* Copy first cpu. */
 	*start = buffer;
 	memcpy(buffer, IP_RT_ACCT_CPU(0), length);
 
 	/* Add the other cpus in, one int at a time */
 	for (i = 1; i < smp_num_cpus; i++) {
 		unsigned int j;
 		for (j = 0; j < length/4; j++)
 			((u32*)buffer)[j] += ((u32*)IP_RT_ACCT_CPU(i))[j];
 	}
 	return length;