#include <stdint.h>
#include <time.h>

uint32_t spread(uint32_t v) __attribute__((noinline));
uint32_t spread2(uint32_t v) __attribute__((noinline));
uint32_t spread3(uint32_t v) __attribute__((noinline));
uint32_t spread4(uint32_t v) __attribute__((noinline));

uint32_t spread4(uint32_t v)
{
   asm("and %0, %0, #255\n\t"
       "orr %0, %0, %0, lsl #8\n\t"
       "orr %0, %0, %0, lsl #16\n\t"
       :
       : "r" (v)
       );
//   return v;
}

uint32_t spread(uint32_t v)
{
   asm("");
   v <<= 24;
   v |= (v >> 8);
   v |= (v >> 16);
   
   return v;
}

uint32_t spread2(uint32_t v)
{
   asm("");
   v &= 0xFF;
   v |= (v << 8);
   v |= (v << 16);
   
   return v;
}

uint32_t spread3(uint32_t v)
{
   asm("");
   return v;
}

int main()
{
   struct timespec start, end;
   clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start);
   
   for (int i = 0; i < 10000; i++)
     {
	for (int j = 0; j < 10000; j++)
	  {
	     spread4(123);
	     spread4(56);
	     spread4(45);
	     spread4(99);
	  }
     }

   clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &end);
   double elapsed = end.tv_sec - start.tv_sec + (end.tv_nsec - start.tv_nsec)*1e-9;
   printf("%.6f\n", elapsed);
   return 0;
}
