+ start = sf->pixel_data;
+ data = start + 1;
+
+ for (y1 = h; y1 > 0; --y1) {
+ /* for really small things, just copy ourselves */
+ if (w < 8) {
+ for (x = w-1; x > 0; --x)
+ *(data++) = *start;
+ }
+ /* for >= 8, then use O(log n) memcpy's... */
+ else {
+ gint len = 4;
+ gint lenbytes = 4 * sizeof(RrPixel32);
+
+ /* copy the first 3 * 32 bits (3 words) ourselves - then we have
+ 3 + the original 1 = 4 words to make copies of at a time
+
+ this is faster than doing memcpy for 1 or 2 words at a time
+ */
+ for (x = 3; x > 0; --x)
+ *(data++) = *start;
+
+ for (x = w - 4; x > 0;) {
+ memcpy(data, start, lenbytes);
+ x -= len;
+ data += len;
+ len <<= 1;
+ lenbytes <<= 1;
+ if (len > x) {
+ len = x;
+ lenbytes = x * sizeof(RrPixel32);
+ }
+ }
+ }
+
+ start += w;
+ ++data;