Context Navigation

Back to Ticket #8656

Ticket #8656: diff

File diff, 7.8 KB (added by SF/robinwatts, 18 years ago)
ARM version of rotation functions, plus C changes to call it

src/video/wingapi/SDL_gapivideo.

-              old
+              new
         return;
+}
+#ifdef ARM
+extern void ARM_rotate(unsigned char *dstptr,
+                       unsigned char *srcPtr,
+                       int            w,
+                       int            h,
+                       int            dstLineStep,
+                       int            srcPixStep,
+                       int            srcLineStep);
+#endif
 static void GAPI_UpdateRects(_THIS, int numrects, SDL_Rect *rects)
+{
         static int height=0, width=0, w4=0, w4loop=0, w2=0, w1=0, x1=0, h4=0, h4loop=0, h2=0, h1=0, dstPixelstep2=0, dstLinestep2=0, aligned=0, srcPixelstep2=0, dstLinestep2c=0;
 …
         dstLinestep2c = dstLinestep2 - dstPixelstep;
         while (numrects--) {
+#ifdef ARM
+                /* In practise, if we are rotating, we should always fall into one of these two cases... */
+                if ((rotation == SDL_ROTATE_RIGHT) && (!convertPalette))
+                {
+                    /* Typical values: sps=2 sls=480 dps=480 dls=-2
+                     * We are going to rotate that to be:
+                     * sps = -480 sls=2 dps=2 dls=480
+                     *
+                     * so sps = -OLDsls
+                     *    sls =  OLDsps
+                     *    dps = -OLDdls
+                     *    dls =  OLDdps
+                     */
+                    /* Could do: assert(dstLinestep == -2); */
+                    ARM_rotate(screenBuffer + startOffset - 2 + (rects->x * dstPixelstep) + ((rects->y+rects->h-1) * dstLinestep),
+                               (unsigned char*)gapiBuffer     + (rects->x * srcPixelstep) + ((rects->y+rects->h-1) * srcLinestep),
+                               rects->h, rects->w,
+                               dstPixelstep,  /*  480 */
+                               -srcLinestep,  /* -480 */
+                               srcPixelstep); /*    2 */
+                    rects++;
+                    continue;
+                }
+                else if ((rotation == SDL_ROTATE_LEFT) && (!convertPalette))
+                {
+                    /* Typical values: sps=2 sls=640 dps=-480 dls=2
+                     * We are going to rotate that to be:
+                     * sps=640 sls=-2 dps=2 dls=480
+                     *
+                     * so sps =  OLDsls
+                     *    sls = -OLDsps
+                     *    dps =  OLDdls
+                     *    dls = -OLDdps
+                     */
+                    /* Could do: assert(dstLinestep == 2); */
+                    ARM_rotate(screenBuffer + startOffset + ((rects->x+rects->w-1) * dstPixelstep) + (rects->y * dstLinestep),
+                               (unsigned char*)gapiBuffer + ((rects->x+rects->w-1) * srcPixelstep) + (rects->y * srcLinestep),
+                               rects->h, rects->w,
+                               -dstPixelstep,  /* 480 */
+                               srcLinestep,    /* 640 */
+                               -srcPixelstep); /*  -2 */
+                    rects++;
+                    continue;
+                }
+#endif /* ARM */
                 destPointer = screenBuffer + startOffset + (rects->x * dstPixelstep) + (rects->y * dstLinestep);
                 srcPointer = (unsigned char*)gapiBuffer + (rects->x * srcPixelstep) + (rects->y * srcLinestep);
                 width = rects->w;

src/video/wingapi/ARM_rot.

-              old
+              new
+@ ARM code version of rotation routines.
+@
+@ @author Robin Watts (robin@wss.co.uk)
+@
+@ When rotating a block of memory to the screen, the key facts to bear in
+@ mind are:
+@  * Screen memory is uncached - therefore to get best performance we want
+@    to write runs of horizontal pixels so the write buffer can kick in and
+@    merge the buffered writes.
+@  * Reading large numbers of pixels to be written out horizontally, pulls
+@    in lots of cache lines. We need to be careful not to cache bust here.
+@  * The 16 or 32 way set associativity for screens can hurt us here too.
+@
+@ A good compromise is therefore to write out in bursts of 4 horizontal
+@ pixels at once.
+        .text
+        .global ARM_rotate
+        @ Reads block of w*h pixels from srcPtr (addressed by srcPixStep,
+        @ srcLineStep) and stores them at dst (addressed by dstPixStep,
+        @ dstLineStep), converting palette by table lookup in convertPalette.
+ARM_rotate:
+        @ r0 = destPtr
+        @ r1 = srcPtr
+        @ r2 = w
+        @ r3 = h
+        @ r4 = dstLineStep
+        @ r5 = srcPixStep  - e.g. 480
+        @ r6 = srcLineStep - e.g. 2 or -2
+        MOV     r12,r13
+        STMFD   r13!,{r4-r11,r14}
+        LDMFD   r12,{r4-r6}
+        @ For simplicity, we will think about width/height in terms of
+        @ destination.
+        AND     r7,r0,#6
+        MOV     r7,r7,LSR #1
+        AND     r7,r7,#3        @ r7 = Numbr over a multiple of 4 we start on
+        RSB     r7,r7,#4        @ r7 = Number to do first time.
+rotate_loop:
+        CMP     r7,r2
+        MOVGT   r7,r2           @ r7 = width to do this time
+        SUBS    r7,r7,#4        @ r7 = width-4
+        BLT     thin            @ less than 4 pixels wide
+        SUB     r8,r4,#6        @ r8 = dstLineStep-6
+x_loop_4:
+        @ In this routine we will to copy a 4 pixel wide stripe
+        ADD     r9,r5,r5,LSL #1 @ r9 = 3*srcPixStep
+        SUB     r9,r6,r9        @ r9 = srcLineStep-3*srcPixStep
+        MOV     r7,r3           @ r7 = h
+y_loop_4:
+        @ r9 >= 0, so at least 4 to do.
+        LDRH    r10,[r1],r5     @ r10 = *(src)
+        LDRH    r11,[r1],r5     @ r11 = *(src+srcPixStep)
+        LDRH    r12,[r1],r5     @ r12 = *(src+srcPixStep*2)
+        LDRH    r14,[r1],r9     @ r14 = *(src+srcPixStep*3)  src+=srcLineStep
+        STRH    r10,[r0],#2     @ *(ptr) = r10
+        STRH    r11,[r0],#2     @ *(ptr+2) = r11
+        STRH    r12,[r0],#2     @ *(ptr+4) = r12
+        STRH    r14,[r0],r8     @ *(ptr+6) = r14    ptr += dstLineStep
+        SUBS    r7,r7,#1        @ h--
+        BGT     y_loop_4
+        MUL     r10,r3,r6
+        ADD     r1,r1,r5,LSL #2
+        SUB     r1,r1,r10
+        MUL     r10,r3,r4
+        ADD     r0,r0,#8
+        SUB     r0,r0,r10
+        SUBS    r2,r2,#4        @ r2 = w -= 4
+        BEQ     rotate_end      @ if w = 0, none left.
+        SUBS    r7,r2,#4        @ r7 = w - 4
+        BGE     x_loop_4        @ if 4 or more left, go back.
+thin:
+        MOV     r14,r3          @ r14 = h
+thin_lp:
+        ADDS    r7,r7,#2        @ Always do 1. GE => do 2. GT => do 3
+        BGE     just_2
+        BGT     just_3
+        @ Just do a 1 pixel wide stripe. Either the last pixel stripe, or
+        @ the first pixel stripe to align us.
+y_loop_1:
+        LDRH    r10,[r1],r6
+        SUBS    r14,r14,#1
+        STRH    r10,[r0],r4
+        BGT     y_loop_1
+        MUL     r10,r3,r6       @ How much to step r1 back to undo this line?
+        ADD     r1,r1,r5        @ Move r1 on by srcPixStep
+        SUB     r1,r1,r10       @ Move r1 back by amount just added on
+        MUL     r10,r3,r4       @ How much to step r0 back to undo this line?
+        ADD     r0,r0,#2        @ Move r0 on by dstPixStep
+        SUB     r0,r0,r10       @ Move r0 back by amount just added on
+        SUBS    r2,r2,#1        @ If we havent finished (i.e. we were doing
+        MOV     r7,r2           @ the first pixel rather than the last one)
+        BGT     rotate_loop     @ then jump back to do some more
+rotate_end:
+        LDMFD   r13!,{r4-r11,PC}
+just_2:
+        @ Just do a 2 pixel wide stripe. Either the last stripe, or
+        @ the first stripe to align us.
+        SUB     r9,r6,r5        @ r9 = srcLineStep - srcPixStep
+        SUB     r8,r4,#2        @ r8 = dstLineStep - 2
+y_loop_2:
+        LDRH    r10,[r1],r5
+        LDRH    r11,[r1],r9
+        SUBS    r14,r14,#1
+        STRH    r10,[r0],#2
+        STRH    r11,[r0],r8
+        BGT     y_loop_2
+        MUL     r10,r3,r6       @ How much to step r1 back to undo this line?
+        ADD     r1,r1,r5,LSL #1 @ Move r1 on by srcPixStep*2
+        SUB     r1,r1,r10       @ Move r1 back by amount just added on
+        MUL     r10,r3,r4       @ How much to step r0 back to undo this line?
+        ADD     r0,r0,#4        @ Move r0 on by dstPixStep*2
+        SUB     r0,r0,r10       @ Move r0 back by amount just added on
+        SUBS    r2,r2,#2        @ If we havent finished (i.e. we were doing
+        MOV     r7,r2           @ the first stripe rather than the last one)
+        BGT     rotate_loop     @ then jump back to do some more
+        LDMFD   r13!,{r4-r11,PC}
+just_3:
+        SUB     r9,r6,r5,LSL #1 @ r9 = srcLineStep - srcPixStep
+        SUB     r8,r4,#4        @ r8 = dstLineStep - 2
+y_loop_3:
+        LDRH    r10,[r1],r5
+        LDRH    r11,[r1],r5
+        LDRH    r12,[r1],r9
+        STRH    r10,[r0],#2
+        STRH    r11,[r0],#2
+        STRH    r12,[r0],r8
+        SUBS    r14,r14,#1
+        BGT     y_loop_3
+        MUL     r10,r3,r6       @ How much to step r1 back to undo this line?
+        ADD     r1,r1,r5        @ Move r1 on by srcPixStep*3
+        ADD     r1,r1,r5,LSL #1
+        SUB     r1,r1,r10       @ Move r1 back by amount just added on
+        MUL     r10,r3,r4       @ How much to step r0 back to undo this line?
+        ADD     r0,r0,#6        @ Move r0 on by dstPixStep*3
+        SUB     r0,r0,r10       @ Move r0 back by amount just added on
+        SUBS    r2,r2,#3        @ If we havent finished (i.e. we were doing
+        MOV     r7,r2           @ the first stripe rather than the last one)
+        BGT     rotate_loop     @ then jump back to do some more
+        LDMFD   r13!,{r4-r11,PC}

Download in other formats:

Original Format