1 | @ ScummVM Scumm Interpreter
|
---|
2 | @ Copyright (C) 2007 The ScummVM project
|
---|
3 | @
|
---|
4 | @ This program is free software@ you can redistribute it and/or
|
---|
5 | @ modify it under the terms of the GNU General Public License
|
---|
6 | @ as published by the Free Software Foundation@ either version 2
|
---|
7 | @ of the License, or (at your option) any later version.
|
---|
8 | @
|
---|
9 | @ This program is distributed in the hope that it will be useful,
|
---|
10 | @ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
11 | @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
12 | @ GNU General Public License for more details.
|
---|
13 | @
|
---|
14 | @ You should have received a copy of the GNU General Public License
|
---|
15 | @ along with this program@ if not, write to the Free Software
|
---|
16 | @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
---|
17 | @
|
---|
18 | @ $URL: $
|
---|
19 | @ $Id: $
|
---|
20 | @
|
---|
21 | @ @author Robin Watts (robin@wss.co.uk)
|
---|
22 |
|
---|
23 | .text
|
---|
24 |
|
---|
25 | .global asmDrawStripToScreen
|
---|
26 | .global asmCopy8Col
|
---|
27 | .global Rescale_320x256xPAL8_To_256x256x1555
|
---|
28 | .global Rescale_320x256x1555_To_256x256x1555
|
---|
29 |
|
---|
30 | @ ARM implementation of asmDrawStripToScreen.
|
---|
31 | @
|
---|
32 | @ C prototype would be:
|
---|
33 | @
|
---|
34 | @ extern "C" void asmDrawStripToScreen(int height,
|
---|
35 | @ int width,
|
---|
36 | @ byte const *text,
|
---|
37 | @ byte const *src,
|
---|
38 | @ byte *dst,
|
---|
39 | @ int vsPitch,
|
---|
40 | @ int vsScreenWidth,
|
---|
41 | @ int textSurfacePitch);
|
---|
42 | @
|
---|
43 | @ In addition, we assume that text, src and dst are all word (4 byte)
|
---|
44 | @ aligned. This is the same assumption that the old 'inline' version
|
---|
45 | @ made.
|
---|
46 | asmDrawStripToScreen:
|
---|
47 | @ r0 = height
|
---|
48 | @ r1 = width
|
---|
49 | @ r2 = text
|
---|
50 | @ r3 = src
|
---|
51 | MOV r12,r13
|
---|
52 | STMFD r13!,{r4-r7,r9-r11,R14}
|
---|
53 | LDMIA r12,{r4,r5,r6,r7}
|
---|
54 | @ r4 = dst
|
---|
55 | @ r5 = vsPitch
|
---|
56 | @ r6 = vmScreenWidth
|
---|
57 | @ r7 = textSurfacePitch
|
---|
58 |
|
---|
59 | CMP r0,#0 @ If height<=0
|
---|
60 | MOVLE r0,#1 @ height=1
|
---|
61 | CMP r1,#4 @ If width<4
|
---|
62 | BLT end @ return
|
---|
63 |
|
---|
64 | @ Width &= ~4 ? What's that about then? Width &= ~3 I could have
|
---|
65 | @ understood...
|
---|
66 | BIC r1,r1,#4
|
---|
67 |
|
---|
68 | SUB r5,r5,r1 @ vsPitch -= width
|
---|
69 | SUB r6,r6,r1 @ vmScreenWidth -= width
|
---|
70 | SUB r7,r7,r1 @ textSurfacePitch -= width
|
---|
71 | MOV r10,#253
|
---|
72 | ORR r10,r10,r10,LSL #8
|
---|
73 | ORR r10,r10,r10,LSL #16 @ r10 = mask
|
---|
74 | yLoop:
|
---|
75 | MOV r14,r1 @ r14 = width
|
---|
76 | xLoop:
|
---|
77 | LDR r12,[r2],#4 @ r12 = [text]
|
---|
78 | LDR r11,[r3],#4 @ r11 = [src]
|
---|
79 | CMP r12,r10
|
---|
80 | BNE singleByteCompare
|
---|
81 | SUBS r14,r14,#4
|
---|
82 | STR r11,[r4], #4 @ r4 = [dst]
|
---|
83 | BGT xLoop
|
---|
84 |
|
---|
85 | ADD r2,r2,r7 @ text += textSurfacePitch
|
---|
86 | ADD r3,r3,r5 @ src += vsPitch
|
---|
87 | ADD r4,r4,r6 @ dst += vmScreenWidth
|
---|
88 | SUBS r0,r0,#1
|
---|
89 | BGT yLoop
|
---|
90 | LDMFD r13!,{r4-r7,r9-r11,PC}
|
---|
91 |
|
---|
92 | singleByteCompare:
|
---|
93 | MOV r9,r12,LSR #24 @ r9 = 1st byte of [text]
|
---|
94 | CMP r9,r10,LSR #24 @ if (r9 == mask)
|
---|
95 | MOVEQ r9,r11,LSR #24 @ r9 = 1st byte of [src]
|
---|
96 | ORR r12,r9,r12,LSL #8 @ r12 = combine r9 and r12
|
---|
97 |
|
---|
98 | MOV r9,r12,LSR #24 @ r9 = 1st byte of [text]
|
---|
99 | CMP r9,r10,LSR #24 @ if (r9 == mask)
|
---|
100 | MOVEQ r9,r11,LSR #24 @ r9 = 1st byte of [src]
|
---|
101 | ORR r12,r9,r12,LSL #8 @ r12 = combine r9 and r12
|
---|
102 |
|
---|
103 | MOV r9,r12,LSR #24 @ r9 = 1st byte of [text]
|
---|
104 | CMP r9,r10,LSR #24 @ if (r9 == mask)
|
---|
105 | MOVEQ r9,r11,LSR #24 @ r9 = 1st byte of [src]
|
---|
106 | ORR r12,r9,r12,LSL #8 @ r12 = combine r9 and r12
|
---|
107 |
|
---|
108 | MOV r9,r12,LSR #24 @ r9 = 1st byte of [text]
|
---|
109 | CMP r9,r10,LSR #24 @ if (r9 == mask)
|
---|
110 | MOVEQ r9,r11,LSR #24 @ r9 = 1st byte of [src]
|
---|
111 | ORR r12,r9,r12,LSL #8 @ r12 = combine r9 and r12
|
---|
112 |
|
---|
113 | STR r12,[r4],#4
|
---|
114 | SUBS r14,r14,#4
|
---|
115 | BGT xLoop
|
---|
116 |
|
---|
117 | ADD r2,r2,r7 @ text += textSurfacePitch
|
---|
118 | ADD r3,r3,r5 @ src += vsPitch
|
---|
119 | ADD r4,r4,r6 @ dst += vmScreenWidth
|
---|
120 | SUBS r0,r0,#1
|
---|
121 | BGT yLoop
|
---|
122 | end:
|
---|
123 | LDMFD r13!,{r4-r7,r9-r11,PC}
|
---|
124 |
|
---|
125 |
|
---|
126 | @ ARM implementation of asmCopy8Col
|
---|
127 | @
|
---|
128 | @ C prototype would be:
|
---|
129 | @
|
---|
130 | @ extern "C" void asmCopy8Col(byte *dst,
|
---|
131 | @ int dstPitch,
|
---|
132 | @ const byte *src,
|
---|
133 | @ int height);
|
---|
134 | @
|
---|
135 | @ In addition, we assume that src and dst are both word (4 byte)
|
---|
136 | @ aligned. This is the same assumption that the old 'inline' version
|
---|
137 | @ made.
|
---|
138 | asmCopy8Col:
|
---|
139 | @ r0 = dst
|
---|
140 | @ r1 = dstPitch
|
---|
141 | @ r2 = src
|
---|
142 | @ r3 = height
|
---|
143 | STMFD r13!,{r14}
|
---|
144 | SUB r1,r1,#4
|
---|
145 |
|
---|
146 | TST r3,#1
|
---|
147 | ADDNE r3,r3,#1
|
---|
148 | BNE roll2
|
---|
149 | yLoop2:
|
---|
150 | LDR r12,[r2],#4
|
---|
151 | LDR r14,[r2],r1
|
---|
152 | STR r12,[r0],#4
|
---|
153 | STR r14,[r0],r1
|
---|
154 | roll2:
|
---|
155 | LDR r12,[r2],#4
|
---|
156 | LDR r14,[r2],r1
|
---|
157 | SUBS r3,r3,#2
|
---|
158 | STR r12,[r0],#4
|
---|
159 | STR r14,[r0],r1
|
---|
160 | BNE yLoop2
|
---|
161 |
|
---|
162 | LDMFD r13!,{PC}
|
---|
163 |
|
---|
164 |
|
---|
165 | @ ARM implementation of Rescale_320x256x1555_To_256x256x1555
|
---|
166 | @
|
---|
167 | @ C prototype would be:
|
---|
168 | @
|
---|
169 | @ extern "C" void Rescale_320x256x1555_To_256x256x1555(
|
---|
170 | @ u16 *dst,
|
---|
171 | @ const u16 *src,
|
---|
172 | @ int dstStride,
|
---|
173 | @ int srcStride);
|
---|
174 | Rescale_320x256x1555_To_256x256x1555:
|
---|
175 | @ r0 = dst
|
---|
176 | @ r1 = src
|
---|
177 | @ r2 = dstStride
|
---|
178 | @ r3 = srcStride
|
---|
179 | STMFD r13!,{r4-r5,r8-r11,r14}
|
---|
180 |
|
---|
181 | SUB r2,r2,#64*5 @ srcStride -= line length
|
---|
182 | SUB r3,r3,#64*4 @ dstStride -= line length
|
---|
183 |
|
---|
184 | MOV r8, #0x0000001F
|
---|
185 | ORR r8, r8,#0x00007C00
|
---|
186 | ORR r8, r8,#0x03E00000 @ r8 = mask
|
---|
187 | MOV r5, #200 @ r5 = y
|
---|
188 | yLoop3:
|
---|
189 | MOV r4, #64 @ r4 = x
|
---|
190 | xLoop3:
|
---|
191 | LDRH r9, [r0],#2 @ r9 = src0
|
---|
192 | LDRH r10,[r0],#2 @ r10= src1
|
---|
193 | LDRH r11,[r0],#2 @ r11= src2
|
---|
194 | LDRH r12,[r0],#2 @ r12= src3
|
---|
195 | LDRH r14,[r0],#2 @ r14= src4
|
---|
196 |
|
---|
197 | ORR r9, r9, r9, LSL #16 @ r9 = src0 | src0
|
---|
198 | ORR r10,r10,r10,LSL #16 @ r10= src1 | src1
|
---|
199 | ORR r11,r11,r11,LSL #16 @ r11= src2 | src2
|
---|
200 | ORR r12,r12,r12,LSL #16 @ r12= src3 | src3
|
---|
201 | ORR r14,r14,r14,LSL #16 @ r13= src4 | src4
|
---|
202 |
|
---|
203 | AND r9, r9, r8 @ r9 = 0 | G0 | 0 | B0 | 0 | R0
|
---|
204 | AND r10,r10,r8 @ r10= 0 | G1 | 0 | B1 | 0 | R1
|
---|
205 | AND r11,r11,r8 @ r11= 0 | G2 | 0 | B2 | 0 | R2
|
---|
206 | AND r12,r12,r8 @ r12= 0 | G3 | 0 | B3 | 0 | R3
|
---|
207 | AND r14,r14,r8 @ r14= 0 | G4 | 0 | B4 | 0 | R4
|
---|
208 |
|
---|
209 | ADD r9, r9, r9, LSL #1 @ r9 = 3*src0
|
---|
210 | ADD r9, r9, r10 @ r9 = dst0<<2
|
---|
211 | ADD r10,r10,r11 @ r10= dst1
|
---|
212 | ADD r11,r11,r12 @ r11= dst2
|
---|
213 | ADD r12,r12,r14 @ r12= src3 + src4
|
---|
214 | ADD r12,r12,r14,LSL #1 @ r12= src3 + src4*3 = dst3<<2
|
---|
215 |
|
---|
216 | AND r9, r8, r9, LSR #2 @ r9 = dst0 (split)
|
---|
217 | AND r10,r8, r10,LSR #1 @ r10= dst1 (split)
|
---|
218 | AND r11,r8, r11,LSR #1 @ r11= dst2 (split)
|
---|
219 | AND r12,r8, r12,LSR #2 @ r12= dst3 (split)
|
---|
220 |
|
---|
221 | ORR r9, r9, r9, LSR #16 @ r9 = dst0
|
---|
222 | ORR r10,r10,r10,LSR #16 @ r10= dst1
|
---|
223 | ORR r11,r11,r11,LSR #16 @ r11= dst2
|
---|
224 | ORR r12,r12,r12,LSR #16 @ r12= dst3
|
---|
225 |
|
---|
226 | ORR r9, r9, #0x8000
|
---|
227 | ORR r10,r10,#0x8000
|
---|
228 | ORR r11,r11,#0x8000
|
---|
229 | ORR r12,r12,#0x8000
|
---|
230 |
|
---|
231 | STRH r9, [r1],#2
|
---|
232 | STRH r10,[r1],#2
|
---|
233 | STRH r11,[r1],#2
|
---|
234 | STRH r12,[r1],#2
|
---|
235 |
|
---|
236 | SUBS r4,r4,#1
|
---|
237 | BGT xLoop3
|
---|
238 |
|
---|
239 | ADD r0,r0,r2,LSL #1
|
---|
240 | ADD r1,r2,r3,LSL #1
|
---|
241 | SUBS r5,r5,#1
|
---|
242 | BGT yLoop3
|
---|
243 |
|
---|
244 | LDMFD r13!,{r4-r5,r8-r11,PC}
|
---|
245 |
|
---|
246 | @ ARM implementation of Rescale_320x256xPAL8_To_256x256x1555
|
---|
247 | @
|
---|
248 | @ C prototype would be:
|
---|
249 | @
|
---|
250 | @ extern "C" void Rescale_320x256xPAL8_To_256x256x1555(
|
---|
251 | @ u16 *dst,
|
---|
252 | @ const u8 *src,
|
---|
253 | @ int dstStride,
|
---|
254 | @ int srcStride,
|
---|
255 | @ const u16 *pal);
|
---|
256 | @
|
---|
257 | @ This is a slight reordering of the params from the existing C one.
|
---|
258 | @ Sorry, but it makes the code easier.
|
---|
259 | Rescale_320x256xPAL8_To_256x256x1555:
|
---|
260 | @ r0 = dst
|
---|
261 | @ r1 = src
|
---|
262 | @ r2 = dstStride
|
---|
263 | @ r3 = srcStride
|
---|
264 | STMFD r13!,{r4-r5,r8-r11,r14}
|
---|
265 | MOV r8, #0x0000001F
|
---|
266 | ORR r8, r8,#0x00007C00
|
---|
267 | ORR r8, r8,#0x03E00000 @ r8 = mask
|
---|
268 | LDR r9, [r13,#7*4] @ r9 = palette
|
---|
269 |
|
---|
270 | SUB r13,r13,#256*4 @ r13 = 1K of space on the stack.
|
---|
271 | MOV r5, r13 @ r5 points to this space
|
---|
272 | MOV r14,#256
|
---|
273 | palLoop:
|
---|
274 | LDRH r10,[r9],#2 @ r10 = palette entry
|
---|
275 | SUBS r14,r14,#1
|
---|
276 | ORR r10,r10,r10,LSL #16
|
---|
277 | AND r10,r10,r8 @ r10 = separated palette entry
|
---|
278 | STR r10,[r5], #4
|
---|
279 | BGT palLoop
|
---|
280 |
|
---|
281 | SUB r2,r2,#64*5 @ srcStride -= line length
|
---|
282 | SUB r3,r3,#64*4 @ dstStride -= line length
|
---|
283 |
|
---|
284 | MOV r5,#200 @ r5 = y
|
---|
285 | yLoop4:
|
---|
286 | MOV r4,#64 @ r4 = x
|
---|
287 | xLoop4:
|
---|
288 | LDRB r9, [r0],#1 @ r9 = src0
|
---|
289 | LDRB r10,[r0],#1 @ r10= src1
|
---|
290 | LDRB r11,[r0],#1 @ r11= src2
|
---|
291 | LDRB r12,[r0],#1 @ r12= src3
|
---|
292 | LDRB r14,[r0],#1 @ r14= src4
|
---|
293 |
|
---|
294 | LDR r9, [r13,r9, LSL #2] @ r9 = pal[src0]
|
---|
295 | LDR r10,[r13,r10,LSL #2] @ r10= pal[src1]
|
---|
296 | LDR r11,[r13,r11,LSL #2] @ r11= pal[src2]
|
---|
297 | LDR r12,[r13,r12,LSL #2] @ r12= pal[src3]
|
---|
298 | LDR r14,[r13,r14,LSL #2] @ r13= pal[src4]
|
---|
299 |
|
---|
300 | ADD r9, r9, r9, LSL #1 @ r9 = 3*src0
|
---|
301 | ADD r9, r9, r10 @ r9 = dst0<<2
|
---|
302 | ADD r10,r10,r11 @ r10= dst1
|
---|
303 | ADD r11,r11,r12 @ r11= dst2
|
---|
304 | ADD r12,r12,r14 @ r12= src3 + src4
|
---|
305 | ADD r12,r12,r14,LSL #1 @ r12= src3 + src4*3 = dst3<<2
|
---|
306 |
|
---|
307 | AND r9, r8, r9, LSR #2 @ r9 = dst0 (split)
|
---|
308 | AND r10,r8, r10,LSR #1 @ r10= dst1 (split)
|
---|
309 | AND r11,r8, r11,LSR #1 @ r11= dst2 (split)
|
---|
310 | AND r12,r8, r12,LSR #2 @ r12= dst3 (split)
|
---|
311 |
|
---|
312 | ORR r9, r9, r9, LSR #16 @ r9 = dst0
|
---|
313 | ORR r10,r10,r10,LSR #16 @ r10= dst1
|
---|
314 | ORR r11,r11,r11,LSR #16 @ r11= dst2
|
---|
315 | ORR r12,r12,r12,LSR #16 @ r12= dst3
|
---|
316 |
|
---|
317 | ORR r9, r9, #0x8000
|
---|
318 | ORR r10,r10,#0x8000
|
---|
319 | ORR r11,r11,#0x8000
|
---|
320 | ORR r12,r12,#0x8000
|
---|
321 |
|
---|
322 | STRH r9, [r1],#2
|
---|
323 | STRH r10,[r1],#2
|
---|
324 | STRH r11,[r1],#2
|
---|
325 | STRH r12,[r1],#2
|
---|
326 |
|
---|
327 | SUBS r4,r4,#1
|
---|
328 | BGT xLoop4
|
---|
329 |
|
---|
330 | ADD r0,r0,r2
|
---|
331 | ADD r1,r2,r3,LSL #1
|
---|
332 | SUBS r5,r5,#1
|
---|
333 | BGT yLoop4
|
---|
334 |
|
---|
335 | ADD r13,r13,#256*4
|
---|
336 |
|
---|
337 | LDMFD r13!,{r4-r5,r8-r11,PC}
|
---|