Index: sound/rate_arm.cpp =================================================================== --- sound/rate_arm.cpp (revision 0) +++ sound/rate_arm.cpp (revision 0) @@ -0,0 +1,428 @@ +/* ScummVM - Scumm Interpreter + * Copyright (C) 2001-2006 The ScummVM project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * $URL$ + * $Id$ + * + */ + +/* + * The code in this file, together with the rate_arm_asm.s file offers + * an ARM optimised version of the code in rate.cpp. The operation of this + * code should be identical to that of rate.cpp, but faster. The heavy + * lifting is done in the assembler file. + * + * To be as portable as possible we implement the core routines with C + * linkage in assembly, and implement the C++ routines that call into + * the C here. The C++ symbol mangling varies wildly between compilers, + * so this is the simplest way to ensure that the C/C++ combination should + * work on as many ARM based platforms as possible. + * + * Essentially the algorithm herein is the same as that in rate.cpp, so + * anyone seeking to understand this should attempt to understand that + * first. That code was based in turn on code with Copyright 1998 Fabrice + * Bellard - part of SoX (http://sox.sourceforge.net). + * Max Horn adapted that code to the needs of ScummVM and partially rewrote + * it, in the process removing any use of floating point arithmetic. Various + * other improvments over the original code were made. + */ + +#include "common/stdafx.h" +#include "sound/audiostream.h" +#include "sound/rate.h" +#include "sound/mixer.h" +#include "common/util.h" + +namespace Audio { + +/** + * The precision of the fractional computations used by the rate converter. + * Normally you should never have to modify this value. + */ +#define FRAC_BITS 16 + +/** + * The size of the intermediate input cache. Bigger values may increase + * performance, but only until some point (depends largely on cache size, + * target processor and various other factors), at which it will decrease + * again. + */ +#define INTERMEDIATE_BUFFER_SIZE 512 + + +/** + * Audio rate converter based on simple resampling. Used when no + * interpolation is required. + * + * Limited to sampling frequency <= 65535 Hz. + */ +typedef struct { + const st_sample_t *inPtr; + int inLen; + + /** position of how far output is ahead of input */ + /** Holds what would have been opos-ipos */ + long opos; + + /** fractional position increment in the output stream */ + long opos_inc; + + st_sample_t inBuf[INTERMEDIATE_BUFFER_SIZE]; +} SimpleRateDetails; + +template +class SimpleRateConverter : public RateConverter { +protected: + SimpleRateDetails sr; +public: + SimpleRateConverter(st_rate_t inrate, st_rate_t outrate); + int flow(AudioStream &input, st_sample_t *obuf, st_size_t osamp, st_volume_t vol_l, st_volume_t vol_r); + int drain(st_sample_t *obuf, st_size_t osamp, st_volume_t vol) { + return (ST_SUCCESS); + } +}; + + +/* + * Prepare processing. + */ +template +SimpleRateConverter::SimpleRateConverter(st_rate_t inrate, st_rate_t outrate) { + if (inrate == outrate) { + error("Input and Output rates must be different to use rate effect"); + } + + if ((inrate % outrate) != 0) { + error("Input rate must be a multiple of Output rate to use rate effect"); + } + + if (inrate >= 65536 || outrate >= 65536) { + error("rate effect can only handle rates < 65536"); + } + + sr.opos = 1; + + /* increment */ + sr.opos_inc = inrate / outrate; + + sr.inLen = 0; +} + +extern "C" void ARM_SimpleRate_M(AudioStream &input, + int (*fn)(Audio::AudioStream&,int16*,int), + SimpleRateDetails *sr, + st_sample_t *obuf, + st_size_t osamp, + st_volume_t vol_l, + st_volume_t vol_r); + +extern "C" void ARM_SimpleRate_S(AudioStream &input, + int (*fn)(Audio::AudioStream&,int16*,int), + SimpleRateDetails *sr, + st_sample_t *obuf, + st_size_t osamp, + st_volume_t vol_l, + st_volume_t vol_r); + +extern "C" void ARM_SimpleRate_R(AudioStream &input, + int (*fn)(Audio::AudioStream&,int16*,int), + SimpleRateDetails *sr, + st_sample_t *obuf, + st_size_t osamp, + st_volume_t vol_l, + st_volume_t vol_r); + +extern "C" int SimpleRate_readFudge(Audio::AudioStream &input, + int16 *a, int b) +{ + return input.readBuffer(a, b); +} + +template +int SimpleRateConverter::flow(AudioStream &input, st_sample_t *obuf, st_size_t osamp, st_volume_t vol_l, st_volume_t vol_r) { + +#ifdef DEBUG_RATECONV +fprintf(stderr, "Simple st=%d rev=%d\n", stereo, reverseStereo); +fflush(stderr); +#endif + if (!stereo) { + ARM_SimpleRate_M(input, + &SimpleRate_readFudge, + &sr, + obuf, osamp, vol_l, vol_r); + } else if (reverseStereo) { + ARM_SimpleRate_R(input, + &SimpleRate_readFudge, + &sr, + obuf, osamp, vol_l, vol_r); + } else { + ARM_SimpleRate_S(input, + &SimpleRate_readFudge, + &sr, + obuf, osamp, vol_l, vol_r); + } + return (ST_SUCCESS); +} + +/** + * Audio rate converter based on simple linear Interpolation. + * + * The use of fractional increment allows us to use no buffer. It + * avoid the problems at the end of the buffer we had with the old + * method which stored a possibly big buffer of size + * lcm(in_rate,out_rate). + * + * Limited to sampling frequency <= 65535 Hz. + */ + +typedef struct { + const st_sample_t *inPtr; + int inLen; + + /** position of how far output is ahead of input */ + /** Holds what would have been opos-ipos */ + long opos; + + /** integer position increment in the output stream */ + long opos_inc; + + /** current sample(s) in the input stream (left/right channel) */ + st_sample_t icur[2]; + /** last sample(s) in the input stream (left/right channel) */ + st_sample_t ilast[2]; + + /** fractional position in the output stream */ + long opos_frac; + + /** fractional position increment in the output stream */ + long opos_inc_frac; + + st_sample_t inBuf[INTERMEDIATE_BUFFER_SIZE]; +} LinearRateDetails; + +extern "C" void ARM_LinearRate_M(AudioStream &input, + int (*fn)(Audio::AudioStream&,int16*,int), + LinearRateDetails *lr, + st_sample_t *obuf, + st_size_t osamp, + st_volume_t vol_l, + st_volume_t vol_r); + +extern "C" void ARM_LinearRate_S(AudioStream &input, + int (*fn)(Audio::AudioStream&,int16*,int), + LinearRateDetails *lr, + st_sample_t *obuf, + st_size_t osamp, + st_volume_t vol_l, + st_volume_t vol_r); + +extern "C" void ARM_LinearRate_R(AudioStream &input, + int (*fn)(Audio::AudioStream&,int16*,int), + LinearRateDetails *lr, + st_sample_t *obuf, + st_size_t osamp, + st_volume_t vol_l, + st_volume_t vol_r); + +template +class LinearRateConverter : public RateConverter { +protected: + LinearRateDetails lr; + +public: + LinearRateConverter(st_rate_t inrate, st_rate_t outrate); + int flow(AudioStream &input, st_sample_t *obuf, st_size_t osamp, st_volume_t vol_l, st_volume_t vol_r); + int drain(st_sample_t *obuf, st_size_t osamp, st_volume_t vol) { + return (ST_SUCCESS); + } +}; + + +/* + * Prepare processing. + */ +template +LinearRateConverter::LinearRateConverter(st_rate_t inrate, st_rate_t outrate) { + unsigned long incr; + + if (inrate == outrate) { + error("Input and Output rates must be different to use rate effect"); + } + + if (inrate >= 65536 || outrate >= 65536) { + error("rate effect can only handle rates < 65536"); + } + + lr.opos_frac = 0; + lr.opos = 1; + + /* increment */ + incr = (inrate << FRAC_BITS) / outrate; + + lr.opos_inc_frac = incr & ((1UL << FRAC_BITS) - 1); + lr.opos_inc = incr >> FRAC_BITS; + + lr.ilast[0] = lr.ilast[1] = 0; + lr.icur[0] = lr.icur[1] = 0; + + lr.inLen = 0; +} + +/* + * Processed signed long samples from ibuf to obuf. + * Return number of samples processed. + */ +template +int LinearRateConverter::flow(AudioStream &input, st_sample_t *obuf, st_size_t osamp, st_volume_t vol_l, st_volume_t vol_r) { + +#ifdef DEBUG_RATECONV +fprintf(stderr, "Linear st=%d rev=%d\n", stereo, reverseStereo); +fflush(stderr); +#endif + if (!stereo) { + ARM_LinearRate_M(input, + &SimpleRate_readFudge, + &lr, + obuf, osamp, vol_l, vol_r); + } else if (reverseStereo) { + ARM_LinearRate_R(input, + &SimpleRate_readFudge, + &lr, + obuf, osamp, vol_l, vol_r); + } else { + ARM_LinearRate_S(input, + &SimpleRate_readFudge, + &lr, + obuf, osamp, vol_l, vol_r); + } + return (ST_SUCCESS); +} + + +#pragma mark - + + +/** + * Simple audio rate converter for the case that the inrate equals the outrate. + */ +extern "C" void ARM_CopyRate_M(st_size_t len, + st_sample_t *obuf, + st_volume_t vol_l, + st_volume_t vol_r, + st_sample_t *_buffer); + +extern "C" void ARM_CopyRate_S(st_size_t len, + st_sample_t *obuf, + st_volume_t vol_l, + st_volume_t vol_r, + st_sample_t *_buffer); + +extern "C" void ARM_CopyRate_R(st_size_t len, + st_sample_t *obuf, + st_volume_t vol_l, + st_volume_t vol_r, + st_sample_t *_buffer); + + +template +class CopyRateConverter : public RateConverter { + st_sample_t *_buffer; + st_size_t _bufferSize; +public: + CopyRateConverter() : _buffer(0), _bufferSize(0) {} + ~CopyRateConverter() { + free(_buffer); + } + + virtual int flow(AudioStream &input, st_sample_t *obuf, st_size_t osamp, st_volume_t vol_l, st_volume_t vol_r) { + assert(input.isStereo() == stereo); + +#ifdef DEBUG_RATECONV +fprintf(stderr, "Copy st=%d rev=%d\n", stereo, reverseStereo); +fflush(stderr); +#endif + st_sample_t *ptr; + st_size_t len; + + if (stereo) + osamp *= 2; + + // Reallocate temp buffer, if necessary + if (osamp > _bufferSize) { + free(_buffer); + _buffer = (st_sample_t *)malloc(osamp * 2); + _bufferSize = osamp; + } + + // Read up to 'osamp' samples into our temporary buffer + len = input.readBuffer(_buffer, osamp); + if (len <= 0) + return (ST_SUCCESS); + + // Mix the data into the output buffer + if (stereo && reverseStereo) + ARM_CopyRate_R(len, obuf, vol_l, vol_r, _buffer); + else if (stereo) + ARM_CopyRate_S(len, obuf, vol_l, vol_r, _buffer); + else + ARM_CopyRate_M(len, obuf, vol_l, vol_r, _buffer); + + return (ST_SUCCESS); + } + virtual int drain(st_sample_t *obuf, st_size_t osamp, st_volume_t vol) { + return (ST_SUCCESS); + } +}; + + +#pragma mark - + + +/** + * Create and return a RateConverter object for the specified input and output rates. + */ +RateConverter *makeRateConverter(st_rate_t inrate, st_rate_t outrate, bool stereo, bool reverseStereo) { + if (inrate != outrate) { + if ((inrate % outrate) == 0) { + if (stereo) { + if (reverseStereo) + return new SimpleRateConverter(inrate, outrate); + else + return new SimpleRateConverter(inrate, outrate); + } else + return new SimpleRateConverter(inrate, outrate); + } else { + if (stereo) { + if (reverseStereo) + return new LinearRateConverter(inrate, outrate); + else + return new LinearRateConverter(inrate, outrate); + } else + return new LinearRateConverter(inrate, outrate); + } + } else { + if (stereo) { + if (reverseStereo) + return new CopyRateConverter(); + else + return new CopyRateConverter(); + } else + return new CopyRateConverter(); + } +} + +} // End of namespace Audio Property changes on: sound/rate_arm.cpp ___________________________________________________________________ Name: svn:executable + * Index: sound/rate_arm_asm.s =================================================================== --- sound/rate_arm_asm.s (revision 0) +++ sound/rate_arm_asm.s (revision 0) @@ -0,0 +1,689 @@ +@ ScummVM Scumm Interpreter +@ Copyright (C) 2007 The ScummVM project +@ +@ This program is free software@ you can redistribute it and/or +@ modify it under the terms of the GNU General Public License +@ as published by the Free Software Foundation@ either version 2 +@ of the License, or (at your option) any later version. +@ +@ This program is distributed in the hope that it will be useful, +@ but WITHOUT ANY WARRANTY@ without even the implied warranty of +@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +@ GNU General Public License for more details. +@ +@ You should have received a copy of the GNU General Public License +@ along with this program@ if not, write to the Free Software +@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +@ +@ $URL: $ +@ $Id: $ +@ +@ @author Robin Watts (robin@wss.co.uk) +@ +@ This file, together with rate_arm.cpp, provides an ARM optimised version +@ of rate.cpp. The algorithm is essentially the same as that within rate.cpp +@ so to understand this file you should understand rate.cpp first. + + .text + + .global ARM_CopyRate_M + .global ARM_CopyRate_S + .global ARM_CopyRate_R + .global ARM_SimpleRate_M + .global ARM_SimpleRate_S + .global ARM_SimpleRate_R + .global ARM_LinearRate_M + .global ARM_LinearRate_S + .global ARM_LinearRate_R + +ARM_CopyRate_M: + @ r0 = len + @ r1 = obuf + @ r2 = vol_l + @ r3 = vol_r + @ <> = ptr + LDR r12,[r13] + STMFD r13!,{r4-r7,r14} + + MOV r14,#0 @ r14= 0 + ORR r2, r2, r2, LSL #8 @ r2 = vol_l as 16 bits + ORR r3, r3, r3, LSL #8 @ r3 = vol_r as 16 bits +CopyRate_M_loop: + LDRSH r5, [r12], #2 @ r5 = tmp0 = tmp1 = *ptr++ + LDRSH r6, [r1] @ r6 = obuf[0] + LDRSH r7, [r1, #2] @ r7 = obuf[1] + MUL r4, r2, r5 @ r4 = tmp0*vol_l + MUL r5, r3, r5 @ r5 = tmp1*vol_r + + ADDS r6, r4, r6, LSL #16 @ r6 = obuf[0]<<16 + tmp0*vol_l + RSCVS r6, r14,#1<<31 @ Clamp r6 + ADDS r7, r5, r7, LSL #16 @ r7 = obuf[1]<<16 + tmp1*vol_r + RSCVS r7, r14,#1<<31 @ Clamp r7 + + MOV r6, r6, LSR #16 @ Shift back to halfword + MOV r7, r7, LSR #16 @ Shift back to halfword + + STRH r6, [r1], #2 @ Store output value + STRH r7, [r1], #2 @ Store output value + + SUBS r0,r0,#1 @ len-- + BGT CopyRate_M_loop @ and loop + + LDMFD r13!,{r4-r7,PC} + +ARM_CopyRate_S: + @ r0 = len + @ r1 = obuf + @ r2 = vol_l + @ r3 = vol_r + @ <> = ptr + LDR r12,[r13] + STMFD r13!,{r4-r7,r14} + + MOV r14,#0 @ r14= 0 + ORR r2, r2, r2, LSL #8 @ r2 = vol_l as 16 bits + ORR r3, r3, r3, LSL #8 @ r3 = vol_r as 16 bits +CopyRate_S_loop: + LDRSH r4, [r12],#2 @ r4 = tmp0 = *ptr++ + LDRSH r5, [r12],#2 @ r5 = tmp1 = *ptr++ + LDRSH r6, [r1] @ r6 = obuf[0] + LDRSH r7, [r1,#2] @ r7 = obuf[1] + MUL r4, r2, r4 @ r5 = tmp0*vol_l + MUL r5, r3, r5 @ r6 = tmp1*vol_r + + ADDS r6, r4, r6, LSL #16 @ r6 = obuf[0]<<16 + tmp0*vol_l + RSCVS r6, r14,#1<<31 @ Clamp r6 + ADDS r7, r5, r7, LSL #16 @ r7 = obuf[1]<<16 + tmp1*vol_r + RSCVS r7, r14,#1<<31 @ Clamp r7 + + MOV r6, r6, LSR #16 @ Shift back to halfword + MOV r7, r7, LSR #16 @ Shift back to halfword + + STRH r6, [r1],#2 @ Store output value + STRH r7, [r1],#2 @ Store output value + + SUBS r0,r0,#2 @ len -= 2 + BGT CopyRate_S_loop @ and loop + + LDMFD r13!,{r4-r7,PC} + +ARM_CopyRate_R: + @ r0 = len + @ r1 = obuf + @ r2 = vol_l + @ r3 = vol_r + @ <> = ptr + LDR r12,[r13] + STMFD r13!,{r4-r7,r14} + + MOV r14,#0 @ r14= 0 + ORR r2, r2, r2, LSL #8 @ r2 = vol_l as 16 bits + ORR r3, r3, r3, LSL #8 @ r3 = vol_r as 16 bits +CopyRate_R_loop: + LDRSH r5, [r12],#2 @ r5 = tmp1 = *ptr++ + LDRSH r4, [r12],#2 @ r4 = tmp0 = *ptr++ + LDRSH r6, [r1] @ r6 = obuf[0] + LDRSH r7, [r1,#2] @ r7 = obuf[1] + MUL r4, r2, r4 @ r4 = tmp0*vol_l + MUL r5, r3, r5 @ r5 = tmp1*vol_r + + ADDS r6, r4, r6, LSL #16 @ r6 = obuf[0]<<16 + tmp0*vol_l + RSCVS r6, r14,#1<<31 @ Clamp r6 + ADDS r7, r5, r7, LSL #16 @ r7 = obuf[1]<<16 + tmp1*vol_r + RSCVS r7, r14,#1<<31 @ Clamp r7 + + MOV r6, r6, LSR #16 @ Shift back to halfword + MOV r7, r7, LSR #16 @ Shift back to halfword + + STRH r6, [r1],#2 @ Store output value + STRH r7, [r1],#2 @ Store output value + + SUBS r0,r0,#2 @ len -= 2 + BGT CopyRate_R_loop @ and loop + + LDMFD r13!,{r4-r7,PC} + +ARM_SimpleRate_M: + @ r0 = AudioStream &input + @ r1 = input.readBuffer + @ r2 = input->sr + @ r3 = obuf + @ <> = osamp + @ <> = vol_l + @ <> = vol_r + MOV r12,r13 + STMFD r13!,{r0-r2,r4-r8,r10-r11,r14} + LDMFD r12,{r11,r12,r14} @ r11= osamp + @ r12= vol_l + @ r14= vol_r + LDMIA r2,{r0,r1,r2,r8} @ r0 = inPtr + @ r1 = inLen + @ r2 = opos + @ r8 = opos_inc + CMP r11,#0 @ if (osamp <= 0) + BLE SimpleRate_M_end @ bale + MOV r10,#0 + ORR r12,r12,r12,LSL #8 @ r12= vol_l as 16 bits + ORR r14,r14,r14,LSL #8 @ r14= vol_r as 16 bits +SimpleRate_M_loop: + SUBS r1, r1, #1 @ r1 = inLen -= 1 + BLT SimpleRate_M_read + SUBS r2, r2, #1 @ r2 = opos-- + ADDGE r0, r0, #2 @ if (r2 >= 0) { sr.inPtr++ + BGE SimpleRate_M_loop @ and loop } +SimpleRate_M_read_return: + LDRSH r5, [r0],#2 @ r5 = tmp1 = *inPtr++ + LDRSH r6, [r3] @ r6 = obuf[0] + LDRSH r7, [r3,#2] @ r7 = obuf[1] + ADD r2, r2, r8 @ r2 = opos += opos_inc + MUL r4, r12,r5 @ r4 = tmp0*vol_l + MUL r5, r14,r5 @ r5 = tmp1*vol_r + + ADDS r6, r4, r6, LSL #16 @ r6 = obuf[0]<<16 + tmp0*vol_l + RSCVS r6, r10,#1<<31 @ Clamp r6 + ADDS r7, r5, r7, LSL #16 @ r7 = obuf[1]<<16 + tmp1*vol_r + RSCVS r7, r10,#1<<31 @ Clamp r7 + + MOV r6, r6, LSR #16 @ Shift back to halfword + MOV r7, r7, LSR #16 @ Shift back to halfword + + STRH r6, [r3],#2 @ Store output value + STRH r7, [r3],#2 @ Store output value + + SUBS r11,r11,#1 @ len-- + BGT SimpleRate_M_loop @ and loop +SimpleRate_M_end: + LDR r14,[r13,#8] @ r14 = sr + ADD r13,r13,#12 @ Skip over r0-r2 on stack + STMIA r14,{r0,r1,r2} @ Store back updated values + LDMFD r13!,{r4-r8,r10-r11,PC} +SimpleRate_M_read: + LDR r0, [r13,#4*2] @ r0 = sr + ADD r0, r0, #16 @ r0 = inPtr = inBuf + STMFD r13!,{r0,r2-r3,r12,r14} + + MOV r1, r0 @ r1 = inBuf + LDR r0, [r13,#4*5] @ r0 = AudioStream & input + MOV r2, #512 @ r2 = ARRAYSIZE(inBuf) + + @ Calling back into C++ here. WinCE is fairly easy about such things + @ but other OS are more awkward. r9 is preserved for Symbian, and + @ we have 3+8+5 = 16 things on the stack (an even number). + MOV r14,PC + LDR PC,[r13,#4*6] @ inLen = input.readBuffer(inBuf,512) + SUBS r1, r0, #1 @ r1 = inLen-1 + LDMFD r13!,{r0,r2-r3,r12,r14} + BLT SimpleRate_M_end + SUBS r2, r2, #1 @ r2 = opos-- + ADDGE r0, r0, #2 @ if (r2 >= 0) { sr.inPtr++ + BGE SimpleRate_M_loop @ and loop } + B SimpleRate_M_read_return + + +ARM_SimpleRate_S: + @ r0 = AudioStream &input + @ r1 = input.readBuffer + @ r2 = input->sr + @ r3 = obuf + @ <> = osamp + @ <> = vol_l + @ <> = vol_r + MOV r12,r13 + STMFD r13!,{r0-r2,r4-r8,r10-r11,r14} + LDMFD r12,{r11,r12,r14} @ r11= osamp + @ r12= vol_l + @ r14= vol_r + LDMIA r2,{r0,r1,r2,r8} @ r0 = inPtr + @ r1 = inLen + @ r2 = opos + @ r8 = opos_inc + CMP r11,#0 @ if (osamp <= 0) + BLE SimpleRate_S_end @ bale + MOV r10,#0 + ORR r12,r12,r12,LSL #8 @ r12= vol_l as 16 bits + ORR r14,r14,r14,LSL #8 @ r14= vol_r as 16 bits +SimpleRate_S_loop: + SUBS r1, r1, #2 @ r1 = inLen -= 2 + BLT SimpleRate_S_read + SUBS r2, r2, #1 @ r2 = opos-- + ADDGE r0, r0, #4 @ if (r2 >= 0) { sr.inPtr += 2 + BGE SimpleRate_S_loop @ and loop } +SimpleRate_S_read_return: + LDRSH r4, [r0],#2 @ r4 = tmp0 = *inPtr++ + LDRSH r5, [r0],#2 @ r5 = tmp1 = *inPtr++ + LDRSH r6, [r3] @ r6 = obuf[0] + LDRSH r7, [r3,#2] @ r7 = obuf[1] + ADD r2, r2, r8 @ r2 = opos += opos_inc + MUL r4, r12,r4 @ r5 = tmp0*vol_l + MUL r5, r14,r5 @ r6 = tmp1*vol_r + + ADDS r6, r4, r6, LSL #16 @ r6 = obuf[0]<<16 + tmp0*vol_l + RSCVS r6, r10,#1<<31 @ Clamp r6 + ADDS r7, r5, r7, LSL #16 @ r7 = obuf[1]<<16 + tmp1*vol_r + RSCVS r7, r10,#1<<31 @ Clamp r7 + + MOV r6, r6, LSR #16 @ Shift back to halfword + MOV r7, r7, LSR #16 @ Shift back to halfword + + STRH r6, [r3],#2 @ Store output value + STRH r7, [r3],#2 @ Store output value + + SUBS r11,r11,#1 @ osamp-- + BGT SimpleRate_S_loop @ and loop +SimpleRate_S_end: + LDR r14,[r13,#8] @ r14 = sr + ADD r13,r13,#12 @ skip over r0-r2 on stack + STMIA r14,{r0,r1,r2} @ store back updated values + LDMFD r13!,{r4-r8,r10-r11,PC} +SimpleRate_S_read: + LDR r0, [r13,#4*2] @ r0 = sr + ADD r0, r0, #16 @ r0 = inPtr = inBuf + STMFD r13!,{r0,r2-r3,r12,r14} + + MOV r1, r0 @ r1 = inBuf + LDR r0, [r13,#4*5] @ r0 = AudioStream & input + MOV r2, #512 @ r2 = ARRAYSIZE(inBuf) + + @ Calling back into C++ here. WinCE is fairly easy about such things + @ but other OS are more awkward. r9 is preserved for Symbian, and + @ we have 3+8+5 = 16 things on the stack (an even number). + MOV r14,PC + LDR PC,[r13,#4*6] @ inLen = input.readBuffer(inBuf,512) + SUBS r1, r0, #2 @ r1 = inLen-2 + LDMFD r13!,{r0,r2-r3,r12,r14} + BLT SimpleRate_S_end + SUBS r2, r2, #1 @ r2 = opos-- + ADDGE r0, r0, #4 @ if (r2 >= 0) { sr.inPtr += 2 + BGE SimpleRate_S_loop @ and loop } + B SimpleRate_S_read_return + + + +ARM_SimpleRate_R: + @ r0 = AudioStream &input + @ r1 = input.readBuffer + @ r2 = input->sr + @ r3 = obuf + @ <> = osamp + @ <> = vol_l + @ <> = vol_r + MOV r12,r13 + STMFD r13!,{r0-r2,r4-r8,r10-r11,r14} + LDMFD r12,{r11,r12,r14} @ r11= osamp + @ r12= vol_l + @ r14= vol_r + LDMIA r2,{r0,r1,r2,r8} @ r0 = inPtr + @ r1 = inLen + @ r2 = opos + @ r8 = opos_inc + CMP r11,#0 @ if (osamp <= 0) + BLE SimpleRate_R_end @ bale + MOV r10,#0 + ORR r12,r12,r12,LSL #8 @ r12= vol_l as 16 bits + ORR r14,r14,r14,LSL #8 @ r14= vol_r as 16 bits +SimpleRate_R_loop: + SUBS r1, r1, #2 @ r1 = inLen -= 2 + BLT SimpleRate_R_read + SUBS r2, r2, #1 @ r2 = opos-- + ADDGE r0, r0, #4 @ if (r2 >= 0) { sr.inPtr += 2 + BGE SimpleRate_R_loop @ and loop } +SimpleRate_R_read_return: + LDRSH r5, [r0],#2 @ r5 = tmp0 = *inPtr++ + LDRSH r4, [r0],#2 @ r4 = tmp1 = *inPtr++ + LDRSH r6, [r3] @ r6 = obuf[0] + LDRSH r7, [r3,#2] @ r7 = obuf[1] + ADD r2, r2, r8 @ r2 = opos += opos_inc + MUL r4, r12,r4 @ r5 = tmp0*vol_l + MUL r5, r14,r5 @ r6 = tmp1*vol_r + + ADDS r6, r4, r6, LSL #16 @ r6 = obuf[0]<<16 + tmp0*vol_l + RSCVS r6, r10,#1<<31 @ Clamp r6 + ADDS r7, r5, r7, LSL #16 @ r7 = obuf[1]<<16 + tmp1*vol_r + RSCVS r7, r10,#1<<31 @ Clamp r7 + + MOV r6, r6, LSR #16 @ Shift back to halfword + MOV r7, r7, LSR #16 @ Shift back to halfword + + STRH r6, [r3],#2 @ Store output value + STRH r7, [r3],#2 @ Store output value + + SUBS r11,r11,#1 @ osamp-- + BGT SimpleRate_R_loop @ and loop +SimpleRate_R_end: + LDR r14,[r13,#8] @ r14 = sr + ADD r13,r13,#12 @ Skip over r0-r2 on stack + STMIA r14,{r0,r1,r2} @ Store back updated values + LDMFD r13!,{r4-r8,r10-r11,PC} +SimpleRate_R_read: + LDR r0, [r13,#4*2] @ r0 = sr + ADD r0, r0, #16 @ r0 = inPtr = inBuf + STMFD r13!,{r0,r2-r3,r12,r14} + + MOV r1, r0 @ r1 = inBuf + LDR r0, [r13,#4*5] @ r0 = AudioStream & input + MOV r2, #512 @ r2 = ARRAYSIZE(inBuf) + + @ Calling back into C++ here. WinCE is fairly easy about such things + @ but other OS are more awkward. r9 is preserved for Symbian, and + @ we have 3+8+5 = 16 things on the stack (an even number). + MOV r14,PC + LDR PC,[r13,#4*6] @ inLen = input.readBuffer(inBuf,512) + SUBS r1, r0, #2 @ r1 = inLen-2 + LDMFD r13!,{r0,r2-r3,r12,r14} + BLT SimpleRate_R_end + SUBS r2, r2, #1 @ r2 = opos-- + ADDGE r0, r0, #4 @ if (r2 >= 0) { sr.inPtr += 2 + BGE SimpleRate_R_loop @ and loop } + B SimpleRate_R_read_return + + +ARM_LinearRate_M: + @ r0 = AudioStream &input + @ r1 = input.readBuffer + @ r2 = input->sr + @ r3 = obuf + @ <> = osamp + @ <> = vol_l + @ <> = vol_r + MOV r12,r13 + STMFD r13!,{r0-r1,r4-r11,r14} + LDMFD r12,{r11,r12,r14} @ r11= osamp + @ r12= vol_l + @ r14= vol_r + LDMIA r2,{r0,r1,r8} @ r0 = inPtr + @ r1 = inLen + @ r8 = opos + CMP r11,#0 @ if (osamp <= 0) + BLE LinearRate_M_end @ bale + ORR r12,r12,r12,LSL #8 @ r12= vol_l as 16 bits + ORR r14,r14,r14,LSL #8 @ r14= vol_r as 16 bits + CMP r1,#0 + BGT LinearRate_M_part2 + + @ part1 - read input samples +LinearRate_M_loop: + SUBS r1, r1, #1 @ r1 = inLen -= 1 + BLT LinearRate_M_read +LinearRate_M_read_return: + LDR r10,[r2, #16] @ r10= icur[0,1] + LDRSH r5, [r0],#2 @ r5 = tmp1 = *inPtr++ + SUBS r8, r8, #1 @ r8 = opos-- + STR r10,[r2,#20] @ ilast[0,1] = icur[0,1] + STRH r5, [r2,#16] @ icur[0] = tmp1 + BGE LinearRate_M_loop + + @ part2 - form output samples +LinearRate_M_part2: + @ We are guaranteed that opos < 0 here + LDRSH r6, [r2,#20] @ r6 = ilast[0] + LDRSH r5, [r2,#16] @ r5 = icur[0] + LDRH r4, [r2,#24] @ r4 = opos_frac + LDR r10,[r2,#28] @ r10= opos_frac_inc + MOV r6, r6, LSL #16 @ r6 = ilast[0]<<16 + SUB r5, r5, r6, ASR #16 @ r5 = icur[0] - ilast[0] + ADD r6, r6, #1<<15 @ r6 = ilast[0]+1<<(FRAC_BITS-1) + MLA r6, r4, r5, r6 @ r6 = (icur[0]-ilast[0])*opos_frac+ilast[0] + + ADD r4, r4, r10 @ r4 = tmp = opos_frac+opos_inc_frac + STRH r4,[r2,#24] @ opos_frac &= 65535 + ADD r8, r8, r4, LSR #16 @ opos += (tmp>>FRAC_BITS) + + LDRSH r4, [r3] @ r4 = obuf[0] + LDRSH r5, [r3,#2] @ r5 = obuf[1] + MOV r6, r6, ASR #16 @ r6 = tmp0 = tmp1 >>= 16 + MUL r7, r12,r6 @ r7 = tmp0*vol_l + MUL r6, r14,r6 @ r6 = tmp1*vol_r + + ADDS r7, r7, r4, LSL #16 @ r7 = obuf[0]<<16 + tmp0*vol_l + MOV r4, #0 + RSCVS r7, r4, #1<<31 @ Clamp r7 + ADDS r6, r6, r5, LSL #16 @ r6 = obuf[1]<<16 + tmp1*vol_r + RSCVS r6, r4, #1<<31 @ Clamp r6 + + MOV r7, r7, LSR #16 @ Shift back to halfword + MOV r6, r6, LSR #16 @ Shift back to halfword + + LDR r5, [r2,#12] @ r5 = opos_inc + STRH r7, [r3],#2 @ Store output value + STRH r6, [r3],#2 @ Store output value + SUBS r11, r11,#1 @ opos-- + BLE LinearRate_M_end @ end if needed + + ADDS r8, r8, r5 @ r8 = opos += opos_inc + BLT LinearRate_M_part2 + B LinearRate_M_loop +LinearRate_M_end: + ADD r13,r13,#8 + STMIA r2,{r0,r1,r8} + LDMFD r13!,{r4-r11,PC} +LinearRate_M_read: + ADD r0, r2, #32 @ r0 = inPtr = inBuf + STMFD r13!,{r0,r2-r3,r12,r14} + + MOV r1, r0 @ r1 = inBuf + LDR r0, [r13,#4*5] @ r0 = AudioStream & input + MOV r2, #512 @ r2 = ARRAYSIZE(inBuf) + + @ Calling back into C++ here. WinCE is fairly easy about such things + @ but other OS are more awkward. r9 is preserved for Symbian, and + @ we have 2+9+5 = 16 things on the stack (an even number). + MOV r14,PC + LDR PC,[r13,#4*6] @ inLen = input.readBuffer(inBuf,512) + SUBS r1, r0, #1 @ r1 = inLen-1 + LDMFD r13!,{r0,r2-r3,r12,r14} + BLT LinearRate_M_end + B LinearRate_M_read_return + +ARM_LinearRate_S: + @ r0 = AudioStream &input + @ r1 = input.readBuffer + @ r2 = input->sr + @ r3 = obuf + @ <> = osamp + @ <> = vol_l + @ <> = vol_r + MOV r12,r13 + STMFD r13!,{r0-r1,r4-r11,r14} + LDMFD r12,{r11,r12,r14} @ r11= osamp + @ r12= vol_l + @ r14= vol_r + LDMIA r2,{r0,r1,r8} @ r0 = inPtr + @ r1 = inLen + @ r8 = opos + CMP r11,#0 @ if (osamp <= 0) + BLE LinearRate_S_end @ bale + ORR r12,r12,r12,LSL #8 @ r12= vol_l as 16 bits + ORR r14,r14,r14,LSL #8 @ r14= vol_r as 16 bits + CMP r1,#0 + BGT LinearRate_S_part2 + + @ part1 - read input samples +LinearRate_S_loop: + SUBS r1, r1, #2 @ r1 = inLen -= 2 + BLT LinearRate_S_read +LinearRate_S_read_return: + LDR r10,[r2, #16] @ r10= icur[0,1] + LDRSH r5, [r0],#2 @ r5 = tmp0 = *inPtr++ + LDRSH r6, [r0],#2 @ r5 = tmp1 = *inPtr++ + SUBS r8, r8, #1 @ r8 = opos-- + STR r10,[r2,#20] @ ilast[0,1] = icur[0,1] + STRH r5, [r2,#16] @ icur[0] = tmp0 + STRH r6, [r2,#16] @ icur[1] = tmp1 + BGE LinearRate_S_loop + + @ part2 - form output samples +LinearRate_S_part2: + @ We are guaranteed that opos < 0 here + LDRSH r6, [r2,#20] @ r6 = ilast[0] + LDRSH r5, [r2,#16] @ r5 = icur[0] + LDRH r4, [r2,#24] @ r4 = opos_frac + MOV r6, r6, LSL #16 @ r6 = ilast[0]<<16 + SUB r5, r5, r6, ASR #16 @ r5 = icur[0] - ilast[0] + ADD r6, r6, #1<<15 @ r6 = ilast[0]+1<<(FRAC_BITS-1) + MLA r6, r4, r5, r6 @ r6 = (icur[0]-ilast[0])*opos_frac+ilast[0] + + LDRSH r7, [r2,#22] @ r6 = ilast[1] + LDRSH r5, [r2,#18] @ r5 = icur[1] + LDR r10,[r2,#28] @ r10= opos_frac_inc + MOV r7, r7, LSL #16 @ r7 = ilast[1]<<16 + SUB r5, r5, r7, ASR #16 @ r5 = icur[1] - ilast[1] + ADD r7, r7, #1<<15 @ r6 = ilast[1]+1<<(FRAC_BITS-1) + MLA r7, r4, r5, r7 @ r6 = (icur[1]-ilast[1])*opos_frac+ilast[1] + + ADD r4, r4, r10 @ r4 = tmp = opos_frac+opos_inc_frac + STRH r4,[r2,#24] @ opos_frac &= 65535 + ADD r8, r8, r4, LSR #16 @ opos += (tmp>>FRAC_BITS) + + LDRSH r4, [r3] @ r4 = obuf[0] + LDRSH r5, [r3,#2] @ r5 = obuf[1] + MOV r7, r7, ASR #16 @ r7 = tmp0 >>= 16 + MOV r6, r6, ASR #16 @ r6 = tmp1 >>= 16 + MUL r7, r12,r7 @ r7 = tmp0*vol_l + MUL r6, r14,r6 @ r6 = tmp1*vol_r + + ADDS r7, r7, r4, LSL #16 @ r7 = obuf[0]<<16 + tmp0*vol_l + MOV r4, #0 + RSCVS r7, r4, #1<<31 @ Clamp r7 + ADDS r6, r6, r5, LSL #16 @ r6 = obuf[1]<<16 + tmp1*vol_r + RSCVS r6, r4, #1<<31 @ Clamp r6 + + MOV r7, r7, LSR #16 @ Shift back to halfword + MOV r6, r6, LSR #16 @ Shift back to halfword + + LDR r5, [r2,#12] @ r5 = opos_inc + STRH r7, [r3],#2 @ Store output value + STRH r6, [r3],#2 @ Store output value + SUBS r11, r11,#1 @ opos-- + BLE LinearRate_S_end @ and loop + + ADDS r8, r8, r5 @ r8 = opos += opos_inc + BLT LinearRate_S_part2 + B LinearRate_S_loop +LinearRate_S_end: + ADD r13,r13,#8 + STMIA r2,{r0,r1,r8} + LDMFD r13!,{r4-r11,PC} +LinearRate_S_read: + ADD r0, r2, #32 @ r0 = inPtr = inBuf + STMFD r13!,{r0,r2-r3,r12,r14} + + MOV r1, r0 @ r1 = inBuf + LDR r0, [r13,#4*5] @ r0 = AudioStream & input + MOV r2, #512 @ r2 = ARRAYSIZE(inBuf) + + @ Calling back into C++ here. WinCE is fairly easy about such things + @ but other OS are more awkward. r9 is preserved for Symbian, and + @ we have 2+9+5 = 16 things on the stack (an even number). + MOV r14,PC + LDR PC,[r13,#4*6] @ inLen = input.readBuffer(inBuf,512) + SUBS r1, r0, #2 @ r1 = inLen-2 + LDMFD r13!,{r0,r2-r3,r12,r14} + BLT LinearRate_S_end + B LinearRate_S_read_return + +ARM_LinearRate_R: + @ r0 = AudioStream &input + @ r1 = input.readBuffer + @ r2 = input->sr + @ r3 = obuf + @ <> = osamp + @ <> = vol_l + @ <> = vol_r + MOV r12,r13 + STMFD r13!,{r0-r1,r4-r11,r14} + LDMFD r12,{r11,r12,r14} @ r11= osamp + @ r12= vol_l + @ r14= vol_r + LDMIA r2,{r0,r1,r8} @ r0 = inPtr + @ r1 = inLen + @ r8 = opos + CMP r11,#0 @ if (osamp <= 0) + BLE LinearRate_R_end @ bale + ORR r12,r12,r12,LSL #8 @ r12= vol_l as 16 bits + ORR r14,r14,r14,LSL #8 @ r14= vol_r as 16 bits + CMP r1,#0 + BGT LinearRate_R_part2 + + @ part1 - read input samples +LinearRate_R_loop: + SUBS r1, r1, #2 @ r1 = inLen -= 2 + BLT LinearRate_R_read +LinearRate_R_read_return: + LDR r10,[r2, #16] @ r10= icur[0,1] + LDRSH r5, [r0],#2 @ r5 = tmp0 = *inPtr++ + LDRSH r6, [r0],#2 @ r5 = tmp1 = *inPtr++ + SUBS r8, r8, #1 @ r8 = opos-- + STR r10,[r2,#20] @ ilast[0,1] = icur[0,1] + STRH r5, [r2,#16] @ icur[0] = tmp0 + STRH r6, [r2,#16] @ icur[1] = tmp1 + BGE LinearRate_R_loop + + @ part2 - form output samples +LinearRate_R_part2: + @ We are guaranteed that opos < 0 here + LDRSH r6, [r2,#20] @ r6 = ilast[0] + LDRSH r5, [r2,#16] @ r5 = icur[0] + LDRH r4, [r2,#24] @ r4 = opos_frac + MOV r6, r6, LSL #16 @ r6 = ilast[0]<<16 + SUB r5, r5, r6, ASR #16 @ r5 = icur[0] - ilast[0] + ADD r6, r6, #1<<15 @ r6 = ilast[0]+1<<(FRAC_BITS-1) + MLA r6, r4, r5, r6 @ r6 = (icur[0]-ilast[0])*opos_frac+ilast[0] + + LDRSH r7, [r2,#22] @ r6 = ilast[1] + LDRSH r5, [r2,#18] @ r5 = icur[1] + LDR r10,[r2,#28] @ r10= opos_frac_inc + MOV r7, r7, LSL #16 @ r7 = ilast[1]<<16 + SUB r5, r5, r7, ASR #16 @ r5 = icur[1] - ilast[1] + ADD r7, r7, #1<<15 @ r6 = ilast[1]+1<<(FRAC_BITS-1) + MLA r7, r4, r5, r7 @ r6 = (icur[1]-ilast[1])*opos_frac+ilast[1] + + ADD r4, r4, r10 @ r4 = tmp = opos_frac+opos_inc_frac + STRH r4,[r2,#24] @ opos_frac &= 65535 + ADD r8, r8, r4, LSR #16 @ opos += (tmp>>FRAC_BITS) + + LDRSH r4, [r3] @ r4 = obuf[0] + LDRSH r5, [r3,#2] @ r5 = obuf[1] + MOV r7, r7, ASR #16 @ r7 = tmp0 >>= 16 + MOV r6, r6, ASR #16 @ r6 = tmp1 >>= 16 + MUL r7, r12,r7 @ r7 = tmp0*vol_l + MUL r6, r14,r6 @ r6 = tmp1*vol_r + + ADDS r7, r7, r4, LSL #16 @ r7 = obuf[0]<<16 + tmp0*vol_l + MOV r4, #0 + RSCVS r7, r4, #1<<31 @ Clamp r7 + ADDS r6, r6, r5, LSL #16 @ r6 = obuf[1]<<16 + tmp1*vol_r + RSCVS r6, r4, #1<<31 @ Clamp r6 + + MOV r7, r7, LSR #16 @ Shift back to halfword + MOV r6, r6, LSR #16 @ Shift back to halfword + + LDR r5, [r2,#12] @ r5 = opos_inc + STRH r6, [r3],#2 @ Store output value + STRH r7, [r3],#2 @ Store output value + SUBS r11, r11,#1 @ opos-- + BLE LinearRate_R_end @ and loop + + ADDS r8, r8, r5 @ r8 = opos += opos_inc + BLT LinearRate_R_part2 + B LinearRate_R_loop +LinearRate_R_end: + ADD r13,r13,#8 + STMIA r2,{r0,r1,r8} + LDMFD r13!,{r4-r11,PC} +LinearRate_R_read: + ADD r0, r2, #32 @ r0 = inPtr = inBuf + STMFD r13!,{r0,r2-r3,r12,r14} + + MOV r1, r0 @ r1 = inBuf + LDR r0, [r13,#4*5] @ r0 = AudioStream & input + MOV r2, #512 @ r2 = ARRAYSIZE(inBuf) + + @ Calling back into C++ here. WinCE is fairly easy about such things + @ but other OS are more awkward. r9 is preserved for Symbian, and + @ we have 2+9+5 = 16 things on the stack (an even number). + MOV r14,PC + LDR PC,[r13,#4*6] @ inLen = input.readBuffer(inBuf,512) + SUBS r1, r0, #2 @ r1 = inLen-2 + LDMFD r13!,{r0,r2-r3,r12,r14} + BLT LinearRate_R_end + B LinearRate_R_read_return Property changes on: sound/rate_arm_asm.s ___________________________________________________________________ Name: svn:executable + * Index: sound/module.mk =================================================================== --- sound/module.mk (revision 27465) +++ sound/module.mk (working copy) @@ -16,7 +16,6 @@ mp3.o \ mpu401.o \ null.o \ - rate.o \ voc.o \ vorbis.o \ wave.o \ @@ -31,5 +30,14 @@ softsynth/fluidsynth.o \ softsynth/mt32.o \ +ifndef USE_ARM_SOUND_ASM +MODULE_OBJS += \ + rate.o +else +MODULE_OBJS += \ + rate_arm.o \ + rate_arm_asm.o +endif + # Include common rules include $(srcdir)/rules.mk Index: backends/platform/wince/Makefile =================================================================== --- backends/platform/wince/Makefile (revision 27465) +++ backends/platform/wince/Makefile (working copy) @@ -25,6 +25,8 @@ #DISABLE_HQ_SCALERS = 1 +USE_ARM_SOUND_ASM = 1 + CXX = arm-wince-pe-g++ LD = arm-wince-pe-g++ AR = arm-wince-pe-ar cru