123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548 |
- AREA |.text|, CODE, READONLY
- GET celt/arm/armopts.s
- IF OPUS_ARM_MAY_HAVE_EDSP
- EXPORT celt_pitch_xcorr_edsp
- ENDIF
- IF OPUS_ARM_MAY_HAVE_NEON
- EXPORT celt_pitch_xcorr_neon
- ENDIF
- IF OPUS_ARM_MAY_HAVE_NEON
- xcorr_kernel_neon PROC
- xcorr_kernel_neon_start
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- VLD1.16 {d5}, [r5]!
- SUBS r12, r3, #8
- BLE xcorr_kernel_neon_process4
- xcorr_kernel_neon_process8
-
-
-
-
-
-
-
-
-
- VLD1.16 {d6, d7}, [r4]!
-
-
- VAND d3, d5, d5
- SUBS r12, r12, #8
-
- VLD1.16 {d4, d5}, [r5]!
- VMLAL.S16 q0, d3, d6[0]
- VEXT.16 d16, d3, d4, #1
- VMLAL.S16 q0, d4, d7[0]
- VEXT.16 d17, d4, d5, #1
- VMLAL.S16 q0, d16, d6[1]
- VEXT.16 d16, d3, d4, #2
- VMLAL.S16 q0, d17, d7[1]
- VEXT.16 d17, d4, d5, #2
- VMLAL.S16 q0, d16, d6[2]
- VEXT.16 d16, d3, d4, #3
- VMLAL.S16 q0, d17, d7[2]
- VEXT.16 d17, d4, d5, #3
- VMLAL.S16 q0, d16, d6[3]
- VMLAL.S16 q0, d17, d7[3]
- BGT xcorr_kernel_neon_process8
- xcorr_kernel_neon_process4
- ADDS r12, r12, #4
- BLE xcorr_kernel_neon_process2
-
- VLD1.16 d6, [r4]!
-
- VAND d4, d5, d5
- SUB r12, r12, #4
-
- VLD1.16 d5, [r5]!
- VMLAL.S16 q0, d4, d6[0]
- VEXT.16 d16, d4, d5, #1
- VMLAL.S16 q0, d16, d6[1]
- VEXT.16 d16, d4, d5, #2
- VMLAL.S16 q0, d16, d6[2]
- VEXT.16 d16, d4, d5, #3
- VMLAL.S16 q0, d16, d6[3]
- xcorr_kernel_neon_process2
- ADDS r12, r12, #2
- BLE xcorr_kernel_neon_process1
-
- VLD2.16 {d6[],d7[]}, [r4]!
-
- VAND d4, d5, d5
- SUB r12, r12, #2
-
- VLD1.32 {d5[]}, [r5]!
- VMLAL.S16 q0, d4, d6
- VEXT.16 d16, d4, d5, #1
-
-
- VSRI.64 d5, d4, #32
- VMLAL.S16 q0, d16, d7
- xcorr_kernel_neon_process1
-
- VLD1.16 {d6[]}, [r4]!
- ADDS r12, r12, #1
-
- VMLAL.S16 q0, d5, d6
- MOVLE pc, lr
-
- VLD1.16 {d4[]}, [r5]!
- VSRI.64 d4, d5, #16
-
- VLD1.16 {d6[]}, [r4]!
- VMLAL.S16 q0, d4, d6
- MOV pc, lr
- ENDP
- celt_pitch_xcorr_neon PROC
-
-
-
-
-
-
-
-
-
-
-
-
-
- STMFD sp!, {r4-r6, lr}
- LDR r6, [sp, #16]
- VMOV.S32 q15, #1
-
- SUBS r6, r6, #4
- BLT celt_pitch_xcorr_neon_process4_done
- celt_pitch_xcorr_neon_process4
-
-
- MOV r4, r0
- MOV r5, r1
- VEOR q0, q0, q0
-
-
- BL xcorr_kernel_neon_start
- SUBS r6, r6, #4
- VST1.32 {q0}, [r2]!
-
- ADD r1, r1, #8
- VMAX.S32 q15, q15, q0
-
- BGE celt_pitch_xcorr_neon_process4
- celt_pitch_xcorr_neon_process4_done
- ADDS r6, r6, #4
-
- VMAX.S32 d30, d30, d31
- VPMAX.S32 d30, d30, d30
-
- BLE celt_pitch_xcorr_neon_done
- celt_pitch_xcorr_neon_process_remaining
- MOV r4, r0
- MOV r5, r1
- VMOV.I32 q0, #0
- SUBS r12, r3, #8
- BLT celt_pitch_xcorr_neon_process_remaining4
- celt_pitch_xcorr_neon_process_remaining_loop8
-
- VLD1.16 {q1}, [r4]!
-
- VLD1.16 {q2}, [r5]!
- SUBS r12, r12, #8
- VMLAL.S16 q0, d4, d2
- VMLAL.S16 q0, d5, d3
- BGE celt_pitch_xcorr_neon_process_remaining_loop8
- celt_pitch_xcorr_neon_process_remaining4
- ADDS r12, r12, #4
- BLT celt_pitch_xcorr_neon_process_remaining4_done
-
- VLD1.16 {d2}, [r4]!
-
- VLD1.16 {d3}, [r5]!
- SUB r12, r12, #4
- VMLAL.S16 q0, d3, d2
- celt_pitch_xcorr_neon_process_remaining4_done
-
- VADD.S32 d0, d0, d1
- VPADDL.S32 d0, d0
- ADDS r12, r12, #4
- BLE celt_pitch_xcorr_neon_process_remaining_loop_done
- celt_pitch_xcorr_neon_process_remaining_loop1
- VLD1.16 {d2[]}, [r4]!
- VLD1.16 {d3[]}, [r5]!
- SUBS r12, r12, #1
- VMLAL.S16 q0, d2, d3
- BGT celt_pitch_xcorr_neon_process_remaining_loop1
- celt_pitch_xcorr_neon_process_remaining_loop_done
- VST1.32 {d0[0]}, [r2]!
- VMAX.S32 d30, d30, d0
- SUBS r6, r6, #1
-
- ADD r1, r1, #2
-
- BGT celt_pitch_xcorr_neon_process_remaining
- celt_pitch_xcorr_neon_done
- VMOV.32 r0, d30[0]
- LDMFD sp!, {r4-r6, pc}
- ENDP
- ENDIF
- IF OPUS_ARM_MAY_HAVE_EDSP
- xcorr_kernel_edsp PROC
- xcorr_kernel_edsp_start
-
-
-
-
-
-
-
-
-
-
-
-
- STMFD sp!, {r2,r4,r5,lr}
- LDR r10, [r5], #4
- SUBS r2, r3, #4
- LDR r11, [r5], #4
- BLE xcorr_kernel_edsp_process4_done
- LDR r12, [r4], #4
-
- xcorr_kernel_edsp_process4
-
-
-
- SMLABB r6, r12, r10, r6
- LDR r14, [r4], #4
- SMLABT r7, r12, r10, r7
- SUBS r2, r2, #4
- SMLABB r8, r12, r11, r8
- SMLABT r9, r12, r11, r9
- SMLATT r6, r12, r10, r6
- LDR r10, [r5], #4
- SMLATB r7, r12, r11, r7
- SMLATT r8, r12, r11, r8
- SMLATB r9, r12, r10, r9
- LDRGT r12, [r4], #4
- SMLABB r6, r14, r11, r6
- SMLABT r7, r14, r11, r7
- SMLABB r8, r14, r10, r8
- SMLABT r9, r14, r10, r9
- SMLATT r6, r14, r11, r6
- LDR r11, [r5], #4
- SMLATB r7, r14, r10, r7
- SMLATT r8, r14, r10, r8
- SMLATB r9, r14, r11, r9
- BGT xcorr_kernel_edsp_process4
- xcorr_kernel_edsp_process4_done
- ADDS r2, r2, #4
- BLE xcorr_kernel_edsp_done
- LDRH r12, [r4], #2
- SUBS r2, r2, #1
-
- SMLABB r6, r12, r10, r6
- LDRHGT r14, [r4], #2
- SMLABT r7, r12, r10, r7
- SMLABB r8, r12, r11, r8
- SMLABT r9, r12, r11, r9
- BLE xcorr_kernel_edsp_done
- SMLABT r6, r14, r10, r6
- SUBS r2, r2, #1
- SMLABB r7, r14, r11, r7
- LDRH r10, [r5], #2
- SMLABT r8, r14, r11, r8
- LDRHGT r12, [r4], #2
- SMLABB r9, r14, r10, r9
- BLE xcorr_kernel_edsp_done
- SMLABB r6, r12, r11, r6
- CMP r2, #1
- SMLABT r7, r12, r11, r7
- LDRH r2, [r5], #2
- SMLABB r8, r12, r10, r8
- LDRHGT r14, [r4]
- SMLABB r9, r12, r2, r9
- BLE xcorr_kernel_edsp_done
- SMLABT r6, r14, r11, r6
- LDRH r11, [r5]
- SMLABB r7, r14, r10, r7
- SMLABB r8, r14, r2, r8
- SMLABB r9, r14, r11, r9
- xcorr_kernel_edsp_done
- LDMFD sp!, {r2,r4,r5,pc}
- ENDP
- celt_pitch_xcorr_edsp PROC
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- STMFD sp!, {r4-r11, lr}
- MOV r5, r1
- LDR r1, [sp, #36]
- MOV r4, r0
- TST r5, #3
-
- MOV r0, #1
- BEQ celt_pitch_xcorr_edsp_process1u_done
- SUBS r12, r3, #4
-
- MOV r14, #0
- LDRH r8, [r5], #2
- BLE celt_pitch_xcorr_edsp_process1u_loop4_done
- LDR r6, [r4], #4
- MOV r8, r8, LSL #16
- celt_pitch_xcorr_edsp_process1u_loop4
- LDR r9, [r5], #4
- SMLABT r14, r6, r8, r14
- LDR r7, [r4], #4
- SMLATB r14, r6, r9, r14
- LDR r8, [r5], #4
- SMLABT r14, r7, r9, r14
- SUBS r12, r12, #4
- SMLATB r14, r7, r8, r14
- LDRGT r6, [r4], #4
- BGT celt_pitch_xcorr_edsp_process1u_loop4
- MOV r8, r8, LSR #16
- celt_pitch_xcorr_edsp_process1u_loop4_done
- ADDS r12, r12, #4
- celt_pitch_xcorr_edsp_process1u_loop1
- LDRHGE r6, [r4], #2
-
- SMLABBGE r14, r6, r8, r14
- SUBSGE r12, r12, #1
- LDRHGT r8, [r5], #2
- BGT celt_pitch_xcorr_edsp_process1u_loop1
-
- SUB r4, r4, r3, LSL #1
-
- SUB r5, r5, r3, LSL #1
-
- CMP r0, r14
- ADD r5, r5, #2
- MOVLT r0, r14
- SUBS r1, r1, #1
-
- STR r14, [r2], #4
- BLE celt_pitch_xcorr_edsp_done
- celt_pitch_xcorr_edsp_process1u_done
-
- SUBS r1, r1, #4
- BLT celt_pitch_xcorr_edsp_process2
- celt_pitch_xcorr_edsp_process4
-
-
- MOV r6, #0
- MOV r7, #0
- MOV r8, #0
- MOV r9, #0
- BL xcorr_kernel_edsp_start
-
- CMP r0, r6
-
- ADD r5, r5, #8
- MOVLT r0, r6
- CMP r0, r7
- MOVLT r0, r7
- CMP r0, r8
- MOVLT r0, r8
- CMP r0, r9
- MOVLT r0, r9
- STMIA r2!, {r6-r9}
- SUBS r1, r1, #4
- BGE celt_pitch_xcorr_edsp_process4
- celt_pitch_xcorr_edsp_process2
- ADDS r1, r1, #2
- BLT celt_pitch_xcorr_edsp_process1a
- SUBS r12, r3, #4
-
- MOV r10, #0
- MOV r11, #0
- LDR r8, [r5], #4
- BLE celt_pitch_xcorr_edsp_process2_loop_done
- LDR r6, [r4], #4
- LDR r9, [r5], #4
- celt_pitch_xcorr_edsp_process2_loop4
- SMLABB r10, r6, r8, r10
- LDR r7, [r4], #4
- SMLABT r11, r6, r8, r11
- SUBS r12, r12, #4
- SMLATT r10, r6, r8, r10
- LDR r8, [r5], #4
- SMLATB r11, r6, r9, r11
- LDRGT r6, [r4], #4
- SMLABB r10, r7, r9, r10
- SMLABT r11, r7, r9, r11
- SMLATT r10, r7, r9, r10
- LDRGT r9, [r5], #4
- SMLATB r11, r7, r8, r11
- BGT celt_pitch_xcorr_edsp_process2_loop4
- celt_pitch_xcorr_edsp_process2_loop_done
- ADDS r12, r12, #2
- BLE celt_pitch_xcorr_edsp_process2_1
- LDR r6, [r4], #4
-
- SMLABB r10, r6, r8, r10
- LDR r9, [r5], #4
- SMLABT r11, r6, r8, r11
- SUB r12, r12, #2
- SMLATT r10, r6, r8, r10
- MOV r8, r9
- SMLATB r11, r6, r9, r11
- celt_pitch_xcorr_edsp_process2_1
- LDRH r6, [r4], #2
- ADDS r12, r12, #1
-
- SMLABB r10, r6, r8, r10
- LDRHGT r7, [r4], #2
- SMLABT r11, r6, r8, r11
- BLE celt_pitch_xcorr_edsp_process2_done
- LDRH r9, [r5], #2
- SMLABT r10, r7, r8, r10
- SMLABB r11, r7, r9, r11
- celt_pitch_xcorr_edsp_process2_done
-
- SUB r4, r4, r3, LSL #1
-
- SUB r5, r5, r3, LSL #1
-
- CMP r0, r10
- ADD r5, r5, #2
- MOVLT r0, r10
- SUB r1, r1, #2
-
- CMP r0, r11
-
- STR r10, [r2], #4
- MOVLT r0, r11
- STR r11, [r2], #4
- celt_pitch_xcorr_edsp_process1a
- ADDS r1, r1, #1
- BLT celt_pitch_xcorr_edsp_done
- SUBS r12, r3, #4
-
- MOV r14, #0
- BLT celt_pitch_xcorr_edsp_process1a_loop_done
- LDR r6, [r4], #4
- LDR r8, [r5], #4
- LDR r7, [r4], #4
- LDR r9, [r5], #4
- celt_pitch_xcorr_edsp_process1a_loop4
- SMLABB r14, r6, r8, r14
- SUBS r12, r12, #4
- SMLATT r14, r6, r8, r14
- LDRGE r6, [r4], #4
- SMLABB r14, r7, r9, r14
- LDRGE r8, [r5], #4
- SMLATT r14, r7, r9, r14
- LDRGE r7, [r4], #4
- LDRGE r9, [r5], #4
- BGE celt_pitch_xcorr_edsp_process1a_loop4
- celt_pitch_xcorr_edsp_process1a_loop_done
- ADDS r12, r12, #2
- LDRGE r6, [r4], #4
- LDRGE r8, [r5], #4
-
- SMLABBGE r14, r6, r8, r14
- SUBGE r12, r12, #2
- SMLATTGE r14, r6, r8, r14
- ADDS r12, r12, #1
- LDRHGE r6, [r4], #2
- LDRHGE r8, [r5], #2
-
- SMLABBGE r14, r6, r8, r14
-
- CMP r0, r14
-
- STR r14, [r2], #4
- MOVLT r0, r14
- celt_pitch_xcorr_edsp_done
- LDMFD sp!, {r4-r11, pc}
- ENDP
- ENDIF
- END
|