[xiph-commits] r11498 - branches/theora-playtime/lib/x86_32_vs
illiminable at svn.xiph.org
illiminable at svn.xiph.org
Fri Jun 2 09:57:01 PDT 2006
Author: illiminable
Date: 2006-06-02 09:56:57 -0700 (Fri, 02 Jun 2006)
New Revision: 11498
Modified:
branches/theora-playtime/lib/x86_32_vs/recon_sse2.c
Log:
* All the recon sse2 finished - still improvements to make
Modified: branches/theora-playtime/lib/x86_32_vs/recon_sse2.c
===================================================================
--- branches/theora-playtime/lib/x86_32_vs/recon_sse2.c 2006-06-02 16:08:20 UTC (rev 11497)
+++ branches/theora-playtime/lib/x86_32_vs/recon_sse2.c 2006-06-02 16:56:57 UTC (rev 11498)
@@ -413,6 +413,7 @@
unsigned char *RefPtr2, ogg_int16_t *ChangePtr,
ogg_uint32_t LineStep)
{
+#if 0
ogg_uint32_t i;
for (i = 8; i; i--){
@@ -430,6 +431,178 @@
RefPtr1 += LineStep;
RefPtr2 += LineStep;
}
+
+#else
+
+ __asm {
+ align 16
+
+ mov eax, LineStep
+ mov ebx, RefPtr1
+ mov ecx, RefPtr2
+ mov edx, ChangePtr
+ mov edi, ReconPtr
+
+ pxor xmm0, xmm0
+
+ /* Iteration 1 */
+ movq xmm1, QWORD PTR [ebx]
+ movq xmm2, QWORD PTR [ecx]
+ punpcklbw xmm1, xmm0
+ punpcklbw xmm2, xmm0
+ paddsw xmm1, xmm2
+ psrlw xmm1, 1
+
+ movdqa xmm2, [edx]
+ paddsw xmm1, xmm2
+
+ packuswb xmm1, xmm0
+
+ movq QWORD PTR [edi], xmm1
+ lea edi, [edi + eax]
+ lea ebx, [ebx + eax]
+ lea ecx, [ecx + eax]
+ lea edx, [edx + 16]
+
+
+ /* Iteration 2 */
+ movq xmm1, QWORD PTR [ebx]
+ movq xmm2, QWORD PTR [ecx]
+ punpcklbw xmm1, xmm0
+ punpcklbw xmm2, xmm0
+ paddsw xmm1, xmm2
+ psrlw xmm1, 1
+
+ movdqa xmm2, [edx]
+ paddsw xmm1, xmm2
+
+ packuswb xmm1, xmm0
+
+ movq QWORD PTR [edi], xmm1
+ lea edi, [edi + eax]
+ lea ebx, [ebx + eax]
+ lea ecx, [ecx + eax]
+ lea edx, [edx + 16]
+
+ /* Iteration 3 */
+ movq xmm1, QWORD PTR [ebx]
+ movq xmm2, QWORD PTR [ecx]
+ punpcklbw xmm1, xmm0
+ punpcklbw xmm2, xmm0
+ paddsw xmm1, xmm2
+ psrlw xmm1, 1
+
+ movdqa xmm2, [edx]
+ paddsw xmm1, xmm2
+
+ packuswb xmm1, xmm0
+
+ movq QWORD PTR [edi], xmm1
+ lea edi, [edi + eax]
+ lea ebx, [ebx + eax]
+ lea ecx, [ecx + eax]
+ lea edx, [edx + 16]
+
+ /* Iteration 4 */
+ movq xmm1, QWORD PTR [ebx]
+ movq xmm2, QWORD PTR [ecx]
+ punpcklbw xmm1, xmm0
+ punpcklbw xmm2, xmm0
+ paddsw xmm1, xmm2
+ psrlw xmm1, 1
+
+ movdqa xmm2, [edx]
+ paddsw xmm1, xmm2
+
+ packuswb xmm1, xmm0
+
+ movq QWORD PTR [edi], xmm1
+ lea edi, [edi + eax]
+ lea ebx, [ebx + eax]
+ lea ecx, [ecx + eax]
+ lea edx, [edx + 16]
+
+ /* Iteration 5 */
+ movq xmm1, QWORD PTR [ebx]
+ movq xmm2, QWORD PTR [ecx]
+ punpcklbw xmm1, xmm0
+ punpcklbw xmm2, xmm0
+ paddsw xmm1, xmm2
+ psrlw xmm1, 1
+
+ movdqa xmm2, [edx]
+ paddsw xmm1, xmm2
+
+ packuswb xmm1, xmm0
+
+ movq QWORD PTR [edi], xmm1
+ lea edi, [edi + eax]
+ lea ebx, [ebx + eax]
+ lea ecx, [ecx + eax]
+ lea edx, [edx + 16]
+
+ /* Iteration 6 */
+ movq xmm1, QWORD PTR [ebx]
+ movq xmm2, QWORD PTR [ecx]
+ punpcklbw xmm1, xmm0
+ punpcklbw xmm2, xmm0
+ paddsw xmm1, xmm2
+ psrlw xmm1, 1
+
+ movdqa xmm2, [edx]
+ paddsw xmm1, xmm2
+
+ packuswb xmm1, xmm0
+
+ movq QWORD PTR [edi], xmm1
+ lea edi, [edi + eax]
+ lea ebx, [ebx + eax]
+ lea ecx, [ecx + eax]
+ lea edx, [edx + 16]
+
+ /* Iteration 7 */
+ movq xmm1, QWORD PTR [ebx]
+ movq xmm2, QWORD PTR [ecx]
+ punpcklbw xmm1, xmm0
+ punpcklbw xmm2, xmm0
+ paddsw xmm1, xmm2
+ psrlw xmm1, 1
+
+ movdqa xmm2, [edx]
+ paddsw xmm1, xmm2
+
+ packuswb xmm1, xmm0
+
+ movq QWORD PTR [edi], xmm1
+ lea edi, [edi + eax]
+ lea ebx, [ebx + eax]
+ lea ecx, [ecx + eax]
+ lea edx, [edx + 16]
+
+ /* Iteration 8 */
+ movq xmm1, QWORD PTR [ebx]
+ movq xmm2, QWORD PTR [ecx]
+ punpcklbw xmm1, xmm0
+ punpcklbw xmm2, xmm0
+ paddsw xmm1, xmm2
+ psrlw xmm1, 1
+
+ movdqa xmm2, [edx]
+ paddsw xmm1, xmm2
+
+ packuswb xmm1, xmm0
+
+ movq QWORD PTR [edi], xmm1
+ /* lea edi, [edi + LineStep]
+ lea ebx, [ebx + LineStep]
+ lea ecx, [ecx + LineStep]
+ lea edx, [edx + 16]*/
+
+
+
+ };
+
+#endif
}
More information about the commits
mailing list