@@ -713,16 +713,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
713713 vdivps (x, op1, op2);
714714 }
715715
716- void uni_vaddps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
716+ void uni_vaddps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
717717 const Xbyak::Operand &op2) {
718718 if (is_valid_isa (avx))
719719 vaddps (x, op1, op2);
720720 else {
721- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
722- addps (x, op2);
721+ if (x.getIdx () == op1.getIdx ()) {
722+ addps (x, op2);
723+ } else if (x.isEqualIfNotInherited (op2)) {
724+ addps (x, op1);
725+ } else {
726+ movups (x, op1);
727+ addps (x, op2);
728+ }
723729 }
724730 }
725- void uni_vaddps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
731+ void uni_vaddps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
726732 const Xbyak::Operand &op2) {
727733 vaddps (x, op1, op2);
728734 }
@@ -817,16 +823,25 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
817823 vsubss (x, Xbyak::Xmm (op1.getIdx ()), Xbyak::Xmm (op2.getIdx ()));
818824 }
819825
820- void uni_vsubps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
826+ void uni_vsubps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
821827 const Xbyak::Operand &op2) {
822828 if (is_valid_isa (avx))
823829 vsubps (x, op1, op2);
824830 else {
825- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
826- subps (x, op2);
831+ if (x.getIdx () == op1.getIdx ()) {
832+ subps (x, op2);
833+ } else if (x.isEqualIfNotInherited (op2)) {
834+ push (op1);
835+ subps (op1, op2);
836+ movups (x, op1);
837+ pop (op1);
838+ } else {
839+ movups (x, op1);
840+ subps (x, op2);
841+ }
827842 }
828843 }
829- void uni_vsubps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
844+ void uni_vsubps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
830845 const Xbyak::Operand &op2) {
831846 vsubps (x, op1, op2);
832847 }
@@ -848,30 +863,42 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
848863 vsubps (x, op1, op2);
849864 }
850865
851- void uni_vpmulld (const Xbyak::Xmm &x1 , const Xbyak::Xmm &x2 ,
852- const Xbyak::Operand &op ) {
866+ void uni_vpmulld (const Xbyak::Xmm &x , const Xbyak::Xmm &op1 ,
867+ const Xbyak::Operand &op2 ) {
853868 if (is_valid_isa (avx)) {
854- vpmulld (x1, x2, op );
869+ vpmulld (x, op1, op2 );
855870 } else {
856- if (x1.getIdx () != x2.getIdx ()) movdqa (x1, x2);
857- pmulld (x1, op);
871+ if (x.getIdx () == op1.getIdx ()) {
872+ pmulld (x, op2);
873+ } else if (x.isEqualIfNotInherited (op2)) {
874+ pmulld (x, op1);
875+ } else {
876+ movdqa (x, op1);
877+ pmulld (x, op2);
878+ }
858879 }
859880 }
860881 void uni_vpmulld (const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
861882 const Xbyak::Operand &op) {
862883 vpmulld (x1, x2, op);
863884 }
864885
865- void uni_vmulps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
886+ void uni_vmulps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
866887 const Xbyak::Operand &op2) {
867888 if (is_valid_isa (avx))
868889 vmulps (x, op1, op2);
869890 else {
870- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
871- mulps (x, op2);
891+ if (x.getIdx () == op1.getIdx ()) {
892+ mulps (x, op2);
893+ } else if (x.isEqualIfNotInherited (op2)) {
894+ mulps (x, op1);
895+ } else {
896+ movups (x, op1);
897+ mulps (x, op2);
898+ }
872899 }
873900 }
874- void uni_vmulps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
901+ void uni_vmulps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
875902 const Xbyak::Operand &op2) {
876903 vmulps (x, op1, op2);
877904 }
@@ -1273,16 +1300,25 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
12731300 vpsrld (x, op, imm);
12741301 }
12751302
1276- void uni_vmaxps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1303+ void uni_vmaxps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
12771304 const Xbyak::Operand &op2) {
12781305 if (is_valid_isa (avx))
12791306 vmaxps (x, op1, op2);
12801307 else {
1281- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
1282- maxps (x, op2);
1308+ if (x.getIdx () == op1.getIdx ()) {
1309+ maxps (x, op2);
1310+ } else if (x.isEqualIfNotInherited (op2)) {
1311+ push (op1);
1312+ maxps (op1, op2);
1313+ movups (x, op1);
1314+ pop (op1);
1315+ } else {
1316+ movups (x, op1);
1317+ maxps (x, op2);
1318+ }
12831319 }
12841320 }
1285- void uni_vmaxps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1321+ void uni_vmaxps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
12861322 const Xbyak::Operand &op2) {
12871323 vmaxps (x, op1, op2);
12881324 }
@@ -1297,17 +1333,26 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
12971333 }
12981334 }
12991335
1300- void uni_vminps (const Xbyak::Xmm &x, const Xbyak::Operand &op1,
1336+ void uni_vminps (const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
13011337 const Xbyak::Operand &op2) {
13021338 if (is_valid_isa (avx))
13031339 vminps (x, op1, op2);
13041340 else {
1305- if (!x.isEqualIfNotInherited (op1)) movups (x, op1);
1306- minps (x, op2);
1341+ if (x.getIdx () == op1.getIdx ()) {
1342+ minps (x, op2);
1343+ } else if (x.isEqualIfNotInherited (op2)) {
1344+ push (op1);
1345+ minps (op1, op2);
1346+ movups (x, op1);
1347+ pop (op1);
1348+ } else {
1349+ movups (x, op1);
1350+ minps (x, op2);
1351+ }
13071352 }
13081353 }
13091354
1310- void uni_vminps (const Xbyak::Ymm &x, const Xbyak::Operand &op1,
1355+ void uni_vminps (const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
13111356 const Xbyak::Operand &op2) {
13121357 vminps (x, op1, op2);
13131358 }
@@ -1344,13 +1389,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
13441389 vpmovzxbd (y, op);
13451390 }
13461391
1347- void uni_vcmpps (const Xbyak::Xmm &x1 , const Xbyak::Xmm &x2 ,
1348- const Xbyak::Operand &op , int cmp_predicate) {
1392+ void uni_vcmpps (const Xbyak::Xmm &x , const Xbyak::Xmm &op1 ,
1393+ const Xbyak::Operand &op2 , int cmp_predicate) {
13491394 if (is_valid_isa (avx))
1350- vcmpps (x1, x2, op , cmp_predicate);
1395+ vcmpps (x, op1, op2 , cmp_predicate);
13511396 else {
1352- if (x1.getIdx () != x2.getIdx ()) uni_vmovups (x1, x2);
1353- cmpps (x1, op, cmp_predicate);
1397+ if (x.getIdx () == op1.getIdx ()) {
1398+ cmpps (x, op2, cmp_predicate);
1399+ } else if (x.isEqualIfNotInherited (op2)) {
1400+ push (op1);
1401+ cmpps (op1, op2, cmp_predicate);
1402+ movups (x, op1);
1403+ pop (op1);
1404+ } else {
1405+ movups (x, op1);
1406+ cmpps (x, op2, cmp_predicate);
1407+ }
13541408 }
13551409 }
13561410 void uni_vcmpps (const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
0 commit comments