Skip to content

Commit 85d287c

Browse files
authored
Implement a bunch of missing AVX instructions (#595)
1 parent d48df5e commit 85d287c

File tree

2 files changed

+49
-6
lines changed

2 files changed

+49
-6
lines changed

lib/Arch/X86/Semantics/AVX.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ DEF_SEM(DoVZEROUPPER) {
2828
return memory;
2929
}
3030

31-
template <typename D, typename S1, size_t KL, size_t VL>
31+
template <typename D, typename S1>
3232
DEF_SEM(VPBROADCASTB, D dst, S1 src1) {
3333
auto src_vec = UReadV8(src1);
3434
auto dst_vec = UClearV8(UReadV8(dst));
@@ -42,6 +42,20 @@ DEF_SEM(VPBROADCASTB, D dst, S1 src1) {
4242
return memory;
4343
}
4444

45+
template <typename D, typename S1>
46+
DEF_SEM(VPBROADCASTQ, D dst, S1 src1) {
47+
auto src_vec = UReadV64(src1);
48+
auto dst_vec = UClearV64(UReadV64(dst));
49+
auto num_groups = NumVectorElems(dst_vec);
50+
auto src_val = UExtractV64(src_vec, 0);
51+
52+
for (std::size_t i = 0; i < num_groups; ++i) {
53+
dst_vec = UInsertV64(dst_vec, i, src_val);
54+
}
55+
UWriteV64(dst, dst_vec);
56+
return memory;
57+
}
58+
4559
template <typename S2>
4660
DEF_SEM(VINSERTF128, VV256W dst, V256 src1, S2 src2, I8 src3) {
4761
auto dst_vec = UReadV128(src1);
@@ -70,6 +84,7 @@ DEF_ISEL(VINSERTF128_YMMqq_YMMqq_MEMdq_IMMb) = VINSERTF128<MV128>;
7084
DEF_ISEL(VINSERTF128_YMMqq_YMMqq_XMMdq_IMMb) = VINSERTF128<V128>;
7185

7286
DEF_ISEL(VZEROUPPER) = DoVZEROUPPER;
73-
DEF_ISEL(VPBROADCASTB_YMMqq_XMMb) = VPBROADCASTB<VV256W, V128, 32, 256>;
87+
DEF_ISEL(VPBROADCASTB_YMMqq_XMMb) = VPBROADCASTB<VV256W, V128>;
88+
DEF_ISEL(VPBROADCASTQ_YMMqq_XMMq) = VPBROADCASTQ<VV256W, V128>;
7489

7590
#endif // HAS_FEATURE_AVX

lib/Arch/X86/Semantics/MMX.cpp

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,16 +183,14 @@ DEF_ISEL(PUNPCKLDQ_XMMdq_XMMq) = PUNPCKLDQ<V128W, V128, V128>;
183183

184184
DEF_ISEL(PUNPCKLQDQ_XMMdq_MEMdq) = PUNPCKLQDQ<V128W, V128, MV128>;
185185
DEF_ISEL(PUNPCKLQDQ_XMMdq_XMMq) = PUNPCKLQDQ<V128W, V128, V128>;
186+
IF_AVX(DEF_ISEL(VPUNPCKLQDQ_XMMdq_XMMdq_XMMdq) = PUNPCKLQDQ<V128W, V128, V128>;)
186187

187188
// Adding new MMX Instructions
188189
namespace {
189190

190191
template <typename D, typename S1, typename S2>
191192
DEF_SEM(PADDB, D dst, S1 src1, S2 src2) {
192-
auto lhs_vec = UReadV8(src1);
193-
auto rhs_vec = UReadV8(src2);
194-
auto dst_vec = UAddV8(lhs_vec, rhs_vec);
195-
UWriteV8(dst, dst_vec);
193+
UWriteV8(dst, UAddV8(UReadV8(src1), UReadV8(src2)));
196194
return memory;
197195
}
198196

@@ -230,6 +228,8 @@ DEF_ISEL(PADDB_MMXq_MEMq) = PADDB<V64W, V64, MV64>;
230228
DEF_ISEL(PADDB_XMMdq_XMMdq) = PADDB<V128W, V128, V128>;
231229
DEF_ISEL(PADDB_XMMdq_MEMdq) = PADDB<V128W, V128, MV128>;
232230

231+
IF_AVX(DEF_ISEL(VPADDB_YMMqq_YMMqq_YMMqq) = PADDB<VV256W, VV256, VV256>;)
232+
233233
DEF_ISEL(PADDW_MMXq_MMXq) = PADDW<V64W, V64, V64>;
234234
DEF_ISEL(PADDW_MMXq_MEMq) = PADDW<V64W, V64, MV64>;
235235
DEF_ISEL(PADDW_XMMdq_XMMdq) = PADDW<V128W, V128, V128>;
@@ -240,6 +240,8 @@ DEF_ISEL(PADDD_MMXq_MEMq) = PADDD<V64W, V64, MV64>;
240240
DEF_ISEL(PADDD_XMMdq_XMMdq) = PADDD<V128W, V128, V128>;
241241
DEF_ISEL(PADDD_XMMdq_MEMdq) = PADDD<V128W, V128, MV128>;
242242

243+
IF_AVX(DEF_ISEL(VPADDD_YMMqq_YMMqq_YMMqq) = PADDD<VV256W, VV256, VV256>;)
244+
243245
DEF_ISEL(PADDQ_MMXq_MMXq) = PADDQ<V64W, V64, V64>;
244246
DEF_ISEL(PADDQ_MMXq_MEMq) = PADDQ<V64W, V64, MV64>;
245247
DEF_ISEL(PADDQ_XMMdq_XMMdq) = PADDQ<V128W, V128, V128>;
@@ -438,6 +440,8 @@ DEF_ISEL(PHADDD_MMXq_MEMq) = PHADDD<V64W, V64, MV64>;
438440
DEF_ISEL(PHADDD_XMMdq_XMMdq) = PHADDD<V128W, V128, V128>;
439441
DEF_ISEL(PHADDD_XMMdq_MEMdq) = PHADDD<V128W, V128, MV128>;
440442

443+
IF_AVX(DEF_ISEL(VPHADDD_YMMqq_YMMqq_YMMqq) = PHADDD<VV256W, VV256, VV256>;)
444+
441445
template <typename D, typename S1, typename S2>
442446
DEF_SEM(PHADDSW, D dst, S1 src1, S2 src2) {
443447
auto src1_vec = SReadV16(src1);
@@ -530,6 +534,7 @@ DEF_ISEL(PSUBB_MMXq_MMXq) = PSUBB<V64W, V64, V64>;
530534
DEF_ISEL(PSUBB_MMXq_MEMq) = PSUBB<V64W, V64, MV64>;
531535
DEF_ISEL(PSUBB_XMMdq_XMMdq) = PSUBB<V128W, V128, V128>;
532536
DEF_ISEL(PSUBB_XMMdq_MEMdq) = PSUBB<V128W, V128, MV128>;
537+
IF_AVX(DEF_ISEL(VPSUBB_YMMqq_YMMqq_YMMqq) = PSUBB<VV256W, VV256, VV256>;)
533538

534539
DEF_ISEL(PSUBW_MMXq_MMXq) = PSUBW<V64W, V64, V64>;
535540
DEF_ISEL(PSUBW_MMXq_MEMq) = PSUBW<V64W, V64, MV64>;
@@ -542,13 +547,15 @@ DEF_ISEL(PSUBD_XMMdq_XMMdq) = PSUBD<V128W, V128, V128>;
542547
DEF_ISEL(PSUBD_XMMdq_MEMdq) = PSUBD<V128W, V128, MV128>;
543548
IF_AVX(DEF_ISEL(VPSUBD_XMMdq_XMMdq_MEMdq) = PSUBD<VV128W, V128, MV128>;)
544549
IF_AVX(DEF_ISEL(VPSUBD_XMMdq_XMMdq_XMMdq) = PSUBD<VV128W, V128, V128>;)
550+
IF_AVX(DEF_ISEL(VPSUBD_YMMqq_YMMqq_YMMqq) = PSUBD<VV256W, VV256, VV256>;)
545551

546552
DEF_ISEL(PSUBQ_MMXq_MMXq) = PSUBQ<V64W, V64, V64>;
547553
DEF_ISEL(PSUBQ_MMXq_MEMq) = PSUBQ<V64W, V64, MV64>;
548554
DEF_ISEL(PSUBQ_XMMdq_XMMdq) = PSUBQ<V128W, V128, V128>;
549555
DEF_ISEL(PSUBQ_XMMdq_MEMdq) = PSUBQ<V128W, V128, MV128>;
550556
IF_AVX(DEF_ISEL(VPSUBQ_XMMdq_XMMdq_MEMdq) = PSUBQ<VV128W, V128, MV128>;)
551557
IF_AVX(DEF_ISEL(VPSUBQ_XMMdq_XMMdq_XMMdq) = PSUBQ<VV128W, V128, V128>;)
558+
IF_AVX(DEF_ISEL(VPSUBQ_YMMqq_YMMqq_YMMqq) = PSUBQ<VV256W, VV256, VV256>;)
552559

553560
/*
554561
3305 VPSUBD VPSUBD_YMMqq_YMMqq_MEMqq AVX2 AVX2 AVX2 ATTRIBUTES:
@@ -1760,6 +1767,23 @@ DEF_SEM(PMULUDQ, D dst, S1 src1, S2 src2) {
17601767
return memory;
17611768
}
17621769

1770+
template <typename D, typename S1, typename S2>
1771+
DEF_SEM(PMULLD, D dst, S1 src1, S2 src2) {
1772+
auto src1_vec = SReadV32(src1);
1773+
auto src2_vec = SReadV32(src2);
1774+
auto dst_vec = SClearV32(SReadV32(dst));
1775+
1776+
auto vec_count = NumVectorElems(src1_vec);
1777+
_Pragma("unroll") for (size_t i = 0; i < vec_count; i++) {
1778+
auto v1 = SExtractV32(src1_vec, i);
1779+
auto v2 = SExtractV32(src2_vec, i);
1780+
auto mul = SMul(SExt(v1), SExt(v2));
1781+
dst_vec = SInsertV32(dst_vec, i, Trunc(mul));
1782+
}
1783+
SWriteV32(dst, dst_vec);
1784+
return memory;
1785+
}
1786+
17631787
template <typename D, typename S1, typename S2>
17641788
DEF_SEM(PMULLW, D dst, S1 src1, S2 src2) {
17651789
auto src1_vec = SReadV16(src1);
@@ -1817,11 +1841,15 @@ DEF_ISEL(PMULUDQ_MMXq_MMXq) = PMULUDQ<V64W, V64, V64>;
18171841
DEF_ISEL(PMULUDQ_MMXq_MEMq) = PMULUDQ<V64W, V64, MV64>;
18181842
DEF_ISEL(PMULUDQ_XMMdq_XMMdq) = PMULUDQ<V128W, V128, V128>;
18191843
DEF_ISEL(PMULUDQ_XMMdq_MEMdq) = PMULUDQ<V128W, V128, MV128>;
1844+
IF_AVX(DEF_ISEL(VPMULUDQ_YMMqq_YMMqq_YMMqq) = PMULUDQ<VV256W, VV256, VV256>;)
1845+
1846+
IF_AVX(DEF_ISEL(VPMULLD_YMMqq_YMMqq_YMMqq) = PMULLD<VV256W, VV256, VV256>;)
18201847

18211848
DEF_ISEL(PMULLW_MMXq_MMXq) = PMULLW<V64W, V64, V64>;
18221849
DEF_ISEL(PMULLW_MMXq_MEMq) = PMULLW<V64W, V64, MV64>;
18231850
DEF_ISEL(PMULLW_XMMdq_XMMdq) = PMULLW<V128W, V128, V128>;
18241851
DEF_ISEL(PMULLW_XMMdq_MEMdq) = PMULLW<V128W, V128, MV128>;
1852+
IF_AVX(DEF_ISEL(VPMULLW_YMMqq_YMMqq_YMMqq) = PMULLW<VV256W, VV256, VV256>;)
18251853

18261854
DEF_ISEL(PMULHW_MMXq_MMXq) = PMULHW<V64W, V64, V64>;
18271855
DEF_ISEL(PMULHW_MMXq_MEMq) = PMULHW<V64W, V64, MV64>;

0 commit comments

Comments
 (0)