@@ -158,6 +158,48 @@ static mlir::Value emitX86SExtMask(CIRGenFunction &cgf, mlir::Value op,
158158 return cgf.getBuilder ().createCast (loc, cir::CastKind::integral, mask, dstTy);
159159}
160160
161+ static mlir::Value emitX86PSRLDQIByteShift (CIRGenFunction &cgf,
162+ const CallExpr *E,
163+ ArrayRef<mlir::Value> Ops) {
164+ auto &builder = cgf.getBuilder ();
165+ auto resultType = cast<cir::VectorType>(Ops[0 ].getType ());
166+ auto loc = cgf.getLoc (E->getExprLoc ());
167+ unsigned shiftVal = getIntValueFromConstOp (Ops[1 ]) & 0xff ;
168+
169+ // If psrldq is shifting the vector more than 15 bytes, emit zero.
170+ if (shiftVal >= 16 )
171+ return builder.getZero (loc, resultType);
172+
173+ auto numElts = resultType.getSize () * 8 ;
174+ assert (numElts % 16 == 0 && " Expected a multiple of 16" );
175+
176+ llvm::SmallVector<int64_t , 64 > indices;
177+
178+ // This correlates to the OG CodeGen
179+ // As stated in the OG, 256/512-bit psrldq operates on 128-bit lanes.
180+ // So we have to make sure we handle it.
181+ for (unsigned l = 0 ; l < numElts; l += 16 ) {
182+ for (unsigned i = 0 ; i < 16 ; ++i) {
183+ unsigned idx = i + shiftVal;
184+ if (idx >= 16 )
185+ idx += numElts - 16 ;
186+ indices.push_back (idx + l);
187+ }
188+ }
189+
190+ auto byteVecTy = cir::VectorType::get (builder.getSInt8Ty (), numElts);
191+ mlir::Value byteCast = builder.createBitcast (Ops[0 ], byteVecTy);
192+ mlir::Value zero = builder.getZero (loc, byteVecTy);
193+
194+ // Perform the shuffle (right shift by inserting zeros from the left)
195+ mlir::Value shuffleResult =
196+ builder.createVecShuffle (loc, byteCast, zero, indices);
197+
198+ // Cast back to original type
199+ return builder.createBitcast (shuffleResult, resultType);
200+ }
201+
202+
161203mlir::Value CIRGenFunction::emitX86BuiltinExpr (unsigned BuiltinID,
162204 const CallExpr *E) {
163205 if (BuiltinID == Builtin::BI__builtin_cpu_is)
@@ -1112,7 +1154,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
11121154 case X86::BI__builtin_ia32_psrldqi128_byteshift:
11131155 case X86::BI__builtin_ia32_psrldqi256_byteshift:
11141156 case X86::BI__builtin_ia32_psrldqi512_byteshift:
1115- llvm_unreachable ( " psrldqi NYI " );
1157+ emitX86PSRLDQIByteShift (* this , E, Ops );
11161158 case X86::BI__builtin_ia32_kshiftliqi:
11171159 case X86::BI__builtin_ia32_kshiftlihi:
11181160 case X86::BI__builtin_ia32_kshiftlisi:
0 commit comments