@@ -1844,7 +1844,10 @@ namespace {
18441844 template <class _Traits , class _Ty >
18451845 const void * __stdcall __std_find_trivial_unsized_impl (const void * _First, const _Ty _Val) noexcept {
18461846#ifndef _M_ARM64EC
1847- if (_Use_avx2 ()) {
1847+ if ((reinterpret_cast <uintptr_t >(_First) & (sizeof (_Ty) - 1 )) != 0 ) {
1848+ // _First isn't aligned to sizeof(_Ty), so we need to use the scalar fallback below.
1849+ // This can happen with 8-byte elements on x86's 4-aligned stack. It can also happen with packed structs.
1850+ } else if (_Use_avx2 ()) {
18481851 _Zeroupper_on_exit _Guard; // TRANSITION, DevCom-10331414
18491852
18501853 // We read by vector-sized pieces, and we align pointers to vector-sized boundary.
@@ -1862,27 +1865,20 @@ namespace {
18621865 unsigned int _Bingo = static_cast <unsigned int >(_mm256_movemask_epi8 (_Traits::_Cmp_avx (_Data, _Comparand)));
18631866
18641867 _Bingo &= _Mask;
1865- if (_Bingo != 0 ) {
1866- unsigned long _Offset = _tzcnt_u32 (_Bingo);
1867- _Advance_bytes (_First, _Offset);
1868- return _First;
1869- }
18701868
18711869 for (;;) {
1872- _Data = _mm256_load_si256 (static_cast <const __m256i*>(_First));
1873- _Bingo = static_cast <unsigned int >(_mm256_movemask_epi8 (_Traits::_Cmp_avx (_Data, _Comparand)));
1874-
18751870 if (_Bingo != 0 ) {
18761871 unsigned long _Offset = _tzcnt_u32 (_Bingo);
18771872 _Advance_bytes (_First, _Offset);
18781873 return _First;
18791874 }
18801875
18811876 _Advance_bytes (_First, 32 );
1882- }
1883- }
18841877
1885- if (_Traits::_Sse_available ()) {
1878+ _Data = _mm256_load_si256 (static_cast <const __m256i*>(_First));
1879+ _Bingo = static_cast <unsigned int >(_mm256_movemask_epi8 (_Traits::_Cmp_avx (_Data, _Comparand)));
1880+ }
1881+ } else if (_Traits::_Sse_available ()) {
18861882 // We read by vector-sized pieces, and we align pointers to vector-sized boundary.
18871883 // From start partial piece we mask out matches that don't belong to the range.
18881884 // This makes sure we never cross page boundary, thus we read 'as if' sequentially.
@@ -1898,17 +1894,8 @@ namespace {
18981894 unsigned int _Bingo = static_cast <unsigned int >(_mm_movemask_epi8 (_Traits::_Cmp_sse (_Data, _Comparand)));
18991895
19001896 _Bingo &= _Mask;
1901- if (_Bingo != 0 ) {
1902- unsigned long _Offset;
1903- _BitScanForward (&_Offset, _Bingo); // lgtm [cpp/conditionallyuninitializedvariable]
1904- _Advance_bytes (_First, _Offset);
1905- return _First;
1906- }
19071897
19081898 for (;;) {
1909- _Data = _mm_load_si128 (static_cast <const __m128i*>(_First));
1910- _Bingo = static_cast <unsigned int >(_mm_movemask_epi8 (_Traits::_Cmp_sse (_Data, _Comparand)));
1911-
19121899 if (_Bingo != 0 ) {
19131900 unsigned long _Offset;
19141901 _BitScanForward (&_Offset, _Bingo); // lgtm [cpp/conditionallyuninitializedvariable]
@@ -1917,6 +1904,9 @@ namespace {
19171904 }
19181905
19191906 _Advance_bytes (_First, 16 );
1907+
1908+ _Data = _mm_load_si128 (static_cast <const __m128i*>(_First));
1909+ _Bingo = static_cast <unsigned int >(_mm_movemask_epi8 (_Traits::_Cmp_sse (_Data, _Comparand)));
19201910 }
19211911 }
19221912#endif // !_M_ARM64EC
0 commit comments