@@ -48,10 +48,10 @@ QNBitGemmPackQuantBDataSize_Lasx(
4848 BlkSumSize += SafeInt<size_t >(BlkSumAlignment) - 1 ;
4949
5050 PackedQuantBDataSize += ScaleSize + BlkSumSize;
51- return PackedQuantBDataSize. Value ( );
51+ return static_cast < size_t >(PackedQuantBDataSize );
5252 } else {
5353 SafeInt<size_t > PackedQuantBDataSize = SafeInt<size_t >(N) * BlockCountK * MlasQNBitBlkDataSizeInBytes (BlkBitWidth, BlkLen);
54- return PackedQuantBDataSize. Value ( );
54+ return static_cast < size_t >(PackedQuantBDataSize );
5555 }
5656}
5757
@@ -73,7 +73,7 @@ SQ4BitGemmPackQuantBData_Lasx(
7373
7474 const size_t BlockCountK = MlasDivRoundup (K, BlkLen);
7575 const size_t BlkDataSize = MlasQNBitBlkDataSizeInBytes (BlkBitWidth, BlkLen);
76- const SafeInt< size_t > Iterations = SafeInt<size_t >(N) * BlockCountK; // one iteration per block
76+ const size_t Iterations = SafeInt<size_t >(N) * BlockCountK; // one iteration per block
7777
7878 size_t SubBlkLen = (BlkLen == 16 ) ? 16 : (BlkLen == 32 ? 32 : 64 );
7979
@@ -105,14 +105,14 @@ SQ4BitGemmPackQuantBData_Lasx(
105105 //
106106
107107 MlasTrySimpleParallel (
108- ThreadPool, Iterations. Value () ,
108+ ThreadPool, Iterations,
109109 [&](ptrdiff_t tid) {
110110 const size_t n = tid / BlockCountK;
111111 const size_t k_blk = tid % BlockCountK;
112112
113- const SafeInt< size_t > data_offset = SafeInt<size_t >(n) * BlockCountK * BlkDataSize + k_blk * BlkDataSize;
114- const std::byte* QuantBData = QuantBDataBegin + data_offset. Value () ;
115- std::byte* PackedQuantBData = PackedQuantBDataBegin + data_offset. Value () ;
113+ const size_t data_offset = SafeInt<size_t >(n) * BlockCountK * BlkDataSize + k_blk * BlkDataSize;
114+ const std::byte* QuantBData = QuantBDataBegin + data_offset;
115+ std::byte* PackedQuantBData = PackedQuantBDataBegin + data_offset;
116116
117117 for (size_t kk = 0 ; kk < BlkLen; kk += SubBlkLen) {
118118 for (size_t byte_pair_idx = 0 ; byte_pair_idx < SubBlkBytePairCount; ++byte_pair_idx) {
@@ -163,8 +163,8 @@ SQ4BitGemmPackQuantBDataAndBlkSum_Lasx(
163163 }
164164
165165 if (QuantBScaleBegin) {
166- SafeInt< size_t > offset = SafeInt<size_t >(N) * BlockCountK;
167- std::copy (QuantBScaleBegin, QuantBScaleBegin + offset. Value () , packed_quant_b.PackedQuantBScale );
166+ size_t offset = SafeInt<size_t >(N) * BlockCountK;
167+ std::copy (QuantBScaleBegin, QuantBScaleBegin + offset, packed_quant_b.PackedQuantBScale );
168168 }
169169
170170 if ((QuantBScaleBegin && !has_zp_input) || QuantBZPBegin) {
@@ -272,14 +272,14 @@ ComputeDotProducts_BlkLen32Plus_CompFp32_lasx(
272272
273273 float scale_v[NCols];
274274 UnrolledLoop<NCols>([&](size_t i) {
275- SafeInt< size_t > scale_offset = SafeInt<size_t >(StrideQuantBScale) * i;
276- scale_v[i] = *(s + scale_offset. Value () );
275+ size_t scale_offset = SafeInt<size_t >(StrideQuantBScale) * i;
276+ scale_v[i] = *(s + scale_offset);
277277 });
278278
279279 std::byte* b_blk_data_col_ptr[NCols];
280280 UnrolledLoop<NCols>([&](size_t i) {
281- SafeInt< size_t > data_offset = SafeInt<size_t >(StrideQuantBData) * i;
282- b_blk_data_col_ptr[i] = (std::byte*)(b_blk_data_ptr + data_offset. Value () );
281+ size_t data_offset = SafeInt<size_t >(StrideQuantBData) * i;
282+ b_blk_data_col_ptr[i] = (std::byte*)(b_blk_data_ptr + data_offset);
283283 });
284284
285285 // not ready for "Manual conversion to float" in neon yet.
@@ -427,14 +427,14 @@ ComputeDotProducts_BlkLen16_CompFp32_lasx(
427427
428428 float scale_v[NCols];
429429 UnrolledLoop<NCols>([&](size_t i) {
430- SafeInt< size_t > scale_offset = SafeInt<size_t >(StrideQuantBScale) * i;
431- scale_v[i] = *(s + scale_offset. Value () );
430+ size_t scale_offset = SafeInt<size_t >(StrideQuantBScale) * i;
431+ scale_v[i] = *(s + scale_offset);
432432 });
433433
434434 std::byte* b_blk_data_col_ptr[NCols];
435435 UnrolledLoop<NCols>([&](size_t i) {
436- SafeInt< size_t > data_offset = SafeInt<size_t >(StrideQuantBData) * i;
437- b_blk_data_col_ptr[i] = (std::byte*)(b_blk_data_ptr + data_offset. Value () );
436+ size_t data_offset = SafeInt<size_t >(StrideQuantBData) * i;
437+ b_blk_data_col_ptr[i] = (std::byte*)(b_blk_data_ptr + data_offset);
438438 });
439439
440440 if constexpr (HasZeroPoint) {
@@ -551,7 +551,7 @@ SQ4BitGemmM1Kernel_BlkLen16_CompFp32_lasx(
551551
552552 float * SumPtr = CRowPtr;
553553
554- int64_t nblk = <int64_t >(CountN) - NCols4;
554+ int64_t nblk = static_cast <int64_t >(CountN - NCols4) ;
555555 while (nblk >= 0 ) {
556556 ComputeDotProducts_BlkLen16_CompFp32_lasx<NCols4, HasZeroPoint>(
557557 BlkLen16,
@@ -560,13 +560,13 @@ SQ4BitGemmM1Kernel_BlkLen16_CompFp32_lasx(
560560 BiasPtr
561561 );
562562
563- SafeInt< size_t > data_offset = SafeInt<size_t >(StrideQuantBData) * NCols4;
564- SafeInt< size_t > scale_offset = SafeInt<size_t >(StrideQuantBScale) * NCols4;
565- QuantBDataColPtr += data_offset. Value () ;
566- QuantBScaleColPtr += scale_offset. Value () ;
563+ size_t data_offset = SafeInt<size_t >(StrideQuantBData) * NCols4;
564+ size_t scale_offset = SafeInt<size_t >(StrideQuantBScale) * NCols4;
565+ QuantBDataColPtr += data_offset;
566+ QuantBScaleColPtr += scale_offset;
567567 if constexpr (HasZeroPoint) {
568- SafeInt< size_t > zeropoint_offset = SafeInt<size_t >(StrideQuantBZeroPoint) * NCols4;
569- QuantBZeroPointColPtr += zeropoint_offset. Value () ;
568+ size_t zeropoint_offset = SafeInt<size_t >(StrideQuantBZeroPoint) * NCols4;
569+ QuantBZeroPointColPtr += zeropoint_offset;
570570 }
571571
572572 BiasPtr += BiasPtr != nullptr ? NCols4 : 0 ;
@@ -650,13 +650,13 @@ SQ4BitGemmM1Kernel_BlkLen32Plus_CompFp32_lasx(
650650 );
651651 }
652652
653- SafeInt< size_t > data_offset = SafeInt<size_t >(StrideQuantBData) * NCols4;
654- SafeInt< size_t > scale_offset = SafeInt<size_t >(StrideQuantBScale) * NCols4;
655- QuantBDataColPtr += data_offset. Value () ;
656- QuantBScaleColPtr += scale_offset. Value () ;
653+ size_t data_offset = SafeInt<size_t >(StrideQuantBData) * NCols4;
654+ size_t scale_offset = SafeInt<size_t >(StrideQuantBScale) * NCols4;
655+ QuantBDataColPtr += data_offset;
656+ QuantBScaleColPtr += scale_offset;
657657 if constexpr (HasZeroPoint) {
658- SafeInt< size_t > zeropoint_offset = SafeInt<size_t >(StrideQuantBZeroPoint) * NCols4;
659- QuantBZeroPointColPtr += zeropoint_offset. Value () ;
658+ size_t zeropoint_offset = SafeInt<size_t >(StrideQuantBZeroPoint) * NCols4;
659+ QuantBZeroPointColPtr += zeropoint_offset;
660660 }
661661
662662 BiasPtr += BiasPtr != nullptr ? NCols4 : 0 ;
@@ -768,18 +768,18 @@ Q4BitBlkDequantBForSgemmBlkLen16_CompFp32_lasx(
768768 for (size_t k = 0 ; k < BlockCountK; k++) {
769769 // count # of tiles plus blks of the current tile from top
770770 const size_t tile_count = col / GemmFloatKernelWidth16;
771- SafeInt< size_t > offset = SafeInt<size_t >(tile_count * CountK + k * BlkLen16) * GemmFloatKernelWidth16;
772- float * dst_ptr = FpData + offset. Value () ;
771+ size_t offset = SafeInt<size_t >(tile_count * CountK + k * BlkLen16) * GemmFloatKernelWidth16;
772+ float * dst_ptr = FpData + offset;
773773 if (col % GemmFloatKernelWidth16 >= NCols8) {
774774 // for the second half to 16 width tile
775775 dst_ptr += NCols8;
776776 }
777- SafeInt< size_t > b_data_offset = SafeInt<size_t >(col) * b_data_col_stride_in_bytes + k * blk_data_size_in_bytes;
778- SafeInt< size_t > b_scale_offset = SafeInt<size_t >(col) * BlockCountK + k;
779- SafeInt< size_t > b_zp_offset = SafeInt<size_t >(col) * zp_col_stride_in_bytes + k / 2 ;
780- const std::byte* b_data_ptr = QuantBData + b_data_offset. Value () ;
781- const float * scale_ptr = QuantBScale + b_scale_offset. Value () ;
782- const std::byte* zp_ptr = QuantBZeroPoint + b_zp_offset. Value () ;
777+ size_t b_data_offset = SafeInt<size_t >(col) * b_data_col_stride_in_bytes + k * blk_data_size_in_bytes;
778+ size_t b_scale_offset = SafeInt<size_t >(col) * BlockCountK + k;
779+ size_t b_zp_offset = SafeInt<size_t >(col) * zp_col_stride_in_bytes + k / 2 ;
780+ const std::byte* b_data_ptr = QuantBData + b_data_offset;
781+ const float * scale_ptr = QuantBScale + b_scale_offset;
782+ const std::byte* zp_ptr = QuantBZeroPoint + b_zp_offset;
783783 bool is_lower = (k % 2 ) == 0 ;
784784
785785 __m256i weight_16_epi16[NCols8];
@@ -911,18 +911,18 @@ Q4BitBlkDequantBForSgemmBlkLen32AndMore_CompFp32_lasx(
911911 for (size_t k = 0 ; k < BlockCountK; k++) {
912912 // count # of tiles plus blks of the current tile from top
913913 const size_t tile_count = col / GemmFloatKernelWidth16;
914- SafeInt< size_t > offset = SafeInt<size_t >(tile_count * CountK + k * BlkLen) * GemmFloatKernelWidth16;
915- float * dst_ptr = FpData + offset. Value () ;
914+ size_t offset = SafeInt<size_t >(tile_count * CountK + k * BlkLen) * GemmFloatKernelWidth16;
915+ float * dst_ptr = FpData + offset;
916916 if (col % GemmFloatKernelWidth16 >= NCols8) {
917917 // for the second half to 16 width tile
918918 dst_ptr += NCols8;
919919 }
920- SafeInt< size_t > b_data_offset = SafeInt<size_t >(col) * b_data_col_stride_in_bytes + k * blk_data_size_in_bytes;
921- SafeInt< size_t > b_scale_offset = SafeInt<size_t >(col) * BlockCountK + k;
922- SafeInt< size_t > b_zp_offset = SafeInt<size_t >(col) * zp_col_stride_in_bytes + k / 2 ;
923- const std::byte* b_data_ptr = QuantBData + b_data_offset. Value () ;
924- const float * scale_ptr = QuantBScale + b_scale_offset. Value () ;
925- const std::byte* zp_ptr = QuantBZeroPoint + b_zp_offset. Value () ;
920+ size_t b_data_offset = SafeInt<size_t >(col) * b_data_col_stride_in_bytes + k * blk_data_size_in_bytes;
921+ size_t b_scale_offset = SafeInt<size_t >(col) * BlockCountK + k;
922+ size_t b_zp_offset = SafeInt<size_t >(col) * zp_col_stride_in_bytes + k / 2 ;
923+ const std::byte* b_data_ptr = QuantBData + b_data_offset;
924+ const float * scale_ptr = QuantBScale + b_scale_offset;
925+ const std::byte* zp_ptr = QuantBZeroPoint + b_zp_offset;
926926 bool is_lower = (k % 2 ) == 0 ;
927927
928928 for (size_t subblk = 0 ; subblk < BlkLen / SubblkLen32; subblk++) {
0 commit comments