microsoft
diff --git a/‎onnxruntime/core/providers/cpu/llm/attention.cc‎
Lines changed: 5 additions & 4 deletions b/‎onnxruntime/core/providers/cpu/llm/attention.cc‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎onnxruntime/core/providers/cpu/llm/attention.h‎
Lines changed: 2 additions & 2 deletions b/‎onnxruntime/core/providers/cpu/llm/attention.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎onnxruntime/core/providers/cpu/llm/attention_helper.cc‎
Lines changed: 0 additions & 156 deletions b/‎onnxruntime/core/providers/cpu/llm/attention_helper.cc‎
Lines changed: 0 additions & 156 deletions
@@ -2,6 +2,7 @@
 // Licensed under the MIT License.
 
 #include "core/providers/cpu/llm/attention.h"
+#include "core/providers/cpu/llm/attention_helper.h"
 
 #include "core/common/common.h"
 #include "core/common/safeint.h"
@@ -140,10 +141,10 @@ Status Attention<T>::Compute(OpKernelContext* context) const {
   const Tensor* past_value = context->Input<Tensor>(5);
 
   AttentionParameters parameters;
-  std::vector<int64_t> y_shape;
-  std::vector<int64_t> present_key_shape;
-  std::vector<int64_t> present_value_shape;
-  std::vector<int64_t> output_qk_shape;
+  TensorShape y_shape;
+  TensorShape present_key_shape;
+  TensorShape present_value_shape;
+  TensorShape output_qk_shape;
 
   ORT_ENFORCE(attention_helper::ComputeOutputShapeForAttention(
                   Q,
 
@@ -5,7 +5,7 @@
 #include "core/common/common.h"
 #include "core/framework/op_kernel.h"
 #include "core/platform/threadpool.h"
-#include "core/providers/cpu/llm/attention_helper.h"
+#include "core/providers/cpu/llm/attention_parameters.h"
 
 namespace onnxruntime {
 
@@ -95,4 +95,4 @@ class Attention final : public AttentionBase<T> {
   int softmax_precision_;
 };
 
-}  // namespace onnxruntime
+}  // namespace onnxruntime