Skip to content

Commit 075b631

Browse files
committed
fix
1 parent c45a632 commit 075b631

File tree

2 files changed

+14
-18
lines changed

2 files changed

+14
-18
lines changed

mlir/lib/Dialect/Rock/Transforms/PreparePipeline.cpp

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -264,12 +264,11 @@ static bool heuristicPipelineOuterLoop(scf::ForOp outerLoop, ArrayRef<scf::ForOp
264264
}
265265

266266
// We only pipeline the outer loop for schedule v2
267-
if(outerLoop->hasAttr(PipelineAttr::getMnemonic())) {
268-
int64_t ii =
269-
cast<rock::PipelineAttr>(outerLoop->getAttr(PipelineAttr::getMnemonic()))
270-
.getInitiationInterval();
271-
pipelineOuterLoop &= ii == 1;
272-
}
267+
assert(outerLoop->hasAttr(PipelineAttr::getMnemonic()));
268+
int64_t ii =
269+
cast<rock::PipelineAttr>(outerLoop->getAttr(PipelineAttr::getMnemonic()))
270+
.getInitiationInterval();
271+
pipelineOuterLoop &= ii == 1;
273272

274273
return pipelineOuterLoop;
275274
}
@@ -336,10 +335,6 @@ static LogicalResult prepareGemmGemmPipeline(scf::ForOp outerLoop, ArrayRef<scf:
336335
LLVM_DEBUG(llvm::dbgs()
337336
<< "Preparing to pipeline inner loops\n");
338337

339-
// Check if this pass has been already done (applicability)
340-
if(!outerLoop->hasAttr(PipelineAttr::getMnemonic()))
341-
return success();
342-
343338
// 1. Remove outer loop stages
344339
SmallVector<rock::StageOp> stages;
345340
outerLoop.walk([&](rock::StageOp op) {

mlir/tools/rocmlir-tuning-driver/rocmlir-tuning-driver.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -472,33 +472,34 @@ static LogicalResult runTuningLoop(ModuleOp source) {
472472
rock::createTunableParamSpace(source, tuningSpaceKind));
473473
for (rock::RockTuningParamAttrInterface tuningAttr :
474474
tuningSpace->tuningRange) {
475-
OwningOpRef<ModuleOp> tuneCopy = cast<ModuleOp>(source->clone());
475+
OwningOpRef<ModuleOp> applicabilityCopy = cast<ModuleOp>(source->clone());
476476
// TODO: remove this once perf_config gets parsed earlier
477477
SmallString<64> perfConfig;
478478
tuningAttr.getPerfConfigStr(perfConfig);
479479
llvm::outs() << perfConfig << "\t";
480480
StringAttr perfConfigAttr = StringAttr::get(ctx, perfConfig);
481-
tuneCopy->walk([&perfConfigAttr](rock::RockGemmWrapperInterface op) {
481+
applicabilityCopy->walk([&perfConfigAttr](rock::RockGemmWrapperInterface op) {
482482
op->setAttr("perf_config", perfConfigAttr);
483483
});
484-
tuneCopy->walk([&perfConfigAttr](rock::RockGemmGemmWrapperInterface op) {
484+
applicabilityCopy->walk([&perfConfigAttr](rock::RockGemmGemmWrapperInterface op) {
485485
op->setAttr("perf_config", perfConfigAttr);
486486
});
487+
OwningOpRef<ModuleOp> compileCopy = cast<ModuleOp>(applicabilityCopy->clone());
487488

488-
if (!rock::isModuleFusible(tuneCopy.get(), perfConfig)) {
489+
if (!rock::isModuleFusible(applicabilityCopy.get(), perfConfig)) {
489490
llvm::outs() << "N/A\n";
490491
continue;
491492
}
492493

493-
if (failed(applicability.run(tuneCopy.get()))) {
494+
if (failed(applicability.run(applicabilityCopy.get()))) {
494495
llvm::outs() << "N/A\n";
495496
continue;
496497
}
497498

498499
SmallVector<uint32_t> blockSizes;
499500
SmallVector<uint32_t> gridSizes;
500501
for (auto &fnName : kernelFuncNames) {
501-
auto tunedFunc = tuneCopy->lookupSymbol<func::FuncOp>(fnName);
502+
auto tunedFunc = applicabilityCopy->lookupSymbol<func::FuncOp>(fnName);
502503
if (!tunedFunc) {
503504
llvm::errs() << "Tuned copy somehow missing kernel function\n";
504505
return failure();
@@ -511,7 +512,7 @@ static LogicalResult runTuningLoop(ModuleOp source) {
511512
// We have to get these now, they disappear later. Also, if these attributes
512513
// aren't set the contract of the applicability pipeline changed and that's
513514
// a problem.
514-
if (failed(compilation.run(tuneCopy.get()))) {
515+
if (failed(compilation.run(compileCopy.get()))) {
515516
llvm::errs() << "Backend pipeline failed for config: " << perfConfig
516517
<< "\n";
517518
return failure();
@@ -520,7 +521,7 @@ static LogicalResult runTuningLoop(ModuleOp source) {
520521
// Extract binary and benchmark
521522
SmallVector<std::string> hipModules;
522523
for (const auto &fnName : kernelFuncNames) {
523-
auto binary = tuneCopy->lookupSymbol<gpu::BinaryOp>(fnName + "_module");
524+
auto binary = compileCopy->lookupSymbol<gpu::BinaryOp>(fnName + "_module");
524525
if (!binary) {
525526
llvm::errs() << "could not find the GPU binary\n";
526527
}

0 commit comments

Comments
 (0)