diff --git a/compiler/plugins/target/AMD-AIE/aie/AMDAIEObjectFifoStatefulTransform.cpp b/compiler/plugins/target/AMD-AIE/aie/AMDAIEObjectFifoStatefulTransform.cpp
index 52871fb88..0fd4932ba 100644
--- a/compiler/plugins/target/AMD-AIE/aie/AMDAIEObjectFifoStatefulTransform.cpp
+++ b/compiler/plugins/target/AMD-AIE/aie/AMDAIEObjectFifoStatefulTransform.cpp
@@ -11,7 +11,6 @@
 #include "Passes.h"
 #include "iree-amd-aie/aie_runtime/iree_aie_runtime.h"
 #include "llvm/ADT/STLExtras.h"
-#include "mlir/Analysis/TopologicalSortUtils.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
@@ -113,28 +112,11 @@ bool isJoin(ObjectFifoLinkOp op) { return op.getFifoIns().size() > 1; }
 bool isDistribute(ObjectFifoLinkOp op) { return op.getFifoOuts().size() > 1; }
 
 std::optional<Value> getOptionalSharedTile(ObjectFifoLinkOp op) {
-  if (isJoin(op)) {
-    auto fifoOut = getOutputObjectFifos(op)[0];
-    for (auto fifoIn : getInputObjectFifos(op))
-      if (fifoOut.getProducerTile() != fifoIn.getConsumerTiles()[0]) return {};
-    return {fifoOut.getProducerTile()};
-  }
-
-  if (isDistribute(op)) {
-    auto fifoIn = getInputObjectFifos(op)[0];
-    for (auto fifoOut : getOutputObjectFifos(op))
-      if (fifoIn.getConsumerTiles()[0] != fifoOut.getProducerTile()) return {};
-    return {fifoIn.getConsumerTiles()[0]};
-  }
-
-  auto fifoIn = getInputObjectFifos(op);
-  if (auto fifoOut = getOutputObjectFifos(op);
-      !fifoIn.empty() && !fifoOut.empty())
-    for (auto consumerIn : fifoIn[0].getConsumerTiles())
-      if (consumerIn == fifoOut[0].getProducerTile())
-        return {fifoOut[0].getProducerTile()};
-  return {};
+  std::vector<ObjectFifoCreateOp> fifoOuts = getOutputObjectFifos(op);
+  assert(fifoOuts.size() > 0);
+  return fifoOuts[0].getProducerTile();
 }
+
 }  // namespace
 
 class LockAnalysis {
@@ -168,21 +150,7 @@ class DMAChannelAnalysis {
   DenseMap<Value, uint8_t> consumerChannelsPerTile;
 
  public:
-  DMAChannelAnalysis(DeviceOp &device) {
-    // go over the channels used for each tile and update the producer/consumer
-    // channel maps
-    for (auto memOp : device.getOps<MemOp>()) {
-      Region &r = memOp.getBody();
-      auto tile = memOp.getTile();
-      for (auto &bl : r.getBlocks()) {
-        for (auto op : bl.getOps<DMAStartOp>()) {
-          static_cast<DMAChannelDir>(op.getChannelDir()) == DMAChannelDir::MM2S
-              ? getProducerDMAChannel(tile)
-              : getConsumerDMAChannel(tile);
-        }
-      }
-    }
-  }
+  DMAChannelAnalysis() {}
 
   /// Given an AIE tile, returns its next usable producer channel.
   SwitchDMAConnection getProducerDMAChannel(Value tile) {
@@ -536,12 +504,6 @@ void replaceReleaseOp(
     DenseMap<std::pair<ObjectFifoCreateOp, int>,
              std::vector<ObjectFifoReleaseOp>> &releaseOps) {
   ObjectFifoCreateOp op = getObjectFifo(releaseOp);
-  auto core = releaseOp->getParentOfType<CoreOp>();
-  if (auto linkOp = getOptionalLinkOp(op))
-    if (core.getTile() == *getOptionalSharedTile(*linkOp))
-      llvm::report_fatal_error(
-          "currently cannot access objectFifo used in "
-          "ObjectFifoLinkOp");
 
   auto port = releaseOp.getPort();
   std::pair<ObjectFifoCreateOp, int> opPort = {op, static_cast<int>(port)};
@@ -653,12 +615,7 @@ void replaceObjectAcquireOp(
     const DenseMap<ObjectFifoCreateOp, std::vector<BufferOp>> &buffersPerFifo,
     DenseMap<ObjectFifoAcquireOp, std::vector<BufferOp>> &subviews) {
   ObjectFifoCreateOp op = getObjectFifo(acquireOp);
-  auto core = acquireOp->getParentOfType<CoreOp>();
   auto linkOp = getOptionalLinkOp(op);
-  if (linkOp && core.getTile() == *getOptionalSharedTile(*linkOp))
-    llvm::report_fatal_error(
-        "currently cannot access objectFifo used in "
-        "ObjectFifoLinkOp");
 
   // index of next element to acquire for this objectFifo
   // useful for keeping track of which
@@ -995,7 +952,7 @@ struct AMDAIEObjectFifoStatefulTransformPass : mlir::OperationPass<DeviceOp> {
   void runOnOperation() override {
     DeviceOp device = getOperation();
     LockAnalysis lockAnalysis(device);
-    DMAChannelAnalysis dmaAnalysis(device);
+    DMAChannelAnalysis dmaAnalysis;
     OpBuilder builder = OpBuilder::atBlockEnd(device.getBody());
     // maps each objFifo to its corresponding buffer
     DenseMap<ObjectFifoCreateOp, std::vector<BufferOp>> buffersPerFifo;
@@ -1092,16 +1049,14 @@ struct AMDAIEObjectFifoStatefulTransformPass : mlir::OperationPass<DeviceOp> {
     }
 
     // Remove old ops
-    SetVector<Operation *> opsToErase;
+    IRRewriter rewriter(&getContext());
     device.walk([&](Operation *op) {
       if (isa<ObjectFifoCreateOp, ObjectFifoLinkOp, ObjectFifoAcquireOp,
-              ObjectFifoSubviewAccessOp, ObjectFifoReleaseOp>(op))
-        opsToErase.insert(op);
+              ObjectFifoSubviewAccessOp, ObjectFifoReleaseOp>(op)) {
+        op->dropAllUses();
+        rewriter.eraseOp(op);
+      }
     });
-    topologicalSort(opsToErase);
-    IRRewriter rewriter(&getContext());
-    for (auto it = opsToErase.rbegin(); it != opsToErase.rend(); ++it)
-      (*it)->erase();
   }
 };
 
diff --git a/compiler/plugins/target/AMD-AIE/aie/test/link_test_AIE1.mlir b/compiler/plugins/target/AMD-AIE/aie/test/link_test_AIE1.mlir
index 28ba3ef42..597a8c409 100644
--- a/compiler/plugins/target/AMD-AIE/aie/test/link_test_AIE1.mlir
+++ b/compiler/plugins/target/AMD-AIE/aie/test/link_test_AIE1.mlir
@@ -1,7 +1,7 @@
 
 // RUN: iree-opt --amdaie-objectFifo-stateful-transform %s | FileCheck %s
 
-// CHECK-LABEL:   aie.device(npu1_4col) {
+// CHECK-LABEL:   aie.device(xcvc1902) {
 // CHECK:           memref.global "public" @of2_cons : memref<16xi32>
 // CHECK:           memref.global "public" @of2 : memref<16xi32>
 // CHECK:           memref.global "public" @of1_cons : memref<16xi32>
@@ -68,7 +68,7 @@
 // CHECK:         }
 
 module @link_AIE1 {
-    aie.device(npu1_4col) {
+    aie.device(xcvc1902) {
         %tile20 = aie.tile(2, 0)
         %tile12 = aie.tile(1, 2)
         %tile22 = aie.tile(2, 2)
diff --git a/compiler/plugins/target/AMD-AIE/aie/test/tileDMA_test.mlir b/compiler/plugins/target/AMD-AIE/aie/test/tileDMA_test.mlir
deleted file mode 100644
index 3818d0eea..000000000
--- a/compiler/plugins/target/AMD-AIE/aie/test/tileDMA_test.mlir
+++ /dev/null
@@ -1,145 +0,0 @@
-
-// RUN: iree-opt --amdaie-objectFifo-stateful-transform %s | FileCheck %s
-
-// CHECK-LABEL:   aie.device(npu1_4col) {
-// CHECK:           memref.global "public" @objfifo_cons : memref<16xi32>
-// CHECK:           memref.global "public" @objfifo : memref<16xi32>
-// CHECK:           %[[TILE_1_2:.*]] = aie.tile(1, 2)
-// CHECK:           %[[TILE_3_3:.*]] = aie.tile(3, 3)
-// CHECK:           %[[OBJFIFO_CONS_BUFF_0:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "objfifo_cons_buff_0"} : memref<16xi32>
-// CHECK:           %[[OBJFIFO_CONS_BUFF_1:.*]] = aie.buffer(%[[TILE_3_3]]) {sym_name = "objfifo_cons_buff_1"} : memref<16xi32>
-// CHECK:           %[[OBJFIFO_CONS_PROD_LOCK:.*]] = aie.lock(%[[TILE_3_3]], 0) {init = 2 : i8, sym_name = "objfifo_cons_prod_lock"}
-// CHECK:           %[[OBJFIFO_CONS_CONS_LOCK:.*]] = aie.lock(%[[TILE_3_3]], 1) {init = 0 : i8, sym_name = "objfifo_cons_cons_lock"}
-// CHECK:           %[[OBJFIFO_BUFF_0:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "objfifo_buff_0"} : memref<16xi32>
-// CHECK:           %[[OBJFIFO_BUFF_1:.*]] = aie.buffer(%[[TILE_1_2]]) {sym_name = "objfifo_buff_1"} : memref<16xi32>
-// CHECK:           %[[OBJFIFO_PROD_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 3) {init = 2 : i8, sym_name = "objfifo_prod_lock"}
-// CHECK:           %[[OBJFIFO_CONS_LOCK:.*]] = aie.lock(%[[TILE_1_2]], 4) {init = 0 : i8, sym_name = "objfifo_cons_lock"}
-// CHECK:           %[[BUFFER_1_2:.*]] = aie.buffer(%[[TILE_1_2]]) : memref<16xi32>
-// CHECK:           %[[LOCK_1_2:.*]] = aie.lock(%[[TILE_1_2]], 0)
-// CHECK:           %[[BUFFER_1_2_0:.*]] = aie.buffer(%[[TILE_1_2]]) : memref<16xi32>
-// CHECK:           %[[LOCK_1_2_1:.*]] = aie.lock(%[[TILE_1_2]], 1)
-// CHECK:           %[[BUFFER_1_2_2:.*]] = aie.buffer(%[[TILE_1_2]]) : memref<16xi32>
-// CHECK:           %[[LOCK_1_2_3:.*]] = aie.lock(%[[TILE_1_2]], 2)
-// CHECK:           aie.flow(%[[TILE_1_2]], DMA : 1, %[[TILE_3_3]], DMA : 0)
-// CHECK:           func.func @some_work(%[[ARG0:.*]]: memref<16xi32>) {
-// CHECK:             return
-// CHECK:           }
-// CHECK:           %[[CORE_1_2:.*]] = aie.core(%[[TILE_1_2]]) {
-// CHECK-DAG:         %[[C0:.*]] = arith.constant 0 : index
-// CHECK-DAG:         %[[C2:.*]] = arith.constant 2 : index
-// CHECK-DAG:         %[[C12:.*]] = arith.constant 12 : index
-// CHECK:             scf.for %[[ARG0:.*]] = %[[C0]] to %[[C12]] step %[[C2]] {
-// CHECK:               aie.use_lock(%[[OBJFIFO_PROD_LOCK]], AcquireGreaterEqual, 1)
-// CHECK:               func.call @some_work(%[[OBJFIFO_BUFF_0]]) : (memref<16xi32>) -> ()
-// CHECK:               aie.use_lock(%[[OBJFIFO_CONS_LOCK]], Release, 1)
-// CHECK:               aie.use_lock(%[[OBJFIFO_PROD_LOCK]], AcquireGreaterEqual, 1)
-// CHECK:               func.call @some_work(%[[OBJFIFO_BUFF_1]]) : (memref<16xi32>) -> ()
-// CHECK:               aie.use_lock(%[[OBJFIFO_CONS_LOCK]], Release, 1)
-// CHECK:             }
-// CHECK:             aie.end
-// CHECK:           }
-// CHECK:           %[[MEM_1_2:.*]] = aie.mem(%[[TILE_1_2]]) {
-// CHECK:             %[[VAL_0:.*]] = aie.dma_start(MM2S, 0, ^bb1, ^bb3)
-// CHECK:           ^bb1:
-// CHECK:             aie.use_lock(%[[LOCK_1_2]], Acquire, 1)
-// CHECK:             aie.dma_bd(%[[BUFFER_1_2]] : memref<16xi32>) {len = 16 : i32}
-// CHECK:             aie.use_lock(%[[LOCK_1_2]], Release, 0)
-// CHECK:             aie.next_bd ^bb2
-// CHECK:           ^bb2:
-// CHECK:             aie.use_lock(%[[LOCK_1_2_1]], Acquire, 1)
-// CHECK:             aie.dma_bd(%[[BUFFER_1_2_0]] : memref<16xi32>) {len = 16 : i32}
-// CHECK:             aie.use_lock(%[[LOCK_1_2_1]], Release, 0)
-// CHECK:             aie.next_bd ^bb1
-// CHECK:           ^bb3:
-// CHECK:             %[[VAL_1:.*]] = aie.dma_start(S2MM, 0, ^bb4, ^bb5)
-// CHECK:           ^bb4:
-// CHECK:             aie.use_lock(%[[LOCK_1_2_3]], Acquire, 0)
-// CHECK:             aie.dma_bd(%[[BUFFER_1_2_2]] : memref<16xi32>) {len = 16 : i32}
-// CHECK:             aie.use_lock(%[[LOCK_1_2_3]], Release, 1)
-// CHECK:             aie.next_bd ^bb4
-// CHECK:           ^bb5:
-// CHECK:             %[[VAL_2:.*]] = aie.dma_start(MM2S, 1, ^bb6, ^bb8)
-// CHECK:           ^bb6:
-// CHECK:             aie.use_lock(%[[OBJFIFO_CONS_LOCK]], AcquireGreaterEqual, 1)
-// CHECK:             aie.dma_bd(%[[OBJFIFO_BUFF_0]] : memref<16xi32>) {len = 16 : i32}
-// CHECK:             aie.use_lock(%[[OBJFIFO_PROD_LOCK]], Release, 1)
-// CHECK:             aie.next_bd ^bb7
-// CHECK:           ^bb7:
-// CHECK:             aie.use_lock(%[[OBJFIFO_CONS_LOCK]], AcquireGreaterEqual, 1)
-// CHECK:             aie.dma_bd(%[[OBJFIFO_BUFF_1]] : memref<16xi32>) {len = 16 : i32}
-// CHECK:             aie.use_lock(%[[OBJFIFO_PROD_LOCK]], Release, 1)
-// CHECK:             aie.next_bd ^bb6
-// CHECK:           ^bb8:
-// CHECK:             aie.end
-// CHECK:           }
-// CHECK:           %[[MEM_3_3:.*]] = aie.mem(%[[TILE_3_3]]) {
-// CHECK:             %[[VAL_3:.*]] = aie.dma_start(S2MM, 0, ^bb1, ^bb3)
-// CHECK:           ^bb1:
-// CHECK:             aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], AcquireGreaterEqual, 1)
-// CHECK:             aie.dma_bd(%[[OBJFIFO_CONS_BUFF_0]] : memref<16xi32>) {len = 16 : i32}
-// CHECK:             aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], Release, 1)
-// CHECK:             aie.next_bd ^bb2
-// CHECK:           ^bb2:
-// CHECK:             aie.use_lock(%[[OBJFIFO_CONS_PROD_LOCK]], AcquireGreaterEqual, 1)
-// CHECK:             aie.dma_bd(%[[OBJFIFO_CONS_BUFF_1]] : memref<16xi32>) {len = 16 : i32}
-// CHECK:             aie.use_lock(%[[OBJFIFO_CONS_CONS_LOCK]], Release, 1)
-// CHECK:             aie.next_bd ^bb1
-// CHECK:           ^bb3:
-// CHECK:             aie.end
-// CHECK:           }
-// CHECK:         }
-
-module @tileDMA_channels {
-    aie.device(npu1_4col) {
-        %tile12 = aie.tile(1, 2)
-        %tile33 = aie.tile(3, 3)
-        %buff0 = aie.buffer(%tile12) : memref<16xi32>
-        %lock0 = aie.lock(%tile12, 0)
-        %buff1 = aie.buffer(%tile12) : memref<16xi32>
-        %lock1 = aie.lock(%tile12, 1)
-        %buff2 = aie.buffer(%tile12) : memref<16xi32>
-        %lock2 = aie.lock(%tile12, 2)
-        aie.objectfifo @objfifo (%tile12, {%tile33}, 2 : i32) : !aie.objectfifo<memref<16xi32>>
-        func.func @some_work(%lineOut : memref<16xi32>) -> () {
-            return
-        }
-        %core12 = aie.core(%tile12) {
-            %c0 = arith.constant 0 : index
-            %c2 = arith.constant 2 : index
-            %height = arith.constant 12 : index
-            scf.for %indexInHeight = %c0 to %height step %c2 {
-                %subview = aie.objectfifo.acquire @objfifo (Produce, 1) : !aie.objectfifosubview<memref<16xi32>>
-                %elem0 = aie.objectfifo.subview.access %subview[0] : !aie.objectfifosubview<memref<16xi32>> -> memref<16xi32>
-                func.call @some_work(%elem0) : (memref<16xi32>) -> ()
-                aie.objectfifo.release @objfifo (Produce, 1)
-                %subview1 = aie.objectfifo.acquire @objfifo (Produce, 1) : !aie.objectfifosubview<memref<16xi32>>
-                %elem1 = aie.objectfifo.subview.access %subview1[0] : !aie.objectfifosubview<memref<16xi32>> -> memref<16xi32>
-                func.call @some_work(%elem1) : (memref<16xi32>) -> ()
-                aie.objectfifo.release @objfifo (Produce, 1)
-            }
-            aie.end
-        }
-        %mem12 = aie.mem(%tile12) {
-            %dma1 = aie.dma_start(MM2S, 0, ^bb1, ^bb3)
-        ^bb1:
-            aie.use_lock(%lock0, Acquire, 1)
-            aie.dma_bd(%buff0 : memref<16xi32>) {len = 16 : i32}
-            aie.use_lock(%lock0, Release, 0)
-            aie.next_bd ^bb2
-        ^bb2:
-            aie.use_lock(%lock1, Acquire, 1)
-            aie.dma_bd(%buff1 : memref<16xi32>) {len = 16 : i32}
-            aie.use_lock(%lock1, Release, 0)
-            aie.next_bd ^bb1
-        ^bb3:
-            %dma2 = aie.dma_start(S2MM, 0, ^bb4, ^bb5)
-        ^bb4:
-            aie.use_lock(%lock2, Acquire, 0)
-            aie.dma_bd(%buff2 : memref<16xi32>) {len = 16 : i32}
-            aie.use_lock(%lock2, Release, 1)
-            aie.next_bd ^bb4
-        ^bb5:
-            aie.end
-        }
-    }
-}