[flang] Basic PFT to MLIR lowering for `do concurrent` locality specifiers
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index 1d13236..81c220e 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -348,6 +348,9 @@
   virtual Fortran::lower::SymbolBox
   lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0;
 
+  virtual Fortran::lower::SymbolBox
+  shallowLookupSymbol(const Fortran::semantics::Symbol &sym) = 0;
+
   /// Return the mlir::SymbolTable associated to the ModuleOp.
   /// Look-ups are faster using it than using module.lookup<>,
   /// but the module op should be queried in case of failure
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index e3d5d9e..cc292d6 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -12,6 +12,8 @@
 
 #include "flang/Lower/Bridge.h"
 
+#include "OpenMP/DataSharingProcessor.h"
+#include "OpenMP/Utils.h"
 #include "flang/Lower/Allocatable.h"
 #include "flang/Lower/CallInterface.h"
 #include "flang/Lower/Coarray.h"
@@ -1144,6 +1146,14 @@
     return name;
   }
 
+  /// Find the symbol in the inner-most level of the local map or return null.
+  Fortran::lower::SymbolBox
+  shallowLookupSymbol(const Fortran::semantics::Symbol &sym) override {
+    if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym))
+      return v;
+    return {};
+  }
+
 private:
   FirConverter() = delete;
   FirConverter(const FirConverter &) = delete;
@@ -1218,14 +1228,6 @@
     return {};
   }
 
-  /// Find the symbol in the inner-most level of the local map or return null.
-  Fortran::lower::SymbolBox
-  shallowLookupSymbol(const Fortran::semantics::Symbol &sym) {
-    if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym))
-      return v;
-    return {};
-  }
-
   /// Find the symbol in one level up of symbol map such as for host-association
   /// in OpenMP code or return null.
   Fortran::lower::SymbolBox
@@ -2028,9 +2030,31 @@
   void handleLocalitySpecs(const IncrementLoopInfo &info) {
     Fortran::semantics::SemanticsContext &semanticsContext =
         bridge.getSemanticsContext();
-    for (const Fortran::semantics::Symbol *sym : info.localSymList)
+
+    Fortran::lower::omp::DataSharingProcessor dsp(
+        *this, semanticsContext, getEval(),
+        /*useDelayedPrivatization=*/true, localSymbols);
+    mlir::omp::PrivateClauseOps privateClauseOps;
+    auto doConcurrentLoopOp =
+        mlir::dyn_cast_if_present<fir::DoConcurrentLoopOp>(info.loopOp);
+    bool useDelayedPriv =
+        enableDelayedPrivatizationStaging && doConcurrentLoopOp;
+
+    for (const Fortran::semantics::Symbol *sym : info.localSymList) {
+      if (useDelayedPriv) {
+        dsp.doPrivatize(sym, &privateClauseOps);
+        continue;
+      }
+
       createHostAssociateVarClone(*sym, /*skipDefaultInit=*/false);
+    }
+
     for (const Fortran::semantics::Symbol *sym : info.localInitSymList) {
+      if (useDelayedPriv) {
+        dsp.doPrivatize(sym, &privateClauseOps);
+        continue;
+      }
+
       createHostAssociateVarClone(*sym, /*skipDefaultInit=*/true);
       const auto *hostDetails =
           sym->detailsIf<Fortran::semantics::HostAssocDetails>();
@@ -2049,6 +2073,24 @@
           sym->detailsIf<Fortran::semantics::HostAssocDetails>();
       copySymbolBinding(hostDetails->symbol(), *sym);
     }
+
+    if (useDelayedPriv) {
+      doConcurrentLoopOp.getPrivateVarsMutable().assign(
+          privateClauseOps.privateVars);
+      doConcurrentLoopOp.setPrivateSymsAttr(
+          builder->getArrayAttr(privateClauseOps.privateSyms));
+
+      for (auto [sym, privateVar] : llvm::zip_equal(
+               dsp.getAllSymbolsToPrivatize(), privateClauseOps.privateVars)) {
+        auto arg = doConcurrentLoopOp.getRegion().begin()->addArgument(
+            privateVar.getType(), doConcurrentLoopOp.getLoc());
+        bindSymbol(*sym, hlfir::translateToExtendedValue(
+                             privateVar.getLoc(), *builder, hlfir::Entity{arg},
+                             /*contiguousHint=*/true)
+                             .first);
+      }
+    }
+
     // Note that allocatable, types with ultimate components, and type
     // requiring finalization are forbidden in LOCAL/LOCAL_INIT (F2023 C1130),
     // so no clean-up needs to be generated for these entities.
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
index b88454c..bf130d5 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
@@ -53,6 +53,15 @@
   });
 }
 
+DataSharingProcessor::DataSharingProcessor(lower::AbstractConverter &converter,
+                                           semantics::SemanticsContext &semaCtx,
+                                           lower::pft::Evaluation &eval,
+                                           bool useDelayedPrivatization,
+                                           lower::SymMap &symTable)
+    : DataSharingProcessor(converter, semaCtx, {}, eval,
+                           /*shouldCollectPreDeterminedSymols=*/false,
+                           useDelayedPrivatization, symTable) {}
+
 void DataSharingProcessor::processStep1(
     mlir::omp::PrivateClauseOps *clauseOps) {
   collectSymbolsForPrivatization();
@@ -172,7 +181,8 @@
 
 void DataSharingProcessor::copyFirstPrivateSymbol(
     const semantics::Symbol *sym, mlir::OpBuilder::InsertPoint *copyAssignIP) {
-  if (sym->test(semantics::Symbol::Flag::OmpFirstPrivate))
+  if (sym->test(semantics::Symbol::Flag::OmpFirstPrivate) ||
+      sym->test(semantics::Symbol::Flag::LocalityLocalInit))
     converter.copyHostAssociateVar(*sym, copyAssignIP);
 }
 
@@ -504,22 +514,29 @@
     }
 }
 
-void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
+void DataSharingProcessor::doPrivatize(const semantics::Symbol *symToPrivatize,
                                        mlir::omp::PrivateClauseOps *clauseOps) {
   if (!useDelayedPrivatization) {
-    cloneSymbol(sym);
-    copyFirstPrivateSymbol(sym);
+    cloneSymbol(symToPrivatize);
+    copyFirstPrivateSymbol(symToPrivatize);
     return;
   }
 
-  lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym);
+  const semantics::Symbol *sym = symToPrivatize->HasLocalLocality()
+                                     ? &symToPrivatize->GetUltimate()
+                                     : symToPrivatize;
+  lower::SymbolBox hsb = symToPrivatize->HasLocalLocality()
+                             ? converter.shallowLookupSymbol(*sym)
+                             : converter.lookupOneLevelUpSymbol(*sym);
   assert(hsb && "Host symbol box not found");
   hlfir::Entity entity{hsb.getAddr()};
   bool cannotHaveNonDefaultLowerBounds = !entity.mayHaveNonDefaultLowerBounds();
 
   mlir::Location symLoc = hsb.getAddr().getLoc();
   std::string privatizerName = sym->name().ToString() + ".privatizer";
-  bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate);
+  bool isFirstPrivate =
+      symToPrivatize->test(semantics::Symbol::Flag::OmpFirstPrivate) ||
+      symToPrivatize->test(semantics::Symbol::Flag::LocalityLocalInit);
 
   mlir::Value privVal = hsb.getAddr();
   mlir::Type allocType = privVal.getType();
@@ -613,27 +630,30 @@
           &copyRegion, /*insertPt=*/{}, {argType, argType}, {symLoc, symLoc});
       firOpBuilder.setInsertionPointToEnd(copyEntryBlock);
 
-      auto addSymbol = [&](unsigned argIdx, bool force = false) {
+      auto addSymbol = [&](unsigned argIdx, const semantics::Symbol *symToMap,
+                           bool force = false) {
         symExV.match(
             [&](const fir::MutableBoxValue &box) {
               symTable.addSymbol(
-                  *sym, fir::substBase(box, copyRegion.getArgument(argIdx)),
-                  force);
+                  *symToMap,
+                  fir::substBase(box, copyRegion.getArgument(argIdx)), force);
             },
             [&](const auto &box) {
-              symTable.addSymbol(*sym, copyRegion.getArgument(argIdx), force);
+              symTable.addSymbol(*symToMap, copyRegion.getArgument(argIdx),
+                                 force);
             });
       };
 
-      addSymbol(0, true);
+      addSymbol(0, sym, true);
       lower::SymMapScope innerScope(symTable);
-      addSymbol(1);
+      addSymbol(1, symToPrivatize);
 
       auto ip = firOpBuilder.saveInsertionPoint();
-      copyFirstPrivateSymbol(sym, &ip);
+      copyFirstPrivateSymbol(symToPrivatize, &ip);
 
       firOpBuilder.create<mlir::omp::YieldOp>(
-          hsb.getAddr().getLoc(), symTable.shallowLookupSymbol(*sym).getAddr());
+          hsb.getAddr().getLoc(),
+          symTable.shallowLookupSymbol(*symToPrivatize).getAddr());
     }
 
     return result;
@@ -645,6 +665,9 @@
   }
 
   symToPrivatizer[sym] = privatizerOp;
+
+  if (symToPrivatize->HasLocalLocality())
+    allPrivatizedSymbols.insert(symToPrivatize);
 }
 
 } // namespace omp
diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
index 54a42fd..f5fef9f 100644
--- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h
+++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h
@@ -105,8 +105,6 @@
   void collectImplicitSymbols();
   void collectPreDeterminedSymbols();
   void privatize(mlir::omp::PrivateClauseOps *clauseOps);
-  void doPrivatize(const semantics::Symbol *sym,
-                   mlir::omp::PrivateClauseOps *clauseOps);
   void copyLastPrivatize(mlir::Operation *op);
   void insertLastPrivateCompare(mlir::Operation *op);
   void cloneSymbol(const semantics::Symbol *sym);
@@ -125,6 +123,11 @@
                        bool shouldCollectPreDeterminedSymbols,
                        bool useDelayedPrivatization, lower::SymMap &symTable);
 
+  DataSharingProcessor(lower::AbstractConverter &converter,
+                       semantics::SemanticsContext &semaCtx,
+                       lower::pft::Evaluation &eval,
+                       bool useDelayedPrivatization, lower::SymMap &symTable);
+
   // Privatisation is split into two steps.
   // Step1 performs cloning of all privatisation clauses and copying for
   // firstprivates. Step1 is performed at the place where process/processStep1
@@ -151,6 +154,9 @@
                ? allPrivatizedSymbols.getArrayRef()
                : llvm::ArrayRef<const semantics::Symbol *>();
   }
+
+  void doPrivatize(const semantics::Symbol *sym,
+                   mlir::omp::PrivateClauseOps *clauseOps);
 };
 
 } // namespace omp
diff --git a/flang/test/Lower/do_concurrent_delayed_locality.f90 b/flang/test/Lower/do_concurrent_delayed_locality.f90
new file mode 100644
index 0000000..494fbd7
--- /dev/null
+++ b/flang/test/Lower/do_concurrent_delayed_locality.f90
@@ -0,0 +1,58 @@
+! RUN: %flang_fc1 -emit-hlfir -mmlir --openmp-enable-delayed-privatization-staging=true -o - %s | FileCheck %s
+
+subroutine do_concurrent_with_locality_specs
+  implicit none
+  integer :: i, local_var, local_init_var
+
+  do concurrent (i=1:10) local(local_var) local_init(local_init_var)
+    if (i < 5) then
+      local_var = 42
+    else 
+      local_init_var = 84
+    end if
+  end do
+end subroutine
+
+! CHECK-LABEL:   omp.private {type = firstprivate} @_QFdo_concurrent_with_locality_specsElocal_init_var_firstprivate_i32 : i32 copy {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>, %[[VAL_1:.*]]: !fir.ref<i32>):
+! CHECK:           %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<i32>
+! CHECK:           hlfir.assign %[[VAL_2]] to %[[VAL_1]] : i32, !fir.ref<i32>
+! CHECK:           omp.yield(%[[VAL_1]] : !fir.ref<i32>)
+! CHECK:         }
+! CHECK:         omp.private {type = private} @_QFdo_concurrent_with_locality_specsElocal_var_private_i32 : i32
+
+! CHECK-LABEL:   func.func @_QPdo_concurrent_with_locality_specs() {
+! CHECK:           %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFdo_concurrent_with_locality_specsEi"}
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFdo_concurrent_with_locality_specsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "local_init_var", uniq_name = "_QFdo_concurrent_with_locality_specsElocal_init_var"}
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFdo_concurrent_with_locality_specsElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "local_var", uniq_name = "_QFdo_concurrent_with_locality_specsElocal_var"}
+! CHECK:           %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFdo_concurrent_with_locality_specsElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_7:.*]] = arith.constant 1 : i32
+! CHECK:           %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index
+! CHECK:           %[[VAL_9:.*]] = arith.constant 10 : i32
+! CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index
+! CHECK:           %[[VAL_11:.*]] = arith.constant 1 : index
+! CHECK:           fir.do_concurrent {
+! CHECK:             %[[VAL_12:.*]] = fir.alloca i32 {bindc_name = "i"}
+! CHECK:             %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFdo_concurrent_with_locality_specsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             fir.do_concurrent.loop (%[[VAL_14:.*]]) = (%[[VAL_8]]) to (%[[VAL_10]]) step (%[[VAL_11]]) private(@_QFdo_concurrent_with_locality_specsElocal_var_private_i32 %[[VAL_6]]#0 -> %[[VAL_15:.*]], @_QFdo_concurrent_with_locality_specsElocal_init_var_firstprivate_i32 %[[VAL_4]]#0 -> %[[VAL_16:.*]] : !fir.ref<i32>, !fir.ref<i32>) {
+! CHECK:               %[[VAL_17:.*]] = fir.convert %[[VAL_14]] : (index) -> i32
+! CHECK:               fir.store %[[VAL_17]] to %[[VAL_13]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFdo_concurrent_with_locality_specsElocal_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFdo_concurrent_with_locality_specsElocal_init_var"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_21:.*]] = arith.constant 5 : i32
+! CHECK:               %[[VAL_22:.*]] = arith.cmpi slt, %[[VAL_20]], %[[VAL_21]] : i32
+! CHECK:               fir.if %[[VAL_22]] {
+! CHECK:                 %[[VAL_23:.*]] = arith.constant 42 : i32
+! CHECK:                 hlfir.assign %[[VAL_23]] to %[[VAL_18]]#0 : i32, !fir.ref<i32>
+! CHECK:               } else {
+! CHECK:                 %[[VAL_24:.*]] = arith.constant 84 : i32
+! CHECK:                 hlfir.assign %[[VAL_24]] to %[[VAL_19]]#0 : i32, !fir.ref<i32>
+! CHECK:               }
+! CHECK:             }
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }