freepooma-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Speed up guard update.


From: Richard Guenther
Subject: [PATCH] Speed up guard update.
Date: Wed, 14 Jan 2004 21:56:51 +0100 (CET)

Hi!

This is a refined (aka shorter) patch which unifies the tracking of
up-to-date faces and the special optimized copy for MPI.

Tested on serial ia32 linux with gcc3.4 with no regression.

Ok?

Richard.


2004Jan14  Richard Guenther <address@hidden>

        * src/Engine/Intersector.h: track used guard faces.
        src/Engine/MultiPatchEngine.h: track up-to-dateness per
        face using a bitmask.
        src/Engine/Stencil.h: track used guard faces.
        src/Field/DiffOps/FieldStencil.h: track used guard faces.
        src/Layout/GridLayout.cpp: record face of guard update.
        src/Layout/LayoutBase.h: add face_m member to guard update
        struct.
        src/Layout/UniformGridLayout.cpp: record face of guard update.
        src/Engine/MultiPatchEngine.cpp: update only not up-to-date
        and needed faces during fillGuards(). Do manual Send/Receive
        of the inner guards domain for MPI.

--- cvs/r2/src/Engine/Intersector.h     2004-01-14 20:08:06.000000000 +0100
+++ pooma-mpi3/r2/src/Engine/Intersector.h      2004-01-14 20:13:32.000000000 
+0100
@@ -129,7 +129,8 @@
   }

   template<class Engine, int Dim2>
-  bool intersect(const Engine &engine, const GuardLayers<Dim2> &guard)
+  bool intersect(const Engine &engine, const GuardLayers<Dim2> &guard,
+                GuardLayers<Dim2> &usedGuards)
   {
     CTAssert(Engine::dimensions == Dim);

@@ -145,9 +146,7 @@
       // If we've seen this ID before, we're done.

       if (ids_m[i] == layout.ID())
-      {
        return false;
-      }

       // If we've seen the base ID before and the base domain is the same
       // we're done.
@@ -157,10 +156,27 @@
       {
        shared(layout.ID(),ids_m[i]);

-       // In this case we are using the guard cells unless this domain
-       // is exactly the same as one we've seen before.
+       // was: return (!sameBaseDomain(i,layout.baseDomain()));

-       return (!sameBaseDomain(i,layout.baseDomain()));
+        // We should be able to find out the actual shape of the
+       // used internal guards here, rather than just returning bool.
+       // Something like:
+
+       // But what do, if Dim2 > baseDims_m[i]!?
+       if (baseDims_m[i] < Dim2)
+         return true;
+
+       bool used = false;
+       for (int j = 0; j < Dim2; j++)
+       {
+         usedGuards.lower(j) = std::max(0, baseDomains_m[i][j].first() - 
layout.baseDomain()[j].first());
+         if (usedGuards.lower(j) != 0)
+           used = true;
+         usedGuards.upper(j) = std::max(0, layout.baseDomain()[j].last() - 
baseDomains_m[i][j].last());
+         if (usedGuards.upper(j) != 0)
+           used = true;
+       }
+       return used;
       }
     }

@@ -437,9 +453,9 @@

   template<class Engine, int Dim2>
   inline
-  bool intersect(const Engine &l, const GuardLayers<Dim2> &guard)
+  bool intersect(const Engine &l, const GuardLayers<Dim2> &guard, 
GuardLayers<Dim2> &usedGuards)
   {
-    return (data()->intersect(l,guard));
+    return (data()->intersect(l,guard,usedGuards));
   }

 private:
--- cvs/r2/src/Engine/MultiPatchEngine.h        2004-01-14 20:11:36.000000000 
+0100
+++ pooma-mpi3/r2/src/Engine/MultiPatchEngine.h 2004-01-14 20:13:32.000000000 
+0100
@@ -628,13 +628,18 @@
   //---------------------------------------------------------------------------
   /// Fill the internal guard cells.

-  inline void fillGuards() const
+  inline void fillGuards(const GuardLayers<Dim>& g) const
   {
-    fillGuardsHandler(WrappedInt<Layout_t::supportsGuards>());
+    fillGuardsHandler(g, WrappedInt<Layout_t::supportsGuards>());
+  }
+
+  inline void fillGuards() const
+  {
+    fillGuards(layout().internalGuards());
   }

-  inline void fillGuardsHandler(const WrappedInt<false>&) const { };
-  void fillGuardsHandler(const WrappedInt<true>&) const ;
+  inline void fillGuardsHandler(const GuardLayers<Dim>&, const 
WrappedInt<false>&) const { };
+  void fillGuardsHandler(const GuardLayers<Dim>&, const WrappedInt<true>&) 
const ;

   //---------------------------------------------------------------------------
   /// Set the internal guard cells to a particular value.
@@ -650,14 +655,31 @@
   /// Set and get the dirty flag (fillGuards is a no-op unless the
   /// dirty flag is true).

+  inline int dirty() const { return *pDirty_m; }
+
   inline void setDirty() const
   {
-    *pDirty_m = true;
+    *pDirty_m = (1<<(Dim*2))-1;
+  }
+
+  inline void clearDirty(int face = -1) const
+  {
+    if (face == -1)
+      *pDirty_m = 0;
+    else {
+      PAssert(face >= 0 && face <= Dim*2-1);
+      *pDirty_m &= ~(1<<face);
+    }
   }

-  inline bool isDirty() const
+  inline bool isDirty(int face = -1) const
   {
-    return *pDirty_m;
+    if (face == -1)
+      return *pDirty_m != 0;
+    else {
+      PAssert(face >= 0 && face <= Dim*2-1);
+      return *pDirty_m & (1<<face);
+    }
   }

   //============================================================
@@ -874,7 +896,7 @@
   /// must share the same flag. We use the reference count in
   /// data_m to decide whether to clean this up.

-  bool *pDirty_m;
+  int *pDirty_m;
 };


@@ -1193,6 +1215,11 @@
     baseEngine_m.fillGuards();
   }

+  inline void fillGuards(const GuardLayers<Dim2>& g) const
+  {
+    baseEngine_m.fillGuards(g);
+  }
+
   //---------------------------------------------------------------------------
   /// Set the internal guard cells to a particular value (default zero)

@@ -1217,10 +1244,15 @@
   {
     baseEngine_m.setDirty();
   }
+
+  inline void clearDirty(int face=-1) const
+  {
+    baseEngine_m.clearDirty(face);
+  }

-  inline bool isDirty() const
+  inline bool isDirty(int face=-1) const
   {
-    return baseEngine_m.isDirty();
+    return baseEngine_m.isDirty(face);
   }

   //---------------------------------------------------------------------------
@@ -1694,12 +1726,13 @@
   apply(const Engine<Dim,T,MultiPatch<LayoutTag,PatchTag> > &engine,
        const ExpressionApply<IntersectorTag<Intersect> > &tag)
   {
+    GuardLayers<Dim> usedGuards;
     bool useGuards =
       tag.tag().intersector_m.intersect(engine,
-                                 engine.layout().internalGuards());
+                                 engine.layout().internalGuards(), usedGuards);

     if (useGuards)
-      engine.fillGuards();
+      engine.fillGuards(usedGuards);

     return 0;
   }
@@ -1725,13 +1758,14 @@
               const ExpressionApply<IntersectorTag<Intersect> > &tag,
               const WrappedInt<true> &)
   {
+    GuardLayers<BD> usedGuards;
     bool useGuards =
       tag.tag().intersector_m.
       intersect(engine,
-               engine.layout().baseLayout().internalGuards());
+               engine.layout().baseLayout().internalGuards(), usedGuards);

     if (useGuards)
-      engine.fillGuards();
+      engine.fillGuards(usedGuards);

     return 0;
   }
@@ -1741,7 +1775,7 @@
               const ExpressionApply<IntersectorTag<Intersect> > &tag,
               const WrappedInt<false> &)
   {
-    tag.tag().intersector_m.intersect(engine, GuardLayers<Dim>());
+    tag.tag().intersector_m.intersect(engine);
     return 0;
   }
 };
--- cvs/r2/src/Engine/Stencil.h 2004-01-14 20:08:07.000000000 +0100
+++ pooma-mpi3/r2/src/Engine/Stencil.h  2004-01-14 20:13:32.000000000 +0100
@@ -752,11 +752,14 @@

   StencilIntersector(const This_t &model)
     : domain_m(model.domain_m),
+      stencilExtent_m(model.stencilExtent_m),
       intersector_m(model.intersector_m)
   { }

-  StencilIntersector(const Interval<Dim> &domain, const Intersect &intersect)
+  StencilIntersector(const Interval<Dim> &domain, const Intersect &intersect,
+                 const GuardLayers<Dim> &stencilExtent)
     : domain_m(domain),
+      stencilExtent_m(stencilExtent),
       intersector_m(intersect)
   { }

@@ -766,6 +769,7 @@
     {
       intersector_m = model.intersector_m;
       domain_m = model.domain_m;
+      stencilExtent_m = model.stencilExtent_m;
     }
     return *this;
   }
@@ -807,14 +811,19 @@

   template<class Engine, int Dim2>
   inline
-  bool intersect(const Engine &engine, const GuardLayers<Dim2> &)
+  bool intersect(const Engine &engine, const GuardLayers<Dim2> &g,
+                 GuardLayers<Dim> &usedGuards)
   {
     intersect(engine);
+    // FIXME: accumulate used guards from intersect above and
+    // stencil extent? I.e. allow  Stencil<>(a(i-1)+a(i+1))?
+    usedGuards = stencilExtent_m;
     return true;
   }

 private:
   Interval<Dim> domain_m;
+  GuardLayers<Dim> stencilExtent_m;
   Intersect     intersector_m;
 };

@@ -833,8 +842,14 @@
               const ExpressionApply<IntersectorTag<Intersect> > &tag)
   {
     typedef StencilIntersector<D, Intersect> NewIntersector_t;
+    GuardLayers<D> stencilExtent;
+    for (int i=0; i<D; ++i) {
+      stencilExtent.lower(i) = engine.function().lowerExtent(i);
+      stencilExtent.upper(i) = engine.function().upperExtent(i);
+    }
     NewIntersector_t newIntersector(engine.intersectDomain(),
-                                   tag.tag().intersector_m);
+                                   tag.tag().intersector_m,
+                                   stencilExtent);

     expressionApply(engine.expression(),
                    IntersectorTag<NewIntersector_t>(newIntersector));
--- cvs/r2/src/Field/DiffOps/FieldStencil.h     2004-01-14 20:08:09.000000000 
+0100
+++ pooma-mpi3/r2/src/Field/DiffOps/FieldStencil.h      2004-01-14 
20:13:32.000000000 +0100
@@ -614,11 +617,13 @@
   // Constructors

   FieldStencilIntersector(const This_t &model)
-    : domain_m(model.domain_m), intersector_m(model.intersector_m)
+    : domain_m(model.domain_m), stencilExtent_m(model.stencilExtent_m),
+      intersector_m(model.intersector_m)
   { }

-  FieldStencilIntersector(const Domain_t &dom, const Intersect &intersect)
-    : domain_m(dom), intersector_m(intersect)
+  FieldStencilIntersector(const Domain_t &dom, const Intersect &intersect,
+                 const GuardLayers<Dim> &stencilExtent)
+    : domain_m(dom), stencilExtent_m(stencilExtent), intersector_m(intersect)
   { }

   This_t &operator=(const This_t &model)
@@ -626,6 +631,7 @@
     if (this != &model)
     {
       domain_m = model.domain_m;
+      stencilExtent_m = model.stencilExtent_m;
       intersector_m = model.intersector_m;
     }
     return *this;
@@ -662,9 +668,13 @@
   }

   template<class Engine, int Dim2>
-  inline bool intersect(const Engine &engine, const GuardLayers<Dim2> &)
+  inline bool intersect(const Engine &engine, const GuardLayers<Dim2> &,
+                       GuardLayers<Dim> &usedGuards)
   {
     intersect(engine);
+    // FIXME: accumulate used guards from intersect above and
+    // stencil extent? I.e. allow  Stencil<>(a(i-1)+a(i+1))?
+    usedGuards = stencilExtent_m;
     return true;
   }

@@ -672,6 +682,7 @@


   Interval<Dim> domain_m;
+  GuardLayers<Dim> stencilExtent_m;
   Intersect     intersector_m;
 };

@@ -699,8 +710,14 @@
     // cells results in an error in the multipatch inode view.)

     typedef FieldStencilIntersector<Dim, Intersect> NewIntersector_t;
+    GuardLayers<Dim> stencilExtent;
+    for (int i=0; i<Dim; ++i) {
+      stencilExtent.lower(i) = engine.functor().lowerExtent(i);
+      stencilExtent.upper(i) = engine.functor().upperExtent(i);
+    }
     NewIntersector_t newIntersector(engine.intersectDomain(),
-                                   tag.tag().intersector_m);
+                                   tag.tag().intersector_m,
+                                   stencilExtent);

     expressionApply(engine.field(),
                    IntersectorTag<NewIntersector_t>(newIntersector));
--- cvs/r2/src/Layout/GridLayout.cpp    2004-01-14 20:08:10.000000000 +0100
+++ pooma-mpi3/r2/src/Layout/GridLayout.cpp     2004-01-14 20:13:32.000000000 
+0100
@@ -429,7 +436,7 @@

                      // Now, push IDs and source into cache...

-                     this->gcFillList_m.push_back(GCFillInfo_t(gcdom, 
sourceID, destID));
+                     this->gcFillList_m.push_back(GCFillInfo_t(gcdom, 
sourceID, destID, d*2));
                    }
                }
            }
@@ -481,7 +488,7 @@

                      // Now, push IDs and source into cache...

-                     this->gcFillList_m.push_back(GCFillInfo_t(gcdom, 
sourceID, destID));
+                     this->gcFillList_m.push_back(GCFillInfo_t(gcdom, 
sourceID, destID, d*2+1));
                    }
                }
            }
--- cvs/r2/src/Layout/LayoutBase.h      2004-01-14 20:08:12.000000000 +0100
+++ pooma-mpi3/r2/src/Layout/LayoutBase.h       2004-01-14 20:13:32.000000000 
+0100
@@ -119,8 +121,8 @@

   struct GCFillInfo
   {
-    GCFillInfo(const Domain_t &dom, int ownedID, int guardID)
-    : domain_m(dom), ownedID_m(ownedID), guardID_m(guardID) { }
+    GCFillInfo(const Domain_t &dom, int ownedID, int guardID, int face=-1)
+    : domain_m(dom), ownedID_m(ownedID), guardID_m(guardID), face_m(face) { }

     // Get a CW warning about this not having a default constructor
     // when we instantiate the vector<GCFillInfo> below. This never
@@ -131,6 +133,7 @@
     Domain_t domain_m;    // guard layer domain
     int      ownedID_m;   // node ID for which domain_m is owned
     int      guardID_m;   // node ID for which domain_m is in the guards
+    int      face_m;      // destination face of the guard layer (or -1, if 
unknown)

     Domain_t & domain() { return domain_m;}
     int & ownedID() { return ownedID_m;}
--- cvs/r2/src/Layout/UniformGridLayout.cpp     2004-01-14 20:08:13.000000000 
+0100
+++ pooma-mpi3/r2/src/Layout/UniformGridLayout.cpp      2004-01-14 
20:13:32.000000000 +0100
@@ -279,7 +279,7 @@
 //-----------------------------------------------------------------------------
 //
 // template <int Dim>
-// void UniformGridLayout<Dim>::calcGCFillList()
+// void UniformGridLayoutData<Dim>::calcGCFillList()
 //
 // Calculates the cached information needed by MultiPatch Engine to
 // fill the guard cells.
@@ -370,7 +370,7 @@
                    this->all_m[sourceID]->context() == Pooma::context() ||
                    this->all_m[destID]->context() == Pooma::context()
                    )
-                
this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID));
+                
this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID,d*2));
               }
           }

@@ -417,7 +417,7 @@
                    this->all_m[sourceID]->context() == Pooma::context() ||
                    this->all_m[destID]->context() == Pooma::context()
                    )
-                 
this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID));
+                 
this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID,d*2+1));
               }
           }
       }
--- cvs/r2/src/Engine/MultiPatchEngine.cpp      2004-01-14 20:11:34.000000000 
+0100
+++ pooma-mpi3/r2/src/Engine/MultiPatchEngine.cpp       2004-01-14 
20:23:23.000000000 +0100
@@ -34,6 +34,7 @@
 #include "Engine/CompressedFraction.h"
 #include "Array/Array.h"
 #include "Tulip/ReduceOverContexts.h"
+#include "Tulip/SendReceive.h"
 #include "Threads/PoomaCSem.h"
 #include "Domain/IteratorPairDomain.h"

@@ -77,16 +78,18 @@
 Engine(const Layout_t &layout)
   : layout_m(layout),
     data_m(layout.sizeGlobal()),
-    pDirty_m(new bool(true))
+    pDirty_m(new int)
 {
   typedef typename Layout_t::Value_t Node_t;

+  setDirty();
+
   // check for correct match of PatchTag and the mapper used to make the
   // layout.
   // THIS IS A HACK! we test on the context of the first patch, and if it
   // is -1, we have a Layout made with the LocalMapper.

-#if POOMA_CHEETAH
+#if POOMA_MESSAGING

   if( layout_m.nodeListGlobal().size() > 0)
   {
@@ -247,7 +250,7 @@
   PAssert(data_m.isValid());
   if (data_m.isShared()) {
     data_m.makeOwnCopy();
-    pDirty_m = new bool(*pDirty_m);
+    pDirty_m = new int(*pDirty_m);
   }

   return *this;
@@ -261,45 +264,88 @@
 //
 //-----------------------------------------------------------------------------

+/// Guard layer assign between non-remote engines, just use the
+/// ET mechanisms
+
+template <int Dim, class T, class Tag>
+static inline
+void simpleAssign(const Array<Dim, T, Tag>& lhs,
+                 const Array<Dim, T, Tag>& rhs,
+                 const Interval<Dim>& domain)
+{
+  lhs(domain) = rhs(domain);
+}
+
+/// Guard layer assign between remote engines, use Send/Receive directly
+/// to avoid one extra copy of the data.
+
+template <int Dim, class T, class Tag>
+static inline
+void simpleAssign(const Array<Dim, T, Remote<Tag> >& lhs,
+                 const Array<Dim, T, Remote<Tag> >& rhs,
+                 const Interval<Dim>& domain)
+{
+  if (lhs.engine().owningContext() == rhs.engine().owningContext())
+    lhs(domain) = rhs(domain);
+  else {
+    typedef typename NewEngine<Engine<Dim, T, Tag>, Interval<Dim> >::Type_t 
ViewEngine_t;
+    if (lhs.engine().engineIsLocal())
+      Receive<ViewEngine_t>::receive(ViewEngine_t(lhs.engine().localEngine(), 
domain),
+                                    rhs.engine().owningContext());
+    else if (rhs.engine().engineIsLocal())
+      SendReceive::send(ViewEngine_t(rhs.engine().localEngine(), domain),
+                       lhs.engine().owningContext());
+  }
+}
+
 template <int Dim, class T, class LayoutTag, class PatchTag>
 void Engine<Dim, T, MultiPatch<LayoutTag,PatchTag> >::
-fillGuardsHandler(const WrappedInt<true> &) const
+fillGuardsHandler(const GuardLayers<Dim>& g, const WrappedInt<true> &) const
 {
   if (!isDirty()) return;
-
-#if POOMA_PURIFY
-
-  // This is here to remove spurious UMRs that result when un-initialized
-  // guards are copied in the following loop. All of the unitialized data
-  // is ultimately overwritten with good data, so I don't see why purify
-  // calls these UMRs in stead of unitialized memory copies, but it does.
-  // I don't do this in general since it would be slow and since T(0) is
-  // not generally valid. This does mean that fillGuards() will fail
-  // with purify for types that do not know what to do with T(0).
-
-  setGuards(T(0));
-
-#endif

+  int updated = 0;
   typename Layout_t::FillIterator_t p = layout_m.beginFillList();
-
+
   while (p != layout_m.endFillList())
     {
       int src  = p->ownedID_m;
       int dest = p->guardID_m;

-      // Create patch arrays that see the entire patch:
+      // Skip face, if not dirty.
+
+      if (isDirty(p->face_m)) {
+
+        // Check, if the p->domain_m is a guard which matches the
+        // needed guard g.
+
+       int d = p->face_m/2;
+       int guardSizeNeeded = p->face_m & 1 ? g.upper(d) : g.lower(d);
+        if (!(p->face_m != -1
+             && guardSizeNeeded == 0)) {
+
+          // Create patch arrays that see the entire patch:

-      Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);
+          Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);

-      // Now do assignment from the subdomains.
+          // Now do assignment from the subdomains.
+#if POOMA_MPI
+          simpleAssign(lhs, rhs, p->domain_m);
+#else
+          lhs(p->domain_m) = rhs(p->domain_m);
+#endif
+
+         // Mark up-to-date.
+         updated |= 1<<p->face_m;
+
+       }
+
+      }

-      lhs(p->domain_m) = rhs(p->domain_m);
-
       ++p;
     }
-
-  *pDirty_m = false;
+
+  *pDirty_m &= ~updated;
 }


@@ -331,7 +377,7 @@
       ++p;
     }

-  *pDirty_m = true;
+  setDirty();
 }


@@ -366,7 +412,7 @@
       ++p;
     }

-  *pDirty_m = true;
+  setDirty();
 }


reply via email to

[Prev in Thread] Current Thread [Next in Thread]