freepooma-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Track up-to-date faces


From: Richard Guenther
Subject: [PATCH] Track up-to-date faces
Date: Sun, 21 Dec 2003 15:53:15 +0100 (CET)

Hi!

This patch moves away from a bool tracking dirtyness of the internal
guards, but instead track the individual faces.  This allows for updating
only the needed internal guards and wastly improves performance of (my)
CFD codes as you can see from the top parts of a flat profile:

before patch (the MultiArgKernels are the actual CFD):

Each sample counts as 0.01 seconds.
  %   cumulative   self              self     total
 time   seconds   seconds    calls   s/call   s/call  name
 26.19     13.59    13.59                             select
  4.99     16.18     2.59     2653     0.00     0.00  int 
EngineBlockSerialize::apply<EngineElemSerialize, Engine<(int)3, double, 
BrickView>, Interval<(int)3> >(EngineElemSerialize&, Engine<(int)3, double, 
BrickView> const&, Interval<(int)3> const&)
  4.82     18.68     2.50                             read
  3.97     20.74     2.06     2653     0.00     0.00  int 
EngineBlockSerialize::apply<EngineElemDeSerialize, Engine<(int)3, double, 
BrickView>, Interval<(int)3> >(EngineElemDeSerialize&, Engine<(int)3, double, 
BrickView> const&, Interval<(int)3> const&)
  2.58     22.08     1.34                             write
  2.51     23.38     1.30                             memcpy
  2.08     24.46     1.08      762     0.00     0.00  int 
EngineBlockSerialize::apply<EngineElemSerialize, Engine<(int)3, Vector<(int)3, 
double, Full>, BrickView>, Interval<(int)3> >(EngineElemSerialize&, 
Engine<(int)3, Vector<(int)3, double, Full>, BrickView> const&, 
Interval<(int)3> const&)
  1.75     25.37     0.91       10     0.09     0.09  
MultiArgKernel<MultiArg4<Field<UniformRectilinearMesh<MeshTraits<(int)3, 
double, UniformRectilinearTag, CartesianTag, (int)3> >, double, 
CompFwd<Engine<(int)3, Vector<(int)3, double, Full>, BrickView>, Loc<(int)1> > 
>, Field<UniformRectilinearMesh<MeshTraits<(int)3, double, 
UniformRectilinearTag, CartesianTag, (int)3> >, double, BrickView>, BrickView, 
BrickView>, EvaluateLocLoop<Adv5::X::MomentumfluxY<(int)3>, (int)3> >::run()
  1.73     26.27     0.90       10     0.09     0.09  
MultiArgKernel<MultiArg4<Field<UniformRectilinearMesh<MeshTraits<(int)3, 
double, UniformRectilinearTag, CartesianTag, (int)3> >, double, 
CompFwd<Engine<(int)3, Vector<(int)3, double, Full>, BrickView>, Loc<(int)1> > 
>, Field<UniformRectilinearMesh<MeshTraits<(int)3, double, 
UniformRectilinearTag, CartesianTag, (int)3> >, double, BrickView>, BrickView, 
BrickView>, EvaluateLocLoop<Adv5::X::MomentumfluxZ<(int)3>, (int)3> >::run()
  1.73     27.17     0.90       10     0.09     0.09  
MultiArgKernel<MultiArg4<Field<UniformRectilinearMesh<MeshTraits<(int)3, 
double, UniformRectilinearTag, CartesianTag, (int)3> >, double, 
CompFwd<Engine<(int)3, Vector<(int)3, double, Full>, BrickView>, Loc<(int)1> > 
>, Field<UniformRectilinearMesh<MeshTraits<(int)3, double, 
UniformRectilinearTag, CartesianTag, (int)3> >, double, BrickView>, BrickView, 
BrickView>, EvaluateLocLoop<Adv5::Y::MomentumfluxZ<(int)3>, (int)3> >::run()

after patch:

Each sample counts as 0.01 seconds.
  %   cumulative   self              self     total
 time   seconds   seconds    calls   s/call   s/call  name
 15.75      4.77     4.77                             select
  2.97      5.67     0.90       10     0.09     0.09  
MultiArgKernel<MultiArg4<Field<UniformRectilinearMesh<MeshTraits<(int)3, 
double, UniformRectilinearTag, CartesianTag, (int)3> >, double, 
CompFwd<Engine<(int)3, Vector<(int)3, double, Full>, BrickView>, Loc<(int)1> > 
>, Field<UniformRectilinearMesh<MeshTraits<(int)3, double, 
UniformRectilinearTag, CartesianTag, (int)3> >, double, BrickView>, BrickView, 
BrickView>, EvaluateLocLoop<Adv5::X::MomentumfluxY<(int)3>, (int)3> >::run()
  2.97      6.57     0.90       10     0.09     0.09  
MultiArgKernel<MultiArg4<Field<UniformRectilinearMesh<MeshTraits<(int)3, 
double, UniformRectilinearTag, CartesianTag, (int)3> >, double, 
CompFwd<Engine<(int)3, Vector<(int)3, double, Full>, BrickView>, Loc<(int)1> > 
>, Field<UniformRectilinearMesh<MeshTraits<(int)3, double, 
UniformRectilinearTag, CartesianTag, (int)3> >, double, BrickView>, BrickView, 
BrickView>, EvaluateLocLoop<Adv5::X::MomentumfluxZ<(int)3>, (int)3> >::run()
  2.97      7.47     0.90       10     0.09     0.09  
MultiArgKernel<MultiArg4<Field<UniformRectilinearMesh<MeshTraits<(int)3, 
double, UniformRectilinearTag, CartesianTag, (int)3> >, double, 
CompFwd<Engine<(int)3, Vector<(int)3, double, Full>, BrickView>, Loc<(int)1> > 
>, Field<UniformRectilinearMesh<MeshTraits<(int)3, double, 
UniformRectilinearTag, CartesianTag, (int)3> >, double, BrickView>, BrickView, 
BrickView>, EvaluateLocLoop<Adv5::Y::MomentumfluxZ<(int)3>, (int)3> >::run()
  2.91      8.35     0.88       10     0.09     0.09  
MultiArgKernel<MultiArg4<Field<UniformRectilinearMesh<MeshTraits<(int)3, 
double, UniformRectilinearTag, CartesianTag, (int)3> >, double, 
CompFwd<Engine<(int)3, Vector<(int)3, double, Full>, BrickView>, Loc<(int)1> > 
>, Field<UniformRectilinearMesh<MeshTraits<(int)3, double, 
UniformRectilinearTag, CartesianTag, (int)3> >, double, BrickView>, BrickView, 
BrickView>, EvaluateLocLoop<Adv5::Y::MomentumfluxX<(int)3>, (int)3> >::run()
  ...
  ...
  1.65     20.66     0.50      453     0.00     0.00  int 
EngineBlockSerialize::apply<EngineElemSerialize, Engine<(int)3, double, 
BrickView>, Interval<(int)3> >(EngineElemSerialize&, Engine<(int)3, double, 
BrickView> const&, Interval<(int)3> const&)
  1.16     26.07     0.35      371     0.00     0.00  int 
EngineBlockSerialize::apply<EngineElemDeSerialize, Engine<(int)3, double, 
BrickView>, Interval<(int)3> >(EngineElemDeSerialize&, Engine<(int)3, double, 
BrickView> const&, Interval<(int)3> const&)
  0.46     28.58     0.14       80     0.00     0.00  int 
EngineBlockSerialize::apply<EngineElemSerialize, Engine<(int)3, Vector<(int)3, 
double, Full>, BrickView>, Interval<(int)3> >(EngineElemSerialize&, 
Engine<(int)3, Vector<(int)3, double, Full>, BrickView> const&, 
Interval<(int)3> const&)
  0.23     29.20     0.07       70     0.00     0.00  int 
EngineBlockSerialize::apply<EngineElemDeSerialize, Engine<(int)3, 
Vector<(int)3, double, Full>, BrickView>, Interval<(int)3> 
>(EngineElemDeSerialize&, Engine<(int)3, Vector<(int)3, double, Full>, 
BrickView> const&, Interval<(int)3> const&)
                0.50    0.00     453/453         int 
EngineBlockSerialize::apply<EngineElemSerialize, Engine<(int)3, double, 
BrickView>, Interval<(int)3> >(EngineElemSerialize&, Engine<(int)3, double, 
BrickView> const&, Interval<(int)3> const&) [36]

where the engine serializers are way down the profile (I grepped for them
and appended the first five).  Notice the drop in the number of
communications from 2653 down to 453!  Timewise this is an improvement of
more than 50%.

It passes without regressions (but these codepaths are only sightly tested
in the testsuite) and looks like it produces identical results for my CFD
application.  But I notice an asymmetry of the
Serialization/Deserialization calls after the patch and need to find out
where this comes from.

But still, is the underlying idea to change bool *pDirty to int *pDirty
and using it as bitfield ok?  I can even go further and track partial
updates, but this will cost memory.

Any comments? Further ideas?

Richard.


Too lazy to do a ChangeLog at the moment.

===== r2/src/Array/tests/makefile 1.4 vs edited =====
--- 1.4/r2/src/Array/tests/makefile     Thu Jan 30 22:35:28 2003
+++ edited/r2/src/Array/tests/makefile  Sun Dec 21 15:14:56 2003
@@ -39,7 +39,7 @@
        array_test12 array_test13 array_test14 array_test15 array_test16 \
        array_test17 array_test18 array_test19 array_test20 array_test21 \
        array_test22 array_test23 array_test24 array_test25 array_test26 \
-       array_test27 array_test28
+       array_test27 array_test28 array_test29

 default:: build

===== r2/src/Engine/Intersector.h 1.3 vs edited =====
--- 1.3/r2/src/Engine/Intersector.h     Thu Oct 23 14:41:01 2003
+++ edited/r2/src/Engine/Intersector.h  Sun Dec 21 15:14:56 2003
@@ -145,9 +145,47 @@
       // If we've seen this ID before, we're done.

       if (ids_m[i] == layout.ID())
-      {
        return false;
+
+      // If we've seen the base ID before and the base domain is the same
+      // we're done.
+
+      if (baseIDs_m[i] == layout.baseID()
+         && sameBaseDomain(i, layout.baseDomain(), guard))
+      {
+       shared(layout.ID(),ids_m[i]);
+
+       return (!sameBaseDomain(i,layout.baseDomain()));
       }
+    }
+
+    // current touches operation works on the owned region, so we don't
+    // use the guard cells.  If we start using touchesAlloc, then you
+    // need to return true here, and the bypass calculation above
+    // becomes somewhat more complicated.
+
+    touches(layout);
+    return false;
+  }
+
+  template<class Engine, int Dim2>
+  bool intersect(const Engine &engine, const GuardLayers<Dim2> &guard, 
GuardLayers<Dim2> &usedGuards)
+  {
+    CTAssert(Engine::dimensions == Dim);
+
+    // First, we need to check through our list of layout IDs and see if we've
+    // either seen this layout or another layout with the same baseID before.
+
+    typedef typename Engine::Layout_t Layout_t;
+    const Layout_t &layout(engine.layout());
+
+    int n = ids_m.size();
+    for (int i = 0; i < n; ++i)
+    {
+      // If we've seen this ID before, we're done.
+
+      if (ids_m[i] == layout.ID())
+       return false;

       // If we've seen the base ID before and the base domain is the same
       // we're done.
@@ -157,10 +195,27 @@
       {
        shared(layout.ID(),ids_m[i]);

-       // In this case we are using the guard cells unless this domain
-       // is exactly the same as one we've seen before.
+       // was: return (!sameBaseDomain(i,layout.baseDomain()));

-       return (!sameBaseDomain(i,layout.baseDomain()));
+        // We should be able to find out the actual shape of the
+       // used internal guards here, rather than just returning bool.
+       // Something like:
+
+       // But what do, if Dim2 > baseDims_m[i]!?
+       if (baseDims_m[i] < Dim2)
+         return true;
+
+       bool used = false;
+       for (int j = 0; j < Dim2; j++)
+       {
+         usedGuards.lower(j) = std::max(0, baseDomains_m[i][j].first() - 
layout.baseDomain()[j].first());
+         if (usedGuards.lower(j) != 0)
+           used = true;
+         usedGuards.upper(j) = std::max(0, layout.baseDomain()[j].last() - 
baseDomains_m[i][j].last());
+         if (usedGuards.upper(j) != 0)
+           used = true;
+       }
+       return used;
       }
     }

@@ -440,6 +495,13 @@
   bool intersect(const Engine &l, const GuardLayers<Dim2> &guard)
   {
     return (data()->intersect(l,guard));
+  }
+
+  template<class Engine, int Dim2>
+  inline
+  bool intersect(const Engine &l, const GuardLayers<Dim2> &guard, 
GuardLayers<Dim2> &usedGuards)
+  {
+    return (data()->intersect(l,guard,usedGuards));
   }

 private:
===== r2/src/Engine/MultiPatchEngine.cpp 1.3 vs edited =====
--- 1.3/r2/src/Engine/MultiPatchEngine.cpp      Wed May 14 09:48:40 2003
+++ edited/r2/src/Engine/MultiPatchEngine.cpp   Sun Dec 21 15:14:56 2003
@@ -36,6 +36,7 @@
 #include "Tulip/ReduceOverContexts.h"
 #include "Threads/PoomaCSem.h"
 #include "Domain/IteratorPairDomain.h"
+#include "Domain/Shrink.h"

 ///////////////////////////////////////////////////////////////////////////////
 //
@@ -77,10 +78,12 @@
 Engine(const Layout_t &layout)
   : layout_m(layout),
     data_m(layout.sizeGlobal()),
-    pDirty_m(new bool(true))
+    pDirty_m(new int)
 {
   typedef typename Layout_t::Value_t Node_t;

+  setDirty();
+
   // check for correct match of PatchTag and the mapper used to make the
   // layout.
   // THIS IS A HACK! we test on the context of the first patch, and if it
@@ -247,7 +250,7 @@
   PAssert(data_m.isValid());
   if (data_m.isShared()) {
     data_m.makeOwnCopy();
-    pDirty_m = new bool(*pDirty_m);
+    pDirty_m = new int(*pDirty_m);
   }

   return *this;
@@ -288,18 +291,89 @@
       int src  = p->ownedID_m;
       int dest = p->guardID_m;

-      // Create patch arrays that see the entire patch:
+      // Skip face, if not dirty.
+
+      if (isDirty(p->face_m)) {
+
+        // Create patch arrays that see the entire patch:

-      Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);
+        Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);

-      // Now do assignment from the subdomains.
+        // Now do assignment from the subdomains.

-      lhs(p->domain_m) = rhs(p->domain_m);
+        lhs(p->domain_m) = rhs(p->domain_m);

+      }
+
       ++p;
     }
-
-  *pDirty_m = false;
+
+  clearDirty();
+}
+
+template <int Dim, class T, class LayoutTag, class PatchTag>
+void Engine<Dim, T, MultiPatch<LayoutTag,PatchTag> >::
+fillGuardsHandler(const GuardLayers<Dim>& g, const WrappedInt<true> &) const
+{
+  if (!isDirty()) return;
+
+  int updated = 0;
+  typename Layout_t::FillIterator_t p = layout_m.beginFillList();
+
+  while (p != layout_m.endFillList())
+    {
+      int src  = p->ownedID_m;
+      int dest = p->guardID_m;
+
+      // Skip face, if not dirty.
+
+      if (isDirty(p->face_m)) {
+
+        // Check, if the p->domain_m is a guard which matches the
+        // needed guard g.
+
+       int d = p->face_m/2;
+       int guardSizeNeeded = p->face_m & 1 ? g.upper(d) : g.lower(d);
+        if (!(p->face_m != -1
+             && guardSizeNeeded == 0)) {
+
+          // Create patch arrays that see the entire patch:
+
+          Array<Dim, T, PatchTag> lhs(data()[dest]), rhs(data()[src]);
+
+         // Shrink domain, if possible.  Maybe not that useful, as
+         // we can't record this update.
+
+         Interval<Dim> domain = p->domain_m;
+#if POOMA_PARTIAL_GUARDS_UPDATE
+         int s = domain[d].size();
+         if (s > guardSizeNeeded) {
+           if (p->face_m & 1)
+             domain[d] = shrinkRight(domain[d], s - guardSizeNeeded);
+           else
+             domain[d] = shrinkLeft(domain[d], s - guardSizeNeeded);
+         }
+#endif
+
+          // Now do assignment from the subdomains.
+
+          lhs(domain) = rhs(domain);
+
+         // Mark up-to-date, if updated completely.
+
+#if POOMA_PARTIAL_GUARDS_UPDATE
+         if (s == guardSizeNeeded)
+#endif
+           updated |= 1<<p->face_m;
+
+       }
+
+      }
+
+      ++p;
+    }
+
+  *pDirty_m &= ~updated;
 }


@@ -331,7 +405,7 @@
       ++p;
     }

-  *pDirty_m = true;
+  setDirty();
 }


@@ -366,7 +440,7 @@
       ++p;
     }

-  *pDirty_m = true;
+  setDirty();
 }


===== r2/src/Engine/MultiPatchEngine.h 1.2 vs edited =====
--- 1.2/r2/src/Engine/MultiPatchEngine.h        Thu Oct 23 14:41:01 2003
+++ edited/r2/src/Engine/MultiPatchEngine.h     Sun Dec 21 15:14:56 2003
@@ -633,9 +633,17 @@
     fillGuardsHandler(WrappedInt<Layout_t::supportsGuards>());
   }

+  inline void fillGuards(const GuardLayers<Dim>& g) const
+  {
+    fillGuardsHandler(g, WrappedInt<Layout_t::supportsGuards>());
+  }
+
   inline void fillGuardsHandler(const WrappedInt<false>&) const { };
   void fillGuardsHandler(const WrappedInt<true>&) const ;

+  inline void fillGuardsHandler(const GuardLayers<Dim>&, const 
WrappedInt<false>&) const { };
+  void fillGuardsHandler(const GuardLayers<Dim>&, const WrappedInt<true>&) 
const ;
+
   //---------------------------------------------------------------------------
   /// Set the internal guard cells to a particular value.

@@ -650,14 +658,34 @@
   /// Set and get the dirty flag (fillGuards is a no-op unless the
   /// dirty flag is true).

-  inline void setDirty() const
+  inline void setDirty(int face = -1) const
   {
-    *pDirty_m = true;
+    if (face == -1)
+      *pDirty_m = (1<<(Dim*2))-1;
+    else {
+      PAssert(face >= 0 && face <= Dim*2-1);
+      *pDirty_m |= (1<<face);
+    }
   }

-  inline bool isDirty() const
+  inline void clearDirty(int face = -1) const
   {
-    return *pDirty_m;
+    if (face == -1)
+      *pDirty_m = 0;
+    else {
+      PAssert(face >= 0 && face <= Dim*2-1);
+      *pDirty_m &= ~(1<<face);
+    }
+  }
+
+  inline bool isDirty(int face = -1) const
+  {
+    if (face == -1)
+      return *pDirty_m != 0;
+    else {
+      PAssert(face >= 0 && face <= Dim*2-1);
+      return *pDirty_m & (1<<face);
+    }
   }

   //============================================================
@@ -874,7 +902,7 @@
   /// must share the same flag. We use the reference count in
   /// data_m to decide whether to clean this up.

-  bool *pDirty_m;
+  int *pDirty_m;
 };


@@ -1193,6 +1221,11 @@
     baseEngine_m.fillGuards();
   }

+  inline void fillGuards(const GuardLayers<Dim2>& g) const
+  {
+    baseEngine_m.fillGuards(g);
+  }
+
   //---------------------------------------------------------------------------
   /// Set the internal guard cells to a particular value (default zero)

@@ -1213,14 +1246,19 @@
   /// Set and get the dirty flag (fillGuard is a no-op unless the
   /// dirty flag is true).

-  inline void setDirty() const
+  inline void setDirty(int face=-1) const
+  {
+    baseEngine_m.setDirty(face);
+  }
+
+  inline void clearDirty(int face=-1) const
   {
-    baseEngine_m.setDirty();
+    baseEngine_m.clearDirty(face);
   }

-  inline bool isDirty() const
+  inline bool isDirty(int face=-1) const
   {
-    return baseEngine_m.isDirty();
+    return baseEngine_m.isDirty(face);
   }

   //---------------------------------------------------------------------------
@@ -1694,12 +1732,13 @@
   apply(const Engine<Dim,T,MultiPatch<LayoutTag,PatchTag> > &engine,
        const ExpressionApply<IntersectorTag<Intersect> > &tag)
   {
+    GuardLayers<Dim> usedGuards;
     bool useGuards =
       tag.tag().intersector_m.intersect(engine,
-                                 engine.layout().internalGuards());
+                                 engine.layout().internalGuards(), usedGuards);

     if (useGuards)
-      engine.fillGuards();
+      engine.fillGuards(usedGuards);

     return 0;
   }
@@ -1725,13 +1764,14 @@
               const ExpressionApply<IntersectorTag<Intersect> > &tag,
               const WrappedInt<true> &)
   {
+    GuardLayers<BD> usedGuards;
     bool useGuards =
       tag.tag().intersector_m.
       intersect(engine,
-               engine.layout().baseLayout().internalGuards());
+               engine.layout().baseLayout().internalGuards(), usedGuards);

     if (useGuards)
-      engine.fillGuards();
+      engine.fillGuards(usedGuards);

     return 0;
   }
===== r2/src/Engine/Stencil.h 1.5 vs edited =====
--- 1.5/r2/src/Engine/Stencil.h Thu Oct 23 14:41:01 2003
+++ edited/r2/src/Engine/Stencil.h      Sun Dec 21 15:14:56 2003
@@ -752,11 +752,14 @@

   StencilIntersector(const This_t &model)
     : domain_m(model.domain_m),
+      stencilExtent_m(model.stencilExtent_m),
       intersector_m(model.intersector_m)
   { }

-  StencilIntersector(const Interval<Dim> &domain, const Intersect &intersect)
+  StencilIntersector(const Interval<Dim> &domain, const Intersect &intersect,
+                 const GuardLayers<Dim> &stencilExtent)
     : domain_m(domain),
+      stencilExtent_m(stencilExtent),
       intersector_m(intersect)
   { }

@@ -766,6 +769,7 @@
     {
       intersector_m = model.intersector_m;
       domain_m = model.domain_m;
+      stencilExtent_m = model.stencilExtent_m;
     }
     return *this;
   }
@@ -813,8 +817,21 @@
     return true;
   }

+  template<class Engine, int Dim2>
+  inline
+  bool intersect(const Engine &engine, const GuardLayers<Dim2> &g,
+                 GuardLayers<Dim> &usedGuards)
+  {
+    intersect(engine);
+    // FIXME: accumulate used guards from intersect above and
+    // stencil extent? I.e. allow  Stencil<>(a(i-1)+a(i+1))?
+    usedGuards = stencilExtent_m;
+    return true;
+  }
+
 private:
   Interval<Dim> domain_m;
+  GuardLayers<Dim> stencilExtent_m;
   Intersect     intersector_m;
 };

@@ -833,8 +850,14 @@
               const ExpressionApply<IntersectorTag<Intersect> > &tag)
   {
     typedef StencilIntersector<D, Intersect> NewIntersector_t;
+    GuardLayers<D> stencilExtent;
+    for (int i=0; i<D; ++i) {
+      stencilExtent.lower(i) = engine.function().lowerExtent(i);
+      stencilExtent.upper(i) = engine.function().upperExtent(i);
+    }
     NewIntersector_t newIntersector(engine.intersectDomain(),
-                                   tag.tag().intersector_m);
+                                   tag.tag().intersector_m,
+                                   stencilExtent);

     expressionApply(engine.expression(),
                    IntersectorTag<NewIntersector_t>(newIntersector));
===== r2/src/Evaluator/MultiArgEvaluator.h 1.5 vs edited =====
--- 1.5/r2/src/Evaluator/MultiArgEvaluator.h    Tue Nov 25 16:39:02 2003
+++ edited/r2/src/Evaluator/MultiArgEvaluator.h Sun Dec 21 15:19:16 2003
@@ -111,19 +111,16 @@
   }

   template<class A>
-  void operator()(const A &a, bool f) const
+  void operator()(const A &a) const
   {
-    if (f)
-    {
-      // This isn't quite what we want here, because we may want to
-      // write to a field containing multiple centering engines.
-      // Need to rewrite notifyEngineWrite as an ExpressionApply,
-      // and create a version of ExpressionApply that goes through
-      // all the engines in a field.
+    // This isn't quite what we want here, because we may want to
+    // write to a field containing multiple centering engines.
+    // Need to rewrite notifyEngineWrite as an ExpressionApply,
+    // and create a version of ExpressionApply that goes through
+    // all the engines in a field.

-      notifyEngineWrite(a.engine());
-      dirtyRelations(a, WrappedInt<A::hasRelations>());
-    }
+    notifyEngineWrite(a.engine());
+    dirtyRelations(a, WrappedInt<A::hasRelations>());
   }
 };

@@ -172,7 +169,7 @@
     MultiArgEvaluator<Evaluator_t>::evaluate(multiArg, function,
                                             domain, info, kernel);

-    applyMultiArg(multiArg, EngineWriteNotifier(), info.writers());
+    applyMultiArgIf(multiArg, EngineWriteNotifier(), info.writers());

     Pooma::endExpression();
   }
@@ -265,7 +262,12 @@
           const Kernel &kernel)
   {
     typedef SimpleIntersector<Dim> Inter_t;
-    Inter_t inter(domain);
+    GuardLayers<Dim> extent;
+    for (int i=0; i<Dim; ++i) {
+      extent.lower(i) = info.lowerExtent(i);
+      extent.upper(i) = info.upperExtent(i);
+    }
+    Inter_t inter(domain, extent);

     applyMultiArg(multiArg, inter, info.useGuards());

@@ -368,7 +370,12 @@
           const Kernel &kernel)
   {
     typedef SimpleIntersector<Dim> Inter_t;
-    Inter_t inter(domain);
+    GuardLayers<Dim> extent;
+    for (int i=0; i<Dim; ++i) {
+      extent.lower(i) = info.lowerExtent(i);
+      extent.upper(i) = info.upperExtent(i);
+    }
+    Inter_t inter(domain, extent);

     applyMultiArg(multiArg, inter, info.useGuards());

===== r2/src/Evaluator/SimpleIntersector.h 1.4 vs edited =====
--- 1.4/r2/src/Evaluator/SimpleIntersector.h    Thu Oct 23 14:41:03 2003
+++ edited/r2/src/Evaluator/SimpleIntersector.h Sun Dec 21 15:14:36 2003
@@ -91,8 +91,8 @@

   // Default constructor is trival.

-  inline SimpleIntersectorData(const Interval<Dim> &domain)
-    : seenFirst_m(false), domain_m(domain)
+  inline SimpleIntersectorData(const Interval<Dim> &domain, const 
GuardLayers<Dim> &extent)
+    : seenFirst_m(false), domain_m(domain), extent_m(extent)
   {
   }

@@ -149,6 +149,7 @@
   INodeContainer_t inodes_m;
   GlobalIDDataBase gidStore_m;
   Interval<Dim> domain_m;
+  GuardLayers<Dim> extent_m;
 };

 /**
@@ -179,8 +180,8 @@

   enum { dimensions = Dim };

-  SimpleIntersector(const Interval<Dim> &domain)
-    : pdata_m(new SimpleIntersectorData_t(domain)), useGuards_m(true)
+  SimpleIntersector(const Interval<Dim> &domain, const GuardLayers<Dim> 
&extent)
+    : pdata_m(new SimpleIntersectorData_t(domain, extent)), useGuards_m(true)
   { }

   SimpleIntersector(const This_t &model)
@@ -297,7 +298,7 @@
     apply.tag().intersect(engine);

     if (apply.tag().useGuards())
-      engine.fillGuards();
+      engine.fillGuards(apply.tag().data()->extent_m);

     return 0;
   }
@@ -316,7 +317,7 @@
     apply.tag().intersect(engine);

     if (apply.tag().useGuards())
-      engine.fillGuards();
+      engine.fillGuards(apply.tag().data()->extent_m);

     return 0;
   }
===== r2/src/Field/DiffOps/FieldStencil.h 1.3 vs edited =====
--- 1.3/r2/src/Field/DiffOps/FieldStencil.h     Sun Oct 26 14:35:20 2003
+++ edited/r2/src/Field/DiffOps/FieldStencil.h  Sun Dec 21 15:14:57 2003
@@ -614,11 +614,13 @@
   // Constructors

   FieldStencilIntersector(const This_t &model)
-    : domain_m(model.domain_m), intersector_m(model.intersector_m)
+    : domain_m(model.domain_m), stencilExtent_m(model.stencilExtent_m),
+      intersector_m(model.intersector_m)
   { }

-  FieldStencilIntersector(const Domain_t &dom, const Intersect &intersect)
-    : domain_m(dom), intersector_m(intersect)
+  FieldStencilIntersector(const Domain_t &dom, const Intersect &intersect,
+                 const GuardLayers<Dim> &stencilExtent)
+    : domain_m(dom), stencilExtent_m(stencilExtent), intersector_m(intersect)
   { }

   This_t &operator=(const This_t &model)
@@ -626,6 +628,7 @@
     if (this != &model)
     {
       domain_m = model.domain_m;
+      stencilExtent_m = model.stencilExtent_m;
       intersector_m = model.intersector_m;
     }
     return *this;
@@ -668,10 +671,22 @@
     return true;
   }

+  template<class Engine, int Dim2>
+  inline bool intersect(const Engine &engine, const GuardLayers<Dim2> &,
+                       GuardLayers<Dim> &usedGuards)
+  {
+    intersect(engine);
+    // FIXME: accumulate used guards from intersect above and
+    // stencil extent? I.e. allow  Stencil<>(a(i-1)+a(i+1))?
+    usedGuards = stencilExtent_m;
+    return true;
+  }
+
 private:


   Interval<Dim> domain_m;
+  GuardLayers<Dim> stencilExtent_m;
   Intersect     intersector_m;
 };

@@ -699,8 +714,14 @@
     // cells results in an error in the multipatch inode view.)

     typedef FieldStencilIntersector<Dim, Intersect> NewIntersector_t;
+    GuardLayers<Dim> stencilExtent;
+    for (int i=0; i<Dim; ++i) {
+      stencilExtent.lower(i) = engine.functor().lowerExtent(i);
+      stencilExtent.upper(i) = engine.functor().upperExtent(i);
+    }
     NewIntersector_t newIntersector(engine.intersectDomain(),
-                                   tag.tag().intersector_m);
+                                   tag.tag().intersector_m,
+                                   stencilExtent);

     expressionApply(engine.field(),
                    IntersectorTag<NewIntersector_t>(newIntersector));
===== r2/src/Layout/GridLayout.cpp 1.4 vs edited =====
--- 1.4/r2/src/Layout/GridLayout.cpp    Wed May 14 09:51:04 2003
+++ edited/r2/src/Layout/GridLayout.cpp Sun Dec 21 15:14:41 2003
@@ -429,7 +429,7 @@

                      // Now, push IDs and source into cache...

-                     this->gcFillList_m.push_back(GCFillInfo_t(gcdom, 
sourceID, destID));
+                     this->gcFillList_m.push_back(GCFillInfo_t(gcdom, 
sourceID, destID, d*2));
                    }
                }
            }
@@ -481,7 +481,7 @@

                      // Now, push IDs and source into cache...

-                     this->gcFillList_m.push_back(GCFillInfo_t(gcdom, 
sourceID, destID));
+                     this->gcFillList_m.push_back(GCFillInfo_t(gcdom, 
sourceID, destID, d*2+1));
                    }
                }
            }
===== r2/src/Layout/LayoutBase.h 1.3 vs edited =====
--- 1.3/r2/src/Layout/LayoutBase.h      Sun Oct 26 14:35:23 2003
+++ edited/r2/src/Layout/LayoutBase.h   Sun Dec 21 15:14:41 2003
@@ -119,8 +119,8 @@

   struct GCFillInfo
   {
-    GCFillInfo(const Domain_t &dom, int ownedID, int guardID)
-    : domain_m(dom), ownedID_m(ownedID), guardID_m(guardID) { }
+    GCFillInfo(const Domain_t &dom, int ownedID, int guardID, int face=-1)
+    : domain_m(dom), ownedID_m(ownedID), guardID_m(guardID), face_m(face) { }

     // Get a CW warning about this not having a default constructor
     // when we instantiate the vector<GCFillInfo> below. This never
@@ -131,6 +131,7 @@
     Domain_t domain_m;    // guard layer domain
     int      ownedID_m;   // node ID for which domain_m is owned
     int      guardID_m;   // node ID for which domain_m is in the guards
+    int      face_m;      // destination face of the guard layer (or -1, if 
unknown)

     Domain_t & domain() { return domain_m;}
     int & ownedID() { return ownedID_m;}
===== r2/src/Layout/UniformGridLayout.cpp 1.4 vs edited =====
--- 1.4/r2/src/Layout/UniformGridLayout.cpp     Wed May 14 09:51:04 2003
+++ edited/r2/src/Layout/UniformGridLayout.cpp  Sun Dec 21 15:14:41 2003
@@ -370,7 +370,7 @@
                    this->all_m[sourceID]->context() == Pooma::context() ||
                    this->all_m[destID]->context() == Pooma::context()
                    )
-                
this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID));
+                
this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID,d*2));
               }
           }

@@ -417,7 +417,7 @@
                    this->all_m[sourceID]->context() == Pooma::context() ||
                    this->all_m[destID]->context() == Pooma::context()
                    )
-                 
this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID));
+                 
this->gcFillList_m.push_back(GCFillInfo_t(gcdom,sourceID,destID,d*2+1));
               }
           }
       }

reply via email to

[Prev in Thread] Current Thread [Next in Thread]