MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_BlackBoxPFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_BLACKBOXPFACTORY_DEF_HPP
47#define MUELU_BLACKBOXPFACTORY_DEF_HPP
48
49#include <stdlib.h>
50#include <iomanip>
51
52
53// #include <Teuchos_LAPACK.hpp>
54#include <Teuchos_SerialDenseMatrix.hpp>
55#include <Teuchos_SerialDenseVector.hpp>
56#include <Teuchos_SerialDenseSolver.hpp>
57
58#include <Xpetra_CrsMatrixUtils.hpp>
59#include <Xpetra_CrsMatrixWrap.hpp>
60#include <Xpetra_ImportFactory.hpp>
61#include <Xpetra_Matrix.hpp>
62#include <Xpetra_MapFactory.hpp>
63#include <Xpetra_MultiVectorFactory.hpp>
64#include <Xpetra_VectorFactory.hpp>
65
66#include <Xpetra_IO.hpp>
67
70
71#include "MueLu_MasterList.hpp"
72#include "MueLu_Monitor.hpp"
73
74namespace MueLu {
75
76 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
78 RCP<ParameterList> validParamList = rcp(new ParameterList());
79
80 // Coarsen can come in two forms, either a single char that will be interpreted as an integer
81 // which is used as the coarsening rate in every spatial dimentions,
82 // or it can be a longer string that will then be interpreted as an array of integers.
83 // By default coarsen is set as "{2}", hence a coarsening rate of 2 in every spatial dimension
84 // is the default setting!
85 validParamList->set<std::string > ("Coarsen", "{3}", "Coarsening rate in all spatial dimensions");
86 validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
87 validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
88 validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coorindates");
89 validParamList->set<RCP<const FactoryBase> >("gNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
90 validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
91 validParamList->set<std::string> ("stencil type", "full", "You can use two type of stencils: full and reduced, that correspond to 27 and 7 points stencils respectively in 3D.");
92 validParamList->set<std::string> ("block strategy", "coupled", "The strategy used to handle systems of PDEs can be: coupled or uncoupled.");
93
94 return validParamList;
95 }
96
97 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
99 Level& /* coarseLevel */)
100 const {
101 Input(fineLevel, "A");
102 Input(fineLevel, "Nullspace");
103 Input(fineLevel, "Coordinates");
104 // Request the global number of nodes per dimensions
105 if(fineLevel.GetLevelID() == 0) {
106 if(fineLevel.IsAvailable("gNodesPerDim", NoFactory::get())) {
107 fineLevel.DeclareInput("gNodesPerDim", NoFactory::get(), this);
108 } else {
109 TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("gNodesPerDim", NoFactory::get()),
111 "gNodesPerDim was not provided by the user on level0!");
112 }
113 } else {
114 Input(fineLevel, "gNodesPerDim");
115 }
116
117 // Request the local number of nodes per dimensions
118 if(fineLevel.GetLevelID() == 0) {
119 if(fineLevel.IsAvailable("lNodesPerDim", NoFactory::get())) {
120 fineLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this);
121 } else {
122 TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("lNodesPerDim", NoFactory::get()),
124 "lNodesPerDim was not provided by the user on level0!");
125 }
126 } else {
127 Input(fineLevel, "lNodesPerDim");
128 }
129 }
130
131 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
133 Level& coarseLevel) const{
134 return BuildP(fineLevel, coarseLevel);
135
136 }
137
138 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
140 Level& coarseLevel)const{
141 FactoryMonitor m(*this, "Build", coarseLevel);
142
143 // Get parameter list
144 const ParameterList& pL = GetParameterList();
145
146 // obtain general variables
147 RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A");
148 RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
149 RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> > coordinates =
150 Get< RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> > >(fineLevel, "Coordinates");
151 LO numDimensions = coordinates->getNumVectors();
152 LO BlkSize = A->GetFixedBlockSize();
153
154 // Get fine level geometric data: g(l)FineNodesPerDir and g(l)NumFineNodes
155 Array<GO> gFineNodesPerDir(3);
156 Array<LO> lFineNodesPerDir(3);
157 // Get the number of points in each direction
158 if(fineLevel.GetLevelID() == 0) {
159 gFineNodesPerDir = fineLevel.Get<Array<GO> >("gNodesPerDim", NoFactory::get());
160 lFineNodesPerDir = fineLevel.Get<Array<LO> >("lNodesPerDim", NoFactory::get());
161 } else {
162 // Loading global number of nodes per diretions
163 gFineNodesPerDir = Get<Array<GO> >(fineLevel, "gNodesPerDim");
164
165 // Loading local number of nodes per diretions
166 lFineNodesPerDir = Get<Array<LO> >(fineLevel, "lNodesPerDim");
167 }
168 for(LO i = 0; i < 3; ++i) {
169 if(gFineNodesPerDir[i] == 0) {
170 GetOStream(Runtime0) << "gNodesPerDim in direction " << i << " is set to 1 from 0"
171 << std::endl;
172 gFineNodesPerDir[i] = 1;
173 }
174 if(lFineNodesPerDir[i] == 0) {
175 GetOStream(Runtime0) << "lNodesPerDim in direction " << i << " is set to 1 from 0"
176 << std::endl;
177 lFineNodesPerDir[i] = 1;
178 }
179 }
180 GO gNumFineNodes = gFineNodesPerDir[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
181 LO lNumFineNodes = lFineNodesPerDir[2]*lFineNodesPerDir[1]*lFineNodesPerDir[0];
182
183 // Get the coarsening rate
184 std::string coarsenRate = pL.get<std::string>("Coarsen");
185 Array<LO> coarseRate(3);
186 {
187 Teuchos::Array<LO> crates;
188 try {
189 crates = Teuchos::fromStringToArray<LO>(coarsenRate);
190 } catch(const Teuchos::InvalidArrayStringRepresentation& e) {
191 GetOStream(Errors,-1) << " *** Coarsen must be a string convertible into an array! *** "
192 << std::endl;
193 throw e;
194 }
195 TEUCHOS_TEST_FOR_EXCEPTION((crates.size() > 1) && (crates.size() < numDimensions),
197 "Coarsen must have at least as many components as the number of"
198 " spatial dimensions in the problem.");
199 for(LO i = 0; i < 3; ++i) {
200 if(i < numDimensions) {
201 if(crates.size() == 1) {
202 coarseRate[i] = crates[0];
203 } else if(i < crates.size()) {
204 coarseRate[i] = crates[i];
205 } else {
206 GetOStream(Errors,-1) << " *** Coarsen must be at least as long as the number of"
207 " spatial dimensions! *** " << std::endl;
208 throw Exceptions::RuntimeError(" *** Coarsen must be at least as long as the number of"
209 " spatial dimensions! *** \n");
210 }
211 } else {
212 coarseRate[i] = 1;
213 }
214 }
215 } // End of scope for crates
216
217 // Get the stencil type used for discretization
218 const std::string stencilType = pL.get<std::string>("stencil type");
219 if(stencilType != "full" && stencilType != "reduced") {
220 GetOStream(Errors,-1) << " *** stencil type must be set to: full or reduced, any other value "
221 "is trated as an error! *** " << std::endl;
222 throw Exceptions::RuntimeError(" *** stencil type is neither full, nor reduced! *** \n");
223 }
224
225 // Get the strategy for PDE systems
226 const std::string blockStrategy = pL.get<std::string>("block strategy");
227 if(blockStrategy != "coupled" && blockStrategy != "uncoupled") {
228 GetOStream(Errors,-1) << " *** block strategy must be set to: coupled or uncoupled, any other "
229 "value is trated as an error! *** " << std::endl;
230 throw Exceptions::RuntimeError(" *** block strategy is neither coupled, nor uncoupled! *** \n");
231 }
232
233 GO gNumCoarseNodes = 0;
234 LO lNumCoarseNodes = 0;
235 Array<GO> gIndices(3), gCoarseNodesPerDir(3), ghostGIDs, coarseNodesGIDs, colGIDs;
236 Array<LO> myOffset(3), lCoarseNodesPerDir(3), glCoarseNodesPerDir(3), endRate(3);
237 Array<bool> ghostInterface(6);
238 Array<int> boundaryFlags(3);
239 ArrayRCP<Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseNodes(numDimensions);
240 Array<ArrayView<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > fineNodes(numDimensions);
241 for(LO dim = 0; dim < numDimensions; ++dim) {fineNodes[dim] = coordinates->getData(dim)();}
242
243 // This struct stores PIDs, LIDs and GIDs on the fine mesh and GIDs on the coarse mesh.
244 RCP<NodesIDs> ghostedCoarseNodes = rcp(new NodesIDs{});
245
246 GetGeometricData(coordinates, coarseRate, gFineNodesPerDir, lFineNodesPerDir, BlkSize,// inputs
247 gIndices, myOffset, ghostInterface, endRate, gCoarseNodesPerDir, // outputs
248 lCoarseNodesPerDir, glCoarseNodesPerDir, ghostGIDs, coarseNodesGIDs, colGIDs,
249 gNumCoarseNodes, lNumCoarseNodes, coarseNodes, boundaryFlags,
250 ghostedCoarseNodes);
251
252 // Create the MultiVector of coarse coordinates
253 Xpetra::UnderlyingLib lib = coordinates->getMap()->lib();
254 RCP<const Map> coarseCoordsMap = MapFactory::Build (lib,
255 gNumCoarseNodes,
256 coarseNodesGIDs.view(0, lNumCoarseNodes),
257 coordinates->getMap()->getIndexBase(),
258 coordinates->getMap()->getComm());
259 Array<ArrayView<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseCoords(numDimensions);
260 for(LO dim = 0; dim < numDimensions; ++dim) {
261 coarseCoords[dim] = coarseNodes[dim]();
262 }
263 RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> > coarseCoordinates =
264 Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO>::Build(coarseCoordsMap, coarseCoords(),
265 numDimensions);
266
267 // Now create a new matrix: Aghost that contains all the data
268 // locally needed to compute the local part of the prolongator.
269 // Here assuming that all the coarse nodes o and fine nodes +
270 // are local then all the data associated with the coarse
271 // nodes O and the fine nodes * needs to be imported.
272 //
273 // *--*--*--*--*--*--*--*
274 // | | | | | | | |
275 // o--+--+--o--+--+--O--*
276 // | | | | | | | |
277 // +--+--+--+--+--+--*--*
278 // | | | | | | | |
279 // +--+--+--+--+--+--*--*
280 // | | | | | | | |
281 // o--+--+--o--+--+--O--*
282 // | | | | | | | |
283 // +--+--+--+--+--+--*--*
284 // | | | | | | | |
285 // *--*--*--*--*--*--*--*
286 // | | | | | | | |
287 // O--*--*--O--*--*--O--*
288 //
289 // Creating that local matrix is easy enough using proper range
290 // and domain maps to import data from A. Note that with this
291 // approach we reorder the local entries using the domain map and
292 // can subsequently compute the prolongator without reordering.
293 // As usual we need to be careful about any coarsening rate
294 // change at the boundary!
295
296 // The ingredients needed are an importer, a range map and a domain map
297 Array<GO> ghostRowGIDs, ghostColGIDs, nodeSteps(3);
298 nodeSteps[0] = 1;
299 nodeSteps[1] = gFineNodesPerDir[0];
300 nodeSteps[2] = gFineNodesPerDir[0]*gFineNodesPerDir[1];
301 Array<LO> glFineNodesPerDir(3);
302 GO startingGID = A->getRowMap()->getMinGlobalIndex();
303 for(LO dim = 0; dim < 3; ++dim) {
304 LO numCoarseNodes = 0;
305 if(dim < numDimensions) {
306 startingGID -= myOffset[dim]*nodeSteps[dim];
307 numCoarseNodes = lCoarseNodesPerDir[dim];
308 if(ghostInterface[2*dim]) {++numCoarseNodes;}
309 if(ghostInterface[2*dim+1]) {++numCoarseNodes;}
310 if(gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
311 glFineNodesPerDir[dim] = (numCoarseNodes - 2)*coarseRate[dim] + endRate[dim] + 1;
312 } else {
313 glFineNodesPerDir[dim] = (numCoarseNodes - 1)*coarseRate[dim] + 1;
314 }
315 } else {
316 glFineNodesPerDir[dim] = 1;
317 }
318 }
319 ghostRowGIDs.resize(glFineNodesPerDir[0]*glFineNodesPerDir[1]*glFineNodesPerDir[2]*BlkSize);
320 for(LO k = 0; k < glFineNodesPerDir[2]; ++k) {
321 for(LO j = 0; j < glFineNodesPerDir[1]; ++j) {
322 for(LO i = 0; i < glFineNodesPerDir[0]; ++i) {
323 for(LO l = 0; l < BlkSize; ++l) {
324 ghostRowGIDs[(k*glFineNodesPerDir[1]*glFineNodesPerDir[0]
325 + j*glFineNodesPerDir[0] + i)*BlkSize + l] = startingGID
326 + (k*gFineNodesPerDir[1]*gFineNodesPerDir[0] + j*gFineNodesPerDir[0] + i)*BlkSize + l;
327 }
328 }
329 }
330 }
331
332 // Looking at the above loops it is easy to find startingGID for the ghostColGIDs
333 Array<GO> startingGlobalIndices(numDimensions), dimStride(numDimensions);
334 Array<GO> startingColIndices(numDimensions), finishingColIndices(numDimensions);
335 GO colMinGID = 0;
336 Array<LO> colRange(numDimensions);
337 dimStride[0] = 1;
338 for(int dim = 1; dim < numDimensions; ++dim) {
339 dimStride[dim] = dimStride[dim - 1]*gFineNodesPerDir[dim - 1];
340 }
341 {
342 GO tmp = startingGID;
343 for(int dim = numDimensions; dim > 0; --dim) {
344 startingGlobalIndices[dim - 1] = tmp / dimStride[dim - 1];
345 tmp = tmp % dimStride[dim - 1];
346
347 if(startingGlobalIndices[dim - 1] > 0) {
348 startingColIndices[dim - 1] = startingGlobalIndices[dim - 1] - 1;
349 }
350 if(startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1] < gFineNodesPerDir[dim - 1]){
351 finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1]
352 + glFineNodesPerDir[dim - 1];
353 } else {
354 finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1]
355 + glFineNodesPerDir[dim - 1] - 1;
356 }
357 colRange[dim - 1] = finishingColIndices[dim - 1] - startingColIndices[dim - 1] + 1;
358 colMinGID += startingColIndices[dim - 1]*dimStride[dim - 1];
359 }
360 }
361 ghostColGIDs.resize(colRange[0]*colRange[1]*colRange[2]*BlkSize);
362 for(LO k = 0; k < colRange[2]; ++k) {
363 for(LO j = 0; j < colRange[1]; ++j) {
364 for(LO i = 0; i < colRange[0]; ++i) {
365 for(LO l = 0; l < BlkSize; ++l) {
366 ghostColGIDs[(k*colRange[1]*colRange[0] + j*colRange[0] + i)*BlkSize + l] = colMinGID
367 + (k*gFineNodesPerDir[1]*gFineNodesPerDir[0] + j*gFineNodesPerDir[0] + i)*BlkSize + l;
368 }
369 }
370 }
371 }
372
373 RCP<const Map> ghostedRowMap = Xpetra::MapFactory<LO,GO,NO>::Build(A->getRowMap()->lib(),
374 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
375 ghostRowGIDs(),
376 A->getRowMap()->getIndexBase(),
377 A->getRowMap()->getComm());
378 RCP<const Map> ghostedColMap = Xpetra::MapFactory<LO,GO,NO>::Build(A->getRowMap()->lib(),
379 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
380 ghostColGIDs(),
381 A->getRowMap()->getIndexBase(),
382 A->getRowMap()->getComm());
383 RCP<const Import> ghostImporter = Xpetra::ImportFactory<LO,GO,NO>::Build(A->getRowMap(),
384 ghostedRowMap);
385 RCP<const Matrix> Aghost = Xpetra::MatrixFactory<SC,LO,GO,NO>::Build(A, *ghostImporter,
386 ghostedRowMap,
387 ghostedRowMap);
388
389 // Create the maps and data structures for the projection matrix
390 RCP<const Map> rowMapP = A->getDomainMap();
391
392 RCP<const Map> domainMapP;
393
394 RCP<const Map> colMapP;
395
396 // At this point we need to create the column map which is a delicate operation.
397 // The entries in that map need to be ordered as follows:
398 // 1) first owned entries ordered by LID
399 // 2) second order the remaining entries by PID
400 // 3) entries with the same remote PID are ordered by GID.
401 // One piece of good news: lNumCoarseNodes is the number of ownedNodes and lNumGhostNodes
402 // is the number of remote nodes. The sorting can be limited to remote nodes
403 // as the owned ones are alreaded ordered by LID!
404
405 LO lNumGhostedNodes = ghostedCoarseNodes->GIDs.size();
406 {
407 std::vector<NodeID> colMapOrdering(lNumGhostedNodes);
408 for(LO ind = 0; ind < lNumGhostedNodes; ++ind) {
409 colMapOrdering[ind].GID = ghostedCoarseNodes->GIDs[ind];
410 if(ghostedCoarseNodes->PIDs[ind] == rowMapP->getComm()->getRank()) {
411 colMapOrdering[ind].PID = -1;
412 } else {
413 colMapOrdering[ind].PID = ghostedCoarseNodes->PIDs[ind];
414 }
415 colMapOrdering[ind].LID = ghostedCoarseNodes->LIDs[ind];
416 colMapOrdering[ind].lexiInd = ind;
417 }
418 std::sort(colMapOrdering.begin(), colMapOrdering.end(),
419 [](NodeID a, NodeID b)->bool{
420 return ( (a.PID < b.PID) || ((a.PID == b.PID) && (a.GID < b.GID)) );
421 });
422
423 colGIDs.resize(BlkSize*lNumGhostedNodes);
424 for(LO ind = 0; ind < lNumGhostedNodes; ++ind) {
425 // Save the permutation calculated to go from Lexicographic indexing to column map indexing
426 ghostedCoarseNodes->colInds[colMapOrdering[ind].lexiInd] = ind;
427 for(LO dof = 0; dof < BlkSize; ++dof) {
428 colGIDs[BlkSize*ind + dof] = BlkSize*colMapOrdering[ind].GID + dof;
429 }
430 }
431 domainMapP = Xpetra::MapFactory<LO,GO,NO>::Build(rowMapP->lib(),
432 BlkSize*gNumCoarseNodes,
433 colGIDs.view(0,BlkSize*lNumCoarseNodes),
434 rowMapP->getIndexBase(),
435 rowMapP->getComm());
436 colMapP = Xpetra::MapFactory<LO,GO,NO>::Build(rowMapP->lib(),
437 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
438 colGIDs.view(0, colGIDs.size()),
439 rowMapP->getIndexBase(),
440 rowMapP->getComm());
441 } // End of scope for colMapOrdering and colGIDs
442
443 std::vector<size_t> strideInfo(1);
444 strideInfo[0] = BlkSize;
445 RCP<const Map> stridedDomainMapP = Xpetra::StridedMapFactory<LO,GO,NO>::Build(domainMapP,
446 strideInfo);
447
448 GO gnnzP = 0;
449 LO lnnzP = 0;
450 // coarse points have one nnz per row
451 gnnzP += gNumCoarseNodes;
452 lnnzP += lNumCoarseNodes;
453 // add all other points multiplying by 2^numDimensions
454 gnnzP += (gNumFineNodes - gNumCoarseNodes)*std::pow(2, numDimensions);
455 lnnzP += (lNumFineNodes - lNumCoarseNodes)*std::pow(2, numDimensions);
456 // finally multiply by the number of dofs per node
457 gnnzP = gnnzP*BlkSize;
458 lnnzP = lnnzP*BlkSize;
459
460 // Create the matrix itself using the above maps
461 RCP<Matrix> P;
462 P = rcp(new CrsMatrixWrap(rowMapP, colMapP, 0));
463 RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
464
465 ArrayRCP<size_t> iaP;
466 ArrayRCP<LO> jaP;
467 ArrayRCP<SC> valP;
468
469 PCrs->allocateAllValues(lnnzP, iaP, jaP, valP);
470
471 ArrayView<size_t> ia = iaP();
472 ArrayView<LO> ja = jaP();
473 ArrayView<SC> val = valP();
474 ia[0] = 0;
475
476
477 LO numCoarseElements = 1;
478 Array<LO> lCoarseElementsPerDir(3);
479 for(LO dim = 0; dim < numDimensions; ++dim) {
480 lCoarseElementsPerDir[dim] = lCoarseNodesPerDir[dim];
481 if(ghostInterface[2*dim]) {++lCoarseElementsPerDir[dim];}
482 if(!ghostInterface[2*dim+1]) {--lCoarseElementsPerDir[dim];}
483 numCoarseElements = numCoarseElements*lCoarseElementsPerDir[dim];
484 }
485
486 for(LO dim = numDimensions; dim < 3; ++dim) {
487 lCoarseElementsPerDir[dim] = 1;
488 }
489
490 // Loop over the coarse elements
491 Array<int> elementFlags(3);
492 Array<LO> elemInds(3), elementNodesPerDir(3), glElementRefTuple(3);
493 Array<LO> glElementRefTupleCG(3), glElementCoarseNodeCG(8);
494 const int numCoarseNodesInElement = std::pow(2, numDimensions);
495 const int nnzPerCoarseNode = (blockStrategy == "coupled") ? BlkSize : 1;
496 const int numRowsPerPoint = BlkSize;
497 for(elemInds[2] = 0; elemInds[2] < lCoarseElementsPerDir[2]; ++elemInds[2]) {
498 for(elemInds[1] = 0; elemInds[1] < lCoarseElementsPerDir[1]; ++elemInds[1]) {
499 for(elemInds[0] = 0; elemInds[0] < lCoarseElementsPerDir[0]; ++elemInds[0]) {
500 elementFlags[0] = 0; elementFlags[1] = 0; elementFlags[2] = 0;
501 for(int dim = 0; dim < 3; ++dim) {
502 // Detect boundary conditions on the element and set corresponding flags.
503 if(elemInds[dim] == 0 && elemInds[dim] == lCoarseElementsPerDir[dim] - 1) {
504 elementFlags[dim] = boundaryFlags[dim];
505 } else if(elemInds[dim] == 0 && (boundaryFlags[dim] == 1 || boundaryFlags[dim] == 3)) {
506 elementFlags[dim] += 1;
507 } else if((elemInds[dim] == lCoarseElementsPerDir[dim] - 1)
508 && (boundaryFlags[dim] == 2 || boundaryFlags[dim] == 3)) {
509 elementFlags[dim] += 2;
510 } else {
511 elementFlags[dim] = 0;
512 }
513
514 // Compute the number of nodes in the current element.
515 if(dim < numDimensions) {
516 if((elemInds[dim] == lCoarseElementsPerDir[dim])
517 && (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim])) {
518 elementNodesPerDir[dim] = endRate[dim] + 1;
519 } else {
520 elementNodesPerDir[dim] = coarseRate[dim] + 1;
521 }
522 } else {
523 elementNodesPerDir[dim] = 1;
524 }
525
526 // Get the lowest tuple of the element using the ghosted local coordinate system
527 glElementRefTuple[dim] = elemInds[dim]*coarseRate[dim];
528 glElementRefTupleCG[dim] = elemInds[dim];
529 }
530
531 // Now get the column map indices corresponding to the dofs associated with the current
532 // element's coarse nodes.
533 for(typename Array<LO>::size_type elem = 0; elem < glElementCoarseNodeCG.size(); ++elem) {
534 glElementCoarseNodeCG[elem]
535 = glElementRefTupleCG[2]*glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0]
536 + glElementRefTupleCG[1]*glCoarseNodesPerDir[0] + glElementRefTupleCG[0];
537 }
538 glElementCoarseNodeCG[4] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
539 glElementCoarseNodeCG[5] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
540 glElementCoarseNodeCG[6] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
541 glElementCoarseNodeCG[7] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
542
543 glElementCoarseNodeCG[2] += glCoarseNodesPerDir[0];
544 glElementCoarseNodeCG[3] += glCoarseNodesPerDir[0];
545 glElementCoarseNodeCG[6] += glCoarseNodesPerDir[0];
546 glElementCoarseNodeCG[7] += glCoarseNodesPerDir[0];
547
548 glElementCoarseNodeCG[1] += 1;
549 glElementCoarseNodeCG[3] += 1;
550 glElementCoarseNodeCG[5] += 1;
551 glElementCoarseNodeCG[7] += 1;
552
553 LO numNodesInElement = elementNodesPerDir[0]*elementNodesPerDir[1]*elementNodesPerDir[2];
554 // LO elementOffset = elemInds[2]*coarseRate[2]*glFineNodesPerDir[1]*glFineNodesPerDir[0]
555 // + elemInds[1]*coarseRate[1]*glFineNodesPerDir[0] + elemInds[0]*coarseRate[0];
556
557 // Compute the element prolongator
558 Teuchos::SerialDenseMatrix<LO,SC> Pi, Pf, Pe;
559 Array<LO> dofType(numNodesInElement*BlkSize), lDofInd(numNodesInElement*BlkSize);
560 ComputeLocalEntries(Aghost, coarseRate, endRate, BlkSize, elemInds, lCoarseElementsPerDir,
561 numDimensions, glFineNodesPerDir, gFineNodesPerDir, gIndices,
562 lCoarseNodesPerDir, ghostInterface, elementFlags, stencilType,
563 blockStrategy, elementNodesPerDir, numNodesInElement, colGIDs,
564 Pi, Pf, Pe, dofType, lDofInd);
565
566 // Find ghosted LID associated with nodes in the element and eventually which of these
567 // nodes are ghosts, this information is used to fill the local prolongator.
568 Array<LO> lNodeLIDs(numNodesInElement);
569 {
570 Array<LO> lNodeTuple(3), nodeInd(3);
571 for(nodeInd[2] = 0; nodeInd[2] < elementNodesPerDir[2]; ++nodeInd[2]) {
572 for(nodeInd[1] = 0; nodeInd[1] < elementNodesPerDir[1]; ++nodeInd[1]) {
573 for(nodeInd[0] = 0; nodeInd[0] < elementNodesPerDir[0]; ++nodeInd[0]) {
574 int stencilLength = 0;
575 if((nodeInd[0] == 0 || nodeInd[0] == elementNodesPerDir[0] - 1) &&
576 (nodeInd[1] == 0 || nodeInd[1] == elementNodesPerDir[1] - 1) &&
577 (nodeInd[2] == 0 || nodeInd[2] == elementNodesPerDir[2] - 1)) {
578 stencilLength = 1;
579 } else {
580 stencilLength = std::pow(2, numDimensions);
581 }
582 LO nodeElementInd = nodeInd[2]*elementNodesPerDir[1]*elementNodesPerDir[1]
583 + nodeInd[1]*elementNodesPerDir[0] + nodeInd[0];
584 for(int dim = 0; dim < 3; ++dim) {
585 lNodeTuple[dim] = glElementRefTuple[dim] - myOffset[dim] + nodeInd[dim];
586 }
587 if(lNodeTuple[0] < 0 || lNodeTuple[1] < 0 || lNodeTuple[2] < 0 ||
588 lNodeTuple[0] > lFineNodesPerDir[0] - 1 ||
589 lNodeTuple[1] > lFineNodesPerDir[1] - 1 ||
590 lNodeTuple[2] > lFineNodesPerDir[2] - 1) {
591 // This flags the ghosts nodes used for prolongator calculation but for which
592 // we do not own the row, hence we won't fill these values on this rank.
593 lNodeLIDs[nodeElementInd] = -1;
594 } else if ((nodeInd[0] == 0 && elemInds[0] !=0) ||
595 (nodeInd[1] == 0 && elemInds[1] !=0) ||
596 (nodeInd[2] == 0 && elemInds[2] !=0)) {
597 // This flags nodes that are owned but common to two coarse elements and that
598 // were already filled by another element, we don't want to fill them twice so
599 // we skip them
600 lNodeLIDs[nodeElementInd] = -2;
601 } else {
602 // The remaining nodes are locally owned and we need to fill the coresponding
603 // rows of the prolongator
604
605 // First we need to find which row needs to be filled
606 lNodeLIDs[nodeElementInd] = lNodeTuple[2]*lFineNodesPerDir[1]*lFineNodesPerDir[0]
607 + lNodeTuple[1]*lFineNodesPerDir[0] + lNodeTuple[0];
608
609 // We also compute the row offset using arithmetic to ensure that we can loop
610 // easily over the nodes in a macro-element as well as facilitate on-node
611 // parallelization. The node serial code could be rewritten with two loops over
612 // the local part of the mesh to avoid the costly integer divisions...
613 Array<LO> refCoarsePointTuple(3);
614 for(int dim = 2; dim > -1; --dim) {
615 if(dim == 0) {
616 refCoarsePointTuple[dim] = (lNodeTuple[dim] + myOffset[dim]) / coarseRate[dim];
617 if(myOffset[dim] == 0) {
618 ++refCoarsePointTuple[dim];
619 }
620 } else {
621 // Note: no need for magnitudeType here, just use double because these things are LO's
622 refCoarsePointTuple[dim] =
623 std::ceil(static_cast<double>(lNodeTuple[dim] + myOffset[dim])
624 / coarseRate[dim]);
625 }
626 if((lNodeTuple[dim] + myOffset[dim]) % coarseRate[dim] > 0) {break;}
627 }
628 const LO numCoarsePoints = refCoarsePointTuple[0]
629 + refCoarsePointTuple[1]*lCoarseNodesPerDir[0]
630 + refCoarsePointTuple[2]*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0];
631 const LO numFinePoints = lNodeLIDs[nodeElementInd] + 1;
632
633 // The below formula computes the rowPtr for row 'n+BlkSize' and we are about to
634 // fill row 'n' to 'n+BlkSize'.
635 size_t rowPtr = (numFinePoints - numCoarsePoints)*numRowsPerPoint
636 *numCoarseNodesInElement*nnzPerCoarseNode + numCoarsePoints*numRowsPerPoint;
637 if(dofType[nodeElementInd*BlkSize] == 0) {
638 // Check if current node is a coarse point
639 rowPtr = rowPtr - numRowsPerPoint;
640 } else {
641 rowPtr = rowPtr - numRowsPerPoint*numCoarseNodesInElement*nnzPerCoarseNode;
642 }
643
644 for(int dof = 0; dof < BlkSize; ++dof) {
645
646 // Now we loop over the stencil and fill the column indices and row values
647 int cornerInd = 0;
648 switch(dofType[nodeElementInd*BlkSize + dof]) {
649 case 0: // Corner node
650 if(nodeInd[2] == elementNodesPerDir[2] - 1) {cornerInd += 4;}
651 if(nodeInd[1] == elementNodesPerDir[1] - 1) {cornerInd += 2;}
652 if(nodeInd[0] == elementNodesPerDir[0] - 1) {cornerInd += 1;}
653 ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1] = rowPtr + dof + 1;
654 ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[cornerInd]]*BlkSize + dof;
655 val[rowPtr + dof] = 1.0;
656 break;
657
658 case 1: // Edge node
659 ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1]
660 = rowPtr + (dof + 1)*numCoarseNodesInElement*nnzPerCoarseNode;
661 for(int ind1 = 0; ind1 < stencilLength; ++ind1) {
662 if(blockStrategy == "coupled") {
663 for(int ind2 = 0; ind2 < BlkSize; ++ind2) {
664 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
665 + ind1*BlkSize + ind2;
666 ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + ind2;
667 val[lRowPtr] = Pe(lDofInd[nodeElementInd*BlkSize + dof],
668 ind1*BlkSize + ind2);
669 }
670 } else if(blockStrategy == "uncoupled") {
671 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
672 + ind1;
673 ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + dof;
674 val[lRowPtr] = Pe(lDofInd[nodeElementInd*BlkSize + dof],
675 ind1*BlkSize + dof);
676 }
677 }
678 break;
679
680 case 2: // Face node
681 ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1]
682 = rowPtr + (dof + 1)*numCoarseNodesInElement*nnzPerCoarseNode;
683 for(int ind1 = 0; ind1 < stencilLength; ++ind1) {
684 if(blockStrategy == "coupled") {
685 for(int ind2 = 0; ind2 < BlkSize; ++ind2) {
686 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
687 + ind1*BlkSize + ind2;
688 ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + ind2;
689 val[lRowPtr] = Pf(lDofInd[nodeElementInd*BlkSize + dof],
690 ind1*BlkSize + ind2);
691 }
692 } else if(blockStrategy == "uncoupled") {
693 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
694 + ind1;
695 // ja [lRowPtr] = colGIDs[glElementCoarseNodeCG[ind1]*BlkSize + dof];
696 ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + dof;
697 val[lRowPtr] = Pf(lDofInd[nodeElementInd*BlkSize + dof],
698 ind1*BlkSize + dof);
699 }
700 }
701 break;
702
703 case 3: // Interior node
704 ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1]
705 = rowPtr + (dof + 1)*numCoarseNodesInElement*nnzPerCoarseNode;
706 for(int ind1 = 0; ind1 < stencilLength; ++ind1) {
707 if(blockStrategy == "coupled") {
708 for(int ind2 = 0; ind2 < BlkSize; ++ind2) {
709 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
710 + ind1*BlkSize + ind2;
711 ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + ind2;
712 val[lRowPtr] = Pi(lDofInd[nodeElementInd*BlkSize + dof],
713 ind1*BlkSize + ind2);
714 }
715 } else if(blockStrategy == "uncoupled") {
716 size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
717 + ind1;
718 ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + dof;
719 val[lRowPtr] = Pi(lDofInd[nodeElementInd*BlkSize + dof],
720 ind1*BlkSize + dof);
721 }
722 }
723 break;
724 }
725 }
726 }
727 }
728 }
729 }
730 } // End of scopt for lNodeTuple and nodeInd
731 }
732 }
733 }
734
735 // Sort all row's column indicies and entries by LID
736 Xpetra::CrsMatrixUtils<SC,LO,GO,NO>::sortCrsEntries(ia, ja, val, rowMapP->lib());
737
738 // Set the values of the prolongation operators into the CrsMatrix P and call FillComplete
739 PCrs->setAllValues(iaP, jaP, valP);
740 PCrs->expertStaticFillComplete(domainMapP,rowMapP);
741
742 // set StridingInformation of P
743 if (A->IsView("stridedMaps") == true) {
744 P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), stridedDomainMapP);
745 } else {
746 P->CreateView("stridedMaps", P->getRangeMap(), stridedDomainMapP);
747 }
748
749 // store the transfer operator and node coordinates on coarse level
750 Set(coarseLevel, "P", P);
751 Set(coarseLevel, "coarseCoordinates", coarseCoordinates);
752 Set<Array<GO> >(coarseLevel, "gCoarseNodesPerDim", gCoarseNodesPerDir);
753 Set<Array<LO> >(coarseLevel, "lCoarseNodesPerDim", lCoarseNodesPerDir);
754
755 }
756
757 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
759 GetGeometricData(RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> >& coordinates,
760 const Array<LO> coarseRate, const Array<GO> gFineNodesPerDir,
761 const Array<LO> lFineNodesPerDir, const LO BlkSize, Array<GO>& gIndices,
762 Array<LO>& myOffset, Array<bool>& ghostInterface, Array<LO>& endRate,
763 Array<GO>& gCoarseNodesPerDir, Array<LO>& lCoarseNodesPerDir,
764 Array<LO>& glCoarseNodesPerDir, Array<GO>& ghostGIDs, Array<GO>& coarseNodesGIDs,
765 Array<GO>& colGIDs, GO& gNumCoarseNodes, LO& lNumCoarseNodes,
766 ArrayRCP<Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseNodes, Array<int>& boundaryFlags,
767 RCP<NodesIDs> ghostedCoarseNodes) const {
768 // This function is extracting the geometric information from the coordinates
769 // and creates the necessary data/formatting to perform locally the calculation
770 // of the pronlongator.
771 //
772 // inputs:
773
774 RCP<const Map> coordinatesMap = coordinates->getMap();
775 LO numDimensions = coordinates->getNumVectors();
776
777 // Using the coarsening rate and the fine level data,
778 // compute coarse level data
779
780 // Phase 1 //
781 // ------------------------------------------------------------------ //
782 // We first start by finding small informations on the mesh such as //
783 // the number of coarse nodes (local and global) and the number of //
784 // ghost nodes / the end rate of coarsening. //
785 // ------------------------------------------------------------------ //
786 GO minGlobalIndex = coordinatesMap->getMinGlobalIndex();
787 {
788 GO tmp;
789 gIndices[2] = minGlobalIndex / (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
790 tmp = minGlobalIndex % (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
791 gIndices[1] = tmp / gFineNodesPerDir[0];
792 gIndices[0] = tmp % gFineNodesPerDir[0];
793
794 myOffset[2] = gIndices[2] % coarseRate[2];
795 myOffset[1] = gIndices[1] % coarseRate[1];
796 myOffset[0] = gIndices[0] % coarseRate[0];
797 }
798
799 for(int dim = 0; dim < 3; ++dim) {
800 if(gIndices[dim] == 0) {
801 boundaryFlags[dim] += 1;
802 }
803 if(gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
804 boundaryFlags[dim] += 2;
805 }
806 }
807
808 // Check whether ghost nodes are needed in each direction
809 for(LO i=0; i < numDimensions; ++i) {
810 if((gIndices[i] != 0) && (gIndices[i] % coarseRate[i] > 0)) {
811 ghostInterface[2*i] = true;
812 }
813 if(((gIndices[i] + lFineNodesPerDir[i]) != gFineNodesPerDir[i])
814 && ((gIndices[i] + lFineNodesPerDir[i] - 1) % coarseRate[i] > 0)) {
815 ghostInterface[2*i + 1] = true;
816 }
817 }
818
819 for(LO i = 0; i < 3; ++i) {
820 if(i < numDimensions) {
821 lCoarseNodesPerDir[i] = (lFineNodesPerDir[i] + myOffset[i] - 1) / coarseRate[i];
822 if(myOffset[i] == 0) { ++lCoarseNodesPerDir[i]; }
823 gCoarseNodesPerDir[i] = (gFineNodesPerDir[i] - 1) / coarseRate[i];
824 endRate[i] = (gFineNodesPerDir[i] - 1) % coarseRate[i];
825 if(endRate[i] == 0) {
826 ++gCoarseNodesPerDir[i];
827 endRate[i] = coarseRate[i];
828 }
829 } else {
830 // Most quantities need to be set to 1 for extra dimensions
831 // this is rather logical, an x-y plane is like a single layer
832 // of nodes in the z direction...
833 gCoarseNodesPerDir[i] = 1;
834 lCoarseNodesPerDir[i] = 1;
835 endRate[i] = 1;
836 }
837 }
838
839 gNumCoarseNodes = gCoarseNodesPerDir[0]*gCoarseNodesPerDir[1]*gCoarseNodesPerDir[2];
840 lNumCoarseNodes = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[2];
841
842 // For each direction, determine how many ghost points are required.
843 LO lNumGhostNodes = 0;
844 Array<GO> startGhostedCoarseNode(3);
845 {
846 const int complementaryIndices[3][2] = {{1,2}, {0,2}, {0,1}};
847 LO tmp = 0;
848 for(LO i = 0; i < 3; ++i) {
849 // The first branch of this if-statement will be used if the rank contains only one layer
850 // of nodes in direction i, that layer must also coincide with the boundary of the mesh
851 // and coarseRate[i] == endRate[i]...
852 if((gIndices[i] == gFineNodesPerDir[i] - 1) && (gIndices[i] % coarseRate[i] == 0)) {
853 startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i] - 1;
854 } else {
855 startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i];
856 }
857 // First we load the number of locally owned coarse nodes
858 glCoarseNodesPerDir[i] = lCoarseNodesPerDir[i];
859
860 // Check whether a face in direction i needs ghost nodes
861 if(ghostInterface[2*i] || ghostInterface[2*i+1]) {
862 ++glCoarseNodesPerDir[i];
863 if(i == 0) {tmp = lCoarseNodesPerDir[1]*lCoarseNodesPerDir[2];}
864 if(i == 1) {tmp = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[2];}
865 if(i == 2) {tmp = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[1];}
866 }
867 // If both faces in direction i need nodes, double the number of ghost nodes
868 if(ghostInterface[2*i] && ghostInterface[2*i+1]) {
869 ++glCoarseNodesPerDir[i];
870 tmp = 2*tmp;
871 }
872 lNumGhostNodes += tmp;
873
874 // The corners and edges need to be checked in 2D / 3D to add more ghosts nodes
875 for(LO j = 0; j < 2; ++j) {
876 for(LO k = 0; k < 2; ++k) {
877 // Check if two adjoining faces need ghost nodes and then add their common edge
878 if(ghostInterface[2*complementaryIndices[i][0]+j]
879 && ghostInterface[2*complementaryIndices[i][1]+k]) {
880 lNumGhostNodes += lCoarseNodesPerDir[i];
881 // Add corners if three adjoining faces need ghost nodes,
882 // but add them only once! Hence when i == 0.
883 if(ghostInterface[2*i] && (i == 0)) { lNumGhostNodes += 1; }
884 if(ghostInterface[2*i+1] && (i == 0)) { lNumGhostNodes += 1; }
885 }
886 }
887 }
888 tmp = 0;
889 }
890 } // end of scope for tmp and complementaryIndices
891
892 const LO lNumGhostedNodes = glCoarseNodesPerDir[0]*glCoarseNodesPerDir[1]
893 *glCoarseNodesPerDir[2];
894 ghostedCoarseNodes->PIDs.resize(lNumGhostedNodes);
895 ghostedCoarseNodes->LIDs.resize(lNumGhostedNodes);
896 ghostedCoarseNodes->GIDs.resize(lNumGhostedNodes);
897 ghostedCoarseNodes->coarseGIDs.resize(lNumGhostedNodes);
898 ghostedCoarseNodes->colInds.resize(lNumGhostedNodes);
899
900 // We loop over all ghosted coarse nodes by increasing global lexicographic order
901 Array<LO> coarseNodeCoarseIndices(3), coarseNodeFineIndices(3), ijk(3);
902 LO currentIndex = -1;
903 for(ijk[2] = 0; ijk[2] < glCoarseNodesPerDir[2]; ++ijk[2]) {
904 for(ijk[1] = 0; ijk[1] < glCoarseNodesPerDir[1]; ++ijk[1]) {
905 for(ijk[0] = 0; ijk[0] < glCoarseNodesPerDir[0]; ++ijk[0]) {
906 currentIndex = ijk[2]*glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0]
907 + ijk[1]*glCoarseNodesPerDir[0] + ijk[0];
908 coarseNodeCoarseIndices[0] = startGhostedCoarseNode[0] + ijk[0];
909 coarseNodeFineIndices[0] = coarseNodeCoarseIndices[0]*coarseRate[0];
910 if(coarseNodeFineIndices[0] > gFineNodesPerDir[0] - 1) {
911 coarseNodeFineIndices[0] = gFineNodesPerDir[0] - 1;
912 }
913 coarseNodeCoarseIndices[1] = startGhostedCoarseNode[1] + ijk[1];
914 coarseNodeFineIndices[1] = coarseNodeCoarseIndices[1]*coarseRate[1];
915 if(coarseNodeFineIndices[1] > gFineNodesPerDir[1] - 1) {
916 coarseNodeFineIndices[1] = gFineNodesPerDir[1] - 1;
917 }
918 coarseNodeCoarseIndices[2] = startGhostedCoarseNode[2] + ijk[2];
919 coarseNodeFineIndices[2] = coarseNodeCoarseIndices[2]*coarseRate[2];
920 if(coarseNodeFineIndices[2] > gFineNodesPerDir[2] - 1) {
921 coarseNodeFineIndices[2] = gFineNodesPerDir[2] - 1;
922 }
923 GO myGID = 0, myCoarseGID = -1;
924 GO factor[3] = {};
925 factor[2] = gFineNodesPerDir[1]*gFineNodesPerDir[0];
926 factor[1] = gFineNodesPerDir[0];
927 factor[0] = 1;
928 for(int dim = 0; dim < 3; ++dim) {
929 if(dim < numDimensions) {
930 if(gIndices[dim] - myOffset[dim] + ijk[dim]*coarseRate[dim]
931 < gFineNodesPerDir[dim] - 1) {
932 myGID += (gIndices[dim] - myOffset[dim]
933 + ijk[dim]*coarseRate[dim])*factor[dim];
934 } else {
935 myGID += (gIndices[dim] - myOffset[dim]
936 + (ijk[dim] - 1)*coarseRate[dim] + endRate[dim])*factor[dim];
937 }
938 }
939 }
940 myCoarseGID = coarseNodeCoarseIndices[0]
941 + coarseNodeCoarseIndices[1]*gCoarseNodesPerDir[0]
942 + coarseNodeCoarseIndices[2]*gCoarseNodesPerDir[1]*gCoarseNodesPerDir[0];
943 ghostedCoarseNodes->GIDs[currentIndex] = myGID;
944 ghostedCoarseNodes->coarseGIDs[currentIndex] = myCoarseGID;
945 }
946 }
947 }
948 coordinatesMap->getRemoteIndexList(ghostedCoarseNodes->GIDs(),
949 ghostedCoarseNodes->PIDs(),
950 ghostedCoarseNodes->LIDs());
951
952 // Phase 2 //
953 // ------------------------------------------------------------------ //
954 // Build a list of GIDs to import the required ghost nodes. //
955 // The ordering of the ghosts nodes will be as natural as possible, //
956 // i.e. it should follow the GID ordering of the mesh. //
957 // ------------------------------------------------------------------ //
958
959 // Saddly we have to more or less redo what was just done to figure out the value of
960 // lNumGhostNodes, there might be some optimization possibility here...
961 ghostGIDs.resize(lNumGhostNodes);
962 LO countGhosts = 0;
963 // Get the GID of the first point on the current partition.
964 GO startingGID = minGlobalIndex;
965 Array<GO> startingIndices(3);
966 // We still want ghost nodes even if have with a 0 offset,
967 // except when we are on a boundary
968 if(ghostInterface[4] && (myOffset[2] == 0)) {
969 if(gIndices[2] + coarseRate[2] > gFineNodesPerDir[2]) {
970 startingGID -= endRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
971 } else {
972 startingGID -= coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
973 }
974 } else {
975 startingGID -= myOffset[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
976 }
977 if(ghostInterface[2] && (myOffset[1] == 0)) {
978 if(gIndices[1] + coarseRate[1] > gFineNodesPerDir[1]) {
979 startingGID -= endRate[1]*gFineNodesPerDir[0];
980 } else {
981 startingGID -= coarseRate[1]*gFineNodesPerDir[0];
982 }
983 } else {
984 startingGID -= myOffset[1]*gFineNodesPerDir[0];
985 }
986 if(ghostInterface[0] && (myOffset[0] == 0)) {
987 if(gIndices[0] + coarseRate[0] > gFineNodesPerDir[0]) {
988 startingGID -= endRate[0];
989 } else {
990 startingGID -= coarseRate[0];
991 }
992 } else {
993 startingGID -= myOffset[0];
994 }
995
996 { // scope for tmp
997 GO tmp;
998 startingIndices[2] = startingGID / (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
999 tmp = startingGID % (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
1000 startingIndices[1] = tmp / gFineNodesPerDir[0];
1001 startingIndices[0] = tmp % gFineNodesPerDir[0];
1002 }
1003
1004 GO ghostOffset[3] = {0, 0, 0};
1005 LO lengthZero = lCoarseNodesPerDir[0];
1006 LO lengthOne = lCoarseNodesPerDir[1];
1007 LO lengthTwo = lCoarseNodesPerDir[2];
1008 if(ghostInterface[0]) {++lengthZero;}
1009 if(ghostInterface[1]) {++lengthZero;}
1010 if(ghostInterface[2]) {++lengthOne;}
1011 if(ghostInterface[3]) {++lengthOne;}
1012 if(ghostInterface[4]) {++lengthTwo;}
1013 if(ghostInterface[5]) {++lengthTwo;}
1014
1015 // First check the bottom face as it will have the lowest GIDs
1016 if(ghostInterface[4]) {
1017 ghostOffset[2] = startingGID;
1018 for(LO j = 0; j < lengthOne; ++j) {
1019 if( (j == lengthOne-1)
1020 && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1021 ghostOffset[1] = ((j-1)*coarseRate[1] + endRate[1])*gFineNodesPerDir[0];
1022 } else {
1023 ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1024 }
1025 for(LO k = 0; k < lengthZero; ++k) {
1026 if( (k == lengthZero-1)
1027 && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1028 ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1029 } else {
1030 ghostOffset[0] = k*coarseRate[0];
1031 }
1032 // If the partition includes a changed rate at the edge, ghost nodes need to be picked
1033 // carefully.
1034 // This if statement is repeated each time a ghost node is selected.
1035 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1036 ++countGhosts;
1037 }
1038 }
1039 }
1040
1041 // Sweep over the lCoarseNodesPerDir[2] coarse layers in direction 2 and gather necessary ghost
1042 // nodes located on these layers.
1043 for(LO i = 0; i < lengthTwo; ++i) {
1044 // Exclude the cases where ghost nodes exists on the faces in directions 2, these faces are
1045 // swept seperatly for efficiency.
1046 if( !((i == lengthTwo-1) && ghostInterface[5]) && !((i == 0) && ghostInterface[4]) ) {
1047 // Set the ghostOffset in direction 2 taking into account a possible endRate different
1048 // from the regular coarseRate.
1049 if( (i == lengthTwo-1) && !ghostInterface[5] ) {
1050 ghostOffset[2] = startingGID + ((i-1)*coarseRate[2] + endRate[2])
1051 *gFineNodesPerDir[1]*gFineNodesPerDir[0];
1052 } else {
1053 ghostOffset[2] = startingGID + i*coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1054 }
1055 for(LO j = 0; j < lengthOne; ++j) {
1056 if( (j == 0) && ghostInterface[2] ) {
1057 for(LO k = 0; k < lengthZero; ++k) {
1058 if( (k == lengthZero-1)
1059 && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1060 if(k == 0) {
1061 ghostOffset[0] = endRate[0];
1062 } else {
1063 ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1064 }
1065 } else {
1066 ghostOffset[0] = k*coarseRate[0];
1067 }
1068 // In this case j == 0 so ghostOffset[1] is zero and can be ignored
1069 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[0];
1070 ++countGhosts;
1071 }
1072 } else if( (j == lengthOne-1) && ghostInterface[3] ) {
1073 // Set the ghostOffset in direction 1 taking into account a possible endRate different
1074 // from the regular coarseRate.
1075 if( (j == lengthOne-1)
1076 && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1077 ghostOffset[1] = ((j-1)*coarseRate[1] + endRate[1])*gFineNodesPerDir[0];
1078 } else {
1079 ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1080 }
1081 for(LO k = 0; k < lengthZero; ++k) {
1082 if( (k == lengthZero-1)
1083 && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1084 ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1085 } else {
1086 ghostOffset[0] = k*coarseRate[0];
1087 }
1088 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1089 ++countGhosts;
1090 }
1091 } else {
1092 // Set ghostOffset[1] for side faces sweep
1093 if( (j == lengthOne-1)
1094 && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1095 ghostOffset[1] = ( (j-1)*coarseRate[1] + endRate[1] )*gFineNodesPerDir[0];
1096 } else {
1097 ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1098 }
1099
1100 // Set ghostOffset[0], ghostGIDs and countGhosts
1101 if(ghostInterface[0]) { // In that case ghostOffset[0]==0, so we can ignore it
1102 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1];
1103 ++countGhosts;
1104 }
1105 if(ghostInterface[1]) { // Grab ghost point at the end of direction 0.
1106 if( (startingIndices[0] + (lengthZero-1)*coarseRate[0]) > gFineNodesPerDir[0] - 1 ) {
1107 if(lengthZero > 2) {
1108 ghostOffset[0] = (lengthZero-2)*coarseRate[0] + endRate[0];
1109 } else {
1110 ghostOffset[0] = endRate[0];
1111 }
1112 } else {
1113 ghostOffset[0] = (lengthZero-1)*coarseRate[0];
1114 }
1115 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1116 ++countGhosts;
1117 }
1118 }
1119 }
1120 }
1121 }
1122
1123 // Finally check the top face
1124 if(ghostInterface[5]) {
1125 if( startingIndices[2] + (lengthTwo-1)*coarseRate[2] + 1 > gFineNodesPerDir[2] ) {
1126 ghostOffset[2] = startingGID
1127 + ((lengthTwo-2)*coarseRate[2] + endRate[2])*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1128 } else {
1129 ghostOffset[2] = startingGID
1130 + (lengthTwo-1)*coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1131 }
1132 for(LO j = 0; j < lengthOne; ++j) {
1133 if( (j == lengthOne-1)
1134 && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1135 ghostOffset[1] = ( (j-1)*coarseRate[1] + endRate[1] )*gFineNodesPerDir[0];
1136 } else {
1137 ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1138 }
1139 for(LO k = 0; k < lengthZero; ++k) {
1140 if( (k == lengthZero-1)
1141 && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1142 ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1143 } else {
1144 ghostOffset[0] = k*coarseRate[0];
1145 }
1146 ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1147 ++countGhosts;
1148 }
1149 }
1150 }
1151
1152 // Phase 3 //
1153 // ------------------------------------------------------------------ //
1154 // Final phase of this function, lists are being built to create the //
1155 // column and domain maps of the projection as well as the map and //
1156 // arrays for the coarse coordinates multivector. //
1157 // ------------------------------------------------------------------ //
1158
1159 Array<GO> gCoarseNodesGIDs(lNumCoarseNodes);
1160 LO currentNode, offset2, offset1, offset0;
1161 // Find the GIDs of the coarse nodes on the partition.
1162 for(LO ind2 = 0; ind2 < lCoarseNodesPerDir[2]; ++ind2) {
1163 if(myOffset[2] == 0) {
1164 offset2 = startingIndices[2] + myOffset[2];
1165 } else {
1166 if(startingIndices[2] + endRate[2] == gFineNodesPerDir[2] - 1) {
1167 offset2 = startingIndices[2] + endRate[2];
1168 } else {
1169 offset2 = startingIndices[2] + coarseRate[2];
1170 }
1171 }
1172 if(offset2 + ind2*coarseRate[2] > gFineNodesPerDir[2] - 1) {
1173 offset2 += (ind2 - 1)*coarseRate[2] + endRate[2];
1174 } else {
1175 offset2 += ind2*coarseRate[2];
1176 }
1177 offset2 = offset2*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1178
1179 for(LO ind1 = 0; ind1 < lCoarseNodesPerDir[1]; ++ind1) {
1180 if(myOffset[1] == 0) {
1181 offset1 = startingIndices[1] + myOffset[1];
1182 } else {
1183 if(startingIndices[1] + endRate[1] == gFineNodesPerDir[1] - 1) {
1184 offset1 = startingIndices[1] + endRate[1];
1185 } else {
1186 offset1 = startingIndices[1] + coarseRate[1];
1187 }
1188 }
1189 if(offset1 + ind1*coarseRate[1] > gFineNodesPerDir[1] - 1) {
1190 offset1 += (ind1 - 1)*coarseRate[1] + endRate[1];
1191 } else {
1192 offset1 += ind1*coarseRate[1];
1193 }
1194 offset1 = offset1*gFineNodesPerDir[0];
1195 for(LO ind0 = 0; ind0 < lCoarseNodesPerDir[0]; ++ind0) {
1196 offset0 = startingIndices[0];
1197 if(myOffset[0] == 0) {
1198 offset0 += ind0*coarseRate[0];
1199 } else {
1200 offset0 += (ind0 + 1)*coarseRate[0];
1201 }
1202 if(offset0 > gFineNodesPerDir[0] - 1) {offset0 += endRate[0] - coarseRate[0];}
1203
1204 currentNode = ind2*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0]
1205 + ind1*lCoarseNodesPerDir[0]
1206 + ind0;
1207 gCoarseNodesGIDs[currentNode] = offset2 + offset1 + offset0;
1208 }
1209 }
1210 }
1211
1212 // Actual loop over all the coarse/ghost nodes to find their index on the coarse mesh
1213 // and the corresponding dofs that will need to be added to colMapP.
1214 colGIDs.resize(BlkSize*(lNumCoarseNodes+lNumGhostNodes));
1215 coarseNodesGIDs.resize(lNumCoarseNodes);
1216 for(LO i = 0; i < numDimensions; ++i) {coarseNodes[i].resize(lNumCoarseNodes);}
1217 GO fineNodesPerCoarseSlab = coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1218 GO fineNodesEndCoarseSlab = endRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1219 GO fineNodesPerCoarsePlane = coarseRate[1]*gFineNodesPerDir[0];
1220 GO fineNodesEndCoarsePlane = endRate[1]*gFineNodesPerDir[0];
1221 GO coarseNodesPerCoarseLayer = gCoarseNodesPerDir[1]*gCoarseNodesPerDir[0];
1222 GO gCoarseNodeOnCoarseGridGID;
1223 LO gInd[3], lCol;
1224 Array<int> ghostPIDs (lNumGhostNodes);
1225 Array<LO> ghostLIDs (lNumGhostNodes);
1226 Array<LO> ghostPermut(lNumGhostNodes);
1227 for(LO k = 0; k < lNumGhostNodes; ++k) {ghostPermut[k] = k;}
1228 coordinatesMap->getRemoteIndexList(ghostGIDs, ghostPIDs, ghostLIDs);
1229 sh_sort_permute(ghostPIDs.begin(),ghostPIDs.end(), ghostPermut.begin(),ghostPermut.end());
1230
1231 { // scope for tmpInds, tmpVars and tmp.
1232 GO tmpInds[3], tmpVars[2];
1233 LO tmp;
1234 // Loop over the coarse nodes of the partition and add them to colGIDs
1235 // that will be used to construct the column and domain maps of P as well
1236 // as to construct the coarse coordinates map.
1237 // for(LO col = 0; col < lNumCoarseNodes; ++col) { // This should most likely be replaced
1238 // by loops of lCoarseNodesPerDir[] to simplify arithmetics
1239 LO col = 0;
1240 LO firstCoarseNodeInds[3], currentCoarseNode;
1241 for(LO dim = 0; dim < 3; ++dim) {
1242 if(myOffset[dim] == 0) {
1243 firstCoarseNodeInds[dim] = 0;
1244 } else {
1245 firstCoarseNodeInds[dim] = coarseRate[dim] - myOffset[dim];
1246 }
1247 }
1248 Array<ArrayRCP<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > fineNodes(numDimensions);
1249 for(LO dim = 0; dim < numDimensions; ++dim) {fineNodes[dim] = coordinates->getData(dim);}
1250 for(LO k = 0; k < lCoarseNodesPerDir[2]; ++k) {
1251 for(LO j = 0; j < lCoarseNodesPerDir[1]; ++j) {
1252 for(LO i = 0; i < lCoarseNodesPerDir[0]; ++i) {
1253 col = k*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0] + j*lCoarseNodesPerDir[0] + i;
1254
1255 // Check for endRate
1256 currentCoarseNode = 0;
1257 if(firstCoarseNodeInds[0] + i*coarseRate[0] > lFineNodesPerDir[0] - 1) {
1258 currentCoarseNode += firstCoarseNodeInds[0] + (i-1)*coarseRate[0] + endRate[0];
1259 } else {
1260 currentCoarseNode += firstCoarseNodeInds[0] + i*coarseRate[0];
1261 }
1262 if(firstCoarseNodeInds[1] + j*coarseRate[1] > lFineNodesPerDir[1] - 1) {
1263 currentCoarseNode += (firstCoarseNodeInds[1] + (j-1)*coarseRate[1] + endRate[1])
1264 *lFineNodesPerDir[0];
1265 } else {
1266 currentCoarseNode += (firstCoarseNodeInds[1] + j*coarseRate[1])*lFineNodesPerDir[0];
1267 }
1268 if(firstCoarseNodeInds[2] + k*coarseRate[2] > lFineNodesPerDir[2] - 1) {
1269 currentCoarseNode += (firstCoarseNodeInds[2] + (k-1)*coarseRate[2] + endRate[2])
1270 *lFineNodesPerDir[1]*lFineNodesPerDir[0];
1271 } else {
1272 currentCoarseNode += (firstCoarseNodeInds[2] + k*coarseRate[2])
1273 *lFineNodesPerDir[1]*lFineNodesPerDir[0];
1274 }
1275 // Load coordinates
1276 for(LO dim = 0; dim < numDimensions; ++dim) {
1277 coarseNodes[dim][col] = fineNodes[dim][currentCoarseNode];
1278 }
1279
1280 if((endRate[2] != coarseRate[2])
1281 && (gCoarseNodesGIDs[col] > (gCoarseNodesPerDir[2] - 2)*fineNodesPerCoarseSlab
1282 + fineNodesEndCoarseSlab - 1)) {
1283 tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab + 1;
1284 tmpVars[0] = gCoarseNodesGIDs[col] - (tmpInds[2] - 1)*fineNodesPerCoarseSlab
1285 - fineNodesEndCoarseSlab;
1286 } else {
1287 tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab;
1288 tmpVars[0] = gCoarseNodesGIDs[col] % fineNodesPerCoarseSlab;
1289 }
1290 if((endRate[1] != coarseRate[1])
1291 && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2)*fineNodesPerCoarsePlane
1292 + fineNodesEndCoarsePlane - 1)) {
1293 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1294 tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1)*fineNodesPerCoarsePlane
1295 - fineNodesEndCoarsePlane;
1296 } else {
1297 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1298 tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1299 }
1300 if(tmpVars[1] == gFineNodesPerDir[0] - 1) {
1301 tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1302 } else {
1303 tmpInds[0] = tmpVars[1] / coarseRate[0];
1304 }
1305 gInd[2] = col / (lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0]);
1306 tmp = col % (lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0]);
1307 gInd[1] = tmp / lCoarseNodesPerDir[0];
1308 gInd[0] = tmp % lCoarseNodesPerDir[0];
1309 lCol = gInd[2]*(lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0])
1310 + gInd[1]*lCoarseNodesPerDir[0] + gInd[0];
1311 gCoarseNodeOnCoarseGridGID = tmpInds[2]*coarseNodesPerCoarseLayer
1312 + tmpInds[1]*gCoarseNodesPerDir[0] + tmpInds[0];
1313 coarseNodesGIDs[lCol] = gCoarseNodeOnCoarseGridGID;
1314 for(LO dof = 0; dof < BlkSize; ++dof) {
1315 colGIDs[BlkSize*lCol + dof] = BlkSize*gCoarseNodeOnCoarseGridGID + dof;
1316 }
1317 }
1318 }
1319 }
1320 // Now loop over the ghost nodes of the partition to add them to colGIDs
1321 // since they will need to be included in the column map of P
1322 for(col = lNumCoarseNodes; col < lNumCoarseNodes + lNumGhostNodes; ++col) {
1323 if((endRate[2] != coarseRate[2])
1324 && (ghostGIDs[ghostPermut[col - lNumCoarseNodes]] > (gCoarseNodesPerDir[2] - 2)
1325 *fineNodesPerCoarseSlab + fineNodesEndCoarseSlab - 1)) {
1326 tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab + 1;
1327 tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]]
1328 - (tmpInds[2] - 1)*fineNodesPerCoarseSlab - fineNodesEndCoarseSlab;
1329 } else {
1330 tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab;
1331 tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] % fineNodesPerCoarseSlab;
1332 }
1333 if((endRate[1] != coarseRate[1])
1334 && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2)*fineNodesPerCoarsePlane
1335 + fineNodesEndCoarsePlane - 1)) {
1336 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1337 tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1)*fineNodesPerCoarsePlane
1338 - fineNodesEndCoarsePlane;
1339 } else {
1340 tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1341 tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1342 }
1343 if(tmpVars[1] == gFineNodesPerDir[0] - 1) {
1344 tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1345 } else {
1346 tmpInds[0] = tmpVars[1] / coarseRate[0];
1347 }
1348 gCoarseNodeOnCoarseGridGID = tmpInds[2]*coarseNodesPerCoarseLayer
1349 + tmpInds[1]*gCoarseNodesPerDir[0] + tmpInds[0];
1350 for(LO dof = 0; dof < BlkSize; ++dof) {
1351 colGIDs[BlkSize*col + dof] = BlkSize*gCoarseNodeOnCoarseGridGID + dof;
1352 }
1353 }
1354 } // End of scope for tmpInds, tmpVars and tmp
1355
1356 } // GetGeometricData()
1357
1358 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1360 ComputeLocalEntries(const RCP<const Matrix>& Aghost, const Array<LO> coarseRate,
1361 const Array<LO> /* endRate */, const LO BlkSize, const Array<LO> elemInds,
1362 const Array<LO> /* lCoarseElementsPerDir */, const LO numDimensions,
1363 const Array<LO> lFineNodesPerDir, const Array<GO> /* gFineNodesPerDir */,
1364 const Array<GO> /* gIndices */, const Array<LO> /* lCoarseNodesPerDir */,
1365 const Array<bool> ghostInterface, const Array<int> elementFlags,
1366 const std::string stencilType, const std::string /* blockStrategy */,
1367 const Array<LO> elementNodesPerDir, const LO numNodesInElement,
1368 const Array<GO> /* colGIDs */,
1369 Teuchos::SerialDenseMatrix<LO,SC>& Pi, Teuchos::SerialDenseMatrix<LO,SC>& Pf,
1370 Teuchos::SerialDenseMatrix<LO,SC>& Pe, Array<LO>& dofType,
1371 Array<LO>& lDofInd) const {
1372
1373 // First extract data from Aghost and move it to the corresponding dense matrix
1374 // This step requires to compute the number of nodes (resp dofs) in the current
1375 // coarse element, then allocate storage for local dense matrices, finally loop
1376 // over the dofs and extract the corresponding row in Aghost before dispactching
1377 // its data to the dense matrices.
1378 // At the same time we want to compute a mapping from the element indexing to
1379 // group indexing. The groups are the following: corner, edge, face and interior
1380 // nodes. We uses these groups to operate on the dense matrices but need to
1381 // store the nodes in their original order after groupd operations are completed.
1382 LO countInterior=0, countFace=0, countEdge=0, countCorner =0;
1383 Array<LO> collapseDir(numNodesInElement*BlkSize);
1384 for(LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1385 for(LO je = 0; je < elementNodesPerDir[1]; ++je) {
1386 for(LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1387 // Check for corner node
1388 if((ke == 0 || ke == elementNodesPerDir[2]-1)
1389 && (je == 0 || je == elementNodesPerDir[1]-1)
1390 && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1391 for(LO dof = 0; dof < BlkSize; ++dof) {
1392 dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1393 + je*elementNodesPerDir[0] + ie) + dof] = 0;
1394 lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1395 + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countCorner + dof;
1396 }
1397 ++countCorner;
1398
1399 // Check for edge node
1400 } else if (((ke == 0 || ke == elementNodesPerDir[2]-1)
1401 && (je == 0 || je == elementNodesPerDir[1]-1))
1402 || ((ke == 0 || ke == elementNodesPerDir[2]-1)
1403 && (ie == 0 || ie == elementNodesPerDir[0]-1))
1404 || ((je == 0 || je == elementNodesPerDir[1]-1)
1405 && (ie == 0 || ie == elementNodesPerDir[0]-1))) {
1406 for(LO dof = 0; dof < BlkSize; ++dof) {
1407 dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1408 + je*elementNodesPerDir[0] + ie) + dof] = 1;
1409 lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1410 + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countEdge + dof;
1411 if((ke == 0 || ke == elementNodesPerDir[2]-1)
1412 && (je == 0 || je == elementNodesPerDir[1]-1)) {
1413 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1414 + je*elementNodesPerDir[0] + ie) + dof] = 0;
1415 } else if((ke == 0 || ke == elementNodesPerDir[2]-1)
1416 && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1417 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1418 + je*elementNodesPerDir[0] + ie) + dof] = 1;
1419 } else if((je == 0 || je == elementNodesPerDir[1]-1
1420 ) && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1421 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1422 + je*elementNodesPerDir[0] + ie) + dof] = 2;
1423 }
1424 }
1425 ++countEdge;
1426
1427 // Check for face node
1428 } else if ((ke == 0 || ke == elementNodesPerDir[2]-1)
1429 || (je == 0 || je == elementNodesPerDir[1]-1)
1430 || (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1431 for(LO dof = 0; dof < BlkSize; ++dof) {
1432 dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1433 + je*elementNodesPerDir[0] + ie) + dof] = 2;
1434 lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1435 + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countFace + dof;
1436 if(ke == 0 || ke == elementNodesPerDir[2]-1) {
1437 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1438 + je*elementNodesPerDir[0] + ie) + dof] = 2;
1439 } else if(je == 0 || je == elementNodesPerDir[1]-1) {
1440 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1441 + je*elementNodesPerDir[0] + ie) + dof] = 1;
1442 } else if(ie == 0 || ie == elementNodesPerDir[0]-1) {
1443 collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1444 + je*elementNodesPerDir[0] + ie) + dof] = 0;
1445 }
1446 }
1447 ++countFace;
1448
1449 // Otherwise it is an interior node
1450 } else {
1451 for(LO dof = 0; dof < BlkSize; ++dof) {
1452 dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1453 + je*elementNodesPerDir[0] + ie) + dof] = 3;
1454 lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1455 + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countInterior +dof;
1456 }
1457 ++countInterior;
1458 }
1459 }
1460 }
1461 }
1462
1463 LO numInteriorNodes = 0, numFaceNodes = 0, numEdgeNodes = 0, numCornerNodes = 8;
1464 numInteriorNodes = (elementNodesPerDir[0] - 2)*(elementNodesPerDir[1] - 2)
1465 *(elementNodesPerDir[2] - 2);
1466 numFaceNodes = 2*(elementNodesPerDir[0] - 2)*(elementNodesPerDir[1] - 2)
1467 + 2*(elementNodesPerDir[0] - 2)*(elementNodesPerDir[2] - 2)
1468 + 2*(elementNodesPerDir[1] - 2)*(elementNodesPerDir[2] - 2);
1469 numEdgeNodes = 4*(elementNodesPerDir[0] - 2) + 4*(elementNodesPerDir[1] - 2)
1470 + 4*(elementNodesPerDir[2] - 2);
1471 // Diagonal blocks
1472 Teuchos::SerialDenseMatrix<LO,SC> Aii(BlkSize*numInteriorNodes, BlkSize*numInteriorNodes);
1473 Teuchos::SerialDenseMatrix<LO,SC> Aff(BlkSize*numFaceNodes, BlkSize*numFaceNodes);
1474 Teuchos::SerialDenseMatrix<LO,SC> Aee(BlkSize*numEdgeNodes, BlkSize*numEdgeNodes);
1475 // Upper triangular blocks
1476 Teuchos::SerialDenseMatrix<LO,SC> Aif(BlkSize*numInteriorNodes, BlkSize*numFaceNodes);
1477 Teuchos::SerialDenseMatrix<LO,SC> Aie(BlkSize*numInteriorNodes, BlkSize*numEdgeNodes);
1478 Teuchos::SerialDenseMatrix<LO,SC> Afe(BlkSize*numFaceNodes, BlkSize*numEdgeNodes);
1479 // Coarse nodes "right hand sides" and local prolongators
1480 Teuchos::SerialDenseMatrix<LO,SC> Aic(BlkSize*numInteriorNodes, BlkSize*numCornerNodes);
1481 Teuchos::SerialDenseMatrix<LO,SC> Afc(BlkSize*numFaceNodes, BlkSize*numCornerNodes);
1482 Teuchos::SerialDenseMatrix<LO,SC> Aec(BlkSize*numEdgeNodes, BlkSize*numCornerNodes);
1483 Pi.shape( BlkSize*numInteriorNodes, BlkSize*numCornerNodes);
1484 Pf.shape( BlkSize*numFaceNodes, BlkSize*numCornerNodes);
1485 Pe.shape( BlkSize*numEdgeNodes, BlkSize*numCornerNodes);
1486
1487 ArrayView<const LO> rowIndices;
1488 ArrayView<const SC> rowValues;
1489 LO idof, iInd, jInd;
1490 int iType = 0, jType = 0;
1491 int orientation = -1;
1492 int collapseFlags[3] = {};
1493 Array<SC> stencil((std::pow(3,numDimensions))*BlkSize);
1494 // LBV Note: we could skip the extraction of rows corresponding to coarse nodes
1495 // we might want to fuse the first three loops and do integer arithmetic
1496 // to have more optimization during compilation...
1497 for(LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1498 for(LO je = 0; je < elementNodesPerDir[1]; ++je) {
1499 for(LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1500 collapseFlags[0] = 0; collapseFlags[1] = 0; collapseFlags[2] = 0;
1501 if((elementFlags[0] == 1 || elementFlags[0] == 3) && ie == 0) {
1502 collapseFlags[0] += 1;
1503 }
1504 if((elementFlags[0] == 2 || elementFlags[0] == 3) && ie == elementNodesPerDir[0] - 1) {
1505 collapseFlags[0] += 2;
1506 }
1507 if((elementFlags[1] == 1 || elementFlags[1] == 3) && je == 0) {
1508 collapseFlags[1] += 1;
1509 }
1510 if((elementFlags[1] == 2 || elementFlags[1] == 3) && je == elementNodesPerDir[1] - 1) {
1511 collapseFlags[1] += 2;
1512 }
1513 if((elementFlags[2] == 1 || elementFlags[2] == 3) && ke == 0) {
1514 collapseFlags[2] += 1;
1515 }
1516 if((elementFlags[2] == 2 || elementFlags[2] == 3) && ke == elementNodesPerDir[2] - 1) {
1517 collapseFlags[2] += 2;
1518 }
1519
1520 // Based on (ie, je, ke) we detect the type of node we are dealing with
1521 GetNodeInfo(ie, je, ke, elementNodesPerDir, &iType, iInd, &orientation);
1522 for(LO dof0 = 0; dof0 < BlkSize; ++dof0) {
1523 // Compute the dof index for each dof at point (ie, je, ke)
1524 idof = ((elemInds[2]*coarseRate[2] + ke)*lFineNodesPerDir[1]*lFineNodesPerDir[0]
1525 + (elemInds[1]*coarseRate[1] + je)*lFineNodesPerDir[0]
1526 + elemInds[0]*coarseRate[0] + ie)*BlkSize + dof0;
1527 Aghost->getLocalRowView(idof, rowIndices, rowValues);
1528 FormatStencil(BlkSize, ghostInterface, ie, je, ke, rowValues,
1529 elementNodesPerDir, collapseFlags, stencilType, stencil);
1530 LO io, jo, ko;
1531 if(iType == 3) {// interior node, no stencil collapse needed
1532 for(LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1533 // Find (if, jf, kf) the indices associated with the interacting node
1534 ko = ke + (interactingNode / 9 - 1);
1535 {
1536 LO tmp = interactingNode % 9;
1537 jo = je + (tmp / 3 - 1);
1538 io = ie + (tmp % 3 - 1);
1539 int dummy;
1540 GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1541 }
1542 if((io > -1 && io < elementNodesPerDir[0]) &&
1543 (jo > -1 && jo < elementNodesPerDir[1]) &&
1544 (ko > -1 && ko < elementNodesPerDir[2])) {
1545 for(LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1546 if (jType == 3) {
1547 Aii(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1548 stencil[interactingNode*BlkSize + dof1];
1549 } else if(jType == 2) {
1550 Aif(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1551 stencil[interactingNode*BlkSize + dof1];
1552 } else if(jType == 1) {
1553 Aie(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1554 stencil[interactingNode*BlkSize + dof1];
1555 } else if(jType == 0) {
1556 Aic(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1557 -stencil[interactingNode*BlkSize + dof1];
1558 }
1559 }
1560 }
1561 }
1562 } else if(iType == 2) {// Face node, collapse stencil along face normal (*orientation)
1563 CollapseStencil(2, orientation, collapseFlags, stencil);
1564 for(LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1565 // Find (if, jf, kf) the indices associated with the interacting node
1566 ko = ke + (interactingNode / 9 - 1);
1567 {
1568 LO tmp = interactingNode % 9;
1569 jo = je + (tmp / 3 - 1);
1570 io = ie + (tmp % 3 - 1);
1571 int dummy;
1572 GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1573 }
1574 if((io > -1 && io < elementNodesPerDir[0]) &&
1575 (jo > -1 && jo < elementNodesPerDir[1]) &&
1576 (ko > -1 && ko < elementNodesPerDir[2])) {
1577 for(LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1578 if(jType == 2) {
1579 Aff(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1580 stencil[interactingNode*BlkSize + dof1];
1581 } else if(jType == 1) {
1582 Afe(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1583 stencil[interactingNode*BlkSize + dof1];
1584 } else if(jType == 0) {
1585 Afc(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1586 -stencil[interactingNode*BlkSize + dof1];
1587 }
1588 }
1589 }
1590 }
1591 } else if(iType == 1) {// Edge node, collapse stencil perpendicular to edge direction
1592 CollapseStencil(1, orientation, collapseFlags, stencil);
1593 for(LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1594 // Find (if, jf, kf) the indices associated with the interacting node
1595 ko = ke + (interactingNode / 9 - 1);
1596 {
1597 LO tmp = interactingNode % 9;
1598 jo = je + (tmp / 3 - 1);
1599 io = ie + (tmp % 3 - 1);
1600 int dummy;
1601 GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1602 }
1603 if((io > -1 && io < elementNodesPerDir[0]) &&
1604 (jo > -1 && jo < elementNodesPerDir[1]) &&
1605 (ko > -1 && ko < elementNodesPerDir[2])) {
1606 for(LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1607 if(jType == 1) {
1608 Aee(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1609 stencil[interactingNode*BlkSize + dof1];
1610 } else if(jType == 0) {
1611 Aec(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1612 -stencil[interactingNode*BlkSize + dof1];
1613 }
1614 } // Check if interacting node is in element or not
1615 } // Pc is the identity so no need to treat iType == 0
1616 } // Loop over interacting nodes within a row
1617 } // Check for current node type
1618 } // Loop over degrees of freedom associated to a given node
1619 } // Loop over i
1620 } // Loop over j
1621 } // Loop over k
1622
1623 // Compute the projection operators for edge and interior nodes
1624 //
1625 // [P_i] = [A_{ii} & A_{if} & A_{ie}]^{-1} [A_{ic}]
1626 // [P_f] = [ 0 & A_{ff} & A_{fe}] [A_{fc}]
1627 // [P_e] = [ 0 & 0 & A_{ee}] [A_{ec}]
1628 // [P_c] = I_c
1629 //
1630 { // Compute P_e
1631 // We need to solve for P_e in: A_{ee}*P_e = A_{fc}
1632 // So we simple do P_e = A_{ee}^{-1)*A_{ec}
1633 Teuchos::SerialDenseSolver<LO,SC> problem;
1634 problem.setMatrix(Teuchos::rcp(&Aee, false));
1635 problem.setVectors(Teuchos::rcp(&Pe, false), Teuchos::rcp(&Aec, false));
1636 problem.factorWithEquilibration(true);
1637 problem.solve();
1638 problem.unequilibrateLHS();
1639 }
1640
1641 { // Compute P_f
1642 // We need to solve for P_f in: A_{ff}*P_f + A_{fe}*P_e = A_{fc}
1643 // Step one: A_{fc} = 1.0*A_{fc} + (-1.0)*A_{fe}*P_e
1644 Afc.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Afe, Pe, 1.0);
1645 // Step two: P_f = A_{ff}^{-1}*A_{fc}
1646 Teuchos::SerialDenseSolver<LO,SC> problem;
1647 problem.setMatrix(Teuchos::rcp(&Aff, false));
1648 problem.setVectors(Teuchos::rcp(&Pf, false), Teuchos::rcp(&Afc, false));
1649 problem.factorWithEquilibration(true);
1650 problem.solve();
1651 problem.unequilibrateLHS();
1652 }
1653
1654 { // Compute P_i
1655 // We need to solve for P_i in: A_{ii}*P_i + A_{if}*P_f + A_{ie}*P_e = A_{ic}
1656 // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{ie}*P_e
1657 Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aie, Pe, 1.0);
1658 // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{if}*P_f
1659 Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aif, Pf, 1.0);
1660 // Step two: P_i = A_{ii}^{-1}*A_{ic}
1661 Teuchos::SerialDenseSolver<LO,SC> problem;
1662 problem.setMatrix(Teuchos::rcp(&Aii, false));
1663 problem.setVectors(Teuchos::rcp(&Pi, false), Teuchos::rcp(&Aic, false));
1664 problem.factorWithEquilibration(true);
1665 problem.solve();
1666 problem.unequilibrateLHS();
1667 }
1668
1669 } // ComputeLocalEntries()
1670
1671 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1673 CollapseStencil(const int type, const int orientation, const int /* collapseFlags */[3],
1674 Array<SC>& stencil) const {
1675
1676 if(type == 2) {// Face stencil collapse
1677 // Let's hope things vectorize well inside this if statement
1678 if(orientation == 0) {
1679 for(LO i = 0; i < 9; ++i) {
1680 stencil[3*i + 1] = stencil[3*i] + stencil[3*i + 1] + stencil[3*i + 2];
1681 stencil[3*i] = 0;
1682 stencil[3*i + 2] = 0;
1683 }
1684 } else if(orientation == 1) {
1685 for(LO i = 0; i < 3; ++i) {
1686 stencil[9*i + 3] = stencil[9*i + 0] + stencil[9*i + 3] + stencil[9*i + 6];
1687 stencil[9*i + 0] = 0;
1688 stencil[9*i + 6] = 0;
1689 stencil[9*i + 4] = stencil[9*i + 1] + stencil[9*i + 4] + stencil[9*i + 7];
1690 stencil[9*i + 1] = 0;
1691 stencil[9*i + 7] = 0;
1692 stencil[9*i + 5] = stencil[9*i + 2] + stencil[9*i + 5] + stencil[9*i + 8];
1693 stencil[9*i + 2] = 0;
1694 stencil[9*i + 8] = 0;
1695 }
1696 } else if(orientation == 2) {
1697 for(LO i = 0; i < 9; ++i) {
1698 stencil[i + 9] = stencil[i] + stencil[i + 9] + stencil[i + 18];
1699 stencil[i] = 0;
1700 stencil[i + 18] = 0;
1701 }
1702 }
1703 } else if(type == 1) {// Edge stencil collapse
1704 SC tmp1 = 0, tmp2 = 0, tmp3 = 0;
1705 if(orientation == 0) {
1706 for(LO i = 0; i < 9; ++i) {
1707 tmp1 += stencil[0 + 3*i];
1708 stencil[0 + 3*i] = 0;
1709 tmp2 += stencil[1 + 3*i];
1710 stencil[1 + 3*i] = 0;
1711 tmp3 += stencil[2 + 3*i];
1712 stencil[2 + 3*i] = 0;
1713 }
1714 stencil[12] = tmp1;
1715 stencil[13] = tmp2;
1716 stencil[14] = tmp3;
1717 } else if(orientation == 1) {
1718 for(LO i = 0; i < 3; ++i) {
1719 tmp1 += stencil[0 + i] + stencil[9 + i] + stencil[18 + i];
1720 stencil[ 0 + i] = 0;
1721 stencil[ 9 + i] = 0;
1722 stencil[18 + i] = 0;
1723 tmp2 += stencil[3 + i] + stencil[12 + i] + stencil[21 + i];
1724 stencil[ 3 + i] = 0;
1725 stencil[12 + i] = 0;
1726 stencil[21 + i] = 0;
1727 tmp3 += stencil[6 + i] + stencil[15 + i] + stencil[24 + i];
1728 stencil[ 6 + i] = 0;
1729 stencil[15 + i] = 0;
1730 stencil[24 + i] = 0;
1731 }
1732 stencil[10] = tmp1;
1733 stencil[13] = tmp2;
1734 stencil[16] = tmp3;
1735 } else if(orientation == 2) {
1736 for(LO i = 0; i < 9; ++i) {
1737 tmp1 += stencil[i];
1738 stencil[i] = 0;
1739 tmp2 += stencil[i + 9];
1740 stencil[i + 9] = 0;
1741 tmp3 += stencil[i + 18];
1742 stencil[i + 18] = 0;
1743 }
1744 stencil[ 4] = tmp1;
1745 stencil[13] = tmp2;
1746 stencil[22] = tmp3;
1747 }
1748 }
1749 } // CollapseStencil
1750
1751 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1753 FormatStencil(const LO BlkSize, const Array<bool> /* ghostInterface */, const LO /* ie */, const LO /* je */,
1754 const LO /* ke */, const ArrayView<const SC> rowValues,const Array<LO> /* elementNodesPerDir */,
1755 const int collapseFlags[3], const std::string stencilType, Array<SC>& stencil)
1756 const {
1757
1758 if(stencilType == "reduced") {
1759 Array<int> fullStencilInds(7);
1760 fullStencilInds[0] = 4; fullStencilInds[1] = 10; fullStencilInds[2] = 12;
1761 fullStencilInds[3] = 13; fullStencilInds[4] = 14; fullStencilInds[5] = 16;
1762 fullStencilInds[6] = 22;
1763
1764 // Create a mask array to automate mesh boundary processing
1765 Array<int> mask(7);
1766 int stencilSize = rowValues.size();
1767 if(collapseFlags[0] + collapseFlags[1] + collapseFlags[2] > 0) {
1768 if(stencilSize == 1) {
1769 // The assumption is made that if only one non-zero exists in the row
1770 // then this represent a Dirichlet BC being enforced.
1771 // One might want to zero out the corresponding row in the prolongator.
1772 mask[0] = 1; mask[1] = 1; mask[2] = 1;
1773 mask[4] = 1; mask[5] = 1; mask[6] = 1;
1774 } else {
1775 // Here we are looking at Neumann like BC where only a flux is prescribed
1776 // and less non-zeros will be present in the corresponding row.
1777 if(collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1778 mask[0] = 1;
1779 }
1780 if(collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1781 mask[6] = 1;
1782 }
1783 if(collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1784 mask[1] = 1;
1785 }
1786 if(collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1787 mask[5] = 1;
1788 }
1789 if(collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1790 mask[2] = 1;
1791 }
1792 if(collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1793 mask[4] = 1;
1794 }
1795 }
1796 }
1797
1798 int offset = 0;
1799 for(int ind = 0; ind < 7; ++ind) {
1800 if(mask[ind] == 1) {
1801 for(int dof = 0; dof < BlkSize; ++dof) {
1802 stencil[BlkSize*fullStencilInds[ind] + dof] = 0.0;
1803 }
1804 ++offset; // The offset is used to stay within rowValues bounds
1805 } else {
1806 for(int dof = 0; dof < BlkSize; ++dof) {
1807 stencil[BlkSize*fullStencilInds[ind] + dof] = rowValues[BlkSize*(ind - offset) + dof];
1808 }
1809 }
1810 }
1811 } else if(stencilType == "full") {
1812 // Create a mask array to automate mesh boundary processing
1813 Array<int> mask(27);
1814 if(collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1815 for(int ind = 0; ind < 9; ++ind) {
1816 mask[ind] = 1;
1817 }
1818 }
1819 if(collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1820 for(int ind = 0; ind < 9; ++ind) {
1821 mask[18 + ind] = 1;
1822 }
1823 }
1824 if(collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1825 for(int ind1 = 0; ind1 < 3; ++ind1) {
1826 for(int ind2 = 0; ind2 < 3; ++ind2) {
1827 mask[ind1*9 + ind2] = 1;
1828 }
1829 }
1830 }
1831 if(collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1832 for(int ind1 = 0; ind1 < 3; ++ind1) {
1833 for(int ind2 = 0; ind2 < 3; ++ind2) {
1834 mask[ind1*9 + ind2 + 6] = 1;
1835 }
1836 }
1837 }
1838 if(collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1839 for(int ind = 0; ind < 9; ++ind) {
1840 mask[3*ind] = 1;
1841 }
1842 }
1843 if(collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1844 for(int ind = 0; ind < 9; ++ind) {
1845 mask[3*ind + 2] = 1;
1846 }
1847 }
1848
1849 // Now the stencil is extracted and formated
1850 int offset = 0;
1851 for(LO ind = 0; ind < 27; ++ind) {
1852 if(mask[ind] == 0) {
1853 for(int dof = 0; dof < BlkSize; ++dof) {
1854 stencil[BlkSize*ind + dof] = 0.0;
1855 }
1856 ++offset; // The offset is used to stay within rowValues bounds
1857 } else {
1858 for(int dof = 0; dof < BlkSize; ++dof) {
1859 stencil[BlkSize*ind + dof] = rowValues[BlkSize*(ind - offset) + dof];
1860 }
1861 }
1862 }
1863 } // stencilTpye
1864 } // FormatStencil()
1865
1866 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1868 const LO ie, const LO je, const LO ke,
1869 const Array<LO> elementNodesPerDir,
1870 int* type, LO& ind, int* orientation) const {
1871
1872 // Initialize flags with -1 to be able to catch issues easily
1873 *type = -1, ind = 0, *orientation = -1;
1874 if((ke == 0 || ke == elementNodesPerDir[2]-1)
1875 && (je == 0 || je == elementNodesPerDir[1]-1)
1876 && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1877 // Corner node
1878 *type = 0;
1879 ind = 4*ke / (elementNodesPerDir[2]-1) + 2*je / (elementNodesPerDir[1]-1)
1880 + ie / (elementNodesPerDir[0]-1);
1881 } else if(((ke == 0 || ke == elementNodesPerDir[2]-1)
1882 && (je == 0 || je == elementNodesPerDir[1]-1))
1883 || ((ke == 0 || ke == elementNodesPerDir[2]-1)
1884 && (ie == 0 || ie == elementNodesPerDir[0]-1))
1885 || ((je == 0 || je == elementNodesPerDir[1]-1)
1886 && (ie == 0 || ie == elementNodesPerDir[0]-1))) {
1887 // Edge node
1888 *type = 1;
1889 if(ke > 0) {ind += 2*(elementNodesPerDir[0] - 2 + elementNodesPerDir[1] - 2);}
1890 if(ke == elementNodesPerDir[2] - 1) {ind += 4*(elementNodesPerDir[2] - 2);}
1891 if((ke == 0) || (ke == elementNodesPerDir[2] - 1)) { // je or ie edges
1892 if(je == 0) {// jlo ie edge
1893 *orientation = 0;
1894 ind += ie - 1;
1895 } else if(je == elementNodesPerDir[1] - 1) {// jhi ie edge
1896 *orientation = 0;
1897 ind += 2*(elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1898 } else {// ilo or ihi je edge
1899 *orientation = 1;
1900 ind += elementNodesPerDir[0] - 2 + 2*(je - 1) + ie / (elementNodesPerDir[0] - 1);
1901 }
1902 } else {// ke edges
1903 *orientation = 2;
1904 ind += 4*(ke - 1) + 2*(je/(elementNodesPerDir[1] - 1)) + ie / (elementNodesPerDir[0] - 1);
1905 }
1906 } else if ((ke == 0 || ke == elementNodesPerDir[2]-1)
1907 || (je == 0 || je == elementNodesPerDir[1]-1)
1908 || (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1909 // Face node
1910 *type = 2;
1911 if(ke == 0) {// current node is on "bottom" face
1912 *orientation = 2;
1913 ind = (je - 1)*(elementNodesPerDir[0] - 2) + ie - 1;
1914 } else {
1915 // add nodes from "bottom" face
1916 ind += (elementNodesPerDir[1] - 2)*(elementNodesPerDir[0] - 2);
1917 // add nodes from side faces
1918 ind += 2*(ke - 1)*(elementNodesPerDir[1] - 2 + elementNodesPerDir[0] - 2);
1919 if(ke == elementNodesPerDir[2]-1) {// current node is on "top" face
1920 *orientation = 2;
1921 ind += (je - 1)*(elementNodesPerDir[0] - 2) + ie - 1;
1922 } else {// current node is on a side face
1923 if(je == 0) {
1924 *orientation = 1;
1925 ind += ie - 1;
1926 } else if(je == elementNodesPerDir[1] - 1) {
1927 *orientation = 1;
1928 ind += 2*(elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1929 } else {
1930 *orientation = 0;
1931 ind += elementNodesPerDir[0] - 2 + 2*(je - 1) + ie / (elementNodesPerDir[0]-1);
1932 }
1933 }
1934 }
1935 } else {
1936 // Interior node
1937 *type = 3;
1938 ind = (ke - 1)*(elementNodesPerDir[1] - 2)*(elementNodesPerDir[0] - 2)
1939 + (je - 1)*(elementNodesPerDir[0] - 2) + ie - 1;
1940 }
1941 } // GetNodeInfo()
1942
1943 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1945 const typename Teuchos::Array<LocalOrdinal>::iterator& first1,
1946 const typename Teuchos::Array<LocalOrdinal>::iterator& last1,
1947 const typename Teuchos::Array<LocalOrdinal>::iterator& first2,
1948 const typename Teuchos::Array<LocalOrdinal>::iterator& /* last2 */) const
1949 {
1950 typedef typename std::iterator_traits<typename Teuchos::Array<LocalOrdinal>::iterator>::difference_type DT;
1951 DT n = last1 - first1;
1952 DT m = n / 2;
1953 DT z = Teuchos::OrdinalTraits<DT>::zero();
1954 while (m > z)
1955 {
1956 DT max = n - m;
1957 for (DT j = 0; j < max; j++)
1958 {
1959 for (DT k = j; k >= 0; k-=m)
1960 {
1961 if (first1[first2[k+m]] >= first1[first2[k]])
1962 break;
1963 std::swap(first2[k+m], first2[k]);
1964 }
1965 }
1966 m = m/2;
1967 }
1968 }
1969
1970} //namespace MueLu
1971
1972#define MUELU_BLACKBOXPFACTORY_SHORT
1973#endif // MUELU_BLACKBOXPFACTORY_DEF_HPP
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void FormatStencil(const LO BlkSize, const Array< bool > ghostInterface, const LO ie, const LO je, const LO ke, const ArrayView< const SC > rowValues, const Array< LO > elementNodesPerDir, const int collapseFlags[3], const std::string stencilType, Array< SC > &stencil) const
void GetNodeInfo(const LO ie, const LO je, const LO ke, const Array< LO > elementNodesPerDir, int *type, LO &ind, int *orientation) const
void sh_sort_permute(const typename Teuchos::Array< LocalOrdinal >::iterator &first1, const typename Teuchos::Array< LocalOrdinal >::iterator &last1, const typename Teuchos::Array< LocalOrdinal >::iterator &first2, const typename Teuchos::Array< LocalOrdinal >::iterator &last2) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void ComputeLocalEntries(const RCP< const Matrix > &Aghost, const Array< LO > coarseRate, const Array< LO > endRate, const LO BlkSize, const Array< LO > elemInds, const Array< LO > lCoarseElementsPerDir, const LO numDimensions, const Array< LO > lFineNodesPerDir, const Array< GO > gFineNodesPerDir, const Array< GO > gIndices, const Array< LO > lCoarseNodesPerDir, const Array< bool > ghostInterface, const Array< int > elementFlags, const std::string stencilType, const std::string blockStrategy, const Array< LO > elementNodesPerDir, const LO numNodesInElement, const Array< GO > colGIDs, Teuchos::SerialDenseMatrix< LO, SC > &Pi, Teuchos::SerialDenseMatrix< LO, SC > &Pf, Teuchos::SerialDenseMatrix< LO, SC > &Pe, Array< LO > &dofType, Array< LO > &lDofInd) const
void CollapseStencil(const int type, const int orientation, const int collapseFlags[3], Array< SC > &stencil) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void GetGeometricData(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LO, GO, NO > > &coordinates, const Array< LO > coarseRate, const Array< GO > gFineNodesPerDir, const Array< LO > lFineNodesPerDir, const LO BlkSize, Array< GO > &gIndices, Array< LO > &myOffset, Array< bool > &ghostInterface, Array< LO > &endRate, Array< GO > &gCoarseNodesPerDir, Array< LO > &lCoarseNodesPerDir, Array< LO > &glCoarseNodesPerDir, Array< GO > &ghostGIDs, Array< GO > &coarseNodesGIDs, Array< GO > &colGIDs, GO &gNumCoarseNodes, LO &lNumCoarseNodes, ArrayRCP< Array< typename Teuchos::ScalarTraits< Scalar >::magnitudeType > > coarseNodes, Array< int > &boundaryFlags, RCP< NodesIDs > ghostedCoarseNodes) const
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
static const NoFactory * get()
Namespace for MueLu classes and methods.
@ Runtime0
One-liner description of what is happening.