Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Container_def.hpp
1/*@HEADER
2// ***********************************************************************
3//
4// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40//@HEADER
41*/
42
43#ifndef IFPACK2_CONTAINER_DEF_HPP
44#define IFPACK2_CONTAINER_DEF_HPP
45
47#include <Teuchos_Time.hpp>
48
49namespace Ifpack2 {
50
51//Implementation of Ifpack2::Container
52
53template<class MatrixType>
55 const Teuchos::RCP<const row_matrix_type>& matrix,
56 const Teuchos::Array<Teuchos::Array<LO> >& partitions,
57 bool pointIndexed) :
58 inputMatrix_ (matrix),
59 inputCrsMatrix_ (Teuchos::rcp_dynamic_cast<const crs_matrix_type>(inputMatrix_)),
60 inputBlockMatrix_ (Teuchos::rcp_dynamic_cast<const block_crs_matrix_type>(inputMatrix_)),
61 pointIndexed_(pointIndexed),
62 IsInitialized_(false),
63 IsComputed_(false)
64{
65 using Teuchos::Ptr;
66 using Teuchos::RCP;
67 using Teuchos::Array;
68 using Teuchos::ArrayView;
69 using Teuchos::Comm;
70 NumLocalRows_ = inputMatrix_->getLocalNumRows();
71 NumGlobalRows_ = inputMatrix_->getGlobalNumRows();
72 NumGlobalNonzeros_ = inputMatrix_->getGlobalNumEntries();
73 IsParallel_ = inputMatrix_->getRowMap()->getComm()->getSize() != 1;
75 if(hasBlockCrs_)
76 bcrsBlockSize_ = inputBlockMatrix_->getBlockSize();
77 else
81 else
83 setBlockSizes(partitions);
84 //Sanity check the partitions
85 #ifdef HAVE_IFPACK2_DEBUG
86 // Check whether the input set of local row indices is correct.
87 const map_type& rowMap = *inputMatrix_->getRowMap();
88 for(int i = 0; i < numBlocks_; i++)
89 {
90 Teuchos::ArrayView<const LO> blockRows = getBlockRows(i);
91 for(LO j = 0; j < blockSizes_[i]; j++)
92 {
93 LO row = blockRows[j];
94 if(pointIndexed)
95 {
96 //convert the point row to the corresponding block row
97 row /= bcrsBlockSize_;
98 }
99 TEUCHOS_TEST_FOR_EXCEPTION(
100 !rowMap.isNodeLocalElement(row),
101 std::invalid_argument, "Ifpack2::Container: "
102 "On process " << rowMap.getComm()->getRank() << " of "
103 << rowMap.getComm()->getSize() << ", in the given set of local row "
104 "indices blockRows = " << Teuchos::toString(blockRows) << ", the following "
105 "entries is not valid local row index on the calling process: "
106 << row << ".");
107 }
108 }
109 #endif
110}
111
112template<class MatrixType>
115
116template<class MatrixType>
117Teuchos::ArrayView<const typename MatrixType::local_ordinal_type>
119{
120 return Teuchos::ArrayView<const LO>
121 (&blockRows_[blockOffsets_[blockIndex]], blockSizes_[blockIndex]);
122}
123
124template<class MatrixType>
125void Container<MatrixType>::setBlockSizes(const Teuchos::Array<Teuchos::Array<LO> >& partitions)
126{
127 //First, create a grand total of all the rows in all the blocks
128 //Note: If partitioner allowed overlap, this could be greater than the # of local rows
129 LO totalBlockRows = 0;
130 numBlocks_ = partitions.size();
131 blockSizes_.resize(numBlocks_);
132 blockOffsets_.resize(numBlocks_);
133 maxBlockSize_ = 0;
134 for(int i = 0; i < numBlocks_; i++)
135 {
136 LO rowsInBlock = partitions[i].size();
137 blockSizes_[i] = rowsInBlock;
138 blockOffsets_[i] = totalBlockRows;
139 totalBlockRows += rowsInBlock;
140 maxBlockSize_ = std::max(maxBlockSize_, rowsInBlock * scalarsPerRow_);
141 }
142 blockRows_.resize(totalBlockRows);
143 //set blockRows_: each entry is the partition/block of the row
144 LO iter = 0;
145 for(int i = 0; i < numBlocks_; i++)
146 {
147 for(int j = 0; j < blockSizes_[i]; j++)
148 {
149 blockRows_[iter++] = partitions[i][j];
150 }
151 }
152}
153
154template<class MatrixType>
156{
157 if(Diag_.is_null())
158 {
159 Diag_ = rcp(new vector_type(inputMatrix_->getDomainMap()));
160 inputMatrix_->getLocalDiagCopy(*Diag_);
161 }
162}
163
164template<class MatrixType>
166 return IsInitialized_;
167}
168
169template<class MatrixType>
171 return IsComputed_;
172}
173
174template<class MatrixType>
176applyMV(const mv_type& X, mv_type& Y) const
177{
178 TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "Not implemented.");
179}
180
181template<class MatrixType>
183weightedApplyMV(const mv_type& X, mv_type& Y, vector_type& W) const
184{
185 TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "Not implemented.");
186}
187
188template<class MatrixType>
190getName()
191{
192 return "Generic";
193}
194
195template<class MatrixType>
197 SC dampingFactor, LO i) const
198{
199 TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "Not implemented.");
200}
201
202template <class MatrixType>
203void Container<MatrixType>::DoJacobi(ConstHostView X, HostView Y, SC dampingFactor) const
204{
205 using STS = Teuchos::ScalarTraits<ISC>;
206 const ISC one = STS::one();
207 // use blockRows_ and blockSizes_
208 size_t numVecs = X.extent(1);
209 // Non-overlapping Jacobi
210 for (LO i = 0; i < numBlocks_; i++)
211 {
212 // may happen that a partition is empty
213 if(blockSizes_[i] != 1 || hasBlockCrs_)
214 {
215 if(blockSizes_[i] == 0 )
216 continue;
217 apply(X, Y, i, Teuchos::NO_TRANS, dampingFactor, one);
218 }
219 else // singleton, can't access Containers_[i] as it was never filled and may be null.
220 {
221 LO LRID = blockRows_[blockOffsets_[i]];
222 getMatDiag();
223 auto diagView = Diag_->getLocalViewHost(Tpetra::Access::ReadOnly);
224 ISC d = one * (static_cast<ISC> (dampingFactor)) / diagView(LRID, 0);
225 for(size_t nv = 0; nv < numVecs; nv++)
226 {
227 ISC x = X(LRID, nv);
228 Y(LRID, nv) += x * d;
229 }
230 }
231 }
232}
233
234template <class MatrixType>
235void Container<MatrixType>::DoOverlappingJacobi(ConstHostView X, HostView Y, ConstHostView W, SC dampingFactor, bool nonsymScaling) const
236{
237 using STS = Teuchos::ScalarTraits<SC>;
238 // Overlapping Jacobi
239 for(LO i = 0; i < numBlocks_; i++)
240 {
241 // may happen that a partition is empty
242 if(blockSizes_[i] == 0)
243 continue;
244 if(blockSizes_[i] != 1) {
245 if (!nonsymScaling)
246 weightedApply(X, Y, W, i, Teuchos::NO_TRANS, dampingFactor, STS::one());
247 else {
248 // A crummy way of doing nonsymmetric scaling. We effectively
249 // first reverse scale x, which later gets scaled inside weightedApply
250 // so the net effect is that x is not scaled.
251 // This was done to keep using weightedApply() that is defined in
252 // many spots in the code.
253 HostView tempo("", X.extent(0), X.extent(1));
254 size_t numVecs = X.extent(1);
255 LO bOffset = blockOffsets_[i];
256 for (LO ii = 0; ii < blockSizes_[i]; ii++) {
257 LO LRID = blockRows_[bOffset++];
258 for (size_t jj = 0; jj < numVecs; jj++) tempo(LRID,jj)=X(LRID,jj)/ W(LRID,0);
259 }
260 weightedApply(tempo, Y, W, i, Teuchos::NO_TRANS, dampingFactor, STS::one());
261 }
262 }
263 else // singleton, can't access Containers_[i] as it was never filled and may be null.
264 {
265 const ISC one = STS::one();
266 size_t numVecs = X.extent(1);
267 LO LRID = blockRows_[blockOffsets_[i]];
268 getMatDiag();
269 auto diagView = Diag_->getLocalViewHost(Tpetra::Access::ReadOnly);
270 ISC recip = one / diagView(LRID, 0);
271 ISC wval = W(LRID,0);
272 ISC combo = wval*recip;
273 ISC d = combo*(static_cast<ISC> (dampingFactor));
274 for(size_t nv = 0; nv < numVecs; nv++)
275 {
276 ISC x = X(LRID, nv);
277 Y(LRID, nv) = x * d + Y(LRID, nv);
278 }
279 }
280 }
281}
282
283//Do Gauss-Seidel with just block i
284//This is used 3 times: once in DoGaussSeidel and twice in DoSGS
285template<class MatrixType, typename LocalScalarType>
287 ConstHostView X, HostView Y, HostView Y2, HostView Resid,
288 SC dampingFactor, LO i) const
289{
290 using Teuchos::ArrayView;
291 using STS = Teuchos::ScalarTraits<ISC>;
292 size_t numVecs = X.extent(1);
293 const ISC one = STS::one();
294 if(this->blockSizes_[i] == 0)
295 return; // Skip empty partitions
296 if(this->hasBlockCrs_ && !this->pointIndexed_)
297 {
298 //Use efficient blocked version
299 ArrayView<const LO> blockRows = this->getBlockRows(i);
300 const size_t localNumRows = this->blockSizes_[i];
301 using inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
302 using vals_type = typename block_crs_matrix_type::values_host_view_type;
303 for(size_t j = 0; j < localNumRows; j++)
304 {
305 LO row = blockRows[j]; // Containers_[i]->ID (j);
306 vals_type values;
307 inds_type colinds;
308 this->inputBlockMatrix_->getLocalRowView(row, colinds, values);
309 LO numEntries = (LO) colinds.size();
310 for(size_t m = 0; m < numVecs; m++)
311 {
312 for (int localR = 0; localR < this->bcrsBlockSize_; localR++)
313 Resid(row * this->bcrsBlockSize_ + localR, m) = X(row * this->bcrsBlockSize_ + localR, m);
314 for (LO k = 0; k < numEntries; ++k)
315 {
316 const LO col = colinds[k];
317 for(int localR = 0; localR < this->bcrsBlockSize_; localR++)
318 {
319 for(int localC = 0; localC < this->bcrsBlockSize_; localC++)
320 {
321 Resid(row * this->bcrsBlockSize_ + localR, m) -=
322 values[k * this->bcrsBlockSize_ * this->bcrsBlockSize_ + localR + localC * this->bcrsBlockSize_]
323 * Y2(col * this->bcrsBlockSize_ + localC, m); }
324 }
325 }
326 }
327 }
328 // solve with this block
329 //
330 // Note: I'm abusing the ordering information, knowing that X/Y
331 // and Y2 have the same ordering for on-proc unknowns.
332 //
333 // Note: Add flop counts for inverse apply
334 apply(Resid, Y2, i, Teuchos::NO_TRANS, dampingFactor, one);
335 }
336 else if(!this->hasBlockCrs_ && this->blockSizes_[i] == 1)
337 {
338 // singleton, can't access Containers_[i] as it was never filled and may be null.
339 // a singleton calculation (just using matrix diagonal) is exact, all residuals should be zero.
340 LO LRID = this->blockOffsets_[i]; // by definition, a singleton 1 row in block.
341 //Use the KokkosSparse internal matrix for low-overhead values/indices access
342 //But, can only do this if the matrix is accessible directly from host, since it's not a DualView
344 container_exec_space().fence();
345 auto localA = this->inputCrsMatrix_->getLocalMatrixHost();
346 using size_type = typename crs_matrix_type::local_matrix_host_type::size_type;
347 const auto& rowmap = localA.graph.row_map;
348 const auto& entries = localA.graph.entries;
349 const auto& values = localA.values;
350 this->getMatDiag();
351 auto diagView = this->Diag_->getLocalViewHost(Tpetra::Access::ReadOnly);
352 ISC d = (static_cast<ISC> (dampingFactor)) / diagView(LRID, 0);
353 for(size_t m = 0; m < numVecs; m++)
354 {
355 // ISC x = X(LRID, m);
356 ISC r = X(LRID, m);
357 for(size_type k = rowmap(LRID); k < rowmap(LRID + 1); k++) {
358 const LO col = entries(k);
359 r -= values(k) * Y2(col, m);
360 }
361
362 ISC newy = r * d;
363 Y2(LRID, m) += newy;
364 }
365 }
366 else if(!this->inputCrsMatrix_.is_null() &&
367 std::is_same<typename crs_matrix_type::device_type::memory_space, Kokkos::HostSpace>::value)
368 {
369 //Use the KokkosSparse internal matrix for low-overhead values/indices access
370 //But, can only do this if the matrix is accessible directly from host, since it's not a DualView
372 container_exec_space().fence();
373 auto localA = this->inputCrsMatrix_->getLocalMatrixHost();
374 using size_type = typename crs_matrix_type::local_matrix_host_type::size_type;
375 const auto& rowmap = localA.graph.row_map;
376 const auto& entries = localA.graph.entries;
377 const auto& values = localA.values;
378 ArrayView<const LO> blockRows = this->getBlockRows(i);
379 for(size_t j = 0; j < size_t(blockRows.size()); j++)
380 {
381 const LO row = blockRows[j];
382 for(size_t m = 0; m < numVecs; m++)
383 {
384 ISC r = X(row, m);
385 for(size_type k = rowmap(row); k < rowmap(row + 1); k++)
386 {
387 const LO col = entries(k);
388 r -= values(k) * Y2(col, m);
389 }
390 Resid(row, m) = r;
391 }
392 }
393 // solve with this block
394 //
395 // Note: I'm abusing the ordering information, knowing that X/Y
396 // and Y2 have the same ordering for on-proc unknowns.
397 //
398 // Note: Add flop counts for inverse apply
399 apply(Resid, Y2, i, Teuchos::NO_TRANS, dampingFactor, one);
400 }
401 else
402 {
403 //Either a point-indexed block matrix, or a normal row matrix
404 //that doesn't support getLocalMatrix
405 ArrayView<const LO> blockRows = this->getBlockRows(i);
406 for(size_t j = 0; j < size_t(blockRows.size()); j++)
407 {
408 const LO row = blockRows[j];
409 auto rowView = getInputRowView(row);
410 for(size_t m = 0; m < numVecs; m++)
411 {
412 Resid(row, m) = X(row, m);
413 for (size_t k = 0; k < rowView.size(); ++k)
414 {
415 const LO col = rowView.ind(k);
416 Resid(row, m) -= rowView.val(k) * Y2(col, m);
417 }
418 }
419 }
420 // solve with this block
421 //
422 // Note: I'm abusing the ordering information, knowing that X/Y
423 // and Y2 have the same ordering for on-proc unknowns.
424 //
425 // Note: Add flop counts for inverse apply
426 apply(Resid, Y2, i, Teuchos::NO_TRANS, dampingFactor, one);
427 }
428}
429
430template<class MatrixType>
431void Container<MatrixType>::
432DoGaussSeidel(ConstHostView X, HostView Y, HostView Y2, SC dampingFactor) const
433{
434 using Teuchos::Array;
435 using Teuchos::ArrayRCP;
436 using Teuchos::ArrayView;
437 using Teuchos::Ptr;
438 using Teuchos::RCP;
439 using Teuchos::rcp;
440 using Teuchos::rcpFromRef;
441 //This function just extracts the diagonal if it hasn't already.
442 getMatDiag();
443 auto numVecs = X.extent(1);
444 // X = RHS, Y = initial guess
445 HostView Resid("", X.extent(0), X.extent(1));
446 for(LO i = 0; i < numBlocks_; i++)
447 {
448 DoGSBlock(X, Y, Y2, Resid, dampingFactor, i);
449 }
450 if(IsParallel_)
451 {
452 auto numMyRows = inputMatrix_->getLocalNumRows();
453 for (size_t m = 0; m < numVecs; ++m)
454 {
455 for (size_t i = 0; i < numMyRows * bcrsBlockSize_; ++i)
456 {
457 Y(i, m) = Y2(i, m);
458 }
459 }
460 }
461}
462
463template<class MatrixType>
464void Container<MatrixType>::
465DoSGS(ConstHostView X, HostView Y, HostView Y2, SC dampingFactor) const
467 // X = RHS, Y = initial guess
468 using Teuchos::Array;
469 using Teuchos::ArrayRCP;
470 using Teuchos::ArrayView;
471 using Teuchos::Ptr;
472 using Teuchos::ptr;
473 using Teuchos::RCP;
474 using Teuchos::rcp;
475 using Teuchos::rcpFromRef;
476 auto numVecs = X.extent(1);
477 HostView Resid("", X.extent(0), X.extent(1));
478 // Forward Sweep
479 for(LO i = 0; i < numBlocks_; i++)
480 {
481 DoGSBlock(X, Y, Y2, Resid, dampingFactor, i);
482 }
483 static_assert(std::is_signed<LO>::value,
484 "Local ordinal must be signed (unsigned breaks reverse iteration to 0)");
485 // Reverse Sweep
486 for(LO i = numBlocks_ - 1; i >= 0; --i)
487 {
488 DoGSBlock(X, Y, Y2, Resid, dampingFactor, i);
489 }
490 if(IsParallel_)
491 {
492 auto numMyRows = inputMatrix_->getLocalNumRows();
493 for (size_t m = 0; m < numVecs; ++m)
494 {
495 for (size_t i = 0; i < numMyRows * bcrsBlockSize_; ++i)
496 {
497 Y(i, m) = Y2(i, m);
498 }
500 }
501}
502
503template<class MatrixType>
504void Container<MatrixType>::
505clearBlocks()
506{
507 numBlocks_ = 0;
508 blockRows_.clear();
509 blockSizes_.clear();
510 blockOffsets_.clear();
511 Diag_ = Teuchos::null; //Diag_ will be recreated if needed
512}
513
514//Implementation of Ifpack2::ContainerImpl
515
516template<class MatrixType, class LocalScalarType>
517ContainerImpl<MatrixType, LocalScalarType>::
518ContainerImpl(
519 const Teuchos::RCP<const row_matrix_type>& matrix,
520 const Teuchos::Array<Teuchos::Array<LO> >& partitions,
521 bool pointIndexed)
522 : Container<MatrixType>(matrix, partitions, pointIndexed) {}
523
524template<class MatrixType, class LocalScalarType>
527
528template<class MatrixType, class LocalScalarType>
530setParameters (const Teuchos::ParameterList& List) {}
531
532template<class MatrixType, class LocalScalarType>
534applyInverseJacobi (const mv_type& /* X */, mv_type& /* Y */,
535 SC dampingFactor,
536 bool /* zeroStartingSolution = false */,
537 int /* numSweeps = 1 */) const
538{
539 TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "Not implemented.");
540}
541
542template<class MatrixType, class LocalScalarType>
544applyMV (const mv_type& X, mv_type& Y) const
545{
546 ConstHostView XView = X.getLocalViewHost(Tpetra::Access::ReadOnly);
547 HostView YView = Y.getLocalViewHost(Tpetra::Access::ReadWrite);
548 this->apply (XView, YView, 0);
549}
550
551template<class MatrixType, class LocalScalarType>
553weightedApplyMV (const mv_type& X,
554 mv_type& Y,
555 vector_type& W) const
556{
557 ConstHostView XView = X.getLocalViewHost(Tpetra::Access::ReadOnly);
558 HostView YView = Y.getLocalViewHost(Tpetra::Access::ReadWrite);
559 ConstHostView WView = W.getLocalViewHost(Tpetra::Access::ReadOnly);
560 weightedApply (XView, YView, WView, 0);
561}
562
563template<class MatrixType, class LocalScalarType>
565getName()
566{
567 return "Generic";
568}
569
570template<class MatrixType, class LocalScalarType>
572solveBlock(ConstHostSubviewLocal X,
573 HostSubviewLocal Y,
574 int blockIndex,
575 Teuchos::ETransp mode,
576 const LSC alpha,
577 const LSC beta) const
578{
579 TEUCHOS_TEST_FOR_EXCEPT_MSG(true, "Not implemented.");
580}
581
582template<class MatrixType, class LocalScalarType>
583typename ContainerImpl<MatrixType, LocalScalarType>::LO
585translateRowToCol(LO row)
586{
587 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
588 GO GO_INVALID = Teuchos::OrdinalTraits<GO>::invalid();
589 const map_type& globalRowMap = *(this->inputMatrix_->getRowMap());
590 const map_type& globalColMap = *(this->inputMatrix_->getColMap());
591 LO rowLID = row;
592 LO dofOffset = 0;
593 if(this->pointIndexed_)
594 {
595 rowLID = row / this->bcrsBlockSize_;
596 dofOffset = row % this->bcrsBlockSize_;
597 }
598 GO diagGID = globalRowMap.getGlobalElement(rowLID);
599 TEUCHOS_TEST_FOR_EXCEPTION(
600 diagGID == GO_INVALID,
601 std::runtime_error, "Ifpack2::Container::translateRowToCol: "
602 "On process " << this->inputMatrix_->getRowMap()->getComm()->getRank() <<
603 ", at least one row index in the set of local "
604 "row indices given to the constructor is not a valid local row index in "
605 "the input matrix's row Map on this process. This should be impossible "
606 "because the constructor checks for this case. Here is the complete set "
607 "of invalid local row indices: " << rowLID << ". "
608 "Please report this bug to the Ifpack2 developers.");
609 //now, can translate diagGID (both a global row AND global col ID) to local column
610 LO colLID = globalColMap.getLocalElement(diagGID);
611 TEUCHOS_TEST_FOR_EXCEPTION(
612 colLID == LO_INVALID,
613 std::runtime_error, "Ifpack2::Container::translateRowToCol: "
614 "On process " << this->inputMatrix_->getRowMap()->getComm()->getRank() << ", "
615 "at least one row index in the set of row indices given to the constructor "
616 "does not have a corresponding column index in the input matrix's column "
617 "Map. This probably means that the column(s) in question is/are empty on "
618 "this process, which would make the submatrix to extract structurally "
619 "singular. The invalid global column index is " << diagGID << ".");
620 //colLID could identify a block column - translate to split column if needed
621 if(this->pointIndexed_)
622 return colLID * this->bcrsBlockSize_ + dofOffset;
623 return colLID;
624}
625
626template<class MatrixType, class LocalScalarType>
628apply (ConstHostView X,
629 HostView Y,
630 int blockIndex,
631 Teuchos::ETransp mode,
632 SC alpha,
633 SC beta) const
634{
635 using Teuchos::ArrayView;
636 using Teuchos::as;
637 using Teuchos::RCP;
638 using Teuchos::rcp;
639
640 // The local operator might have a different Scalar type than
641 // MatrixType. This means that we might have to convert X and Y to
642 // the Tpetra::MultiVector specialization that the local operator
643 // wants. This class' X_ and Y_ internal fields are of the right
644 // type for the local operator, so we can use those as targets.
645
647
648 TEUCHOS_TEST_FOR_EXCEPTION(
649 ! this->IsComputed_, std::runtime_error, "Ifpack2::Container::apply: "
650 "You must have called the compute() method before you may call apply(). "
651 "You may call the apply() method as many times as you want after calling "
652 "compute() once, but you must have called compute() at least once.");
653
654 const size_t numVecs = X.extent(1);
655
656 if(numVecs == 0) {
657 return; // done! nothing to do
658 }
659
660 // The local operator works on a permuted subset of the local parts
661 // of X and Y. The subset and permutation are defined by the index
662 // array returned by getBlockRows(). If the permutation is trivial
663 // and the subset is exactly equal to the local indices, then we
664 // could use the local parts of X and Y exactly, without needing to
665 // permute. Otherwise, we have to use temporary storage to permute
666 // X and Y. For now, we always use temporary storage.
667 //
668 // Create temporary permuted versions of the input and output.
669 // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
670 // store the permuted versions of X resp. Y. Note that X_local has
671 // the domain Map of the operator, which may be a permuted subset of
672 // the local Map corresponding to X.getMap(). Similarly, Y_local
673 // has the range Map of the operator, which may be a permuted subset
674 // of the local Map corresponding to Y.getMap(). numRows_ here
675 // gives the number of rows in the row Map of the local Inverse_
676 // operator.
677 //
678 // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
679 // here that the row Map and the range Map of that operator are
680 // the same.
681 //
682 // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
683 // really belongs in Tpetra.
684
685 if(X_localBlocks_.size() == 0 || X.extent(1) != X_local_.extent(1))
686 {
687 //need to resize (or create for the first time) the three scratch arrays
688 X_localBlocks_.clear();
689 Y_localBlocks_.clear();
690 X_localBlocks_.reserve(this->numBlocks_);
691 Y_localBlocks_.reserve(this->numBlocks_);
692
693 X_local_ = HostViewLocal("X_local", this->blockRows_.size() * this->scalarsPerRow_, numVecs);
694 Y_local_ = HostViewLocal("Y_local", this->blockRows_.size() * this->scalarsPerRow_, numVecs);
695
696 //create all X_local and Y_local managed Views at once, are
697 //reused in subsequent apply() calls
698 for(int i = 0; i < this->numBlocks_; i++)
699 {
700 auto blockBounds = std::make_pair(this->blockOffsets_[i] * this->scalarsPerRow_,
701 (this->blockOffsets_[i] + this->blockSizes_[i]) * this->scalarsPerRow_);
702 X_localBlocks_.emplace_back(X_local_, blockBounds, Kokkos::ALL());
703 Y_localBlocks_.emplace_back(Y_local_, blockBounds, Kokkos::ALL());
704 }
705 }
706
707 const ArrayView<const LO> blockRows = this->getBlockRows(blockIndex);
708
709 if(this->scalarsPerRow_ == 1)
710 mvgs.gatherViewToView (X_localBlocks_[blockIndex], X, blockRows);
711 else
712 mvgs.gatherViewToViewBlock (X_localBlocks_[blockIndex], X, blockRows, this->scalarsPerRow_);
713
714 // We must gather the contents of the output multivector Y even on
715 // input to solveBlock(), since the inverse operator might use it as
716 // an initial guess for a linear solve. We have no way of knowing
717 // whether it does or does not.
718
719 if(this->scalarsPerRow_ == 1)
720 mvgs.gatherViewToView (Y_localBlocks_[blockIndex], Y, blockRows);
721 else
722 mvgs.gatherViewToViewBlock (Y_localBlocks_[blockIndex], Y, blockRows, this->scalarsPerRow_);
723
724 // Apply the local operator:
725 // Y_local := beta*Y_local + alpha*M^{-1}*X_local
726 this->solveBlock (X_localBlocks_[blockIndex], Y_localBlocks_[blockIndex], blockIndex, mode,
727 LSC(alpha), LSC(beta));
728
729 // Scatter the permuted subset output vector Y_local back into the
730 // original output multivector Y.
731 if(this->scalarsPerRow_ == 1)
732 mvgs.scatterViewToView (Y, Y_localBlocks_[blockIndex], blockRows);
733 else
734 mvgs.scatterViewToViewBlock (Y, Y_localBlocks_[blockIndex], blockRows, this->scalarsPerRow_);
735}
736
737template<class MatrixType, class LocalScalarType>
739weightedApply(ConstHostView X,
740 HostView Y,
741 ConstHostView D,
742 int blockIndex,
743 Teuchos::ETransp mode,
744 SC alpha,
745 SC beta) const
746{
747 using Teuchos::ArrayRCP;
748 using Teuchos::ArrayView;
749 using Teuchos::Range1D;
750 using Teuchos::Ptr;
751 using Teuchos::ptr;
752 using Teuchos::RCP;
753 using Teuchos::rcp;
754 using Teuchos::rcp_const_cast;
755 using std::endl;
756 using STS = Teuchos::ScalarTraits<SC>;
757
758 // The local operator template parameter might have a different
759 // Scalar type than MatrixType. This means that we might have to
760 // convert X and Y to the Tpetra::MultiVector specialization that
761 // the local operator wants. This class' X_ and Y_ internal fields
762 // are of the right type for the local operator, so we can use those
763 // as targets.
764
765 const char prefix[] = "Ifpack2::Container::weightedApply: ";
766 TEUCHOS_TEST_FOR_EXCEPTION(
767 ! this->IsComputed_, std::runtime_error, prefix << "You must have called the "
768 "compute() method before you may call this method. You may call "
769 "weightedApply() as many times as you want after calling compute() once, "
770 "but you must have called compute() at least once first.");
771
772 //bmk 7-2019: BlockRelaxation already checked this, but if that changes...
773 TEUCHOS_TEST_FOR_EXCEPTION(
774 this->scalarsPerRow_ > 1, std::logic_error, prefix << "Use of block rows isn't allowed "
775 "in overlapping Jacobi (the only method that uses weightedApply");
776
777 const size_t numVecs = X.extent(1);
778
779 TEUCHOS_TEST_FOR_EXCEPTION(
780 X.extent(1) != Y.extent(1), std::runtime_error,
781 prefix << "X and Y have different numbers of vectors (columns). X has "
782 << X.extent(1) << ", but Y has " << Y.extent(1) << ".");
783
784 if(numVecs == 0) {
785 return; // done! nothing to do
786 }
787
788 const size_t numRows = this->blockSizes_[blockIndex];
789
790 // The local operator works on a permuted subset of the local parts
791 // of X and Y. The subset and permutation are defined by the index
792 // array returned by getBlockRows(). If the permutation is trivial
793 // and the subset is exactly equal to the local indices, then we
794 // could use the local parts of X and Y exactly, without needing to
795 // permute. Otherwise, we have to use temporary storage to permute
796 // X and Y. For now, we always use temporary storage.
797 //
798 // Create temporary permuted versions of the input and output.
799 // (Re)allocate X_ and/or Y_ only if necessary. We'll use them to
800 // store the permuted versions of X resp. Y. Note that X_local has
801 // the domain Map of the operator, which may be a permuted subset of
802 // the local Map corresponding to X.getMap(). Similarly, Y_local
803 // has the range Map of the operator, which may be a permuted subset
804 // of the local Map corresponding to Y.getMap(). numRows_ here
805 // gives the number of rows in the row Map of the local operator.
806 //
807 // FIXME (mfh 20 Aug 2013) There might be an implicit assumption
808 // here that the row Map and the range Map of that operator are
809 // the same.
810 //
811 // FIXME (mfh 20 Aug 2013) This "local permutation" functionality
812 // really belongs in Tpetra.
813 if(X_localBlocks_.size() == 0 || X.extent(1) != X_local_.extent(1))
814 {
815 //need to resize (or create for the first time) the three scratch arrays
816 X_localBlocks_.clear();
817 Y_localBlocks_.clear();
818 X_localBlocks_.reserve(this->numBlocks_);
819 Y_localBlocks_.reserve(this->numBlocks_);
820
821 X_local_ = HostViewLocal("X_local", this->blockRows_.size() * this->scalarsPerRow_, numVecs);
822 Y_local_ = HostViewLocal("Y_local", this->blockRows_.size() * this->scalarsPerRow_, numVecs);
823
824 //create all X_local and Y_local managed Views at once, are
825 //reused in subsequent apply() calls
826 for(int i = 0; i < this->numBlocks_; i++)
827 {
828 auto blockBounds = std::make_pair(this->blockOffsets_[i] * this->scalarsPerRow_,
829 (this->blockOffsets_[i] + this->blockSizes_[i]) * this->scalarsPerRow_);
830 X_localBlocks_.emplace_back(X_local_, blockBounds, Kokkos::ALL());
831 Y_localBlocks_.emplace_back(Y_local_, blockBounds, Kokkos::ALL());
832 }
833 }
834 if(int(weightedApplyScratch_.extent(0)) != 3 * this->maxBlockSize_ ||
835 weightedApplyScratch_.extent(1) != numVecs)
836 {
837 weightedApplyScratch_ = HostViewLocal("weightedApply scratch", 3 * this->maxBlockSize_, numVecs);
838 }
839
840 ArrayView<const LO> blockRows = this->getBlockRows(blockIndex);
841
843
844 //note: BlockCrs w/ weighted Jacobi isn't allowed, so no need to use block gather/scatter
845 mvgs.gatherViewToView (X_localBlocks_[blockIndex], X, blockRows);
846 // We must gather the output multivector Y even on input to
847 // solveBlock(), since the local operator might use it as an initial
848 // guess for a linear solve. We have no way of knowing whether it
849 // does or does not.
850
851 mvgs.gatherViewToView (Y_localBlocks_[blockIndex], Y, blockRows);
852
853 // Apply the diagonal scaling D to the input X. It's our choice
854 // whether the result has the original input Map of X, or the
855 // permuted subset Map of X_local. If the latter, we also need to
856 // gather D into the permuted subset Map. We choose the latter, to
857 // save memory and computation. Thus, we do the following:
858 //
859 // 1. Gather D into a temporary vector D_local.
860 // 2. Create a temporary X_scaled to hold diag(D_local) * X_local.
861 // 3. Compute X_scaled := diag(D_loca) * X_local.
862 auto maxBS = this->maxBlockSize_;
863 auto bs = this->blockSizes_[blockIndex] * this->scalarsPerRow_;
864
865 HostSubviewLocal D_local(weightedApplyScratch_, std::make_pair(0, bs), std::make_pair(0, 1));
866 mvgs.gatherViewToView (D_local, D, blockRows);
867 HostSubviewLocal X_scaled(weightedApplyScratch_, std::make_pair(maxBS, maxBS + bs), Kokkos::ALL());
868 for(size_t j = 0; j < numVecs; j++)
869 for(size_t i = 0; i < numRows; i++)
870 X_scaled(i, j) = X_localBlocks_[blockIndex](i, j) * D_local(i, 0);
871
872 HostSubviewLocal Y_temp(weightedApplyScratch_, std::make_pair(maxBS * 2, maxBS * 2 + bs), Kokkos::ALL());
873 // Apply the local operator: Y_temp := M^{-1} * X_scaled
874 this->solveBlock (X_scaled, Y_temp, blockIndex, mode, STS::one(), STS::zero());
875 // Y_local := beta * Y_local + alpha * diag(D_local) * Y_temp.
876 //
877 // Note that we still use the permuted subset scaling D_local here,
878 // because Y_temp has the same permuted subset Map. That's good, in
879 // fact, because it's a subset: less data to read and multiply.
880 LISC a = alpha;
881 LISC b = beta;
882 for(size_t j = 0; j < numVecs; j++)
883 for(size_t i = 0; i < numRows; i++)
884 Y_localBlocks_[blockIndex](i, j) = b * Y_localBlocks_[blockIndex](i, j) + a * Y_temp(i, j) * D_local(i, 0);
885
886 // Copy the permuted subset output vector Y_local into the original
887 // output multivector Y.
888 mvgs.scatterViewToView (Y, Y_localBlocks_[blockIndex], blockRows);
889}
890
891template<class MatrixType, class LocalScalarType>
893 typename ContainerImpl<MatrixType, LocalScalarType>::SC,
894 typename ContainerImpl<MatrixType, LocalScalarType>::LO,
895 typename ContainerImpl<MatrixType, LocalScalarType>::GO,
896 typename ContainerImpl<MatrixType, LocalScalarType>::NO>
898getInputRowView(LO row) const
899{
900
901 typedef typename MatrixType::nonconst_local_inds_host_view_type nonconst_local_inds_host_view_type;
902 typedef typename MatrixType::nonconst_values_host_view_type nonconst_values_host_view_type;
903
904 typedef typename MatrixType::local_inds_host_view_type local_inds_host_view_type;
905 typedef typename MatrixType::values_host_view_type values_host_view_type;
906 using IST = typename row_matrix_type::impl_scalar_type;
907
908 if(this->hasBlockCrs_)
909 {
910 using h_inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
911 using h_vals_type = typename block_crs_matrix_type::values_host_view_type;
912 h_inds_type colinds;
913 h_vals_type values;
914 this->inputBlockMatrix_->getLocalRowView(row / this->bcrsBlockSize_, colinds, values);
915 size_t numEntries = colinds.size();
916 // CMS: Can't say I understand what this really does
917 //return StridedRowView(values + row % this->bcrsBlockSize_, colinds, this->bcrsBlockSize_, numEntries * this->bcrsBlockSize_);
918 h_vals_type subvals = Kokkos::subview(values,std::pair<size_t,size_t>(row % this->bcrsBlockSize_,values.size()));
919 return StridedRowView(subvals, colinds, this->bcrsBlockSize_, numEntries * this->bcrsBlockSize_);
920 }
921 else if(!this->inputMatrix_->supportsRowViews())
922 {
923 size_t maxEntries = this->inputMatrix_->getLocalMaxNumRowEntries();
924 Teuchos::Array<LO> inds(maxEntries);
925 Teuchos::Array<SC> vals(maxEntries);
926 nonconst_local_inds_host_view_type inds_v(inds.data(),maxEntries);
927 nonconst_values_host_view_type vals_v(reinterpret_cast<IST*>(vals.data()),maxEntries);
928 size_t numEntries;
929 this->inputMatrix_->getLocalRowCopy(row, inds_v, vals_v, numEntries);
930 vals.resize(numEntries); inds.resize(numEntries);
931 return StridedRowView(vals, inds);
932 }
933 else
934 {
935 // CMS - This is dangerous and might not work.
936 local_inds_host_view_type colinds;
937 values_host_view_type values;
938 this->inputMatrix_->getLocalRowView(row, colinds, values);
939 return StridedRowView(values, colinds, 1, colinds.size());
940 }
941}
942
943template<class MatrixType, class LocalScalarType>
946{
947 X_localBlocks_.clear();
948 Y_localBlocks_.clear();
949 X_local_ = HostViewLocal();
950 Y_local_ = HostViewLocal();
952}
953
954namespace Details {
955
956//Implementation of Ifpack2::Details::StridedRowView
957template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
959StridedRowView(h_vals_type vals_, h_inds_type inds_, int blockSize_, size_t nnz_)
960 : vals(vals_), inds(inds_), blockSize(blockSize_), nnz(nnz_)
961{}
962
963template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
965StridedRowView(Teuchos::Array<SC>& vals_, Teuchos::Array<LO>& inds_)
966 : vals(), inds(), blockSize(1), nnz(vals_.size())
967{
968 valsCopy.swap(vals_);
969 indsCopy.swap(inds_);
970}
971
972template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
974val(size_t i) const
975{
976 #ifdef HAVE_IFPACK2_DEBUG
977 TEUCHOS_TEST_FOR_EXCEPTION(i >= nnz, std::runtime_error,
978 "Out-of-bounds access into Ifpack2::Container::StridedRowView");
979 #endif
980 if(vals.size() > 0)
981 {
982 if(blockSize == 1)
983 return vals[i];
984 else
985 return vals[i * blockSize];
986 }
987 else
988 return valsCopy[i];
989}
990
991template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
992LocalOrdinal StridedRowView<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
993ind(size_t i) const
994{
995 #ifdef HAVE_IFPACK2_DEBUG
996 TEUCHOS_TEST_FOR_EXCEPTION(i >= nnz, std::runtime_error,
997 "Out-of-bounds access into Ifpack2::Container::StridedRowView");
998 #endif
999 //inds is smaller than vals by a factor of the block size (dofs/node)
1000 if(inds.size() > 0)
1001 {
1002 if(blockSize == 1)
1003 return inds[i];
1004 else
1005 return inds[i / blockSize] * blockSize + i % blockSize;
1006 }
1007 else
1008 return indsCopy[i];
1009}
1010
1011template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1012size_t StridedRowView<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1013size() const
1014{
1015 return nnz;
1016}
1017}
1018
1019}
1020
1021template <class MatrixType>
1022std::ostream& operator<<(std::ostream& os, const Ifpack2::Container<MatrixType>& obj)
1023{
1024 return obj.print(os);
1025}
1026
1027#define IFPACK2_CONTAINER_INSTANT(S,LO,GO,N) \
1028 template class Ifpack2::Container<Tpetra::RowMatrix<S, LO, GO, N>>; \
1029 template class Ifpack2::ContainerImpl<Tpetra::RowMatrix<S, LO, GO, N>, S>; \
1030 template class Ifpack2::Details::StridedRowView<S, LO, GO, N>; \
1031 template std::ostream& operator<< <Tpetra::RowMatrix<S, LO, GO, N>>( \
1032 std::ostream& os, const Ifpack2::Container<Tpetra::RowMatrix<S, LO, GO, N>>& obj);
1033
1034#endif
1035
Declaration and definition of the Ifpack2::Details::MultiVectorLocalGatherScatter class.
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS)....
Definition Ifpack2_Container_decl.hpp:344
LocalScalarType LSC
The internal representation of LocalScalarType in Kokkos::View.
Definition Ifpack2_Container_decl.hpp:363
void DoGSBlock(ConstHostView X, HostView Y, HostView Y2, HostView Resid, SC dampingFactor, LO i) const
Do one step of Gauss-Seidel on block i (used by DoGaussSeidel and DoSGS)
Definition Ifpack2_Container_def.hpp:286
Interface for creating and solving a set of local linear problems.
Definition Ifpack2_Container_decl.hpp:113
virtual ~Container()
Destructor.
Definition Ifpack2_Container_def.hpp:114
bool hasBlockCrs_
Whether the input matrix is a BlockCRS matrix.
Definition Ifpack2_Container_decl.hpp:310
bool IsParallel_
Whether the problem is distributed across multiple MPI processes.
Definition Ifpack2_Container_decl.hpp:302
int numBlocks_
The number of blocks (partitions) in the container.
Definition Ifpack2_Container_decl.hpp:292
Teuchos::ArrayView< const LO > getBlockRows(int blockIndex) const
Local indices of the rows of the input matrix that belong to this block.
Definition Ifpack2_Container_def.hpp:118
static std::string getName()
Definition Ifpack2_Container_def.hpp:190
int bcrsBlockSize_
If hasBlockCrs_, the number of DOFs per vertex. Otherwise 1.
Definition Ifpack2_Container_decl.hpp:312
Teuchos::RCP< const block_crs_matrix_type > inputBlockMatrix_
The input matrix, dynamic cast to BlockCrsMatrix. May be null.
Definition Ifpack2_Container_decl.hpp:289
typename Kokkos::Details::ArithTraits< SC >::val_type ISC
Internal representation of Scalar in Kokkos::View.
Definition Ifpack2_Container_decl.hpp:135
virtual void DoGSBlock(ConstHostView X, HostView Y, HostView Y2, HostView Resid, SC dampingFactor, LO i) const
Do one step of Gauss-Seidel on block i (used by DoGaussSeidel and DoSGS)
Definition Ifpack2_Container_def.hpp:196
virtual void applyMV(const mv_type &X, mv_type &Y) const
Wrapper for apply with MultiVector.
Definition Ifpack2_Container_def.hpp:176
LO NumLocalRows_
Number of local rows in input matrix.
Definition Ifpack2_Container_decl.hpp:304
bool isInitialized() const
Whether the container has been successfully initialized.
Definition Ifpack2_Container_def.hpp:165
void setBlockSizes(const Teuchos::Array< Teuchos::Array< LO > > &partitions)
Initialize arrays with information about block sizes.
Definition Ifpack2_Container_def.hpp:125
LO scalarsPerRow_
Definition Ifpack2_Container_decl.hpp:317
Teuchos::Array< LO > blockSizes_
Number of rows in each block.
Definition Ifpack2_Container_decl.hpp:296
GO NumGlobalNonzeros_
Number of nonzeros in input matrix.
Definition Ifpack2_Container_decl.hpp:308
Container(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, bool pointIndexed)
Constructor.
Definition Ifpack2_Container_def.hpp:54
virtual std::ostream & print(std::ostream &os) const =0
Print basic information about the container to os.
virtual void weightedApplyMV(const mv_type &X, mv_type &Y, vector_type &W) const
Wrapper for weightedApply with MultiVector.
Definition Ifpack2_Container_def.hpp:183
Teuchos::RCP< const row_matrix_type > inputMatrix_
The input matrix to the constructor.
Definition Ifpack2_Container_decl.hpp:283
bool pointIndexed_
(If hasBlockCrs_) Whether the blocks are described using sub-block row indices instead of full block ...
Definition Ifpack2_Container_decl.hpp:314
GO NumGlobalRows_
Number of global rows in input matrix.
Definition Ifpack2_Container_decl.hpp:306
bool isComputed() const
Whether the container has been successfully computed.
Definition Ifpack2_Container_def.hpp:170
typename mv_type::dual_view_type::t_host HostView
Definition Ifpack2_Container_decl.hpp:139
Implementation detail of Ifpack2::Container subclasses.
Definition Ifpack2_Details_MultiVectorLocalGatherScatter.hpp:85
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition Ifpack2_AdditiveSchwarz_decl.hpp:74
Structure for read-only views of general matrix rows.
Definition Ifpack2_Container_decl.hpp:537
StridedRowView(h_vals_type vals_, h_inds_type inds_, int blockSize_, size_t nnz_)
Constructor for row views (preferred)
Definition Ifpack2_Container_def.hpp:959