Amesos Package Browser (Single Doxygen Collection) Development
Loading...
Searching...
No Matches
Amesos_Scalapack.cpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Amesos: Direct Sparse Solver Package
5// Copyright (2004) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// This library is free software; you can redistribute it and/or modify
11// it under the terms of the GNU Lesser General Public License as
12// published by the Free Software Foundation; either version 2.1 of the
13// License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23// USA
24// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
25//
26// ***********************************************************************
27// @HEADER
28
29#include "Amesos_Scalapack.h"
30#include "Epetra_Map.h"
31#include "Epetra_Import.h"
32#include "Epetra_Export.h"
33#include "Epetra_CrsMatrix.h"
34#include "Epetra_Vector.h"
35#include "Epetra_Util.h"
36#include "CrsMatrixTranspose.h"
38// #include "Epetra_LAPACK_wrappers.h"
39
40
41//
42// pcolnum computes the process column which a given column belongs to
43// in the ScaLAPACK 2D grid.
44//
45inline int pcolnum( int j, int nb, int npcol ) {
46 return ((j/nb)%npcol) ; }
47
48
49//=============================================================================
50Amesos_Scalapack::Amesos_Scalapack(const Epetra_LinearProblem &prob ):
51 ictxt_(-1313),
52 ScaLAPACK1DMap_(0),
53 ScaLAPACK1DMatrix_(0),
54 VectorMap_(0),
55 UseTranspose_(false), // Overwritten by call to SetParameters below
56 Problem_(&prob),
57 ConTime_(0.0),
58 SymTime_(0.0),
59 NumTime_(0.0),
60 SolTime_(0.0),
61 VecTime_(0.0),
62 MatTime_(0.0),
63 FatOut_(0),
64 Time_(0)
65{
66 Teuchos::ParameterList ParamList ;
67 SetParameters( ParamList ) ;
68}
69
70//=============================================================================
72
73 if ( ScaLAPACK1DMap_ ) delete ScaLAPACK1DMap_ ;
75 // print out some information if required by the user
76 if( (verbose_ && PrintTiming_) || verbose_ == 2 ) PrintTiming();
77 if( (verbose_ && PrintStatus_) || verbose_ == 2 ) PrintStatus();
78
79 if( Time_ ) delete Time_;
80 if( FatOut_ ) delete FatOut_;
81 if( VectorMap_ ) delete VectorMap_;
82}
83// See pre and post conditions in Amesos_Scalapack.h
84
85//
86//
87// Distribution of the matrix:
88// Amesos_Scalapack uses one of two data distributions:
89// 1) A 1D blocked data distribution in which each row is assigned
90// to a single process. The first (n/p) rows are assigned to
91// the first process and the i^th (n/p) rows are assigned to
92// process i. A^T X = B is computed.
93// 2) A 2D data distribution (A X = B is computed)
94//
95// The 1D data distribution should be reasonably efficient for the matrices
96// that we expect to deal with, i.e. n on the order of 4,000
97// with up to 16 processes. For n >= 10,000 we should switch to
98// a 2D block-cyclis data distribution.
99//
100// Distribution of the vector(s)
101// 1) 1D data distribution
102// ScaLAPACK requires that the vectors be distributed in the same manner
103// as the matrices. Since PDGETRF factors the transpose of the matrix,
104// using NPROW=1, the vectors must also be distributed with NPROW=1, i.e.
105// each vector fully owned by a particular process. And, the first nb
106// vectors belong on the first process.
107//
108// The easiest way to deal with this is to limit ourselves to nb right hand
109// sides at a time (this is not a significant limitation as nb >= n/p )
110// and using our basic heuristic for the number of processes to use
111// nb >= min(200,n) as well.
112//
113// Limiting the number of right hand sides to <= nb means that all right hand
114// sides are stored on process 0. Hence, they can be treated as a serial
115// matrix of vectors.
116//
117// 2) 2D data distribution
118// If we restrict ourselves to nb (typically 32) right hand sides,
119// we can use a simple epetra exporter to export the vector a single
120// ScaLAPACK process column (i.e. to nprow processes)
121//
122
123//
125
126 if( debug_ == 1 ) std::cout << "Entering `RedistributeA()'" << std::endl;
127
128 Time_->ResetStartTime();
129
130 Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
131 EPETRA_CHK_ERR( RowMatrixA == 0 ) ;
132
133 const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap() ;
134 int NumberOfProcesses = Comm().NumProc() ;
135
136 //
137 // Compute a uniform distribution as ScaLAPACK would want it
138 // MyFirstElement - The first element which this processor would have
139 // NumExpectedElemetns - The number of elements which this processor would have
140 //
141
142 int NumRows_ = RowMatrixA->NumGlobalRows() ;
143 int NumColumns_ = RowMatrixA->NumGlobalCols() ;
144 if ( MaxProcesses_ > 0 ) {
145 NumberOfProcesses = EPETRA_MIN( NumberOfProcesses, MaxProcesses_ ) ;
146 }
147 else {
148 int ProcessNumHeuristic = (1+NumRows_/200)*(1+NumRows_/200);
149 NumberOfProcesses = EPETRA_MIN( NumberOfProcesses, ProcessNumHeuristic );
150 }
151
152 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:171" << std::endl;
153 //
154 // Create the ScaLAPACK data distribution.
155 // The TwoD data distribution is created in a completely different
156 // manner and is not transposed (whereas the SaLAPACK 1D data
157 // distribution was transposed)
158 //
159 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:163" << std::endl;
160 Comm().Barrier();
161 if ( TwoD_distribution_ ) {
162 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:166" << std::endl;
163 Comm().Barrier();
164 npcol_ = EPETRA_MIN( NumberOfProcesses,
165 EPETRA_MAX ( 2, (int) sqrt( NumberOfProcesses * 0.5 ) ) ) ;
166 nprow_ = NumberOfProcesses / npcol_ ;
167
168 //
169 // Create the map for FatA - our first intermediate matrix
170 //
171 int NumMyElements = RowMatrixA->RowMatrixRowMap().NumMyElements() ;
172 std::vector<int> MyGlobalElements( NumMyElements );
173 RowMatrixA->RowMatrixRowMap().MyGlobalElements( &MyGlobalElements[0] ) ;
174
175 int NumMyColumns = RowMatrixA->RowMatrixColMap().NumMyElements() ;
176 std::vector<int> MyGlobalColumns( NumMyColumns );
177 RowMatrixA->RowMatrixColMap().MyGlobalElements( &MyGlobalColumns[0] ) ;
178
179 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:194" << std::endl;
180
181 std::vector<int> MyFatElements( NumMyElements * npcol_ );
182
183 for( int LocalRow=0; LocalRow<NumMyElements; LocalRow++ ) {
184 for (int i = 0 ; i < npcol_; i++ ){
185 MyFatElements[LocalRow*npcol_+i] = MyGlobalElements[LocalRow]*npcol_+i;
186 }
187 }
188
189 Epetra_Map FatInMap( npcol_*NumRows_, NumMyElements*npcol_,
190 &MyFatElements[0], 0, Comm() );
191
192 //
193 // Create FatIn, our first intermediate matrix
194 //
195 Epetra_CrsMatrix FatIn( Copy, FatInMap, 0 );
196
197
198 std::vector<std::vector<int> > FatColumnIndices(npcol_,std::vector<int>(1));
199 std::vector<std::vector<double> > FatMatrixValues(npcol_,std::vector<double>(1));
200 std::vector<int> FatRowPtrs(npcol_); // A FatRowPtrs[i] = the number
201 // of entries in local row LocalRow*npcol_ + i
202
203 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:219" << std::endl;
204 //
207 if ( iam_ >= nprow_ * npcol_ ) {
208 myprow_ = nprow_;
209 mypcol_ = npcol_;
210 }
211 // Each row is split into npcol_ rows, with each of the
212 // new rows containing only those elements belonging to
213 // its process column (in the ScaLAPACK 2D process grid)
214 //
215 int MaxNumIndices = RowMatrixA->MaxNumEntries();
216 int NumIndices;
217 std::vector<int> ColumnIndices(MaxNumIndices);
218 std::vector<double> MatrixValues(MaxNumIndices);
219
220 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:232 NumMyElements = "
221 << NumMyElements
222 << std::endl;
223
224 nb_ = grid_nb_;
225
226 for( int LocalRow=0; LocalRow<NumMyElements; ++LocalRow ) {
227
228 RowMatrixA->ExtractMyRowCopy( LocalRow,
229 MaxNumIndices,
230 NumIndices,
231 &MatrixValues[0],
232 &ColumnIndices[0] );
233
234 for (int i=0; i<npcol_; i++ ) FatRowPtrs[i] = 0 ;
235
236 //
237 // Deal the individual matrix entries out to the row owned by
238 // the process to which this matrix entry will belong.
239 //
240 for( int i=0 ; i<NumIndices ; ++i ) {
241 int GlobalCol = MyGlobalColumns[ ColumnIndices[i] ];
242 int pcol_i = pcolnum( GlobalCol, nb_, npcol_ ) ;
243 if ( FatRowPtrs[ pcol_i ]+1 >= FatColumnIndices[ pcol_i ].size() ) {
244 FatColumnIndices[ pcol_i ]. resize( 2 * FatRowPtrs[ pcol_i ]+1 );
245 FatMatrixValues[ pcol_i ]. resize( 2 * FatRowPtrs[ pcol_i ]+1 );
246 }
247 FatColumnIndices[pcol_i][FatRowPtrs[pcol_i]] = GlobalCol ;
248 FatMatrixValues[pcol_i][FatRowPtrs[pcol_i]] = MatrixValues[i];
249
250 FatRowPtrs[ pcol_i ]++;
251 }
252
253 //
254 // Insert each of the npcol_ rows individually
255 //
256 for ( int pcol_i = 0 ; pcol_i < npcol_ ; pcol_i++ ) {
257 FatIn.InsertGlobalValues( MyGlobalElements[LocalRow]*npcol_ + pcol_i,
258 FatRowPtrs[ pcol_i ],
259 &FatMatrixValues[ pcol_i ][0],
260 &FatColumnIndices[ pcol_i ][0] );
261 }
262 }
263 FatIn.FillComplete( false );
264
265 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:260" << std::endl;
266 if ( debug_ == 1) std::cout << "Amesos_Scalapack.cpp:265B"
267 << " iam_ = " << iam_
268 << " nb_ = " << nb_
269 << " nprow_ = " << nprow_
270 << " npcol_ = " << npcol_
271 << std::endl;
272
273 //
274 // Compute the map for our second intermediate matrix, FatOut
275 //
276 // Compute directly
277 int UniformRows = ( NumRows_ / ( nprow_ * nb_ ) ) * nb_ ;
278 int AllExcessRows = NumRows_ - UniformRows * nprow_ ;
279 int OurExcessRows = EPETRA_MIN( nb_, AllExcessRows - ( myprow_ * nb_ ) ) ;
280 OurExcessRows = EPETRA_MAX( 0, OurExcessRows );
281 NumOurRows_ = UniformRows + OurExcessRows ;
282
283 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:277" << std::endl;
284 int UniformColumns = ( NumColumns_ / ( npcol_ * nb_ ) ) * nb_ ;
285 int AllExcessColumns = NumColumns_ - UniformColumns * npcol_ ;
286 int OurExcessColumns = EPETRA_MIN( nb_, AllExcessColumns - ( mypcol_ * nb_ ) ) ;
287 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:281" << std::endl;
288 OurExcessColumns = EPETRA_MAX( 0, OurExcessColumns );
289 NumOurColumns_ = UniformColumns + OurExcessColumns ;
290
291 if ( iam_ >= nprow_ * npcol_ ) {
292 UniformRows = 0;
293 NumOurRows_ = 0;
294 NumOurColumns_ = 0;
295 }
296
297 Comm().Barrier();
298
299 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:295" << std::endl;
300#if 0
301 // Compute using ScaLAPACK's numroc routine, assert agreement
302 int izero = 0; // All matrices start at process 0
303 int NumRocSays = numroc_( &NumRows_, &nb_, &myprow_, &izero, &nprow_ );
304 assert( NumOurRows_ == NumRocSays );
305#endif
306 //
307 // Compute the rows which this process row owns in the ScaLAPACK 2D
308 // process grid.
309 //
310 std::vector<int> AllOurRows(NumOurRows_);
311
312 int RowIndex = 0 ;
313 int BlockRow = 0 ;
314 for ( ; BlockRow < UniformRows / nb_ ; BlockRow++ ) {
315 for ( int RowOffset = 0; RowOffset < nb_ ; RowOffset++ ) {
316 AllOurRows[RowIndex++] = BlockRow*nb_*nprow_ + myprow_*nb_ + RowOffset ;
317 }
318 }
319 Comm().Barrier();
320 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:315" << std::endl;
321 assert ( BlockRow == UniformRows / nb_ ) ;
322 for ( int RowOffset = 0; RowOffset < OurExcessRows ; RowOffset++ ) {
323 AllOurRows[RowIndex++] = BlockRow*nb_*nprow_ + myprow_*nb_ + RowOffset ;
324 }
325 assert( RowIndex == NumOurRows_ );
326 //
327 // Distribute those rows amongst all the processes in that process row
328 // This is an artificial distribution with the following properties:
329 // 1) It is a 1D data distribution (each row belogs entirely to
330 // a single process
331 // 2) All data which will eventually belong to a given process row,
332 // is entirely contained within the processes in that row.
333 //
334
335 Comm().Barrier();
336 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:312" << std::endl;
337 //
338 // Compute MyRows directly
339 //
340 std::vector<int>MyRows(NumOurRows_);
341 RowIndex = 0 ;
342 BlockRow = 0 ;
343 for ( ; BlockRow < UniformRows / nb_ ; BlockRow++ ) {
344 for ( int RowOffset = 0; RowOffset < nb_ ; RowOffset++ ) {
345 MyRows[RowIndex++] = BlockRow*nb_*nprow_*npcol_ +
346 myprow_*nb_*npcol_ + RowOffset*npcol_ + mypcol_ ;
347 }
348 }
349
350 Comm().Barrier();
351 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:326" << std::endl;
352
353 assert ( BlockRow == UniformRows / nb_ ) ;
354 for ( int RowOffset = 0; RowOffset < OurExcessRows ; RowOffset++ ) {
355 MyRows[RowIndex++] = BlockRow*nb_*nprow_*npcol_ +
356 myprow_*nb_*npcol_ + RowOffset*npcol_ + mypcol_ ;
357 }
358
359 Comm().Barrier();
360 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:334" << std::endl;
361 Comm().Barrier();
362
363 for (int i=0; i < NumOurRows_; i++ ) {
364 assert( MyRows[i] == AllOurRows[i]*npcol_+mypcol_ );
365 }
366
367 Comm().Barrier();
368 if ( debug_ == 1) std::cout << "Amesos_Scalapack.cpp:340"
369 << " iam_ = " << iam_
370 << " myprow_ = " << myprow_
371 << " mypcol_ = " << mypcol_
372 << " NumRows_ = " << NumRows_
373 << " NumOurRows_ = " << NumOurRows_
374 << std::endl;
375
376 Comm().Barrier();
377 Epetra_Map FatOutMap( npcol_*NumRows_, NumOurRows_, &MyRows[0], 0, Comm() );
378
379 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:344" << std::endl;
380 Comm().Barrier();
381
382 if ( FatOut_ ) delete FatOut_ ;
383 FatOut_ = new Epetra_CrsMatrix( Copy, FatOutMap, 0 ) ;
384
385 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:348" << std::endl;
386
387 Epetra_Export ExportToFatOut( FatInMap, FatOutMap ) ;
388
389 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:360" << std::endl;
390
391 FatOut_->Export( FatIn, ExportToFatOut, Add );
392 FatOut_->FillComplete( false );
393
394 //
395 // Create a map to allow us to redistribute the vectors X and B
396 //
397 Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
398 const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap() ;
399 assert( NumGlobalElements_ == OriginalMap.NumGlobalElements() ) ;
400 int NumMyVecElements = 0 ;
401 if ( mypcol_ == 0 ) {
402 NumMyVecElements = NumOurRows_;
403 }
404
405 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:385" << std::endl;
406
407 if (VectorMap_) { delete VectorMap_ ; VectorMap_ = 0 ; }
408 VectorMap_ = new Epetra_Map( NumGlobalElements_,
409 NumMyVecElements,
410 &AllOurRows[0],
411 0,
412 Comm() );
413 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << " Amesos_Scalapack.cpp:393 debug_ = "
414 << debug_ << std::endl;
415
416 } else {
417 nprow_ = 1 ;
418 npcol_ = NumberOfProcesses / nprow_ ;
419 assert ( nprow_ * npcol_ == NumberOfProcesses ) ;
420
421 m_per_p_ = ( NumRows_ + NumberOfProcesses - 1 ) / NumberOfProcesses ;
422 int MyFirstElement = EPETRA_MIN( iam_ * m_per_p_, NumRows_ ) ;
423 int MyFirstNonElement = EPETRA_MIN( (iam_+1) * m_per_p_, NumRows_ ) ;
424 int NumExpectedElements = MyFirstNonElement - MyFirstElement ;
425
426 assert( NumRows_ == RowMatrixA->NumGlobalRows() ) ;
427 if ( ScaLAPACK1DMap_ ) delete( ScaLAPACK1DMap_ ) ;
428 ScaLAPACK1DMap_ = new Epetra_Map( NumRows_, NumExpectedElements, 0, Comm() );
429 if ( ScaLAPACK1DMatrix_ ) delete( ScaLAPACK1DMatrix_ ) ;
430 ScaLAPACK1DMatrix_ = new Epetra_CrsMatrix(Copy, *ScaLAPACK1DMap_, 0);
431 Epetra_Export ExportToScaLAPACK1D_( OriginalMap, *ScaLAPACK1DMap_);
432
433 ScaLAPACK1DMatrix_->Export( *RowMatrixA, ExportToScaLAPACK1D_, Add );
434
435 ScaLAPACK1DMatrix_->FillComplete( false ) ;
436 }
437 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << " Amesos_Scalapack.cpp:417 debug_ = "
438 << debug_ << std::endl;
439 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:402"
440 << " nprow_ = " << nprow_
441 << " npcol_ = " << npcol_ << std::endl ;
442 int info;
443 const int zero = 0 ;
444 if ( ictxt_ == -1313 ) {
445 ictxt_ = 0 ;
446 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:408" << std::endl;
448 }
449 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:410A" << std::endl;
450
451 int nprow;
452 int npcol;
453 int myrow;
454 int mycol;
455 BLACS_GRIDINFO_F77(&ictxt_, &nprow, &npcol, &myrow, &mycol) ;
456 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "iam_ = " << iam_ << " Amesos_Scalapack.cpp:410" << std::endl;
457 if ( iam_ < nprow_ * npcol_ ) {
458 assert( nprow == nprow_ ) ;
459 if ( npcol != npcol_ ) std::cout << "Amesos_Scalapack.cpp:430 npcol = " <<
460 npcol << " npcol_ = " << npcol_ << std::endl ;
461 assert( npcol == npcol_ ) ;
462 if ( TwoD_distribution_ ) {
463 assert( myrow == myprow_ ) ;
464 assert( mycol == mypcol_ ) ;
465 lda_ = EPETRA_MAX(1,NumOurRows_) ;
466 } else {
467 assert( myrow == 0 ) ;
468 assert( mycol == iam_ ) ;
469 nb_ = m_per_p_;
470 lda_ = EPETRA_MAX(1,NumGlobalElements_);
471 }
472 if ( debug_ == 1) std::cout << "iam_ = " << iam_
473 << "Amesos_Scalapack.cpp: " << __LINE__
474 << " TwoD_distribution_ = " << TwoD_distribution_
475 << " NumGlobalElements_ = " << NumGlobalElements_
476 << " debug_ = " << debug_
477 << " nb_ = " << nb_
478 << " lda_ = " << lda_
479 << " nprow_ = " << nprow_
480 << " npcol_ = " << npcol_
481 << " myprow_ = " << myprow_
482 << " mypcol_ = " << mypcol_
483 << " iam_ = " << iam_ << std::endl ;
488 &nb_,
489 &nb_,
490 &zero,
491 &zero,
492 &ictxt_,
493 &lda_,
494 &info) ;
495 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:441" << std::endl;
496 assert( info == 0 ) ;
497 } else {
498 DescA_[0] = -13;
499 if ( debug_ == 1) std::cout << "iam_ = " << iam_ << "Amesos_Scalapack.cpp:458 nprow = " << nprow << std::endl;
500 assert( nprow == -1 ) ;
501 }
502
503 if ( debug_ == 1) std::cout << "Amesos_Scalapack.cpp:446" << std::endl;
504 MatTime_ += Time_->ElapsedTime();
505
506 return 0;
507}
508
509
511
512 //
513 // Convert matrix and vector to the form that Scalapack expects
514 // ScaLAPACK accepts the matrix to be in any 2D block-cyclic form
515 //
516 // Amesos_ScaLAPACK uses one of two 2D data distributions:
517 // a simple 1D non-cyclic data distribution with npcol= 1, or a
518 // full 2D block-cyclic data distribution.
519 //
520 // 2D data distribvution:
521 // Because the Epetra export operation is oriented toward a 1D
522 // data distribution in which each row is entirely stored on
523 // a single process, we create two intermediate matrices: FatIn and
524 // FatOut, both of which have dimension:
525 // NumGlobalElements * nprow by NumGlobalElements
526 // This allows each row of FatOut to be owned by a single process.
527 // The larger dimension does not significantly increase the
528 // storage requirements and allows the export operation to be
529 // efficient.
530 //
531 // 1D data distribution:
532 // We have chosen the simplest 2D block-cyclic form, a 1D blocked (not-cyclic)
533 // data distribution, for the matrix A.
534 // We use the same distribution for the multivectors X and B. However,
535 // except for very large numbers of right hand sides, this places all of X and B
536 // on process 0, making it effectively a serial matrix.
537 //
538 // For now, we simply treat X and B as serial matrices (as viewed from epetra)
539 // though ScaLAPACK treats them as distributed matrices.
540 //
541
542 if( debug_ == 1 ) std::cout << "Entering `ConvertToScalapack()'" << std::endl;
543
544 Time_->ResetStartTime();
545
546 if ( iam_ < nprow_ * npcol_ ) {
547 if ( TwoD_distribution_ ) {
548
550 for ( int i = 0 ; i < (int)DenseA_.size() ; i++ ) DenseA_[i] = 0 ;
551 assert( lda_ == EPETRA_MAX(1,NumOurRows_) ) ;
552 assert( DescA_[8] == lda_ ) ;
553
554 int NzThisRow ;
555 int MyRow;
556
557 double *RowValues;
558 int *ColIndices;
559 int MaxNumEntries = FatOut_->MaxNumEntries();
560
561 std::vector<int>ColIndicesV(MaxNumEntries);
562 std::vector<double>RowValuesV(MaxNumEntries);
563
564 int NumMyElements = FatOut_->NumMyRows() ;
565 for ( MyRow = 0; MyRow < NumMyElements ; MyRow++ ) {
567 ExtractMyRowView( MyRow, NzThisRow, RowValues,
568 ColIndices ) != 0 ) ;
569 //
570 // The following eight lines are just a sanity check on MyRow:
571 //
572 int MyGlobalRow = FatOut_->GRID( MyRow );
573 assert( MyGlobalRow%npcol_ == mypcol_ ) ; // I should only own rows belonging to my processor column
574 int MyTrueRow = MyGlobalRow/npcol_ ; // This is the original row
575 int UniformRows = ( MyTrueRow / ( nprow_ * nb_ ) ) * nb_ ;
576 int AllExcessRows = MyTrueRow - UniformRows * nprow_ ;
577 int OurExcessRows = AllExcessRows - ( myprow_ * nb_ ) ;
578
579 if ( MyRow != UniformRows + OurExcessRows ) {
580 std::cout << " iam _ = " << iam_
581 << " MyGlobalRow = " << MyGlobalRow
582 << " MyTrueRow = " << MyTrueRow
583 << " UniformRows = " << UniformRows
584 << " AllExcessRows = " << AllExcessRows
585 << " OurExcessRows = " << OurExcessRows
586 << " MyRow = " << MyRow << std::endl ;
587 }
588
589 assert( OurExcessRows >= 0 && OurExcessRows < nb_ );
590 assert( MyRow == UniformRows + OurExcessRows ) ;
591
592 for ( int j = 0; j < NzThisRow; j++ ) {
593 assert( FatOut_->RowMatrixColMap().GID( ColIndices[j] ) ==
594 FatOut_->GCID( ColIndices[j] ) );
595
596 int MyGlobalCol = FatOut_->GCID( ColIndices[j] );
597 assert( (MyGlobalCol/nb_)%npcol_ == mypcol_ ) ;
598 int UniformCols = ( MyGlobalCol / ( npcol_ * nb_ ) ) * nb_ ;
599 int AllExcessCols = MyGlobalCol - UniformCols * npcol_ ;
600 int OurExcessCols = AllExcessCols - ( mypcol_ * nb_ ) ;
601 assert( OurExcessCols >= 0 && OurExcessCols < nb_ );
602 int MyCol = UniformCols + OurExcessCols ;
603
604 DenseA_[ MyCol * lda_ + MyRow ] = RowValues[j] ;
605 }
606 }
607
608 } else {
609
610 int NumMyElements = ScaLAPACK1DMatrix_->NumMyRows() ;
611
612 assert( NumGlobalElements_ ==ScaLAPACK1DMatrix_->NumGlobalRows());
613 assert( NumGlobalElements_ ==ScaLAPACK1DMatrix_->NumGlobalCols());
614 DenseA_.resize( NumGlobalElements_ * NumMyElements ) ;
615 for ( int i = 0 ; i < (int)DenseA_.size() ; i++ ) DenseA_[i] = 0 ;
616
617 int NzThisRow ;
618 int MyRow;
619
620 double *RowValues;
621 int *ColIndices;
622 int MaxNumEntries = ScaLAPACK1DMatrix_->MaxNumEntries();
623
624 assert( DescA_[8] == lda_ ) ; // Double check Lda
625 std::vector<int>ColIndicesV(MaxNumEntries);
626 std::vector<double>RowValuesV(MaxNumEntries);
627
628 for ( MyRow = 0; MyRow < NumMyElements ; MyRow++ ) {
630 ExtractMyRowView( MyRow, NzThisRow, RowValues,
631 ColIndices ) != 0 ) ;
632
633 for ( int j = 0; j < NzThisRow; j++ ) {
634 DenseA_[ ( ScaLAPACK1DMatrix_->RowMatrixColMap().GID( ColIndices[j] ) )
635 + MyRow * NumGlobalElements_ ] = RowValues[j] ;
636 }
637 }
638 //
639 // Create a map to allow us to redistribute the vectors X and B
640 //
641 Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
642 const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap() ;
643 assert( NumGlobalElements_ == OriginalMap.NumGlobalElements() ) ;
644 int NumMyElements_ = 0 ;
645 if (iam_==0) NumMyElements_ = NumGlobalElements_;
646
647 if (VectorMap_) { delete VectorMap_ ; VectorMap_ = 0 ; }
648 VectorMap_ = new Epetra_Map( NumGlobalElements_, NumMyElements_, 0, Comm() );
649 }
650 }
651 ConTime_ += Time_->ElapsedTime();
652
653 return 0;
654}
655
656
657int Amesos_Scalapack::SetParameters( Teuchos::ParameterList &ParameterList ) {
658
659 if( debug_ == 1 ) std::cout << "Entering `SetParameters()'" << std::endl;
660
661 // retrive general parameters
662 SetStatusParameters( ParameterList );
663
664 SetControlParameters( ParameterList );
665
666 //
667 // We have to set these to their defaults here because user codes
668 // are not guaranteed to have a "Scalapack" parameter list.
669 //
670 TwoD_distribution_ = true;
671 grid_nb_ = 32;
672
673 // Some compilers reject the following cast:
674 // if( &ParameterList == 0 ) return 0;
675
676 // ========================================= //
677 // retrive ScaLAPACK's parameters from list. //
678 // ========================================= //
679
680 // retrive general parameters
681 // check to see if they exist before trying to retrieve them
682 if (ParameterList.isSublist("Scalapack") ) {
683 const Teuchos::ParameterList ScalapackParams = ParameterList.sublist("Scalapack") ;
684 // Fix Bug #3251
685 if ( ScalapackParams.isParameter("2D distribution") )
686 TwoD_distribution_ = ScalapackParams.get<bool>("2D distribution");
687 if ( ScalapackParams.isParameter("grid_nb") )
688 grid_nb_ = ScalapackParams.get<int>("grid_nb");
689 }
690
691 return 0;
692}
693
695
696 if( debug_ == 1 ) std::cout << "Entering `PerformNumericFactorization()'" << std::endl;
697
698 Time_->ResetStartTime();
699
700 Ipiv_.resize(NumGlobalElements_) ;
701 for (int i=0; i <NumGlobalElements_ ; i++)
702 Ipiv_[i] = 0 ; // kludge - just to see if this makes valgrind happy
703
704 if ( false) std::cout << " Amesos_Scalapack.cpp: 711 iam_ = " << iam_ << " DescA = "
705 << DescA_[0] << " "
706 << DescA_[1] << " "
707 << DescA_[2] << " "
708 << DescA_[3] << " "
709 << DescA_[4] << " "
710 << DescA_[5] << " "
711 << DescA_[6] << " "
712 << DescA_[7] << " "
713 << DescA_[8] << " "
714 << std::endl ;
715
716#if 1
717 if( NumGlobalElements_ < 10 && nprow_ == 1 && npcol_ == 1 && debug_ == 1 ) {
718 assert( lda_ == NumGlobalElements_ ) ;
719 std::cout << " DenseA = " << std::endl ;
720 for (int i=0 ; i < NumGlobalElements_; i++ ) {
721 for (int j=0 ; j < NumGlobalElements_; j++ ) {
722 if ( DenseA_[ i+j*lda_ ] < 0 ) {
723 DenseA_[ i+j*lda_ ] *= (1+1e-5) ; // kludge fixme debugxx - just to let vaglrind check to be sure that DenseA is initialized
724 }
725 // std::cout << DenseA_[ i+j*lda_ ] << "\t";
726 }
727 // std::cout << std::endl ;
728 }
729 }
730#endif
731
732 int Ierr[1] ;
733 Ierr[0] = 0 ;
734 const int one = 1 ;
735 if ( iam_ < nprow_ * npcol_ ) {
736 if ( nprow_ * npcol_ == 1 ) {
737 DGETRF_F77(&NumGlobalElements_,
739 &DenseA_[0],
740 &lda_,
741 &Ipiv_[0],
742 Ierr) ;
743 } else {
746 &DenseA_[0],
747 &one,
748 &one,
749 DescA_,
750 &Ipiv_[0],
751 Ierr) ;
752 }
753 }
754
755 if ( debug_ == 1 && Ierr[0] != 0 )
756 std::cout << " Amesos_Scalapack.cpp:738 iam_ = " << iam_
757 << " Ierr = " << Ierr[0] << std::endl ;
758
759 // All processes should return the same error code
760 if ( nprow_ * npcol_ < Comm().NumProc() )
761 Comm().Broadcast( Ierr, 1, 0 ) ;
762
763 NumTime_ += Time_->ElapsedTime();
764
765 return Ierr[0];
766}
767
768
770 bool OK ;
771
772 if ( GetProblem()->GetOperator()->OperatorRangeMap().NumGlobalPoints() !=
773 GetProblem()->GetOperator()->OperatorDomainMap().NumGlobalPoints() ) OK = false;
774 return OK;
775}
776
777
779
780 if( debug_ == 1 ) std::cout << "Entering `PerformSymbolicFactorization()'" << std::endl;
781
782 if( Time_ == 0 ) Time_ = new Epetra_Time( Comm() );
783
785
786 return 0;
787}
788
790
791 if( debug_ == 1 ) std::cout << "Entering `NumericFactorization()'" << std::endl;
792
794
795 iam_ = Comm().MyPID();
796
797 Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
798 const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap() ;
799 NumGlobalElements_ = OriginalMap.NumGlobalElements();
800
801 NumGlobalNonzeros_ = RowMatrixA->NumGlobalNonzeros();
802
805
807}
808
809
811
812 if( debug_ == 1 ) std::cout << "Entering `Solve()'" << std::endl;
813
814 NumSolve_++;
815
816 Epetra_MultiVector *vecX = Problem_->GetLHS() ;
817 Epetra_MultiVector *vecB = Problem_->GetRHS() ;
818
819 //
820 // Compute the number of right hands sides
821 // (and check that X and B have the same shape)
822 //
823 int nrhs;
824 if ( vecX == 0 ) {
825 nrhs = 0 ;
826 EPETRA_CHK_ERR( vecB != 0 ) ;
827 } else {
828 nrhs = vecX->NumVectors() ;
829 EPETRA_CHK_ERR( vecB->NumVectors() != nrhs ) ;
830 }
831
832 Epetra_MultiVector *ScalapackB =0;
833 Epetra_MultiVector *ScalapackX =0;
834 //
835 // Extract Scalapack versions of X and B
836 //
837 double *ScalapackXvalues ;
838
839 Epetra_RowMatrix *RowMatrixA = dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
840 Time_->ResetStartTime(); // track time to broadcast vectors
841 //
842 // Copy B to the scalapack version of B
843 //
844 const Epetra_Map &OriginalMap = RowMatrixA->RowMatrixRowMap();
845 Epetra_MultiVector *ScalapackXextract = new Epetra_MultiVector( *VectorMap_, nrhs ) ;
846 Epetra_MultiVector *ScalapackBextract = new Epetra_MultiVector( *VectorMap_, nrhs ) ;
847
848 Epetra_Import ImportToScalapack( *VectorMap_, OriginalMap );
849 ScalapackBextract->Import( *vecB, ImportToScalapack, Insert ) ;
850 ScalapackB = ScalapackBextract ;
851 ScalapackX = ScalapackXextract ;
852
853 VecTime_ += Time_->ElapsedTime();
854
855 //
856 // Call SCALAPACKs PDGETRS to perform the solve
857 //
858
859 int DescX[10];
860
861 ScalapackX->Scale(1.0, *ScalapackB) ;
862
863 int ScalapackXlda ;
864
865 Time_->ResetStartTime(); // tract time to solve
866
867 //
868 // Setup DescX
869 //
870
871 if( nrhs > nb_ ) {
872 EPETRA_CHK_ERR( -2 );
873 }
874
875 int Ierr[1] ;
876 Ierr[0] = 0 ;
877 const int zero = 0 ;
878 const int one = 1 ;
879 if ( iam_ < nprow_ * npcol_ ) {
880 EPETRA_CHK_ERR( ScalapackX->ExtractView( &ScalapackXvalues, &ScalapackXlda ) ) ;
881
882 if ( false ) std::cout << "Amesos_Scalapack.cpp: " << __LINE__ << " ScalapackXlda = " << ScalapackXlda
883 << " lda_ = " << lda_
884 << " nprow_ = " << nprow_
885 << " npcol_ = " << npcol_
886 << " myprow_ = " << myprow_
887 << " mypcol_ = " << mypcol_
888 << " iam_ = " << iam_ << std::endl ;
889 if ( TwoD_distribution_ ) assert( mypcol_ >0 || EPETRA_MAX(ScalapackXlda,1) == lda_ ) ;
890
891 DESCINIT_F77(DescX,
893 &nrhs,
894 &nb_,
895 &nb_,
896 &zero,
897 &zero,
898 &ictxt_,
899 &lda_,
900 Ierr ) ;
901 assert( Ierr[0] == 0 ) ;
902
903 //
904 // For the 1D data distribution, we factor the transposed
905 // matrix, hence we must invert the sense of the transposition
906 //
907 char trans = 'N';
908 if ( TwoD_distribution_ ) {
909 if ( UseTranspose() ) trans = 'T' ;
910 } else {
911
912 if ( ! UseTranspose() ) trans = 'T' ;
913 }
914
915 if ( nprow_ * npcol_ == 1 ) {
916 DGETRS_F77(&trans,
918 &nrhs,
919 &DenseA_[0],
920 &lda_,
921 &Ipiv_[0],
922 ScalapackXvalues,
923 &lda_,
924 Ierr ) ;
925 } else {
926 PDGETRS_F77(&trans,
928 &nrhs,
929 &DenseA_[0],
930 &one,
931 &one,
932 DescA_,
933 &Ipiv_[0],
934 ScalapackXvalues,
935 &one,
936 &one,
937 DescX,
938 Ierr ) ;
939 }
940 }
941
942 SolTime_ += Time_->ElapsedTime();
943
944 Time_->ResetStartTime(); // track time to broadcast vectors
945 //
946 // Copy X back to the original vector
947 //
948 Epetra_Import ImportFromScalapack( OriginalMap, *VectorMap_ );
949 vecX->Import( *ScalapackX, ImportFromScalapack, Insert ) ;
950 delete ScalapackBextract ;
951 delete ScalapackXextract ;
952
953 VecTime_ += Time_->ElapsedTime();
954
955 // All processes should return the same error code
956 if ( nprow_ * npcol_ < Comm().NumProc() )
957 Comm().Broadcast( Ierr, 1, 0 ) ;
958
959 // MS // compute vector norms
960 if( ComputeVectorNorms_ == true || verbose_ == 2 ) {
961 double NormLHS, NormRHS;
962 for( int i=0 ; i<nrhs ; ++i ) {
963 EPETRA_CHK_ERR((*vecX)(i)->Norm2(&NormLHS));
964 EPETRA_CHK_ERR((*vecB)(i)->Norm2(&NormRHS));
965 if( verbose_ && Comm().MyPID() == 0 ) {
966 std::cout << "Amesos_Scalapack : vector " << i << ", ||x|| = " << NormLHS
967 << ", ||b|| = " << NormRHS << std::endl;
968 }
969 }
970 }
971
972 // MS // compute true residual
973 if( ComputeTrueResidual_ == true || verbose_ == 2 ) {
974 double Norm;
975 Epetra_MultiVector Ax(vecB->Map(),nrhs);
976 for( int i=0 ; i<nrhs ; ++i ) {
977 (Problem_->GetMatrix()->Multiply(UseTranspose(), *((*vecX)(i)), Ax));
978 (Ax.Update(1.0, *((*vecB)(i)), -1.0));
979 (Ax.Norm2(&Norm));
980
981 if( verbose_ && Comm().MyPID() == 0 ) {
982 std::cout << "Amesos_Scalapack : vector " << i << ", ||Ax - b|| = " << Norm << std::endl;
983 }
984 }
985 }
986
987 return Ierr[0];
988
989}
990
991
992// ================================================ ====== ==== ==== == =
993
995{
996
997 if( iam_ != 0 ) return;
998
999 std::cout << "----------------------------------------------------------------------------" << std::endl;
1000 std::cout << "Amesos_Scalapack : Matrix has " << NumGlobalElements_ << " rows"
1001 << " and " << NumGlobalNonzeros_ << " nonzeros" << std::endl;
1002 std::cout << "Amesos_Scalapack : Nonzero elements per row = "
1003 << 1.0*NumGlobalNonzeros_/NumGlobalElements_ << std::endl;
1004 std::cout << "Amesos_Scalapack : Percentage of nonzero elements = "
1005 << 100.0*NumGlobalNonzeros_/(pow(NumGlobalElements_,2.0)) << std::endl;
1006 std::cout << "Amesos_Scalapack : Use transpose = " << UseTranspose_ << std::endl;
1007 std::cout << "----------------------------------------------------------------------------" << std::endl;
1008
1009 return;
1010
1011}
1012
1013// ================================================ ====== ==== ==== == =
1014
1016{
1017 if( iam_ ) return;
1018
1019 std::cout << "----------------------------------------------------------------------------" << std::endl;
1020 std::cout << "Amesos_Scalapack : Time to convert matrix to ScaLAPACK format = "
1021 << ConTime_ << " (s)" << std::endl;
1022 std::cout << "Amesos_Scalapack : Time to redistribute matrix = "
1023 << MatTime_ << " (s)" << std::endl;
1024 std::cout << "Amesos_Scalapack : Time to redistribute vectors = "
1025 << VecTime_ << " (s)" << std::endl;
1026 std::cout << "Amesos_Scalapack : Number of symbolic factorizations = "
1027 << NumSymbolicFact_ << std::endl;
1028 std::cout << "Amesos_Scalapack : Time for sym fact = "
1029 << SymTime_ << " (s), avg = " << SymTime_/NumSymbolicFact_
1030 << " (s)" << std::endl;
1031 std::cout << "Amesos_Scalapack : Number of numeric factorizations = "
1032 << NumNumericFact_ << std::endl;
1033 std::cout << "Amesos_Scalapack : Time for num fact = "
1034 << NumTime_ << " (s), avg = " << NumTime_/NumNumericFact_
1035 << " (s)" << std::endl;
1036 std::cout << "Amesos_Scalapack : Number of solve phases = "
1037 << NumSolve_ << std::endl;
1038 std::cout << "Amesos_Scalapack : Time for solve = "
1039 << SolTime_ << " (s), avg = " << SolTime_/NumSolve_
1040 << " (s)" << std::endl;
1041 std::cout << "----------------------------------------------------------------------------" << std::endl;
1042
1043 return;
1044}
#define AMESOS_PRINT(variable)
#define DESCINIT_F77
#define PDGETRF_F77
#define BLACS_GRIDINFO_F77
#define SL_INIT_F77
#define PDGETRS_F77
int pcolnum(int j, int nb, int npcol)
#define EPETRA_CHK_ERR(xxx)
void SetControlParameters(const Teuchos::ParameterList &ParameterList)
bool MatrixShapeOK() const
Returns true if SCALAPACK can handle this matrix shape.
Epetra_Time * Time_
void PrintTiming() const
Print timing information.
~Amesos_Scalapack(void)
Amesos_Scalapack Destructor.
int Solve()
Solves A X = B (or AT X = B)
Epetra_Map * ScaLAPACK1DMap_
const Epetra_Comm & Comm() const
Returns a pointer to the Epetra_Comm communicator associated with this matrix.
Epetra_CrsMatrix * FatOut_
std::vector< double > DenseA_
int NumericFactorization()
Performs NumericFactorization on the matrix A.
const Epetra_LinearProblem * Problem_
Epetra_Map * VectorMap_
bool UseTranspose() const
Returns the current UseTranspose setting.
Amesos_Scalapack(const Epetra_LinearProblem &LinearProblem)
Amesos_Scalapack Constructor.
std::vector< int > Ipiv_
const Epetra_LinearProblem * GetProblem() const
Get a pointer to the Problem.
int SetParameters(Teuchos::ParameterList &ParameterList)
Updates internal variables.
int SymbolicFactorization()
Performs SymbolicFactorization on the matrix A.
Epetra_CrsMatrix * ScaLAPACK1DMatrix_
void PrintStatus() const
Print information about the factorization and solution phases.
int debug_
Sets the level of debug_ output.
bool PrintTiming_
If true, prints timing information in the destructor.
bool ComputeVectorNorms_
If true, prints the norms of X and B in Solve().
int verbose_
Toggles the output level.
int NumSymbolicFact_
Number of symbolic factorization phases.
int NumSolve_
Number of solves.
bool ComputeTrueResidual_
If true, computes the true residual in Solve().
bool PrintStatus_
If true, print additional information in the destructor.
void SetStatusParameters(const Teuchos::ParameterList &ParameterList)
int NumNumericFact_
Number of numeric factorization phases.