Intrepid2
Intrepid2_Kernels.hpp
Go to the documentation of this file.
1// @HEADER
2// ************************************************************************
3//
4// Intrepid2 Package
5// Copyright (2007) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Kyungjoo Kim (kyukim@sandia.gov), or
38// Mauro Perego (mperego@sandia.gov)
39//
40// ************************************************************************
41// @HEADER
42
48#ifndef __INTREPID2_KERNELS_HPP__
49#define __INTREPID2_KERNELS_HPP__
50
51#include "Intrepid2_ConfigDefs.hpp"
52
53#include "Intrepid2_Types.hpp"
54#include "Intrepid2_Utils.hpp"
55
56#include "Kokkos_Core.hpp"
57
58namespace Intrepid2 {
59
60 namespace Kernels {
61
62 struct Serial {
63 template<typename ScalarType,
64 typename AViewType,
65 typename BViewType,
66 typename CViewType>
67 KOKKOS_INLINE_FUNCTION
68 static void
69 gemm_trans_notrans(const ScalarType alpha,
70 const AViewType &A,
71 const BViewType &B,
72 const ScalarType beta,
73 const CViewType &C) {
74 //C = beta*C + alpha * A'*B
75 const ordinal_type
76 m = C.extent(0),
77 n = C.extent(1),
78 k = B.extent(0);
79
80 for (ordinal_type i=0;i<m;++i)
81 for (ordinal_type j=0;j<n;++j) {
82 C(i,j) *= beta;
83 for (ordinal_type l=0;l<k;++l)
84 C(i,j) += alpha*A(l,i)*B(l,j);
85 }
86 }
87
88 template<typename ScalarType,
89 typename AViewType,
90 typename BViewType,
91 typename CViewType>
92 KOKKOS_INLINE_FUNCTION
93 static void
94 gemm_notrans_trans(const ScalarType alpha,
95 const AViewType &A,
96 const BViewType &B,
97 const ScalarType beta,
98 const CViewType &C) {
99 //C = beta*C + alpha * A*B'
100 const ordinal_type
101 m = C.extent(0),
102 n = C.extent(1),
103 k = A.extent(1);
104
105 for (ordinal_type i=0;i<m;++i)
106 for (ordinal_type j=0;j<n;++j) {
107 C(i,j) *= beta;
108 for (ordinal_type l=0;l<k;++l)
109 C(i,j) += alpha*A(i,l)*B(j,l);
110 }
111 }
112
113 template<typename ScalarType,
114 typename AViewType,
115 typename xViewType,
116 typename yViewType>
117 KOKKOS_INLINE_FUNCTION
118 static void
119 gemv_trans(const ScalarType alpha,
120 const AViewType &A,
121 const xViewType &x,
122 const ScalarType beta,
123 const yViewType &y) {
124 //y = beta*y + alpha * A'*x
125 const ordinal_type
126 m = y.extent(0),
127 n = x.extent(0);
128
129 for (ordinal_type i=0;i<m;++i) {
130 y(i) *= beta;
131 for (ordinal_type j=0;j<n;++j)
132 y(i) += alpha*A(j,i)*x(j);
133 }
134 }
135
136 template<typename ScalarType,
137 typename AViewType,
138 typename xViewType,
139 typename yViewType>
140 KOKKOS_INLINE_FUNCTION
141 static void
142 gemv_notrans(const ScalarType alpha,
143 const AViewType &A,
144 const xViewType &x,
145 const ScalarType beta,
146 const yViewType &y) {
147 //y = beta*y + alpha * A*x
148 const ordinal_type
149 m = y.extent(0),
150 n = x.extent(0);
151
152 for (ordinal_type i=0;i<m;++i) {
153 y(i) *= beta;
154 for (ordinal_type j=0;j<n;++j)
155 y(i) += alpha*A(i,j)*x(j);
156 }
157 }
158
159 template<typename matViewType>
160 KOKKOS_INLINE_FUNCTION
161 static typename matViewType::non_const_value_type
162 determinant(const matViewType &mat) {
163 INTREPID2_TEST_FOR_ABORT(mat.extent(0) != mat.extent(1), "mat should be a square matrix.");
164 INTREPID2_TEST_FOR_ABORT(mat.extent(0) > 3, "Higher dimensions (> 3) are not supported.");
165
166 typename matViewType::non_const_value_type r_val(0);
167 const int m = mat.extent(0);
168 switch (m) {
169 case 1:
170 r_val = mat(0,0);
171 break;
172 case 2:
173 r_val = ( mat(0,0) * mat(1,1) -
174 mat(0,1) * mat(1,0) );
175 break;
176 case 3:
177 r_val = ( mat(0,0) * mat(1,1) * mat(2,2) +
178 mat(1,0) * mat(2,1) * mat(0,2) +
179 mat(2,0) * mat(0,1) * mat(1,2) -
180 mat(2,0) * mat(1,1) * mat(0,2) -
181 mat(0,0) * mat(2,1) * mat(1,2) -
182 mat(1,0) * mat(0,1) * mat(2,2) );
183 break;
184 }
185 return r_val;
186 }
187
188 template<typename matViewType,
189 typename invViewType>
190 KOKKOS_INLINE_FUNCTION
191 static void
192 inverse(const invViewType &inv,
193 const matViewType &mat) {
194 INTREPID2_TEST_FOR_ABORT(mat.extent(0) != mat.extent(1), "mat should be a square matrix.");
195 INTREPID2_TEST_FOR_ABORT(inv.extent(0) != inv.extent(1), "inv should be a square matrix.");
196 INTREPID2_TEST_FOR_ABORT(mat.extent(0) != inv.extent(0), "mat and inv must have the same dimension.");
197 INTREPID2_TEST_FOR_ABORT(mat.extent(0) > 3, "Higher dimensions (> 3) are not supported.");
198 INTREPID2_TEST_FOR_ABORT(mat.data() == inv.data(), "mat and inv must have different data pointer.");
199
200 const auto val = determinant(mat);
201 const int m = mat.extent(0);
202 switch (m) {
203 case 1: {
204 inv(0,0) = 1.0/mat(0,0);
205 break;
206 }
207 case 2: {
208 inv(0,0) = mat(1,1)/val;
209 inv(1,1) = mat(0,0)/val;
210
211 inv(1,0) = - mat(1,0)/val;
212 inv(0,1) = - mat(0,1)/val;
213 break;
214 }
215 case 3: {
216 {
217 const auto val0 = mat(1,1)*mat(2,2) - mat(2,1)*mat(1,2);
218 const auto val1 = - mat(1,0)*mat(2,2) + mat(2,0)*mat(1,2);
219 const auto val2 = mat(1,0)*mat(2,1) - mat(2,0)*mat(1,1);
220
221 inv(0,0) = val0/val;
222 inv(1,0) = val1/val;
223 inv(2,0) = val2/val;
224 }
225 {
226 const auto val0 = mat(2,1)*mat(0,2) - mat(0,1)*mat(2,2);
227 const auto val1 = mat(0,0)*mat(2,2) - mat(2,0)*mat(0,2);
228 const auto val2 = - mat(0,0)*mat(2,1) + mat(2,0)*mat(0,1);
229
230 inv(0,1) = val0/val;
231 inv(1,1) = val1/val;
232 inv(2,1) = val2/val;
233 }
234 {
235 const auto val0 = mat(0,1)*mat(1,2) - mat(1,1)*mat(0,2);
236 const auto val1 = - mat(0,0)*mat(1,2) + mat(1,0)*mat(0,2);
237 const auto val2 = mat(0,0)*mat(1,1) - mat(1,0)*mat(0,1);
238
239 inv(0,2) = val0/val;
240 inv(1,2) = val1/val;
241 inv(2,2) = val2/val;
242 }
243 break;
244 }
245 }
246 }
247
248 template<typename ScalarType,
249 typename xViewType,
250 typename yViewType,
251 typename zViewType>
252 KOKKOS_INLINE_FUNCTION
253 static void
254 z_is_axby(const zViewType &z,
255 const ScalarType alpha,
256 const xViewType &x,
257 const ScalarType beta,
258 const yViewType &y) {
259 //y = beta*y + alpha*x
260 const ordinal_type
261 m = z.extent(0);
262
263 for (ordinal_type i=0;i<m;++i)
264 z(i) = alpha*x(i) + beta*y(i);
265 }
266
267 template<typename AViewType>
268 KOKKOS_INLINE_FUNCTION
269 static double
270 norm(const AViewType &A, const ENorm normType) {
271 typedef typename AViewType::non_const_value_type value_type;
272 const ordinal_type m = A.extent(0), n = A.extent(1);
273 double r_val = 0;
274 switch(normType) {
275 case NORM_TWO:{
276 for (ordinal_type i=0;i<m;++i)
277 for (ordinal_type j=0;j<n;++j)
278 r_val += A.access(i,j)*A.access(i,j);
279 r_val = sqrt(r_val);
280 break;
281 }
282 case NORM_INF:{
283 for (ordinal_type i=0;i<m;++i)
284 for (ordinal_type j=0;j<n;++j) {
285 const value_type current = Util<value_type>::abs(A.access(i,j));
286 r_val = (r_val < current ? current : r_val);
287 }
288 break;
289 }
290 case NORM_ONE:{
291 for (ordinal_type i=0;i<m;++i)
292 for (ordinal_type j=0;j<n;++j)
293 r_val += Util<value_type>::abs(A.access(i,j));
294 break;
295 }
296 default: {
297 Kokkos::abort("norm type is not supported");
298 break;
299 }
300 }
301 return r_val;
302 }
303
304 template<typename dstViewType,
305 typename srcViewType>
306 KOKKOS_INLINE_FUNCTION
307 static void
308 copy(const dstViewType &dst, const srcViewType &src) {
309 if (dst.data() != src.data()) {
310 const ordinal_type m = dst.extent(0), n = dst.extent(1);
311 for (ordinal_type i=0;i<m;++i)
312 for (ordinal_type j=0;j<n;++j)
313 dst.access(i,j) = src.access(i,j);
314 }
315 }
316
317 // y = Ax
318 template<typename yViewType,
319 typename AViewType,
320 typename xViewType>
321 KOKKOS_FORCEINLINE_FUNCTION
322 static void
323 matvec_trans_product_d2( const yViewType &y,
324 const AViewType &A,
325 const xViewType &x ) {
326 y(0) = A(0,0)*x(0) + A(1,0)*x(1);
327 y(1) = A(0,1)*x(0) + A(1,1)*x(1);
328 }
329
330 template<typename yViewType,
331 typename AViewType,
332 typename xViewType>
333 KOKKOS_FORCEINLINE_FUNCTION
334 static void
335 matvec_trans_product_d3( const yViewType &y,
336 const AViewType &A,
337 const xViewType &x ) {
338 y(0) = A(0,0)*x(0) + A(1,0)*x(1) + A(2,0)*x(2);
339 y(1) = A(0,1)*x(0) + A(1,1)*x(1) + A(2,1)*x(2);
340 y(2) = A(0,2)*x(0) + A(1,2)*x(1) + A(2,2)*x(2);
341 }
342
343 // y = Ax
344 template<typename yViewType,
345 typename AViewType,
346 typename xViewType>
347 KOKKOS_FORCEINLINE_FUNCTION
348 static void
349 matvec_product_d2( const yViewType &y,
350 const AViewType &A,
351 const xViewType &x ) {
352 y(0) = A(0,0)*x(0) + A(0,1)*x(1);
353 y(1) = A(1,0)*x(0) + A(1,1)*x(1);
354 }
355
356 template<typename yViewType,
357 typename AViewType,
358 typename xViewType>
359 KOKKOS_FORCEINLINE_FUNCTION
360 static void
361 matvec_product_d3( const yViewType &y,
362 const AViewType &A,
363 const xViewType &x ) {
364 y(0) = A(0,0)*x(0) + A(0,1)*x(1) + A(0,2)*x(2);
365 y(1) = A(1,0)*x(0) + A(1,1)*x(1) + A(1,2)*x(2);
366 y(2) = A(2,0)*x(0) + A(2,1)*x(1) + A(2,2)*x(2);
367 }
368
369 template<typename yViewType,
370 typename AViewType,
371 typename xViewType>
372 KOKKOS_FORCEINLINE_FUNCTION
373 static void
374 matvec_product( const yViewType &y,
375 const AViewType &A,
376 const xViewType &x ) {
377 switch (y.extent(0)) {
378 case 2: matvec_product_d2(y, A, x); break;
379 case 3: matvec_product_d3(y, A, x); break;
380 default: {
381 INTREPID2_TEST_FOR_ABORT(true, "matvec only support dimension 2 and 3 (consider to use gemv interface).");
382 break;
383 }
384 }
385 }
386
387 template<typename zViewType,
388 typename xViewType,
389 typename yViewType>
390 KOKKOS_FORCEINLINE_FUNCTION
391 static void
392 vector_product_d2( const zViewType &z,
393 const xViewType &x,
394 const yViewType &y ) {
395 z(0) = x(0)*y(1) - x(1)*y(0);
396 }
397
398 template<typename zViewType,
399 typename xViewType,
400 typename yViewType>
401 KOKKOS_FORCEINLINE_FUNCTION
402 static void
403 vector_product_d3( const zViewType &z,
404 const xViewType &x,
405 const yViewType &y ) {
406 z(0) = x(1)*y(2) - x(2)*y(1);
407 z(1) = x(2)*y(0) - x(0)*y(2);
408 z(2) = x(0)*y(1) - x(1)*y(0);
409 }
410
411
412 };
413
414
415
416 template<typename xViewType,
417 typename yViewType>
418 KOKKOS_FORCEINLINE_FUNCTION
419 static typename xViewType::value_type
420 dot( const xViewType x,
421 const yViewType y ) {
422 typename xViewType::value_type r_val(0);
423 ordinal_type i = 0, iend = x.extent(0);
424 for (;i<iend;i+=4)
425 r_val += ( x(i )*y(i ) +
426 x(i+1)*y(i+1) +
427 x(i+2)*y(i+2) +
428 x(i+3)*y(i+3) );
429 for (;i<iend;++i)
430 r_val += x(i)*y(i);
431
432 return r_val;
433 }
434
435 template<typename xViewType,
436 typename yViewType>
437 KOKKOS_FORCEINLINE_FUNCTION
438 static typename xViewType::value_type
439 dot_d2( const xViewType x,
440 const yViewType y ) {
441 return ( x(0)*y(0) + x(1)*y(1) );
442 }
443
444 template<typename xViewType,
445 typename yViewType>
446 KOKKOS_FORCEINLINE_FUNCTION
447 static typename xViewType::value_type
448 dot_d3( const xViewType x,
449 const yViewType y ) {
450 return ( x(0)*y(0) + x(1)*y(1) + x(2)*y(2) );
451 }
452
453 template<typename AViewType,
454 typename alphaScalarType>
455 KOKKOS_FORCEINLINE_FUNCTION
456 static void
457 scale_mat( AViewType &A,
458 const alphaScalarType alpha ) {
459 const ordinal_type
460 iend = A.extent(0),
461 jend = A.extent(1);
462
463 for (ordinal_type i=0;i<iend;++i)
464 for (ordinal_type j=0;j<jend;++j)
465 A(i,j) *= alpha;
466 }
467
468 template<typename AViewType,
469 typename alphaScalarType>
470 KOKKOS_FORCEINLINE_FUNCTION
471 static void
472 inv_scale_mat( AViewType &A,
473 const alphaScalarType alpha ) {
474 const ordinal_type
475 iend = A.extent(0),
476 jend = A.extent(1);
477
478 for (ordinal_type i=0;i<iend;++i)
479 for (ordinal_type j=0;j<jend;++j)
480 A(i,j) /= alpha;
481 }
482
483 template<typename AViewType,
484 typename alphaScalarType,
485 typename BViewType>
486 KOKKOS_FORCEINLINE_FUNCTION
487 static void
488 scalar_mult_mat( AViewType &A,
489 const alphaScalarType alpha,
490 const BViewType &B ) {
491 const ordinal_type
492 iend = A.extent(0),
493 jend = A.extent(1);
494
495 for (ordinal_type i=0;i<iend;++i)
496 for (ordinal_type j=0;j<jend;++j)
497 A(i,j) = alpha*B(i,j);
498 }
499
500 template<typename AViewType,
501 typename alphaScalarType,
502 typename BViewType>
503 KOKKOS_FORCEINLINE_FUNCTION
504 static void
505 inv_scalar_mult_mat( AViewType &A,
506 const alphaScalarType alpha,
507 const BViewType &B ) {
508 const ordinal_type
509 iend = A.extent(0),
510 jend = A.extent(1);
511
512 for (ordinal_type i=0;i<iend;++i)
513 for (ordinal_type j=0;j<jend;++j)
514 A(i,j) = B(i,j)/alpha;
515 }
516
517 }
518}
519
520#endif
Contains definitions of custom data types in Intrepid2.
Header function for Intrepid2::Util class and other utility functions.
small utility functions