Intrepid2
|
Implementation of a general sum factorization algorithm, using a novel approach developed by Roberts, for integration. Uses hierarchical parallelism. More...
#include <Intrepid2_IntegrationToolsDef.hpp>
Public Member Functions | |
F_IntegratePointValueCache (Data< Scalar, DeviceType > integralData, TensorData< Scalar, DeviceType > leftComponent, Data< Scalar, DeviceType > composedTransform, TensorData< Scalar, DeviceType > rightComponent, TensorData< Scalar, DeviceType > cellMeasures, int a_offset, int b_offset, int leftFieldOrdinalOffset, int rightFieldOrdinalOffset) | |
template<size_t maxComponents, size_t numComponents = maxComponents> | |
KOKKOS_INLINE_FUNCTION int | incrementArgument (Kokkos::Array< int, maxComponents > &arguments, const Kokkos::Array< int, maxComponents > &bounds) const |
KOKKOS_INLINE_FUNCTION int | incrementArgument (Kokkos::Array< int, Parameters::MaxTensorComponents > &arguments, const Kokkos::Array< int, Parameters::MaxTensorComponents > &bounds, const int &numComponents) const |
runtime-sized variant of incrementArgument; gets used by approximate flop count. | |
template<size_t maxComponents, size_t numComponents = maxComponents> | |
KOKKOS_INLINE_FUNCTION int | nextIncrementResult (const Kokkos::Array< int, maxComponents > &arguments, const Kokkos::Array< int, maxComponents > &bounds) const |
KOKKOS_INLINE_FUNCTION int | nextIncrementResult (const Kokkos::Array< int, Parameters::MaxTensorComponents > &arguments, const Kokkos::Array< int, Parameters::MaxTensorComponents > &bounds, const int &numComponents) const |
runtime-sized variant of nextIncrementResult; gets used by approximate flop count. | |
template<size_t maxComponents, size_t numComponents = maxComponents> | |
KOKKOS_INLINE_FUNCTION int | relativeEnumerationIndex (const Kokkos::Array< int, maxComponents > &arguments, const Kokkos::Array< int, maxComponents > &bounds, const int startIndex) const |
template<int rank> | |
KOKKOS_INLINE_FUNCTION enable_if_t< rank==3 &&rank==integralViewRank, Scalar & > | integralViewEntry (const IntegralViewType &integralView, const int &cellDataOrdinal, const int &i, const int &j) const |
template<int rank> | |
KOKKOS_INLINE_FUNCTION enable_if_t< rank==2 &&rank==integralViewRank, Scalar & > | integralViewEntry (const IntegralViewType &integralView, const int &cellDataOrdinal, const int &i, const int &j) const |
KOKKOS_INLINE_FUNCTION void | runSpecialized3 (const TeamMember &teamMember) const |
Hand-coded 3-component version. | |
template<size_t numTensorComponents> | |
KOKKOS_INLINE_FUNCTION void | run (const TeamMember &teamMember) const |
KOKKOS_INLINE_FUNCTION void | operator() (const TeamMember &teamMember) const |
long | approximateFlopCountPerCell () const |
returns an estimate of the number of floating point operations per cell (counting sums, subtractions, divisions, and multiplies, each of which counts as one operation). | |
int | teamSize (const int &maxTeamSizeFromKokkos) const |
returns the team size that should be provided to the policy constructor, based on the Kokkos maximum and the amount of thread parallelism we have available. | |
size_t | team_shmem_size (int numThreads) const |
Provide the shared memory capacity. | |
Private Types | |
using | ExecutionSpace = typename DeviceType::execution_space |
using | TeamPolicy = Kokkos::TeamPolicy<DeviceType> |
using | TeamMember = typename TeamPolicy::member_type |
using | IntegralViewType = Kokkos::View<typename RankExpander<Scalar, integralViewRank>::value_type, DeviceType> |
Private Attributes | |
IntegralViewType | integralView_ |
TensorData< Scalar, DeviceType > | leftComponent_ |
Data< Scalar, DeviceType > | composedTransform_ |
TensorData< Scalar, DeviceType > | rightComponent_ |
TensorData< Scalar, DeviceType > | cellMeasures_ |
int | a_offset_ |
int | b_offset_ |
int | leftComponentSpan_ |
int | rightComponentSpan_ |
int | numTensorComponents_ |
int | leftFieldOrdinalOffset_ |
int | rightFieldOrdinalOffset_ |
size_t | fad_size_output_ = 0 |
Kokkos::Array< int, Parameters::MaxTensorComponents > | leftFieldBounds_ |
Kokkos::Array< int, Parameters::MaxTensorComponents > | rightFieldBounds_ |
Kokkos::Array< int, Parameters::MaxTensorComponents > | pointBounds_ |
int | maxFieldsLeft_ |
int | maxFieldsRight_ |
int | maxPointCount_ |
Implementation of a general sum factorization algorithm, using a novel approach developed by Roberts, for integration. Uses hierarchical parallelism.
Whereas F_Integrate, and Mora and Demkowicz, and all others we are aware of, cache partial sums at intermediate component levels — the cached values are indexed by component basis ordinals — we integrate the first component in its dimension(s) and store values for integration points in the remaining dimensions, so that our caches are indexed by point ordinals. If there are L_x, L_y, and L_z quadrature points in dimensions x,y,z, we require a cache of size L_y * L_z +1 for a 3D, 3-component integral. The standard approach requires a cache of size (p_x+1)*(p_y+1). So long as one is not over-integrating by too much, these sizes are about the same. The real advantage of our approach here is (we expect) that it improves data locality.
Definition at line 993 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 995 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 999 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 997 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 996 of file Intrepid2_IntegrationToolsDef.hpp.
|
inline |
Definition at line 1025 of file Intrepid2_IntegrationToolsDef.hpp.
|
inline |
returns an estimate of the number of floating point operations per cell (counting sums, subtractions, divisions, and multiplies, each of which counts as one operation).
Definition at line 1587 of file Intrepid2_IntegrationToolsDef.hpp.
|
inline |
Definition at line 1077 of file Intrepid2_IntegrationToolsDef.hpp.
|
inline |
runtime-sized variant of incrementArgument; gets used by approximate flop count.
Definition at line 1094 of file Intrepid2_IntegrationToolsDef.hpp.
|
inline |
Definition at line 1165 of file Intrepid2_IntegrationToolsDef.hpp.
|
inline |
Definition at line 1173 of file Intrepid2_IntegrationToolsDef.hpp.
|
inline |
Definition at line 1112 of file Intrepid2_IntegrationToolsDef.hpp.
|
inline |
runtime-sized variant of nextIncrementResult; gets used by approximate flop count.
Definition at line 1127 of file Intrepid2_IntegrationToolsDef.hpp.
|
inline |
Definition at line 1568 of file Intrepid2_IntegrationToolsDef.hpp.
|
inline |
Definition at line 1143 of file Intrepid2_IntegrationToolsDef.hpp.
|
inline |
Definition at line 1429 of file Intrepid2_IntegrationToolsDef.hpp.
|
inline |
Hand-coded 3-component version.
Definition at line 1180 of file Intrepid2_IntegrationToolsDef.hpp.
|
inline |
Provide the shared memory capacity.
Definition at line 1743 of file Intrepid2_IntegrationToolsDef.hpp.
|
inline |
returns the team size that should be provided to the policy constructor, based on the Kokkos maximum and the amount of thread parallelism we have available.
Definition at line 1734 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1005 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1006 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1004 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1002 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1013 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1000 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1001 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1007 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1017 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1010 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1021 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1022 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1023 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1009 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1019 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1003 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1008 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1018 of file Intrepid2_IntegrationToolsDef.hpp.
|
private |
Definition at line 1011 of file Intrepid2_IntegrationToolsDef.hpp.