17 #ifndef dealii_cuda_matrix_free_h 18 #define dealii_cuda_matrix_free_h 22 #ifdef DEAL_II_COMPILER_CUDA_AWARE 48 template <
int dim,
typename Number>
79 template <
int dim,
typename Number =
double>
110 const bool use_coloring =
false,
111 const bool overlap_communication_computation =
false)
112 : parallelization_scheme(parallelization_scheme)
113 , mapping_update_flags(mapping_update_flags)
114 , use_coloring(use_coloring)
115 , overlap_communication_computation(overlap_communication_computation)
117 # ifndef DEAL_II_MPI_WITH_CUDA_SUPPORT 119 overlap_communication_computation ==
false,
121 "Overlapping communication and computation requires CUDA-aware MPI."));
123 if (overlap_communication_computation ==
true)
125 use_coloring ==
false || overlap_communication_computation ==
false,
127 "Overlapping communication and coloring are incompatible options. Only one of them can be enabled."));
222 get_padding_length()
const;
252 get_data(
unsigned int color)
const;
273 template <
typename Functor,
typename VectorType>
275 cell_loop(
const Functor & func,
294 template <
typename Functor>
296 evaluate_coefficients(Functor func)
const;
302 template <
typename VectorType>
311 template <
typename VectorType>
313 set_constrained_values(
const Number val,
VectorType &dst)
const;
320 initialize_dof_vector(
329 initialize_dof_vector(
353 std::shared_ptr<const MPI_Comm> comm,
360 template <
typename Functor,
typename VectorType>
362 serial_cell_loop(
const Functor & func,
370 template <
typename Functor>
372 distributed_cell_loop(
373 const Functor & func,
382 template <
typename Functor>
384 distributed_cell_loop(
385 const Functor & func,
393 template <
typename VectorType>
395 serial_copy_constrained_values(
const VectorType &src,
403 distributed_copy_constrained_values(
414 distributed_copy_constrained_values(
422 template <
typename VectorType>
424 serial_set_constrained_values(
const Number val,
VectorType &dst)
const;
431 distributed_set_constrained_values(
442 distributed_set_constrained_values(
522 std::vector<Number *>
JxW;
580 friend class internal::ReinitHelper<dim, Number>;
590 template <
int dim,
typename Number>
600 for (
int d = 0;
d < dim; ++
d)
601 gradients[
d] = gq[
d];
614 Number *gradients[dim];
622 __host__ __device__ constexpr
unsigned int 649 __device__
inline unsigned int 653 threadIdx.x % n_q_points_1d :
655 threadIdx.x % n_q_points_1d + n_q_points_1d * threadIdx.y :
656 threadIdx.x % n_q_points_1d +
657 n_q_points_1d * (threadIdx.y + n_q_points_1d * threadIdx.z));
668 template <
int dim,
typename Number>
669 __device__
inline unsigned int 671 const unsigned int cell,
673 const unsigned int n_q_points_1d,
674 const unsigned int n_q_points)
677 q_point_id_in_cell<dim>(n_q_points_1d);
687 template <
int dim,
typename Number>
690 const unsigned int cell,
692 const unsigned int n_q_points_1d)
695 q_point_id_in_cell<dim>(n_q_points_1d));
Transformed quadrature weights.
ParallelizationScheme parallelization_scheme
std::shared_ptr< const Utilities::MPI::Partitioner > partitioner
void reinit(MatrixBlock< MatrixType > &v, const BlockSparsityPattern &p)
bool overlap_communication_computation
unsigned int padding_length
CUDAWrappers::MatrixFree< dim, Number >::point_type & get_quadrature_point(const unsigned int cell, const typename CUDAWrappers::MatrixFree< dim, Number >::Data *data, const unsigned int n_q_points_1d)
__host__ constexpr unsigned int cells_per_block_shmem(int dim, int fe_degree)
unsigned int cells_per_block
Transformed quadrature points.
types::global_dof_index * constrained_dofs
#define AssertThrow(cond, exc)
bool overlap_communication_computation
types::global_dof_index n_dofs
unsigned int q_points_per_cell
dim3 constraint_block_dim
AdditionalData(const ParallelizationScheme parallelization_scheme=parallel_in_elem, const UpdateFlags mapping_update_flags=update_gradients|update_JxW_values|update_quadrature_points, const bool use_coloring=false, const bool overlap_communication_computation=false)
static ::ExceptionBase & ExcMessage(std::string arg1)
std::vector< point_type * > q_points
unsigned int * constraint_mask
unsigned int q_point_id_in_cell(const unsigned int n_q_points_1d)
SharedData(Number *vd, Number *gq[dim])
std::vector< unsigned int > row_start
unsigned int dofs_per_cell
std::vector< Number * > inv_jacobian
#define DEAL_II_NAMESPACE_CLOSE
unsigned int local_q_point_id(const unsigned int cell, const typename CUDAWrappers::MatrixFree< dim, Number >::Data *data, const unsigned int n_q_points_1d, const unsigned int n_q_points)
std::vector< dim3 > block_dim
std::vector< unsigned int * > constraint_mask
SymmetricTensor< 2, dim, Number > d(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
UpdateFlags mapping_update_flags
ParallelizationScheme parallelization_scheme
std::vector< dim3 > grid_dim
std::vector< types::global_dof_index * > local_to_global
#define DEAL_II_NAMESPACE_OPEN
Shape function gradients.
std::vector< Number * > JxW
std::vector< unsigned int > n_cells
types::global_dof_index * local_to_global
std::enable_if< std::is_fundamental< T >::value, std::size_t >::type memory_consumption(const T &t)
unsigned int padding_length
unsigned int n_constrained_dofs