17 #ifndef dealii_vector_operations_internal_h 18 #define dealii_vector_operations_internal_h 29 #include <deal.II/lac/cuda_kernels.templates.h> 39 namespace VectorOperations
65 template <
typename T,
typename U>
72 template <
typename T,
typename U>
75 const std::complex<T> *
end,
76 std::complex<U> * dest)
81 template <
typename T,
typename U>
83 copy(
const std::complex<T> *,
const std::complex<T> *,
U *)
86 ExcMessage(
"Can't convert a vector of complex numbers " 87 "into a vector of reals/doubles"));
92 #ifdef DEAL_II_WITH_THREADS 101 template <
typename Functor>
113 const unsigned int gs =
147 template <
typename Functor>
153 const std::shared_ptr<::parallel::internal::TBBPartitioner>
156 #ifdef DEAL_II_WITH_THREADS 164 Assert(partitioner.get() !=
nullptr,
166 "Unexpected initialization of Vector that does " 167 "not set the TBB partitioner to a usable state."));
168 std::shared_ptr<tbb::affinity_partitioner> tbb_partitioner =
169 partitioner->acquire_one_partitioner();
180 static_cast<size_type>(0),
181 static_cast<size_type>(generic_functor.
n_chunks),
185 partitioner->release_one_partitioner(tbb_partitioner);
187 else if (vec_size > 0)
199 template <
typename Number>
214 if (
value == Number())
216 #ifdef DEAL_II_WITH_CXX17 222 std::memset(dst + begin, 0,
sizeof(Number) * (end - begin));
226 std::fill(dst + begin, dst + end,
value);
233 template <
typename Number,
typename OtherNumber>
249 #if __GNUG__ && __GNUC__ < 5 250 if (__has_trivial_copy(Number) &&
253 # ifdef DEAL_II_WITH_CXX17 254 if constexpr (std::is_trivially_copyable<Number>() &&
257 if (std::is_trivially_copyable<Number>() &&
261 std::memcpy(dst + begin, src + begin, (end - begin) *
sizeof(Number));
270 const OtherNumber *
const src;
274 template <
typename Number>
302 template <
typename Number>
306 const Number *
const v_val,
320 val[i] += factor * v_val[i];
325 val[i] += factor * v_val[i];
334 template <
typename Number>
338 const Number *
const v_val,
354 val[i] = x * val[i] + a * v_val[i];
359 val[i] = x * val[i] + a * v_val[i];
369 template <
typename Number>
397 template <
typename Number>
425 template <
typename Number>
453 template <
typename Number>
457 const Number *
const v_val,
458 const Number *
const w_val,
475 val[i] = val[i] + a * v_val[i] +
b * w_val[i];
480 val[i] = val[i] + a * v_val[i] +
b * w_val[i];
491 template <
typename Number>
495 const Number *
const v_val,
509 val[i] = x * val[i] + v_val[i];
514 val[i] = x * val[i] + v_val[i];
523 template <
typename Number>
547 val[i] = x * val[i] + a * v_val[i] +
b * w_val[i];
552 val[i] = x * val[i] + a * v_val[i] +
b * w_val[i];
564 template <
typename Number>
592 template <
typename Number>
596 const Number *
const u_val,
610 val[i] = a * u_val[i];
615 val[i] = a * u_val[i];
624 template <
typename Number>
628 const Number *
const u_val,
629 const Number *
const v_val,
646 val[i] = a * u_val[i] +
b * v_val[i];
651 val[i] = a * u_val[i] +
b * v_val[i];
662 template <
typename Number>
688 val[i] = a * u_val[i] +
b * v_val[i] + c * w_val[i];
693 val[i] = a * u_val[i] +
b * v_val[i] + c * w_val[i];
706 template <
typename Number>
722 val[i] = a_val[i] / b_val[i];
727 val[i] = a_val[i] / b_val[i];
743 template <
typename Number,
typename Number2>
749 Dot(
const Number *
const X,
const Number2 *
const Y)
775 "This operation is not correctly implemented for " 776 "complex-valued objects.");
780 const Number *
const X;
781 const Number2 *
const Y;
784 template <
typename Number,
typename RealType>
807 const Number *
const X;
810 template <
typename Number,
typename RealType>
836 template <
typename Number,
typename RealType>
857 return std::pow(std::abs(x), p);
864 template <
typename Number>
890 template <
typename Number>
896 const Number *
const V,
897 const Number *
const W,
932 "This operation is not correctly implemented for " 933 "complex-valued objects.");
938 const Number *
const V;
939 const Number *
const W;
987 template <
typename Operation,
typename ResultType>
995 if (vec_size <= vector_accumulation_recursion_threshold * 32)
1005 outer_results[0] = ResultType();
1013 const size_type remainder = vec_size % 32;
1015 n_chunks < vector_accumulation_recursion_threshold,
1027 std::integral_constant<bool, Operation::vectorizes>());
1038 vector_accumulation_recursion_threshold + 1);
1043 const size_type inner_chunks = remainder / 8;
1045 const size_type remainder_inner = remainder % 8;
1046 ResultType r0 = ResultType(), r1 = ResultType(),
1048 switch (inner_chunks)
1067 for (
size_type j = 0; j < remainder_inner; ++j)
1071 if (n_chunks == vector_accumulation_recursion_threshold)
1072 outer_results[vector_accumulation_recursion_threshold -
1087 while (n_chunks > 1)
1089 if (n_chunks % 2 == 1)
1090 outer_results[n_chunks++] = ResultType();
1092 outer_results[i / 2] = outer_results[i] + outer_results[i + 1];
1095 result = outer_results[0];
1103 (vec_size / (vector_accumulation_recursion_threshold * 32)) *
1104 vector_accumulation_recursion_threshold * 8;
1106 ResultType r0, r1, r2, r3;
1110 first + 2 * new_size,
1111 first + 3 * new_size,
1125 template <
typename Operation,
typename ResultType>
1128 const Operation &op,
1131 ResultType (&outer_results)[vector_accumulation_recursion_threshold],
1132 std::integral_constant<bool, false>)
1138 ResultType r0 = op(index);
1139 ResultType r1 = op(index + 1);
1140 ResultType r2 = op(index + 2);
1141 ResultType r3 = op(index + 3);
1143 for (
size_type j = 1; j < 8; ++j, index += 4)
1146 r1 += op(index + 1);
1147 r2 += op(index + 2);
1148 r3 += op(index + 3);
1152 outer_results[i] = r0 + r2;
1163 template <
typename Operation,
typename Number>
1166 const Operation &op,
1169 Number (&outer_results)[vector_accumulation_recursion_threshold],
1170 std::integral_constant<bool, true>)
1179 const size_type regular_chunks = n_chunks / nvecs;
1180 for (
size_type i = 0; i < regular_chunks; ++i)
1187 for (
size_type j = 1; j < 8; ++j, index += nvecs * 4)
1189 r0 += op.do_vectorized(index);
1190 r1 += op.do_vectorized(index + nvecs);
1191 r2 += op.do_vectorized(index + 2 * nvecs);
1192 r3 += op.do_vectorized(index + 3 * nvecs);
1197 r0.
store(&outer_results[i * nvecs]);
1209 "VectorizedArray::size() must be a power of 2 and not more than 16");
1211 if (n_chunks % nvecs != 0)
1215 const size_type start_irreg = regular_chunks * nvecs;
1217 for (
size_type j = 0; j < 32; j += 2 * nvecs, index += 2 * nvecs)
1219 r0 += op.do_vectorized(index);
1220 r1 += op.do_vectorized(index + nvecs);
1223 r0.
store(&outer_results[start_irreg]);
1232 #ifdef DEAL_II_WITH_THREADS 1261 template <
typename Operation,
typename ResultType>
1264 static const unsigned int threshold_array_allocate = 512;
1275 const unsigned int gs =
1292 if (n_chunks > threshold_array_allocate)
1296 large_array.resize(2 * ((n_chunks + 1) / 2));
1297 array_ptr = large_array.data();
1300 array_ptr = &small_array[0];
1310 for (
size_type i = range.begin(); i < range.end(); ++i)
1320 while (n_chunks > 1)
1322 if (n_chunks % 2 == 1)
1323 array_ptr[n_chunks++] = ResultType();
1325 array_ptr[i / 2] = array_ptr[i] + array_ptr[i + 1];
1328 return array_ptr[0];
1337 ResultType small_array[threshold_array_allocate];
1351 template <
typename Operation,
typename ResultType>
1354 const Operation &op,
1357 ResultType & result,
1358 const std::shared_ptr<::parallel::internal::TBBPartitioner>
1361 #ifdef DEAL_II_WITH_THREADS 1369 Assert(partitioner.get() !=
nullptr,
1371 "Unexpected initialization of Vector that does " 1372 "not set the TBB partitioner to a usable state."));
1373 std::shared_ptr<tbb::affinity_partitioner> tbb_partitioner =
1374 partitioner->acquire_one_partitioner();
1387 static_cast<size_type>(0),
1388 static_cast<size_type>(generic_functor.
n_chunks),
1392 partitioner->release_one_partitioner(tbb_partitioner);
1393 result = generic_functor.
do_sum();
1404 template <
typename Number,
typename Number2,
typename MemorySpace>
1409 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1412 const ::MemorySpace::MemorySpaceData<Number2, MemorySpace>
1419 "For the CUDA MemorySpace Number and Number2 should be the same type");
1424 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1433 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1436 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1443 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1446 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1453 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1462 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1466 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1473 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1478 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1480 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1487 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1491 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1498 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1503 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1510 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1516 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1518 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1525 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1534 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1537 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1544 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1548 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1555 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1560 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1562 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1569 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1572 const ::MemorySpace::MemorySpaceData<Number2, MemorySpace>
1579 template <
typename real_type>
1582 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1586 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1593 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1596 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1602 template <
typename real_type>
1605 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1613 template <
typename real_type>
1616 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1626 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1630 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1632 const ::MemorySpace::MemorySpaceData<Number, MemorySpace>
1639 template <
typename MemorySpace2>
1642 const std::shared_ptr<::parallel::internal::TBBPartitioner> &
1646 const ::MemorySpace::MemorySpaceData<Number, MemorySpace2>
1654 template <
typename Number,
typename Number2>
1658 copy(
const std::shared_ptr<::parallel::internal::TBBPartitioner>
1659 & thread_loop_partitioner,
1661 const ::MemorySpace::
1662 MemorySpaceData<Number2, ::MemorySpace::Host> &v_data,
1669 parallel_for(copier, 0, size, thread_loop_partitioner);
1673 set(
const std::shared_ptr<::parallel::internal::TBBPartitioner>
1674 & thread_loop_partitioner,
1682 parallel_for(setter, 0, size, thread_loop_partitioner);
1687 const std::shared_ptr<::parallel::internal::TBBPartitioner>
1688 & thread_loop_partitioner,
1690 const ::MemorySpace::
1691 MemorySpaceData<Number, ::MemorySpace::Host> &v_data,
1697 v_data.values.get());
1698 parallel_for(vector_add, 0, size, thread_loop_partitioner);
1703 const std::shared_ptr<::parallel::internal::TBBPartitioner>
1704 & thread_loop_partitioner,
1706 const ::MemorySpace::
1707 MemorySpaceData<Number, ::MemorySpace::Host> &v_data,
1713 v_data.values.get());
1714 parallel_for(vector_subtract, 0, size, thread_loop_partitioner);
1719 const std::shared_ptr<::parallel::internal::TBBPartitioner>
1720 & thread_loop_partitioner,
1728 parallel_for(vector_add, 0, size, thread_loop_partitioner);
1732 add_av(
const std::shared_ptr<::parallel::internal::TBBPartitioner>
1733 & thread_loop_partitioner,
1736 const ::MemorySpace::
1737 MemorySpaceData<Number, ::MemorySpace::Host> &v_data,
1743 v_data.values.get(),
1745 parallel_for(vector_add, 0, size, thread_loop_partitioner);
1750 const std::shared_ptr<::parallel::internal::TBBPartitioner>
1751 & thread_loop_partitioner,
1755 const ::MemorySpace::
1756 MemorySpaceData<Number, ::MemorySpace::Host> &v_data,
1757 const ::MemorySpace::
1758 MemorySpaceData<Number, ::MemorySpace::Host> &w_data,
1764 data.values.get(), v_data.values.get(), w_data.values.get(), a,
b);
1765 parallel_for(vector_add, 0, size, thread_loop_partitioner);
1770 const std::shared_ptr<::parallel::internal::TBBPartitioner>
1771 & thread_loop_partitioner,
1774 const ::MemorySpace::
1775 MemorySpaceData<Number, ::MemorySpace::Host> &v_data,
1781 v_data.values.get(),
1783 parallel_for(vector_sadd, 0, size, thread_loop_partitioner);
1788 const std::shared_ptr<::parallel::internal::TBBPartitioner>
1789 & thread_loop_partitioner,
1793 const ::MemorySpace::
1794 MemorySpaceData<Number, ::MemorySpace::Host> &v_data,
1800 v_data.values.get(),
1803 parallel_for(vector_sadd, 0, size, thread_loop_partitioner);
1808 const std::shared_ptr<::parallel::internal::TBBPartitioner>
1809 & thread_loop_partitioner,
1814 const ::MemorySpace::
1815 MemorySpaceData<Number, ::MemorySpace::Host> &v_data,
1816 const ::MemorySpace::
1817 MemorySpaceData<Number, ::MemorySpace::Host> &w_data,
1823 data.values.get(), v_data.values.get(), w_data.values.get(), x, a,
b);
1824 parallel_for(vector_sadd, 0, size, thread_loop_partitioner);
1829 const std::shared_ptr<::parallel::internal::TBBPartitioner>
1830 & thread_loop_partitioner,
1832 const Number factor,
1839 parallel_for(vector_multiply, 0, size, thread_loop_partitioner);
1843 scale(
const std::shared_ptr<::parallel::internal::TBBPartitioner>
1844 & thread_loop_partitioner,
1846 const ::MemorySpace::
1847 MemorySpaceData<Number, ::MemorySpace::Host> &v_data,
1853 v_data.values.get());
1854 parallel_for(vector_scale, 0, size, thread_loop_partitioner);
1858 equ_au(
const std::shared_ptr<::parallel::internal::TBBPartitioner>
1859 & thread_loop_partitioner,
1862 const ::MemorySpace::
1863 MemorySpaceData<Number, ::MemorySpace::Host> &v_data,
1869 v_data.values.get(),
1871 parallel_for(vector_equ, 0, size, thread_loop_partitioner);
1876 const std::shared_ptr<::parallel::internal::TBBPartitioner>
1877 & thread_loop_partitioner,
1881 const ::MemorySpace::
1882 MemorySpaceData<Number, ::MemorySpace::Host> &v_data,
1883 const ::MemorySpace::
1884 MemorySpaceData<Number, ::MemorySpace::Host> &w_data,
1890 data.values.get(), v_data.values.get(), w_data.values.get(), a,
b);
1891 parallel_for(vector_equ, 0, size, thread_loop_partitioner);
1895 dot(
const std::shared_ptr<::parallel::internal::TBBPartitioner>
1896 & thread_loop_partitioner,
1898 const ::MemorySpace::
1899 MemorySpaceData<Number2, ::MemorySpace::Host> &v_data,
1906 data.values.get(), v_data.values.get());
1908 dot, 0, size, sum, thread_loop_partitioner);
1914 template <
typename real_type>
1916 norm_2(
const std::shared_ptr<::parallel::internal::TBBPartitioner>
1917 & thread_loop_partitioner,
1930 const std::shared_ptr<::parallel::internal::TBBPartitioner>
1931 & thread_loop_partitioner,
1933 const ::MemorySpace::
1934 MemorySpaceData<Number, ::MemorySpace::Host> &data)
1943 template <
typename real_type>
1945 norm_1(
const std::shared_ptr<::parallel::internal::TBBPartitioner>
1946 & thread_loop_partitioner,
1957 template <
typename real_type>
1959 norm_p(
const std::shared_ptr<::parallel::internal::TBBPartitioner>
1960 & thread_loop_partitioner,
1974 const std::shared_ptr<::parallel::internal::TBBPartitioner>
1975 & thread_loop_partitioner,
1978 const ::MemorySpace::
1979 MemorySpaceData<Number, ::MemorySpace::Host> &v_data,
1980 const ::MemorySpace::
1981 MemorySpaceData<Number, ::MemorySpace::Host> &w_data,
1988 v_data.values.get(),
1989 w_data.values.get(),
1996 template <
typename MemorySpace2>
1998 import(
const std::shared_ptr<::parallel::internal::TBBPartitioner>
1999 & thread_loop_partitioner,
2002 const ::MemorySpace::MemorySpaceData<Number, MemorySpace2>
2007 typename std::enable_if<
2013 copy(thread_loop_partitioner, size, v_data, data);
2017 add_vector(thread_loop_partitioner, size, v_data, data);
2025 #ifdef DEAL_II_COMPILER_CUDA_AWARE 2026 template <
typename MemorySpace2>
2028 import(
const std::shared_ptr<::parallel::internal::TBBPartitioner>
2032 const ::MemorySpace::MemorySpaceData<Number, MemorySpace2>
2037 typename std::enable_if<
2043 cudaError_t cuda_error_code = cudaMemcpy(data.values.get(),
2044 v_data.values_dev.get(),
2045 size *
sizeof(Number),
2046 cudaMemcpyDeviceToHost);
2059 #ifdef DEAL_II_COMPILER_CUDA_AWARE 2060 template <
typename Number>
2070 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2072 const ::MemorySpace::
2073 MemorySpaceData<Number, ::MemorySpace::CUDA> &v_data,
2078 cudaError_t cuda_error_code = cudaMemcpy(data.values_dev.get(),
2079 v_data.values_dev.get(),
2080 size *
sizeof(Number),
2081 cudaMemcpyDeviceToDevice);
2086 set(
const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2094 ::LinearAlgebra::CUDAWrappers::kernel::set<Number>
2095 <<<
n_blocks, block_size>>>(data.values_dev.get(), s, size);
2101 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2103 const ::MemorySpace::
2104 MemorySpaceData<Number, ::MemorySpace::CUDA> &v_data,
2110 ::LinearAlgebra::CUDAWrappers::kernel::add_aV<Number>
2111 <<<
n_blocks, block_size>>>(data.values_dev.get(),
2113 v_data.values_dev.get(),
2120 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2122 const ::MemorySpace::
2123 MemorySpaceData<Number, ::MemorySpace::CUDA> &v_data,
2129 ::LinearAlgebra::CUDAWrappers::kernel::add_aV<Number>
2130 <<<
n_blocks, block_size>>>(data.values_dev.get(),
2132 v_data.values_dev.get(),
2139 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2147 ::LinearAlgebra::CUDAWrappers::kernel::vec_add<Number>
2148 <<<
n_blocks, block_size>>>(data.values_dev.get(), a, size);
2154 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2157 const ::MemorySpace::
2158 MemorySpaceData<Number, ::MemorySpace::CUDA> &v_data,
2164 ::LinearAlgebra::CUDAWrappers::kernel::add_aV<Number>
2165 <<<
n_blocks, block_size>>>(data.values_dev.get(),
2167 v_data.values_dev.get(),
2174 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2178 const ::MemorySpace::
2179 MemorySpaceData<Number, ::MemorySpace::CUDA> &v_data,
2180 const ::MemorySpace::
2181 MemorySpaceData<Number, ::MemorySpace::CUDA> &w_data,
2187 ::LinearAlgebra::CUDAWrappers::kernel::add_aVbW<Number>
2188 <<<dim3(n_blocks, 1), dim3(block_size)>>>(data.values_dev.get(),
2190 v_data.values_dev.get(),
2192 w_data.values_dev.get(),
2199 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2202 const ::MemorySpace::
2203 MemorySpaceData<Number, ::MemorySpace::CUDA> &v_data,
2209 ::LinearAlgebra::CUDAWrappers::kernel::sadd<Number>
2210 <<<dim3(n_blocks, 1), dim3(block_size)>>>(
2211 x, data.values_dev.get(), 1., v_data.values_dev.get(), size);
2217 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2221 const ::MemorySpace::
2222 MemorySpaceData<Number, ::MemorySpace::CUDA> &v_data,
2228 ::LinearAlgebra::CUDAWrappers::kernel::sadd<Number>
2229 <<<dim3(n_blocks, 1), dim3(block_size)>>>(
2230 x, data.values_dev.get(), a, v_data.values_dev.get(), size);
2236 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2241 const ::MemorySpace::
2242 MemorySpaceData<Number, ::MemorySpace::CUDA> &v_data,
2243 const ::MemorySpace::
2244 MemorySpaceData<Number, ::MemorySpace::CUDA> &w_data,
2250 ::LinearAlgebra::CUDAWrappers::kernel::sadd<Number>
2251 <<<dim3(n_blocks, 1), dim3(block_size)>>>(x,
2252 data.values_dev.get(),
2254 v_data.values_dev.get(),
2256 w_data.values_dev.get(),
2263 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2265 const Number factor,
2271 ::LinearAlgebra::CUDAWrappers::kernel::vec_scale<Number>
2272 <<<
n_blocks, block_size>>>(data.values_dev.get(), factor, size);
2278 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2280 const ::MemorySpace::
2281 MemorySpaceData<Number, ::MemorySpace::CUDA> &v_data,
2287 ::LinearAlgebra::CUDAWrappers::kernel::scale<Number>
2288 <<<dim3(n_blocks, 1), dim3(block_size)>>>(data.values_dev.get(),
2289 v_data.values_dev.get(),
2296 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2299 const ::MemorySpace::
2300 MemorySpaceData<Number, ::MemorySpace::CUDA> &v_data,
2306 ::LinearAlgebra::CUDAWrappers::kernel::equ<Number>
2307 <<<dim3(n_blocks, 1), dim3(block_size)>>>(data.values_dev.get(),
2309 v_data.values_dev.get(),
2316 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2320 const ::MemorySpace::
2321 MemorySpaceData<Number, ::MemorySpace::CUDA> &v_data,
2322 const ::MemorySpace::
2323 MemorySpaceData<Number, ::MemorySpace::CUDA> &w_data,
2329 ::LinearAlgebra::CUDAWrappers::kernel::equ<Number>
2330 <<<dim3(n_blocks, 1), dim3(block_size)>>>(data.values_dev.get(),
2332 v_data.values_dev.get(),
2334 w_data.values_dev.get(),
2340 dot(
const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2342 const ::MemorySpace::
2343 MemorySpaceData<Number, ::MemorySpace::CUDA> &v_data,
2348 Number * result_device;
2349 cudaError_t error_code = cudaMalloc(&result_device,
sizeof(Number));
2351 error_code = cudaMemset(result_device, 0,
sizeof(Number));
2358 <<<dim3(n_blocks, 1), dim3(block_size)>>>(result_device,
2359 data.values_dev.get(),
2360 v_data.values_dev.get(),
2361 static_cast<unsigned int>(
2367 error_code = cudaMemcpy(&result,
2370 cudaMemcpyDeviceToHost);
2373 error_code = cudaFree(result_device);
2381 template <
typename real_type>
2383 norm_2(
const std::shared_ptr<::parallel::internal::TBBPartitioner>
2384 & thread_loop_partitioner,
2391 sum = dot(thread_loop_partitioner, size, data, data);
2396 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2398 const ::MemorySpace::
2399 MemorySpaceData<Number, ::MemorySpace::CUDA> &data)
2401 Number * result_device;
2402 cudaError_t error_code = cudaMalloc(&result_device,
sizeof(Number));
2404 error_code = cudaMemset(result_device, 0,
sizeof(Number));
2410 <<<dim3(n_blocks, 1), dim3(block_size)>>>(result_device,
2411 data.values_dev.get(),
2416 error_code = cudaMemcpy(&result,
2419 cudaMemcpyDeviceToHost);
2422 error_code = cudaFree(result_device);
2428 template <
typename real_type>
2431 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2438 Number * result_device;
2439 cudaError_t error_code = cudaMalloc(&result_device,
sizeof(Number));
2441 error_code = cudaMemset(result_device, 0,
sizeof(Number));
2447 <<<dim3(n_blocks, 1), dim3(block_size)>>>(result_device,
2448 data.values_dev.get(),
2452 error_code = cudaMemcpy(&sum,
2455 cudaMemcpyDeviceToHost);
2458 error_code = cudaFree(result_device);
2462 template <
typename real_type>
2465 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2477 const std::shared_ptr<::parallel::internal::TBBPartitioner> &,
2480 const ::MemorySpace::
2481 MemorySpaceData<Number, ::MemorySpace::CUDA> &v_data,
2482 const ::MemorySpace::
2483 MemorySpaceData<Number, ::MemorySpace::CUDA> &w_data,
2489 cudaError_t error_code = cudaMalloc(&res_d,
sizeof(Number));
2491 error_code = cudaMemset(res_d, 0,
sizeof(Number));
2495 ::LinearAlgebra::CUDAWrappers::kernel::add_and_dot<Number>
2496 <<<dim3(n_blocks, 1), dim3(block_size)>>>(res_d,
2497 data.values_dev.get(),
2498 v_data.values_dev.get(),
2499 w_data.values_dev.get(),
2505 cudaMemcpy(&res, res_d,
sizeof(Number), cudaMemcpyDeviceToHost);
2507 error_code = cudaFree(res_d);
2512 template <
typename MemorySpace2>
2514 import(
const std::shared_ptr<::parallel::internal::TBBPartitioner>
2515 & thread_loop_partitioner,
2518 const ::MemorySpace::MemorySpaceData<Number, MemorySpace2>
2523 typename std::enable_if<
2529 copy(thread_loop_partitioner, size, v_data, data);
2533 add_vector(thread_loop_partitioner, size, v_data, data);
2541 template <
typename MemorySpace2>
2543 import(
const std::shared_ptr<::parallel::internal::TBBPartitioner>
2547 const ::MemorySpace::MemorySpaceData<Number, MemorySpace2>
2552 typename std::enable_if<
2558 cudaError_t cuda_error_code = cudaMemcpy(data.values_dev.get(),
2559 v_data.values.get(),
2560 size *
sizeof(Number),
2561 cudaMemcpyHostToDevice);
static void equ_au(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, const Number, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
static Number dot(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
static void norm_2(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, real_type &, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
static void norm_2(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, real_type &sum, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
__global__ void reduction(Number *result, const Number *v, const size_type N)
#define AssertDimension(dim1, dim2)
RealType operator()(const size_type i) const
void operator()(const tbb::blocked_range< size_type > &range) const
const Number *const v_val
const Number *const v_val
__global__ void double_vector_reduction(Number *result, const Number *v1, const Number *v2, const size_type N)
Vectorization_multiply_factor(Number *const val, const Number factor)
void operator()(const size_type begin, const size_type end) const
static void add_vector(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
TBBReduceFunctor(const Operation &op, const size_type start, const size_type end)
const Number *const v_val
TBBForFunctor(Functor &functor, const size_type start, const size_type end)
void operator()(const size_type begin, const size_type end) const
static void norm_p(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, real_type &, real_type, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
static void multiply_factor(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, const Number factor, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
static void add_vector(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
VectorizedArray< Number > do_vectorized(const size_type i) const
static constexpr std::enable_if< std::is_same< Dummy, number >::value &&is_cuda_compatible< Dummy >::value, real_type >::type abs_square(const number &x)
static void norm_p(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, real_type &, real_type, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &)
#define AssertIndexRange(index, range)
static void add_avpbw(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, const Number, const Number, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
static void add_av(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, const Number a, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
Vectorization_add_factor(Number *const val, const Number factor)
static void norm_p(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, real_type &sum, const real_type p, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
static Number dot(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, const ::MemorySpace::MemorySpaceData< Number2, MemorySpace > &, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
static void sadd_xavbw(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, const Number x, const Number a, const Number b, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &v_data, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &w_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
static void scale(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
static void sadd_xav(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, const Number x, const Number a, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
Number operator()(const size_type i) const
static void sadd_xavbw(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, const Number x, const Number a, const Number b, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &v_data, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &w_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
static void norm_2(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, real_type &sum, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
void operator()(const size_type begin, const size_type end) const
MeanValue(const Number *X)
static void sadd_xv(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, const Number x, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
#define AssertThrow(cond, exc)
static real_type abs(const number &x)
VectorizedArray< Number > do_vectorized(const size_type i) const
VectorizedArray< Number > do_vectorized(const size_type i) const
static void equ_au(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, const Number a, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
const Number *const v_val
#define AssertCudaKernel()
const OtherNumber *const src
static Number dot(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, const ::MemorySpace::MemorySpaceData< Number2, ::MemorySpace::Host > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
void operator()(const size_type begin, const size_type end) const
void store(Number *ptr) const
const Number *const v_val
std::vector< ResultType > large_array
Vectorization_equ_aubvcw(Number *val, const Number *u_val, const Number *v_val, const Number *w_val, const Number a, const Number b, const Number c)
void operator()(const size_type begin, const size_type end) const
static void norm_1(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, real_type &, Number *, Number *)
void operator()(const size_type begin, const size_type end) const
Vectorization_add_v(Number *const val, const Number *const v_val)
Number operator()(const size_type i) const
static Number add_and_dot(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, const Number a, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &v_data, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &w_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
Vectorization_sadd_xv(Number *const val, const Number *const v_val, const Number x)
Vector_copy(const OtherNumber *const src, Number *const dst)
static void norm_1(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, real_type &sum, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
const Number *const a_val
static ::ExceptionBase & ExcMessage(std::string arg1)
void operator()(const size_type begin, const size_type end) const
RealType operator()(const size_type i) const
static void equ_aubv(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, const Number a, const Number b, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &v_data, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &w_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
void operator()(const size_type begin, const size_type end) const
const Number *const u_val
static Number add_and_dot(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, const Number, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
const Number *const v_val
T sum(const T &t, const MPI_Comm &mpi_communicator)
VectorizedArray< Number > do_vectorized(const size_type i) const
void accumulate_recursive(const Operation &op, const size_type first, const size_type last, ResultType &result)
const Number *const v_val
#define Assert(cond, exc)
Vectorization_scale(Number *const val, const Number *const v_val)
void operator()(const size_type begin, const size_type end) const
const Number *const v_val
types::global_dof_index size_type
Vectorization_subtract_v(Number *val, const Number *const v_val)
static void sadd_xavbw(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, const Number, const Number, const Number, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
static void add_av(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, const Number, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
#define AssertCuda(error_code)
static void sadd_xv(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, const Number, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
static void add_avpbw(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, const Number a, const Number b, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &v_data, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &w_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
static void add_factor(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, Number a, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
void operator()(const size_type begin, const size_type end) const
#define DEAL_II_NAMESPACE_CLOSE
void load(const Number *ptr)
bool is_non_negative(const T &t)
VectorType::value_type * end(VectorType &V)
static void equ_aubv(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, const Number a, const Number b, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &v_data, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &w_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
void parallel_reduce(const Operation &op, const size_type start, const size_type end, ResultType &result, const std::shared_ptr<::parallel::internal::TBBPartitioner > &partitioner)
std::enable_if< IsBlockVector< VectorType >::value, unsigned int >::type n_blocks(const VectorType &vector)
const Number *const w_val
void operator()(const size_type begin, const size_type end) const
static void subtract_vector(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
static void sadd_xv(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, const Number x, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
static constexpr std::size_t size()
Norm2(const Number *const X)
static void add_factor(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, Number a, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
const Number *const v_val
Vectorization_sadd_xav(Number *val, const Number *const v_val, const Number a, const Number x)
Vectorization_equ_au(Number *const val, const Number *const u_val, const Number a)
const Number *const b_val
static void copy(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, const ::MemorySpace::MemorySpaceData< Number2, MemorySpace > &, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
VectorizedArray< Number > do_vectorized(const size_type i) const
SymmetricTensor< 2, dim, Number > b(const Tensor< 2, dim, Number > &F)
static void add_avpbw(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, const Number a, const Number b, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &v_data, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &w_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
const unsigned int vector_accumulation_recursion_threshold
static void equ_aubv(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, const Number, const Number, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
void operator()(const size_type begin, const size_type end) const
Tensor< 2, dim, Number > w(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
unsigned int global_dof_index
static void subtract_vector(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
static void copy(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
static void equ_au(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, const Number a, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
const Number *const w_val
ResultType do_sum() const
void accumulate_regular(const Operation &op, const size_type &n_chunks, size_type &index, ResultType(&outer_results)[vector_accumulation_recursion_threshold], std::integral_constant< bool, false >)
RealType operator()(const size_type i) const
const Number *const v_val
#define DEAL_II_NAMESPACE_OPEN
VectorType::value_type * begin(VectorType &V)
T min(const T &t, const MPI_Comm &mpi_communicator)
static void multiply_factor(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, const Number, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
Number operator()(const size_type i) const
static void copy(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, const ::MemorySpace::MemorySpaceData< Number2, ::MemorySpace::Host > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
void operator()(const size_type begin, const size_type end) const
unsigned int minimum_parallel_grain_size
AddAndDot(Number *const X, const Number *const V, const Number *const W, const Number a)
Dot(const Number *const X, const Number2 *const Y)
static void scale(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
static Number mean_value(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
static void add_vector(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
Vectorization_equ_aubv(Number *const val, const Number *const u_val, const Number *const v_val, const Number a, const Number b)
static ::ExceptionBase & ExcNotImplemented()
static Number add_and_dot(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, const Number a, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &v_data, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &w_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
static void sadd_xav(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, const Number x, const Number a, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
static unsigned int n_threads()
Vectorization_add_avpbw(Number *const val, const Number *const v_val, const Number *const w_val, const Number a, const Number b)
VectorizedArray< Number > do_vectorized(const size_type i) const
static void subtract_vector(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
void operator()(const size_type begin, const size_type end) const
static void norm_1(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, real_type &sum, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
void operator()(const tbb::blocked_range< size_type > &range) const
void operator()(const size_type begin, const size_type end) const
Vectorization_add_av(Number *const val, const Number *const v_val, const Number factor)
#define DEAL_II_FALLTHROUGH
static Number mean_value(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type size, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::CUDA > &data)
static void add_factor(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, Number, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
const Number *const u_val
void copy(const std::complex< T > *, const std::complex< T > *, U *)
static void sadd_xav(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, const Number, const Number, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &, ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)
NormP(const Number *X, RealType p)
static void multiply_factor(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, const Number factor, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
static void add_av(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, const Number a, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
void parallel_for(Functor &functor, const size_type start, const size_type end, const std::shared_ptr<::parallel::internal::TBBPartitioner > &partitioner)
void copy(const T *begin, const T *end, U *dest)
#define AssertIsFinite(number)
const Number *const u_val
Vectorization_ratio(Number *val, const Number *a_val, const Number *b_val)
Vectorization_sadd_xavbw(Number *val, const Number *v_val, const Number *w_val, Number x, Number a, Number b)
#define DEAL_II_OPENMP_SIMD_PRAGMA
Vector_set(const Number value, Number *const dst)
const Number *const w_val
void operator()(const size_type begin, const size_type end) const
static ::ExceptionBase & ExcInternalError()
static void scale(const std::shared_ptr<::parallel::internal::TBBPartitioner > &thread_loop_partitioner, const size_type size, const ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &v_data, ::MemorySpace::MemorySpaceData< Number, ::MemorySpace::Host > &data)
static Number mean_value(const std::shared_ptr<::parallel::internal::TBBPartitioner > &, const size_type, const ::MemorySpace::MemorySpaceData< Number, MemorySpace > &)