1309 *
const unsigned int )
const 1315 *
template <
int dim>
1316 *
void RightHandSide<dim>::vector_value(
const Point<dim> &p,
1319 *
for (
unsigned int c = 0; c < this->
n_components; ++c)
1327 * <a name=
"Linearsolversandpreconditioners"></a>
1328 * <h3>Linear solvers and preconditioners</h3>
1332 * The linear solvers and preconditioners are discussed extensively in the
1333 * introduction. Here, we create the respective objects that will be used.
1338 * <a name=
"ThecodeInverseMatrixcodeclasstemplate"></a>
1339 * <h4>The <code>InverseMatrix</code>
class template</h4>
1340 * The <code>InverseMatrix</code>
class represents the data structure for an
1341 * inverse
matrix. Unlike @ref step_20
"step-20", we implement
this with a
class instead of
1342 * the helper
function inverse_linear_operator() we will
apply this class to
1343 * different kinds of matrices that will require different preconditioners
1344 * (in @ref step_20 "step-20" we only used a non-
identity preconditioner for the mass
1346 * via template parameters, and
matrix and preconditioner objects of these
1347 *
types will then be passed to the constructor when an
1348 * <code>InverseMatrix</code>
object is created. The member function
1349 * <code>vmult</code> is obtained by solving a linear system:
1352 * template <class MatrixType, class PreconditionerType>
1356 * InverseMatrix(
const MatrixType & m,
1357 *
const PreconditionerType &preconditioner);
1367 *
template <
class MatrixType,
class PreconditionerType>
1368 * InverseMatrix<MatrixType, PreconditionerType>::InverseMatrix(
1369 *
const MatrixType & m,
1370 *
const PreconditionerType &preconditioner)
1372 * , preconditioner(&preconditioner)
1378 * This is the implementation of the <code>vmult</code>
function.
1382 * In
this class we use a rather large tolerance for the solver control. The
1383 * reason
for this is that the
function is used very frequently, and hence,
1384 * any additional effort to make the residual in the CG solve smaller makes
1385 * the solution more expensive. Note that we
do not only use
this class as a
1386 * preconditioner
for the Schur complement, but also when forming the
1387 * inverse of the Laplace
matrix – which is hence directly responsible
1388 *
for the accuracy of the solution itself, so we can
't choose a too large 1389 * tolerance, either. 1392 * template <class MatrixType, class PreconditionerType> 1393 * void InverseMatrix<MatrixType, PreconditionerType>::vmult( 1394 * Vector<double> & dst, 1395 * const Vector<double> &src) const 1397 * SolverControl solver_control(src.size(), 1e-6 * src.l2_norm()); 1398 * SolverCG<Vector<double>> cg(solver_control); 1402 * cg.solve(*matrix, dst, src, *preconditioner); 1409 * <a name="ThecodeSchurComplementcodeclasstemplate"></a> 1410 * <h4>The <code>SchurComplement</code> class template</h4> 1414 * This class implements the Schur complement discussed in the introduction. 1415 * It is in analogy to @ref step_20 "step-20". Though, we now call it with a template 1416 * parameter <code>PreconditionerType</code> in order to access that when 1417 * specifying the respective type of the inverse matrix class. As a 1418 * consequence of the definition above, the declaration 1419 * <code>InverseMatrix</code> now contains the second template parameter for 1420 * a preconditioner class as above, which affects the 1421 * <code>SmartPointer</code> object <code>m_inverse</code> as well. 1424 * template <class PreconditionerType> 1425 * class SchurComplement : public Subscriptor 1429 * const BlockSparseMatrix<double> &system_matrix, 1430 * const InverseMatrix<SparseMatrix<double>, PreconditionerType> &A_inverse); 1432 * void vmult(Vector<double> &dst, const Vector<double> &src) const; 1435 * const SmartPointer<const BlockSparseMatrix<double>> system_matrix; 1436 * const SmartPointer< 1437 * const InverseMatrix<SparseMatrix<double>, PreconditionerType>> 1440 * mutable Vector<double> tmp1, tmp2; 1445 * template <class PreconditionerType> 1446 * SchurComplement<PreconditionerType>::SchurComplement( 1447 * const BlockSparseMatrix<double> &system_matrix, 1448 * const InverseMatrix<SparseMatrix<double>, PreconditionerType> &A_inverse) 1449 * : system_matrix(&system_matrix) 1450 * , A_inverse(&A_inverse) 1451 * , tmp1(system_matrix.block(0, 0).m()) 1452 * , tmp2(system_matrix.block(0, 0).m()) 1456 * template <class PreconditionerType> 1458 * SchurComplement<PreconditionerType>::vmult(Vector<double> & dst, 1459 * const Vector<double> &src) const 1461 * system_matrix->block(0, 1).vmult(tmp1, src); 1462 * A_inverse->vmult(tmp2, tmp1); 1463 * system_matrix->block(1, 0).vmult(dst, tmp2); 1470 * <a name="StokesProblemclassimplementation"></a> 1471 * <h3>StokesProblem class implementation</h3> 1476 * <a name="StokesProblemStokesProblem"></a> 1477 * <h4>StokesProblem::StokesProblem</h4> 1481 * The constructor of this class looks very similar to the one of 1482 * @ref step_20 "step-20". The constructor initializes the variables for the polynomial 1483 * degree, triangulation, finite element system and the dof handler. The 1484 * underlying polynomial functions are of order <code>degree+1</code> for 1485 * the vector-valued velocity components and of order <code>degree</code> 1486 * for the pressure. This gives the LBB-stable element pair 1487 * @f$Q_{degree+1}^d\times Q_{degree}@f$, often referred to as the Taylor-Hood 1492 * Note that we initialize the triangulation with a MeshSmoothing argument, 1493 * which ensures that the refinement of cells is done in a way that the 1494 * approximation of the PDE solution remains well-behaved (problems arise if 1495 * grids are too unstructured), see the documentation of 1496 * <code>Triangulation::MeshSmoothing</code> for details. 1499 * template <int dim> 1500 * StokesProblem<dim>::StokesProblem(const unsigned int degree) 1502 * , triangulation(Triangulation<dim>::maximum_smoothing) 1503 * , fe(FE_Q<dim>(degree + 1), dim, FE_Q<dim>(degree), 1) 1504 * , dof_handler(triangulation) 1511 * <a name="StokesProblemsetup_dofs"></a> 1512 * <h4>StokesProblem::setup_dofs</h4> 1516 * Given a mesh, this function associates the degrees of freedom with it and 1517 * creates the corresponding matrices and vectors. At the beginning it also 1518 * releases the pointer to the preconditioner object (if the shared pointer 1519 * pointed at anything at all at this point) since it will definitely not be 1520 * needed any more after this point and will have to be re-computed after 1521 * assembling the matrix, and unties the sparse matrices from their sparsity 1526 * We then proceed with distributing degrees of freedom and renumbering 1527 * them: In order to make the ILU preconditioner (in 3D) work efficiently, 1528 * it is important to enumerate the degrees of freedom in such a way that it 1529 * reduces the bandwidth of the matrix, or maybe more importantly: in such a 1530 * way that the ILU is as close as possible to a real LU decomposition. On 1531 * the other hand, we need to preserve the block structure of velocity and 1532 * pressure already seen in @ref step_20 "step-20" and @ref step_21 "step-21". This is done in two 1533 * steps: First, all dofs are renumbered to improve the ILU and then we 1534 * renumber once again by components. Since 1535 * <code>DoFRenumbering::component_wise</code> does not touch the 1536 * renumbering within the individual blocks, the basic renumbering from the 1537 * first step remains. As for how the renumber degrees of freedom to improve 1538 * the ILU: deal.II has a number of algorithms that attempt to find 1539 * orderings to improve ILUs, or reduce the bandwidth of matrices, or 1540 * optimize some other aspect. The DoFRenumbering namespace shows a 1541 * comparison of the results we obtain with several of these algorithms 1542 * based on the testcase discussed here in this tutorial program. Here, we 1543 * will use the traditional Cuthill-McKee algorithm already used in some of 1544 * the previous tutorial programs. In the <a href="#improved-ilu">section 1545 * on improved ILU</a> we're going to discuss
this issue in more detail.
1549 * There is
one more change compared to previous tutorial programs: There is
1550 * no reason in sorting the <code>dim</code> velocity components
1551 * individually. In fact, rather than
first enumerating all @f$x@f$-velocities,
1552 * then all @f$y@f$-velocities, etc, we would like to keep all velocities at the
1553 * same location together and only separate between velocities (all
1554 * components) and pressures. By
default,
this is not what the
1556 * component separately; what we have to
do is group several components into
1557 *
"blocks" and pass
this block structure to that
function. Consequently, we
1558 * allocate a vector <code>block_component</code> with as many elements as
1559 * there are components and describe all velocity components to correspond
1560 * to block 0,
while the pressure component will form block 1:
1563 *
template <
int dim>
1564 *
void StokesProblem<dim>::setup_dofs()
1566 * A_preconditioner.reset();
1567 * system_matrix.clear();
1568 * preconditioner_matrix.clear();
1570 * dof_handler.distribute_dofs(fe);
1573 * std::vector<unsigned int> block_component(dim + 1, 0);
1574 * block_component[dim] = 1;
1579 * Now comes the implementation of Dirichlet boundary conditions, which
1580 * should be evident after the discussion in the introduction. All that
1581 * changed is that the
function already appears in the setup
functions,
1582 * whereas we were used to see it in some assembly routine. Further down
1583 * below where we
set up the mesh, we will associate the top boundary
1584 * where we impose Dirichlet boundary conditions with boundary indicator
1585 * 1. We will have to pass
this boundary indicator as
second argument to
1586 * the
function below interpolating boundary values. There is
one more
1587 * thing, though. The
function describing the Dirichlet conditions was
1588 * defined
for all components, both velocity and pressure. However, the
1589 * Dirichlet conditions are to be
set for the velocity only. To
this end,
1590 * we use a
ComponentMask that only selects the velocity components. The
1591 * component mask is obtained from the finite element by specifying the
1592 * particular components we want. Since we use adaptively refined grids,
1593 * the
affine constraints
object needs to be
first filled with hanging node
1594 * constraints generated from the DoF handler. Note the order of the two
1595 * functions — we
first compute the hanging node constraints, and
1596 * then insert the boundary values into the constraints
object. This makes
1597 * sure that we respect H<sup>1</sup> conformity on boundaries with
1598 * hanging nodes (in three space dimensions), where the hanging node needs
1599 * to dominate the Dirichlet boundary values.
1603 * constraints.
clear();
1609 * BoundaryValues<dim>(),
1611 * fe.component_mask(velocities));
1614 * constraints.close();
1618 * In analogy to @ref step_20
"step-20", we count the dofs in the individual components.
1619 * We could
do this in the same way as there, but we want to operate on
1620 * the block structure we used already
for the renumbering: The
function 1623 * velocity and pressure block via <code>block_component</code>.
1626 *
const std::vector<types::global_dof_index> dofs_per_block =
1628 *
const unsigned int n_u = dofs_per_block[0];
1629 *
const unsigned int n_p = dofs_per_block[1];
1631 * std::cout <<
" Number of active cells: " <<
triangulation.n_active_cells()
1633 * <<
" Number of degrees of freedom: " << dof_handler.n_dofs()
1634 * <<
" (" << n_u <<
'+' << n_p <<
')' << std::endl;
1638 * The next task is to allocate a sparsity pattern
for the system
matrix we
1639 * will create and
one for the preconditioner
matrix. We could
do this in
1640 * the same way as in @ref step_20
"step-20", i.e. directly build an
object of type
1642 * is a major reason not to
do so:
1643 * In 3D, the
function DoFTools::max_couplings_between_dofs yields a
1644 * conservative but rather large number
for the coupling between the
1645 * individual dofs, so that the memory initially provided
for the creation
1646 * of the sparsity pattern of the
matrix is far too much -- so much actually
1647 * that the
initial sparsity pattern won
't even fit into the physical memory 1648 * of most systems already for moderately-sized 3D problems, see also the 1649 * discussion in @ref step_18 "step-18". Instead, we first build temporary objects that use 1650 * a different data structure that doesn't require allocating more memory
1651 * than necessary but isn
't suitable for use as a basis of SparseMatrix or 1652 * BlockSparseMatrix objects; in a second step we then copy these objects 1653 * into objects of type BlockSparsityPattern. This is entirely analogous to 1654 * what we already did in @ref step_11 "step-11" and @ref step_18 "step-18". In particular, we make use of 1655 * the fact that we will never write into the @f$(1,1)@f$ block of the system 1656 * matrix and that this is the only block to be filled for the 1657 * preconditioner matrix. 1661 * All this is done inside new scopes, which means that the memory of 1662 * <code>dsp</code> will be released once the information has been copied to 1663 * <code>sparsity_pattern</code>. 1667 * BlockDynamicSparsityPattern dsp(2, 2); 1669 * dsp.block(0, 0).reinit(n_u, n_u); 1670 * dsp.block(1, 0).reinit(n_p, n_u); 1671 * dsp.block(0, 1).reinit(n_u, n_p); 1672 * dsp.block(1, 1).reinit(n_p, n_p); 1674 * dsp.collect_sizes(); 1676 * Table<2, DoFTools::Coupling> coupling(dim + 1, dim + 1); 1678 * for (unsigned int c = 0; c < dim + 1; ++c) 1679 * for (unsigned int d = 0; d < dim + 1; ++d) 1680 * if (!((c == dim) && (d == dim))) 1681 * coupling[c][d] = DoFTools::always; 1683 * coupling[c][d] = DoFTools::none; 1685 * DoFTools::make_sparsity_pattern( 1686 * dof_handler, coupling, dsp, constraints, false); 1688 * sparsity_pattern.copy_from(dsp); 1692 * BlockDynamicSparsityPattern preconditioner_dsp(2, 2); 1694 * preconditioner_dsp.block(0, 0).reinit(n_u, n_u); 1695 * preconditioner_dsp.block(1, 0).reinit(n_p, n_u); 1696 * preconditioner_dsp.block(0, 1).reinit(n_u, n_p); 1697 * preconditioner_dsp.block(1, 1).reinit(n_p, n_p); 1699 * preconditioner_dsp.collect_sizes(); 1701 * Table<2, DoFTools::Coupling> preconditioner_coupling(dim + 1, dim + 1); 1703 * for (unsigned int c = 0; c < dim + 1; ++c) 1704 * for (unsigned int d = 0; d < dim + 1; ++d) 1705 * if (((c == dim) && (d == dim))) 1706 * preconditioner_coupling[c][d] = DoFTools::always; 1708 * preconditioner_coupling[c][d] = DoFTools::none; 1710 * DoFTools::make_sparsity_pattern(dof_handler, 1711 * preconditioner_coupling, 1712 * preconditioner_dsp, 1716 * preconditioner_sparsity_pattern.copy_from(preconditioner_dsp); 1721 * Finally, the system matrix, the preconsitioner matrix, the solution and 1722 * the right hand side vector are created from the block structure similar 1723 * to the approach in @ref step_20 "step-20": 1726 * system_matrix.reinit(sparsity_pattern); 1727 * preconditioner_matrix.reinit(preconditioner_sparsity_pattern); 1729 * solution.reinit(2); 1730 * solution.block(0).reinit(n_u); 1731 * solution.block(1).reinit(n_p); 1732 * solution.collect_sizes(); 1734 * system_rhs.reinit(2); 1735 * system_rhs.block(0).reinit(n_u); 1736 * system_rhs.block(1).reinit(n_p); 1737 * system_rhs.collect_sizes(); 1744 * <a name="StokesProblemassemble_system"></a> 1745 * <h4>StokesProblem::assemble_system</h4> 1749 * The assembly process follows the discussion in @ref step_20 "step-20" and in the 1750 * introduction. We use the well-known abbreviations for the data structures 1751 * that hold the local matrices, right hand side, and global numbering of the 1752 * degrees of freedom for the present cell. 1755 * template <int dim> 1756 * void StokesProblem<dim>::assemble_system() 1758 * system_matrix = 0; 1760 * preconditioner_matrix = 0; 1762 * QGauss<dim> quadrature_formula(degree + 2); 1764 * FEValues<dim> fe_values(fe, 1765 * quadrature_formula, 1766 * update_values | update_quadrature_points | 1767 * update_JxW_values | update_gradients); 1769 * const unsigned int dofs_per_cell = fe.dofs_per_cell; 1771 * const unsigned int n_q_points = quadrature_formula.size(); 1773 * FullMatrix<double> local_matrix(dofs_per_cell, dofs_per_cell); 1774 * FullMatrix<double> local_preconditioner_matrix(dofs_per_cell, 1776 * Vector<double> local_rhs(dofs_per_cell); 1778 * std::vector<types::global_dof_index> local_dof_indices(dofs_per_cell); 1780 * const RightHandSide<dim> right_hand_side; 1781 * std::vector<Vector<double>> rhs_values(n_q_points, Vector<double>(dim + 1)); 1785 * Next, we need two objects that work as extractors for the FEValues 1786 * object. Their use is explained in detail in the report on @ref 1790 * const FEValuesExtractors::Vector velocities(0); 1791 * const FEValuesExtractors::Scalar pressure(dim); 1795 * As an extension over @ref step_20 "step-20" and @ref step_21 "step-21", we include a few optimizations 1796 * that make assembly much faster for this particular problem. The 1797 * improvements are based on the observation that we do a few calculations 1798 * too many times when we do as in @ref step_20 "step-20": The symmetric gradient actually 1799 * has <code>dofs_per_cell</code> different values per quadrature point, but 1800 * we extract it <code>dofs_per_cell*dofs_per_cell</code> times from the 1801 * FEValues object - for both the loop over <code>i</code> and the inner 1802 * loop over <code>j</code>. In 3d, that means evaluating it @f$89^2=7921@f$ 1803 * instead of @f$89@f$ times, a not insignificant difference. 1807 * So what we're going to
do here is to avoid such repeated calculations
1808 * by getting a vector of rank-2 tensors (and similarly
for the divergence
1809 * and the basis
function value on pressure) at the quadrature
point prior
1810 * to starting the
loop over the dofs on the cell. First, we create the
1811 * respective objects that will hold these values. Then, we start the
loop 1812 * over all cells and the
loop over the quadrature points, where we
first 1813 *
extract these values. There is
one more optimization we implement here:
1815 * since all the operations involved are symmetric with respect to @f$i@f$ and
1816 * @f$j@f$. This is implemented by simply running the inner
loop not to
1817 * <code>dofs_per_cell</code>, but only up to <code>i</code>, the index of
1821 * std::vector<SymmetricTensor<2, dim>> symgrad_phi_u(dofs_per_cell);
1822 * std::vector<double> div_phi_u(dofs_per_cell);
1823 * std::vector<double> phi_p(dofs_per_cell);
1825 *
for (
const auto &cell : dof_handler.active_cell_iterators())
1827 * fe_values.reinit(cell);
1829 * local_preconditioner_matrix = 0;
1832 * right_hand_side.vector_value_list(fe_values.get_quadrature_points(),
1835 *
for (
unsigned int q = 0; q < n_q_points; ++q)
1837 *
for (
unsigned int k = 0; k < dofs_per_cell; ++k)
1839 * symgrad_phi_u[k] =
1840 * fe_values[velocities].symmetric_gradient(k, q);
1841 * div_phi_u[k] = fe_values[velocities].divergence(k, q);
1842 * phi_p[k] = fe_values[pressure].value(k, q);
1847 * Now
finally for the bilinear forms of both the system
matrix and
1848 * the
matrix we use
for the preconditioner. Recall that the
1849 * formulas
for these two are
1851 * A_{ij} &= a(\varphi_i,\varphi_j)
1852 * \\ &= \underbrace{2(\varepsilon(\varphi_{i,\textbf{u}}),
1853 * \varepsilon(\varphi_{j,\textbf{u}}))_{\Omega}}
1856 * \underbrace{- (\textrm{div}\; \varphi_{i,\textbf{u}},
1857 * \varphi_{j,p})_{\Omega}}
1860 * \underbrace{- (\varphi_{i,p},
1862 * \varphi_{j,\textbf{u}})_{\Omega}}
1867 * M_{ij} &= \underbrace{(\varphi_{i,p},
1868 * \varphi_{j,p})_{\Omega}}
1871 * respectively, where @f$\varphi_{i,\textbf{u}}@f$ and @f$\varphi_{i,p}@f$
1872 * are the velocity and pressure components of the @f$i@f$th shape
1873 *
function. The various terms above are then easily recognized in
1874 * the following implementation:
1877 *
for (
unsigned int i = 0; i < dofs_per_cell; ++i)
1879 *
for (
unsigned int j = 0; j <= i; ++j)
1881 * local_matrix(i, j) +=
1882 * (2 * (symgrad_phi_u[i] * symgrad_phi_u[j])
1883 * - div_phi_u[i] * phi_p[j]
1884 * - phi_p[i] * div_phi_u[j])
1885 * * fe_values.JxW(q);
1887 * local_preconditioner_matrix(i, j) +=
1888 * (phi_p[i] * phi_p[j])
1889 * * fe_values.JxW(q);
1893 * Note that in the implementation of (1) above, `operator*`
1894 * is overloaded for symmetric tensors, yielding the scalar
1895 * product between the two tensors.
1899 * For the right-hand side we use the fact that the shape
1900 * functions are only non-
zero in
one component (because our
1901 * elements are primitive). Instead of multiplying the tensor
1902 * representing the dim+1 values of shape function i with the
1903 * whole right-hand side vector, we only look at the only
1904 * non-
zero component. The function
1906 * which component this shape function lives in (0=x velocity,
1907 * 1=y velocity, 2=pressure in 2
d), which we use to pick out
1908 * the correct component of the right-hand side vector to
1912 * const
unsigned int component_i =
1913 * fe.system_to_component_index(i).
first;
1914 * local_rhs(i) += (fe_values.shape_value(i, q)
1915 * * rhs_values[q](component_i))
1916 * * fe_values.JxW(q);
1922 * Before we can write the local data into the global
matrix (and
1924 * Dirichlet boundary conditions and eliminate hanging node constraints,
1925 * as we discussed in the introduction), we have to be careful about
one 1926 * thing, though. We have only built half of the local matrices
1927 * because of symmetry, but we're going to save the full matrices
1928 * in order to use the standard functions for solving. This is done
1929 * by flipping the indices in case we are pointing into the empty part
1930 * of the local matrices.
1933 * for (
unsigned int i = 0; i < dofs_per_cell; ++i)
1934 * for (
unsigned int j = i + 1; j < dofs_per_cell; ++j)
1936 * local_matrix(i, j) = local_matrix(j, i);
1937 * local_preconditioner_matrix(i, j) =
1938 * local_preconditioner_matrix(j, i);
1941 * cell->get_dof_indices(local_dof_indices);
1942 * constraints.distribute_local_to_global(local_matrix,
1944 * local_dof_indices,
1947 * constraints.distribute_local_to_global(local_preconditioner_matrix,
1948 * local_dof_indices,
1949 * preconditioner_matrix);
1954 * Before we
're going to solve this linear system, we generate a 1955 * preconditioner for the velocity-velocity matrix, i.e., 1956 * <code>block(0,0)</code> in the system matrix. As mentioned above, this 1957 * depends on the spatial dimension. Since the two classes described by 1958 * the <code>InnerPreconditioner::type</code> alias have the same 1959 * interface, we do not have to do anything different whether we want to 1960 * use a sparse direct solver or an ILU: 1963 * std::cout << " Computing preconditioner..." << std::endl << std::flush; 1965 * A_preconditioner = 1966 * std::make_shared<typename InnerPreconditioner<dim>::type>(); 1967 * A_preconditioner->initialize( 1968 * system_matrix.block(0, 0), 1969 * typename InnerPreconditioner<dim>::type::AdditionalData()); 1977 * <a name="StokesProblemsolve"></a> 1978 * <h4>StokesProblem::solve</h4> 1982 * After the discussion in the introduction and the definition of the 1983 * respective classes above, the implementation of the <code>solve</code> 1984 * function is rather straight-forward and done in a similar way as in 1985 * @ref step_20 "step-20". To start with, we need an object of the 1986 * <code>InverseMatrix</code> class that represents the inverse of the 1987 * matrix A. As described in the introduction, the inverse is generated with 1988 * the help of an inner preconditioner of type 1989 * <code>InnerPreconditioner::type</code>. 1992 * template <int dim> 1993 * void StokesProblem<dim>::solve() 1995 * const InverseMatrix<SparseMatrix<double>, 1996 * typename InnerPreconditioner<dim>::type> 1997 * A_inverse(system_matrix.block(0, 0), *A_preconditioner); 1998 * Vector<double> tmp(solution.block(0).size()); 2002 * This is as in @ref step_20 "step-20". We generate the right hand side @f$B A^{-1} F - G@f$ 2003 * for the Schur complement and an object that represents the respective 2004 * linear operation @f$B A^{-1} B^T@f$, now with a template parameter 2005 * indicating the preconditioner - in accordance with the definition of 2010 * Vector<double> schur_rhs(solution.block(1).size()); 2011 * A_inverse.vmult(tmp, system_rhs.block(0)); 2012 * system_matrix.block(1, 0).vmult(schur_rhs, tmp); 2013 * schur_rhs -= system_rhs.block(1); 2015 * SchurComplement<typename InnerPreconditioner<dim>::type> schur_complement( 2016 * system_matrix, A_inverse); 2020 * The usual control structures for the solver call are created... 2023 * SolverControl solver_control(solution.block(1).size(), 2024 * 1e-6 * schur_rhs.l2_norm()); 2025 * SolverCG<Vector<double>> cg(solver_control); 2029 * Now to the preconditioner to the Schur complement. As explained in 2030 * the introduction, the preconditioning is done by a mass matrix in the 2031 * pressure variable. 2035 * Actually, the solver needs to have the preconditioner in the form 2036 * @f$P^{-1}@f$, so we need to create an inverse operation. Once again, we 2037 * use an object of the class <code>InverseMatrix</code>, which 2038 * implements the <code>vmult</code> operation that is needed by the 2039 * solver. In this case, we have to invert the pressure mass matrix. As 2040 * it already turned out in earlier tutorial programs, the inversion of 2041 * a mass matrix is a rather cheap and straight-forward operation 2042 * (compared to, e.g., a Laplace matrix). The CG method with ILU 2043 * preconditioning converges in 5-10 steps, independently on the mesh 2044 * size. This is precisely what we do here: We choose another ILU 2045 * preconditioner and take it along to the InverseMatrix object via the 2046 * corresponding template parameter. A CG solver is then called within 2047 * the vmult operation of the inverse matrix. 2051 * An alternative that is cheaper to build, but needs more iterations 2052 * afterwards, would be to choose a SSOR preconditioner with factor 2053 * 1.2. It needs about twice the number of iterations, but the costs for 2054 * its generation are almost negligible. 2057 * SparseILU<double> preconditioner; 2058 * preconditioner.initialize(preconditioner_matrix.block(1, 1), 2059 * SparseILU<double>::AdditionalData()); 2061 * InverseMatrix<SparseMatrix<double>, SparseILU<double>> m_inverse( 2062 * preconditioner_matrix.block(1, 1), preconditioner); 2066 * With the Schur complement and an efficient preconditioner at hand, we 2067 * can solve the respective equation for the pressure (i.e. block 0 in 2068 * the solution vector) in the usual way: 2071 * cg.solve(schur_complement, solution.block(1), schur_rhs, m_inverse); 2075 * After this first solution step, the hanging node constraints have to 2076 * be distributed to the solution in order to achieve a consistent 2080 * constraints.distribute(solution); 2082 * std::cout << " " << solver_control.last_step() 2083 * << " outer CG Schur complement iterations for pressure" 2089 * As in @ref step_20 "step-20", we finally need to solve for the velocity equation where 2090 * we plug in the solution to the pressure equation. This involves only 2091 * objects we already know - so we simply multiply @f$p@f$ by @f$B^T@f$, subtract 2092 * the right hand side and multiply by the inverse of @f$A@f$. At the end, we 2093 * need to distribute the constraints from hanging nodes in order to 2094 * obtain a consistent flow field: 2098 * system_matrix.block(0, 1).vmult(tmp, solution.block(1)); 2100 * tmp += system_rhs.block(0); 2102 * A_inverse.vmult(solution.block(0), tmp); 2104 * constraints.distribute(solution); 2112 * <a name="StokesProblemoutput_results"></a> 2113 * <h4>StokesProblem::output_results</h4> 2117 * The next function generates graphical output. In this example, we are 2118 * going to use the VTK file format. We attach names to the individual 2119 * variables in the problem: <code>velocity</code> to the <code>dim</code> 2120 * components of velocity and <code>pressure</code> to the pressure. 2124 * Not all visualization programs have the ability to group individual 2125 * vector components into a vector to provide vector plots; in particular, 2126 * this holds for some VTK-based visualization programs. In this case, the 2127 * logical grouping of components into vectors should already be described 2128 * in the file containing the data. In other words, what we need to do is 2129 * provide our output writers with a way to know which of the components of 2130 * the finite element logically form a vector (with @f$d@f$ components in @f$d@f$ 2131 * space dimensions) rather than letting them assume that we simply have a 2132 * bunch of scalar fields. This is achieved using the members of the 2133 * <code>DataComponentInterpretation</code> namespace: as with the filename, 2134 * we create a vector in which the first <code>dim</code> components refer 2135 * to the velocities and are given the tag 2136 * DataComponentInterpretation::component_is_part_of_vector; we 2137 * finally push one tag 2138 * DataComponentInterpretation::component_is_scalar to describe 2139 * the grouping of the pressure variable. 2143 * The rest of the function is then the same as in @ref step_20 "step-20". 2146 * template <int dim> 2148 * StokesProblem<dim>::output_results(const unsigned int refinement_cycle) const 2150 * std::vector<std::string> solution_names(dim, "velocity"); 2151 * solution_names.emplace_back("pressure"); 2153 * std::vector<DataComponentInterpretation::DataComponentInterpretation> 2154 * data_component_interpretation( 2155 * dim, DataComponentInterpretation::component_is_part_of_vector); 2156 * data_component_interpretation.push_back( 2157 * DataComponentInterpretation::component_is_scalar); 2159 * DataOut<dim> data_out; 2160 * data_out.attach_dof_handler(dof_handler); 2161 * data_out.add_data_vector(solution, 2163 * DataOut<dim>::type_dof_data, 2164 * data_component_interpretation); 2165 * data_out.build_patches(); 2167 * std::ofstream output( 2168 * "solution-" + Utilities::int_to_string(refinement_cycle, 2) + ".vtk"); 2169 * data_out.write_vtk(output); 2176 * <a name="StokesProblemrefine_mesh"></a> 2177 * <h4>StokesProblem::refine_mesh</h4> 2181 * This is the last interesting function of the <code>StokesProblem</code> 2182 * class. As indicated by its name, it takes the solution to the problem 2183 * and refines the mesh where this is needed. The procedure is the same as 2184 * in the respective step in @ref step_6 "step-6", with the exception that we base the 2185 * refinement only on the change in pressure, i.e., we call the Kelly error 2186 * estimator with a mask object of type ComponentMask that selects the 2187 * single scalar component for the pressure that we are interested in (we 2188 * get such a mask from the finite element class by specifying the component 2189 * we want). Additionally, we do not coarsen the grid again: 2192 * template <int dim> 2193 * void StokesProblem<dim>::refine_mesh() 2195 * Vector<float> estimated_error_per_cell(triangulation.n_active_cells()); 2197 * FEValuesExtractors::Scalar pressure(dim); 2198 * KellyErrorEstimator<dim>::estimate( 2200 * QGauss<dim - 1>(degree + 1), 2201 * std::map<types::boundary_id, const Function<dim> *>(), 2203 * estimated_error_per_cell, 2204 * fe.component_mask(pressure)); 2206 * GridRefinement::refine_and_coarsen_fixed_number(triangulation, 2207 * estimated_error_per_cell, 2210 * triangulation.execute_coarsening_and_refinement(); 2217 * <a name="StokesProblemrun"></a> 2218 * <h4>StokesProblem::run</h4> 2222 * The last step in the Stokes class is, as usual, the function that 2223 * generates the initial grid and calls the other functions in the 2228 * We start off with a rectangle of size @f$4 \times 1@f$ (in 2d) or @f$4 \times 1 2229 * \times 1@f$ (in 3d), placed in @f$R^2/R^3@f$ as @f$(-2,2)\times(-1,0)@f$ or 2230 * @f$(-2,2)\times(0,1)\times(-1,0)@f$, respectively. It is natural to start 2231 * with equal mesh size in each direction, so we subdivide the initial 2232 * rectangle four times in the first coordinate direction. To limit the 2233 * scope of the variables involved in the creation of the mesh to the range 2234 * where we actually need them, we put the entire block between a pair of 2238 * template <int dim> 2239 * void StokesProblem<dim>::run() 2242 * std::vector<unsigned int> subdivisions(dim, 1); 2243 * subdivisions[0] = 4; 2245 * const Point<dim> bottom_left = (dim == 2 ? 2246 * Point<dim>(-2, -1) : // 2d case 2247 * Point<dim>(-2, 0, -1)); // 3d case 2249 * const Point<dim> top_right = (dim == 2 ? 2250 * Point<dim>(2, 0) : // 2d case 2251 * Point<dim>(2, 1, 0)); // 3d case 2253 * GridGenerator::subdivided_hyper_rectangle(triangulation, 2261 * A boundary indicator of 1 is set to all boundaries that are subject to 2262 * Dirichlet boundary conditions, i.e. to faces that are located at 0 in 2263 * the last coordinate direction. See the example description above for 2267 * for (const auto &cell : triangulation.active_cell_iterators()) 2268 * for (const auto &face : cell->face_iterators()) 2269 * if (face->center()[dim - 1] == 0) 2270 * face->set_all_boundary_ids(1); 2275 * We then apply an initial refinement before solving for the first 2276 * time. In 3D, there are going to be more degrees of freedom, so we 2277 * refine less there: 2280 * triangulation.refine_global(4 - dim); 2284 * As first seen in @ref step_6 "step-6", we cycle over the different refinement levels 2285 * and refine (except for the first cycle), setup the degrees of freedom 2286 * and matrices, assemble, solve and create output: 2289 * for (unsigned int refinement_cycle = 0; refinement_cycle < 6; 2290 * ++refinement_cycle) 2292 * std::cout << "Refinement cycle " << refinement_cycle << std::endl; 2294 * if (refinement_cycle > 0) 2299 * std::cout << " Assembling..." << std::endl << std::flush; 2300 * assemble_system(); 2302 * std::cout << " Solving..." << std::flush; 2305 * output_results(refinement_cycle); 2307 * std::cout << std::endl; 2310 * } // namespace Step22 2316 * <a name="Thecodemaincodefunction"></a> 2317 * <h3>The <code>main</code> function</h3> 2321 * The main function is the same as in @ref step_20 "step-20". We pass the element degree as 2322 * a parameter and choose the space dimension at the well-known template slot. 2329 * using namespace Step22; 2331 * StokesProblem<2> flow_problem(1); 2332 * flow_problem.run(); 2334 * catch (std::exception &exc) 2336 * std::cerr << std::endl 2338 * << "----------------------------------------------------" 2340 * std::cerr << "Exception on processing: " << std::endl 2341 * << exc.what() << std::endl 2342 * << "Aborting!" << std::endl 2343 * << "----------------------------------------------------" 2350 * std::cerr << std::endl 2352 * << "----------------------------------------------------" 2354 * std::cerr << "Unknown exception!" << std::endl 2355 * << "Aborting!" << std::endl 2356 * << "----------------------------------------------------" 2364 <a name="Results"></a> 2365 <a name="Results"></a><h1>Results</h1> 2368 <a name="Outputoftheprogramandgraphicalvisualization"></a><h3>Output of the program and graphical visualization</h3> 2371 <a name="2Dcalculations"></a><h4>2D calculations</h4> 2374 Running the program with the space dimension set to 2 in the <code>main</code> 2375 function yields the following output (in "release mode", 2376 See also <a href="http://www.math.colostate.edu/~bangerth/videos.676.18.html">video lecture 18</a>.): 2378 examples/step-22> make run 2380 Number of active cells: 64 2381 Number of degrees of freedom: 679 (594+85) 2383 Computing preconditioner... 2384 Solving... 11 outer CG Schur complement iterations for pressure 2387 Number of active cells: 160 2388 Number of degrees of freedom: 1683 (1482+201) 2390 Computing preconditioner... 2391 Solving... 11 outer CG Schur complement iterations for pressure 2394 Number of active cells: 376 2395 Number of degrees of freedom: 3813 (3370+443) 2397 Computing preconditioner... 2398 Solving... 11 outer CG Schur complement iterations for pressure 2401 Number of active cells: 880 2402 Number of degrees of freedom: 8723 (7722+1001) 2404 Computing preconditioner... 2405 Solving... 11 outer CG Schur complement iterations for pressure 2408 Number of active cells: 2008 2409 Number of degrees of freedom: 19383 (17186+2197) 2411 Computing preconditioner... 2412 Solving... 11 outer CG Schur complement iterations for pressure 2415 Number of active cells: 4288 2416 Number of degrees of freedom: 40855 (36250+4605) 2418 Computing preconditioner... 2419 Solving... 11 outer CG Schur complement iterations for pressure 2422 The entire computation above takes about 2 seconds on a reasonably 2423 quick (for 2015 standards) machine. 2425 What we see immediately from this is that the number of (outer) 2426 iterations does not increase as we refine the mesh. This confirms the 2427 statement in the introduction that preconditioning the Schur 2428 complement with the mass matrix indeed yields a matrix spectrally 2429 equivalent to the identity matrix (i.e. with eigenvalues bounded above 2430 and below independently of the mesh size or the relative sizes of 2431 cells). In other words, the mass matrix and the Schur complement are 2432 spectrally equivalent. 2434 In the images below, we show the grids for the first six refinement 2435 steps in the program. Observe how the grid is refined in regions 2436 where the solution rapidly changes: On the upper boundary, we have 2437 Dirichlet boundary conditions that are -1 in the left half of the line 2438 and 1 in the right one, so there is an abrupt change at @f$x=0@f$. Likewise, 2439 there are changes from Dirichlet to Neumann data in the two upper 2440 corners, so there is need for refinement there as well: 2442 <table width="60%" align="center"> 2445 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-0.png" alt=""> 2448 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-1.png" alt=""> 2453 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-2.png" alt=""> 2456 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-3.png" alt=""> 2461 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-4.png" alt=""> 2464 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.mesh-5.png" alt=""> 2469 Finally, following is a plot of the flow field. It shows fluid 2470 transported along with the moving upper boundary and being replaced by 2471 material coming from below: 2473 <img src="https://www.dealii.org/images/steps/developer/step-22.2d.solution.png" alt=""> 2475 This plot uses the capability of VTK-based visualization programs (in 2476 this case of VisIt) to show vector data; this is the result of us 2477 declaring the velocity components of the finite element in use to be a 2478 set of vector components, rather than independent scalar components in 2479 the <code>StokesProblem@<dim@>::%output_results</code> function of this 2484 <a name="3Dcalculations"></a><h4>3D calculations</h4> 2487 In 3d, the screen output of the program looks like this: 2491 Number of active cells: 32 2492 Number of degrees of freedom: 1356 (1275+81) 2494 Computing preconditioner... 2495 Solving... 13 outer CG Schur complement iterations for pressure. 2498 Number of active cells: 144 2499 Number of degrees of freedom: 5088 (4827+261) 2501 Computing preconditioner... 2502 Solving... 14 outer CG Schur complement iterations for pressure. 2505 Number of active cells: 704 2506 Number of degrees of freedom: 22406 (21351+1055) 2508 Computing preconditioner... 2509 Solving... 14 outer CG Schur complement iterations for pressure. 2512 Number of active cells: 3168 2513 Number of degrees of freedom: 93176 (89043+4133) 2515 Computing preconditioner... 2516 Solving... 15 outer CG Schur complement iterations for pressure. 2519 Number of active cells: 11456 2520 Number of degrees of freedom: 327808 (313659+14149) 2522 Computing preconditioner... 2523 Solving... 15 outer CG Schur complement iterations for pressure. 2526 Number of active cells: 45056 2527 Number of degrees of freedom: 1254464 (1201371+53093) 2529 Computing preconditioner... 2530 Solving... 14 outer CG Schur complement iterations for pressure. 2533 Again, we see that the number of outer iterations does not increase as 2534 we refine the mesh. Nevertheless, the compute time increases 2535 significantly: for each of the iterations above separately, it takes about 2536 0.14 seconds, 0.63 seconds, 4.8 seconds, 35 seconds, 2 minutes and 33 seconds, 2537 and 13 minutes and 12 seconds. This overall superlinear (in the number of 2538 unknowns) increase in runtime is due to the fact that our inner solver is not 2539 @f${\cal O}(N)@f$: a simple experiment shows that as we keep refining the mesh, the 2540 average number of ILU-preconditioned CG iterations to invert the 2541 velocity-velocity block @f$A@f$ increases. 2543 We will address the question of how possibly to improve our solver <a 2544 href="#improved-solver">below</a>. 2546 As for the graphical output, the grids generated during the solution 2549 <table width="60%" align="center"> 2552 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-0.png" alt=""> 2555 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-1.png" alt=""> 2560 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-2.png" alt=""> 2563 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-3.png" alt=""> 2568 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-4.png" alt=""> 2571 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.mesh-5.png" alt=""> 2576 Again, they show essentially the location of singularities introduced 2577 by boundary conditions. The vector field computed makes for an 2580 <img src="https://www.dealii.org/images/steps/developer/step-22.3d.solution.png" alt=""> 2582 The isocontours shown here as well are those of the pressure 2583 variable, showing the singularity at the point of discontinuous 2584 velocity boundary conditions. 2588 <a name="Sparsitypattern"></a><h3>Sparsity pattern</h3> 2591 As explained during the generation of the sparsity pattern, it is 2592 important to have the numbering of degrees of freedom in mind when 2593 using preconditioners like incomplete LU decompositions. This is most 2594 conveniently visualized using the distribution of nonzero elements in 2595 the stiffness matrix. 2597 If we don't
do anything special to renumber degrees of freedom (i.e.,
2600 appropriately sorted into their corresponding blocks of the
matrix and
2601 vector), then we
get the following image after the
first adaptive
2602 refinement in two dimensions:
2604 <img src=
"https://www.dealii.org/images/steps/developer/step-22.2d.sparsity-nor.png" alt=
"">
2606 In order to generate such a graph, you have to insert a piece of
2607 code like the following to the end of the setup step.
2610 std::ofstream out (
"sparsity_pattern.gpl");
2611 sparsity_pattern.print_gnuplot(out);
2615 It is clearly visible that the
nonzero entries are spread over almost the
2616 whole
matrix. This makes preconditioning by ILU inefficient: ILU generates a
2617 Gaussian elimination (LU decomposition) without fill-in elements, which means
2618 that more tentative fill-ins left out will result in a worse approximation of
2619 the complete decomposition.
2621 In
this program, we have thus chosen a more advanced renumbering of
2623 the components into velocity and pressure yields the following output:
2625 <img src=
"https://www.dealii.org/images/steps/developer/step-22.2d.sparsity-ren.png" alt=
"">
2627 It is apparent that the situation has improved a lot. Most of the elements are
2628 now concentrated around the
diagonal in the (0,0) block in the
matrix. Similar
2629 effects are also visible for the other blocks. In this case, the ILU
2630 decomposition will be much closer to the full LU decomposition, which improves
2631 the quality of the preconditioner. (It may be interesting to note that the
2632 sparse direct solver UMFPACK does some %
internal renumbering of the equations
2633 before actually generating a sparse LU decomposition; that procedure leads to
2634 a very similar pattern to the
one we got from the Cuthill-McKee algorithm.)
2636 Finally, we want to have a closer
2637 look at a sparsity pattern in 3D. We show only the (0,0) block of the
2638 matrix, again after
one adaptive refinement. Apart from the fact that the matrix
2639 size has increased, it is also visible that there are many more entries
2640 in the matrix. Moreover, even for the optimized renumbering, there will be a
2641 considerable amount of tentative fill-in elements. This illustrates why UMFPACK
2642 is not a good choice in 3D - a full decomposition needs many new entries that
2643 eventually won't fit into the physical memory (RAM):
2649 <a name="PossibleExtensions"></a><h3>Possible Extensions</h3>
2652 <a name="improved-solver">
2653 <a name="Improvedlinearsolverin3D"></a><h4>Improved linear solver in 3D</h4>
2657 We have seen in the section of computational results that the number of outer
2658 iterations does not depend on the mesh size, which is optimal in a sense of
2659 scalability. This does, however, not
apply to the solver as a whole, as
2661 We did not look at the number of inner iterations when generating the inverse of
2662 the matrix @f$A@f$ and the mass matrix @f$M_p@f$. Of course, this is unproblematic in
2663 the 2D case where we precondition @f$A@f$ with a direct solver and the
2664 <code>vmult</code> operation of the inverse matrix structure will converge in
2665 one single CG step, but this changes in 3D where we only use an ILU
2666 preconditioner. There, the number of required preconditioned CG steps to
2667 invert @f$A@f$ increases as the mesh is refined, and each <code>vmult</code>
2668 operation involves on average approximately 14, 23, 36, 59, 75 and 101 inner
2669 CG iterations in the refinement steps shown above. (On the other hand,
2670 the number of iterations for applying the inverse pressure mass matrix is
2671 always around five, both in two and three dimensions.) To summarize, most work
2672 is spent on solving linear systems with the same matrix @f$A@f$ over and over again.
2673 What makes this look even worse is the fact that we
2674 actually
invert a matrix that is about 95 percent the size of the total system
2675 matrix and stands for 85 percent of the non-
zero entries in the sparsity
2676 pattern. Hence, the natural question is whether it is reasonable to solve a
2677 linear system with matrix @f$A@f$ for about 15 times when calculating the solution
2678 to the block system.
2680 The answer is, of course, that we can do that in a few other (most of the time
2682 Nevertheless, it has to be remarked that an indefinite system as the
one 2683 at hand puts indeed much higher
2684 demands on the linear algebra than standard elliptic problems as we have seen
2685 in the early tutorial programs. The improvements are still rather
2686 unsatisfactory, if
one compares with an elliptic problem of similar
2687 size. Either way, we will introduce below a number of improvements to the
2688 linear solver, a discussion that we will re-consider again with additional
2689 options in the @ref step_31 "step-31" program.
2691 <a name="improved-ilu">
2692 <a name="BetterILUdecompositionbysmartreordering"></a><h5>Better ILU decomposition by smart reordering</h5>
2695 A first attempt to improve the speed of the linear solution process is to choose
2696 a dof reordering that makes the ILU being closer to a full LU decomposition, as
2697 already mentioned in the in-code comments. The
DoFRenumbering namespace compares
2698 several choices for the renumbering of dofs for the Stokes equations. The best
2699 result regarding the computing time was found for the King ordering, which is
2701 program, the inner solver needs considerably less operations,
e.g. about 62
2702 inner CG iterations for the inversion of @f$A@f$ at cycle 4 compared to about 75
2703 iterations with the standard Cuthill-McKee-algorithm. Also, the computing time
2704 at cycle 4 decreased from about 17 to 11 minutes for the <code>solve()</code>
2705 call. However, the King ordering (and the orderings provided by the
2707 much more memory than the in-build deal versions, since it acts on abstract
2708 graphs rather than the geometry provided by the
triangulation. In the present
2709 case, the renumbering takes about 5 times as much memory, which yields an
2710 infeasible algorithm for the last cycle in 3D with 1.2 million
2713 <a name="BetterpreconditionerfortheinnerCGsolver"></a><h5>Better preconditioner for the inner CG solver</h5>
2715 Another idea to improve the situation even more would be to choose a
2716 preconditioner that makes CG for the (0,0) matrix @f$A@f$ converge in a
2717 mesh-independent number of iterations, say 10 to 30. We have seen such a
2718 candidate in @ref step_16 "step-16": multigrid.
2720 <a name="BlockSchurcomplementpreconditioner"></a><h5>Block Schur complement preconditioner</h5>
2722 Even with a good preconditioner for @f$A@f$, we still
2723 need to solve of the same linear system repeatedly (with different
2724 right hand sides, though) in order to make the Schur complement solve
2725 converge. The approach we are going to discuss here is how inner iteration
2726 and outer iteration can be combined. If we persist in calculating the Schur
2727 complement, there is no other possibility.
2729 The alternative is to attack the block system at once and use an
approximate 2730 Schur complement as efficient preconditioner. The idea is as
2731 follows: If we find a block preconditioner @f$P@f$ such that the matrix
2733 P^{-1}\left(\
begin{array}{cc}
2737 is simple, then an iterative solver with that preconditioner will converge in a
2738 few iterations. Using the Schur complement @f$S = B
A^{-1} B^
T@f$,
one finds that
2742 \left(\
begin{array}{cc}
2743 A^{-1} & 0 \\ S^{-1} B
A^{-1} & -S^{-1}
2746 would appear to be a good choice since
2748 P^{-1}\left(\
begin{array}{cc}
2752 \left(\
begin{array}{cc}
2753 A^{-1} & 0 \\ S^{-1} B
A^{-1} & -S^{-1}
2754 \end{array}\right)\cdot \left(\
begin{array}{cc}
2758 \left(\
begin{array}{cc}
2759 I &
A^{-1} B^
T \\ 0 &
I 2762 This is the approach taken by the paper by Silvester and Wathen referenced
2763 to in the introduction (with the exception that Silvester and Wathen use
2764 right preconditioning). In
this case, a Krylov-based iterative method would
2765 converge in
one step only
if exact inverses of @f$A@f$ and @f$S@f$ were applied,
2766 since all the
eigenvalues are
one (and the number of iterations in such a
2767 method is bounded by the number of distinct
eigenvalues). Below, we will
2768 discuss the choice of an adequate solver
for this problem. First, we are
2769 going to have a closer look at the implementation of the preconditioner.
2771 Since @f$P@f$ is aimed to be a preconditioner only, we shall use approximations to
2772 the inverse of the Schur complement @f$S@f$ and the
matrix @f$A@f$. Hence, the Schur
2773 complement will be approximated by the pressure mass
matrix @f$M_p@f$, and we use
2774 a preconditioner to @f$A@f$ (without an InverseMatrix
class around it) for
2775 approximating @f$A^{-1}@f$.
2777 Here comes the
class that
implements the block Schur
2778 complement preconditioner. The <code>vmult</code> operation for block vectors
2779 according to the derivation above can be specified by three successive
2782 template <
class PreconditionerA,
class PreconditionerMp>
2783 class BlockSchurPreconditioner :
public Subscriptor 2788 const PreconditionerA &Apreconditioner);
2796 PreconditionerMp > > m_inverse;
2797 const PreconditionerA &a_preconditioner;
2803 template <
class PreconditionerA,
class PreconditionerMp>
2804 BlockSchurPreconditioner<PreconditionerA, PreconditionerMp>::BlockSchurPreconditioner(
2807 const PreconditionerA &Apreconditioner
2812 a_preconditioner (Apreconditioner),
2813 tmp (S.block(1,1).m())
2818 template <
class PreconditionerA,
class PreconditionerMp>
2819 void BlockSchurPreconditioner<PreconditionerA, PreconditionerMp>::vmult (
2824 a_preconditioner.vmult (dst.
block(0), src.
block(0));
2828 system_matrix->block(1,0).residual(tmp, dst.
block(0), src.
block(1));
2833 m_inverse->vmult (dst.
block(1), tmp);
2837 Since we act on the whole block system now, we have to live with
one 2838 disadvantage: we need to perform the solver iterations on
2839 the full block system instead of the smaller pressure space.
2841 Now we turn to the question which solver we should use
for the block
2842 system. The
first observation is that the resulting preconditioned
matrix cannot
2843 be solved with CG since it is neither positive definite nor symmetric.
2845 The deal.II libraries implement several solvers that are appropriate
for the
2846 problem at hand. One choice is the solver @ref
SolverBicgstab "BiCGStab", which
2847 was used
for the
solution of the unsymmetric advection problem in @ref step_9
"step-9". The
2849 (generalized minimum
residual). Both methods have their pros and cons - there
2850 are problems where
one of the two candidates clearly outperforms the other, and
2852 <a href=
"http://en.wikipedia.org/wiki/GMRES#Comparison_with_other_solvers">Wikipedia</a>
's 2853 article on the GMRES method gives a comparative presentation. 2854 A more comprehensive and well-founded comparison can be read e.g. in the book by 2855 J.W. Demmel (Applied Numerical Linear Algebra, SIAM, 1997, section 6.6.6). 2857 For our specific problem with the ILU preconditioner for @f$A@f$, we certainly need 2858 to perform hundreds of iterations on the block system for large problem sizes 2859 (we won't beat CG!). Actually,
this disfavors GMRES: During the GMRES
2860 iterations, a basis of Krylov vectors is successively built up and some
2861 operations are performed on these vectors. The more vectors are in
this basis,
2862 the more operations and memory will be needed. The number of operations scales
2863 as @f${\cal O}(n + k^2)@f$ and memory as @f${\cal O}(kn)@f$, where @f$k@f$ is the number of
2864 vectors in the Krylov basis and @f$n@f$ the size of the (block)
matrix.
2865 To not let these demands grow excessively, deal.II limits the size @f$k@f$ of the
2866 basis to 30 vectors by
default.
2867 Then, the basis is rebuilt. This implementation of the GMRES method is called
2868 GMRES(k), with
default @f$k=30@f$. What we have gained by
this restriction,
2869 namely a bound on operations and memory requirements, will be compensated by
2870 the fact that we use an incomplete basis -
this will increase the number of
2871 required iterations.
2873 BiCGStab, on the other hand, won
't get slower when many iterations are needed 2874 (one iteration uses only results from one preceding step and 2875 not all the steps as GMRES). Besides the fact the BiCGStab is more expensive per 2876 step since two matrix-vector products are needed (compared to one for 2877 CG or GMRES), there is one main reason which makes BiCGStab not appropriate for 2878 this problem: The preconditioner applies the inverse of the pressure 2879 mass matrix by using the InverseMatrix class. Since the application of the 2880 inverse matrix to a vector is done only in approximative way (an exact inverse 2881 is too expensive), this will also affect the solver. In the case of BiCGStab, 2882 the Krylov vectors will not be orthogonal due to that perturbation. While 2883 this is uncritical for a small number of steps (up to about 50), it ruins the 2884 performance of the solver when these perturbations have grown to a significant 2885 magnitude in the coarse of iterations. 2887 We did some experiments with BiCGStab and found it to 2888 be faster than GMRES up to refinement cycle 3 (in 3D), but it became very slow 2889 for cycles 4 and 5 (even slower than the original Schur complement), so the 2890 solver is useless in this situation. Choosing a sharper tolerance for the 2891 inverse matrix class (<code>1e-10*src.l2_norm()</code> instead of 2892 <code>1e-6*src.l2_norm()</code>) made BiCGStab perform well also for cycle 4, 2893 but did not change the failure on the very large problems. 2895 GMRES is of course also effected by the approximate inverses, but it is not as 2896 sensitive to orthogonality and retains a relatively good performance also for 2897 large sizes, see the results below. 2899 With this said, we turn to the realization of the solver call with GMRES with 2900 @f$k=100@f$ temporary vectors: 2903 const SparseMatrix<double> &pressure_mass_matrix 2904 = preconditioner_matrix.block(1,1); 2905 SparseILU<double> pmass_preconditioner; 2906 pmass_preconditioner.initialize (pressure_mass_matrix, 2907 SparseILU<double>::AdditionalData()); 2909 InverseMatrix<SparseMatrix<double>,SparseILU<double> > 2910 m_inverse (pressure_mass_matrix, pmass_preconditioner); 2912 BlockSchurPreconditioner<typename InnerPreconditioner<dim>::type, 2914 preconditioner (system_matrix, m_inverse, *A_preconditioner); 2916 SolverControl solver_control (system_matrix.m(), 2917 1e-6*system_rhs.l2_norm()); 2918 GrowingVectorMemory<BlockVector<double> > vector_memory; 2919 SolverGMRES<BlockVector<double> >::AdditionalData gmres_data; 2920 gmres_data.max_n_tmp_vectors = 100; 2922 SolverGMRES<BlockVector<double> > gmres(solver_control, vector_memory, 2925 gmres.solve(system_matrix, solution, system_rhs, 2928 constraints.distribute (solution); 2931 << solver_control.last_step() 2932 << " block GMRES iterations"; 2935 Obviously, one needs to add the include file @ref SolverGMRES 2936 "<lac/solver_gmres.h>" in order to make this run. 2937 We call the solver with a BlockVector template in order to enable 2938 GMRES to operate on block vectors and matrices. 2939 Note also that we need to set the (1,1) block in the system 2940 matrix to zero (we saved the pressure mass matrix there which is not part of the 2941 problem) after we copied the information to another matrix. 2943 Using the Timer class, we collect some statistics that compare the runtime 2944 of the block solver with the one from the problem implementation above. 2945 Besides the solution with the two options we also check if the solutions 2946 of the two variants are close to each other (i.e. this solver gives indeed the 2947 same solution as we had before) and calculate the infinity 2948 norm of the vector difference. 2950 Let's
first see the results in 2D:
2953 Number of active cells: 64
2954 Number of degrees of freedom: 679 (594+85) [0.00162792 s]
2955 Assembling... [0.00108981 s]
2956 Computing preconditioner... [0.0025959 s]
2958 Schur complement: 11 outer CG iterations
for p [0.00479603s ]
2959 Block Schur preconditioner: 12 GMRES iterations [0.00441718 s]
2960 l_infinity difference between
solution vectors: 5.38258e-07
2963 Number of active cells: 160
2964 Number of degrees of freedom: 1683 (1482+201) [0.00345707 s]
2965 Assembling... [0.00237417 s]
2966 Computing preconditioner... [0.00605702 s]
2968 Schur complement: 11 outer CG iterations
for p [0.0123992s ]
2969 Block Schur preconditioner: 12 GMRES iterations [0.011909 s]
2970 l_infinity difference between
solution vectors: 1.74658e-05
2973 Number of active cells: 376
2974 Number of degrees of freedom: 3813 (3370+443) [0.00729299 s]
2975 Assembling... [0.00529909 s]
2976 Computing preconditioner... [0.0167508 s]
2978 Schur complement: 11 outer CG iterations
for p [0.031672s ]
2979 Block Schur preconditioner: 12 GMRES iterations [0.029232 s]
2980 l_infinity difference between
solution vectors: 7.81569e-06
2983 Number of active cells: 880
2984 Number of degrees of freedom: 8723 (7722+1001) [0.017709 s]
2985 Assembling... [0.0126002 s]
2986 Computing preconditioner... [0.0435679 s]
2988 Schur complement: 11 outer CG iterations
for p [0.0971651s ]
2989 Block Schur preconditioner: 12 GMRES iterations [0.0992041 s]
2990 l_infinity difference between
solution vectors: 1.87249e-05
2993 Number of active cells: 2008
2994 Number of degrees of freedom: 19383 (17186+2197) [0.039988 s]
2995 Assembling... [0.028281 s]
2996 Computing preconditioner... [0.118314 s]
2998 Schur complement: 11 outer CG iterations
for p [0.252133s ]
2999 Block Schur preconditioner: 13 GMRES iterations [0.269125 s]
3000 l_infinity difference between
solution vectors: 6.38657e-05
3003 Number of active cells: 4288
3004 Number of degrees of freedom: 40855 (36250+4605) [0.0880702 s]
3005 Assembling... [0.0603511 s]
3006 Computing preconditioner... [0.278339 s]
3008 Schur complement: 11 outer CG iterations
for p [0.53846s ]
3009 Block Schur preconditioner: 13 GMRES iterations [0.578667 s]
3010 l_infinity difference between
solution vectors: 0.000173363
3013 We see that there is no huge difference in the
solution time between the
3014 block Schur complement preconditioner solver and the Schur complement
3015 itself. The reason is simple: we used a direct
solve as preconditioner
for 3016 @f$A@f$ - so we cannot expect any gain by avoiding the inner iterations. We see
3017 that the number of iterations has slightly increased
for GMRES, but all in
3018 all the two choices are fairly similar.
3020 The picture of course changes in 3D:
3024 Number of active cells: 32
3025 Number of degrees of freedom: 1356 (1275+81) [0.00845218 s]
3026 Assembling... [0.019372 s]
3027 Computing preconditioner... [0.00712395 s]
3029 Schur complement: 13 outer CG iterations
for p [0.0320101s ]
3030 Block Schur preconditioner: 22 GMRES iterations [0.0048759 s]
3031 l_infinity difference between
solution vectors: 2.15942e-05
3034 Number of active cells: 144
3035 Number of degrees of freedom: 5088 (4827+261) [0.0346942 s]
3036 Assembling... [0.0857739 s]
3037 Computing preconditioner... [0.0465031 s]
3039 Schur complement: 14 outer CG iterations
for p [0.349258s ]
3040 Block Schur preconditioner: 35 GMRES iterations [0.048759 s]
3041 l_infinity difference between
solution vectors: 1.77657e-05
3044 Number of active cells: 704
3045 Number of degrees of freedom: 22406 (21351+1055) [0.175669 s]
3046 Assembling... [0.437447 s]
3047 Computing preconditioner... [0.286435 s]
3049 Schur complement: 14 outer CG iterations
for p [3.65519s ]
3050 Block Schur preconditioner: 63 GMRES iterations [0.497787 s]
3051 l_infinity difference between
solution vectors: 5.08078e-05
3054 Number of active cells: 3168
3055 Number of degrees of freedom: 93176 (89043+4133) [0.790985 s]
3056 Assembling... [1.97598 s]
3057 Computing preconditioner... [1.4325 s]
3059 Schur complement: 15 outer CG iterations
for p [29.9666s ]
3060 Block Schur preconditioner: 128 GMRES iterations [5.02645 s]
3061 l_infinity difference between
solution vectors: 0.000119671
3064 Number of active cells: 11456
3065 Number of degrees of freedom: 327808 (313659+14149) [3.44995 s]
3066 Assembling... [7.54772 s]
3067 Computing preconditioner... [5.46306 s]
3069 Schur complement: 15 outer CG iterations
for p [139.987s ]
3070 Block Schur preconditioner: 255 GMRES iterations [38.0946 s]
3071 l_infinity difference between
solution vectors: 0.00020793
3074 Number of active cells: 45056
3075 Number of degrees of freedom: 1254464 (1201371+53093) [19.6795 s]
3076 Assembling... [28.6586 s]
3077 Computing preconditioner... [22.401 s]
3079 Schur complement: 14 outer CG iterations
for p [796.767s ]
3080 Block Schur preconditioner: 524 GMRES iterations [355.597 s]
3081 l_infinity difference between
solution vectors: 0.000501219
3084 Here, the block preconditioned solver is clearly superior to the Schur
3085 complement, but the advantage gets less
for more mesh points. This is
3086 because GMRES(k) scales worse with the problem size than CG, as we discussed
3087 above. Nonetheless, the improvement by a factor of 3-6
for moderate problem
3088 sizes is quite impressive.
3091 <a name=
"Combiningtheblockpreconditionerandmultigrid"></a><h5>Combining the block preconditioner and multigrid</h5>
3093 An ultimate linear solver
for this problem could be imagined as a
3094 combination of an optimal
3095 preconditioner
for @f$A@f$ (
e.g. multigrid) and the block preconditioner
3096 described above, which is the approach taken in the @ref step_31
"step-31" 3097 and @ref step_32
"step-32" tutorial programs (where we use an algebraic multigrid
3098 method) and @ref step_56
"step-56" (where we use a geometric multigrid method).
3101 <a name=
"Noblockmatricesandvectors"></a><h5>No block matrices and vectors</h5>
3103 Another possibility that can be taken into account is to not
set up a block
3104 system, but rather
solve the system of velocity and pressure all at once. The
3105 options are direct
solve with UMFPACK (2D) or GMRES with ILU
3106 preconditioning (3D). It should be straightforward to
try that.
3110 <a name=
"Moreinterestingtestcases"></a><h4>More interesting testcases</h4>
3113 The program can of course also serve as a basis to compute the flow in more
3114 interesting cases. The original motivation to write
this program was
for it to
3115 be a starting
point for some geophysical flow problems, such as the
3116 movement of magma under places where continental plates drift apart (
for 3117 example mid-ocean ridges). Of course, in such places, the geometry is more
3118 complicated than the examples shown above, but it is not hard to accommodate
3121 For example, by
using the following modification of the boundary values
3127 const unsigned int component)
const 3129 Assert (component < this->n_components,
3132 const double x_offset =
std::atan(p[1]*4)/3;
3135 return (p[0] < x_offset ? -1 : (p[0] > x_offset ? 1 : 0));
3139 and the following way to generate the mesh as the domain
3140 @f$[-2,2]\times[-2,2]\times[-1,0]@f$
3142 std::vector<unsigned int> subdivisions (dim, 1);
3143 subdivisions[0] = 4;
3145 subdivisions[1] = 4;
3159 then we
get images where the fault line is curved:
3160 <table width=
"60%" align=
"center">
3163 <img src=
"https://www.dealii.org/images/steps/developer/step-22.3d-extension.png" alt=
"">
3166 <img src=
"https://www.dealii.org/images/steps/developer/step-22.3d-grid-extension.png" alt=
"">
3173 <a name=
"PlainProg"></a>
3174 <h1> The plain program</h1>
3175 @include
"step-22.cc"
void loop(ITERATOR begin, typename identity< ITERATOR >::type end, DOFINFO &dinfo, INFOBOX &info, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &cell_worker, const std::function< void(DOFINFO &, typename INFOBOX::CellInfo &)> &boundary_worker, const std::function< void(DOFINFO &, DOFINFO &, typename INFOBOX::CellInfo &, typename INFOBOX::CellInfo &)> &face_worker, ASSEMBLER &assembler, const LoopControl &lctrl=LoopControl())
std::function< int(const VectorType &src, VectorType &dst)> residual
Contents is actually a matrix.
SymmetricTensor< 2, dim, Number > e(const Tensor< 2, dim, Number > &F)
void component_wise(DoFHandlerType &dof_handler, const std::vector< unsigned int > &target_component=std::vector< unsigned int >())
static const types::blas_int one
static ::ExceptionBase & ExcIndexRange(int arg1, int arg2, int arg3)
void make_hanging_node_constraints(const DoFHandlerType &dof_handler, AffineConstraints< number > &constraints)
constexpr SymmetricTensor< 2, dim, Number > invert(const SymmetricTensor< 2, dim, Number > &t)
void call(const std::function< RT()> &function, internal::return_value< RT > &ret_val)
constexpr ReturnType< rank, T >::value_type & extract(T &t, const ArrayType &indices)
#define Assert(cond, exc)
auto apply(F &&fn, Tuple &&t) -> decltype(apply_impl(std::forward< F >(fn), std::forward< Tuple >(t), std_cxx14::make_index_sequence< std::tuple_size< typename std::remove_reference< Tuple >::type >::value >()))
VectorType::value_type * end(VectorType &V)
Point< spacedim > point(const gp_Pnt &p, const double tolerance=1e-10)
SymmetricTensor< 2, dim, Number > d(const Tensor< 2, dim, Number > &F, const Tensor< 2, dim, Number > &dF_dt)
void subdivided_hyper_rectangle(Triangulation< dim, spacedim > &tria, const std::vector< unsigned int > &repetitions, const Point< dim > &p1, const Point< dim > &p2, const bool colorize=false)
void king_ordering(DoFHandlerType &dof_handler, const bool reversed_numbering=false, const bool use_constraints=false)
VectorType::value_type * begin(VectorType &V)
void Cuthill_McKee(DoFHandlerType &dof_handler, const bool reversed_numbering=false, const bool use_constraints=false, const std::vector< types::global_dof_index > &starting_indices=std::vector< types::global_dof_index >())
void approximate(SynchronousIterators< std::tuple< TriaActiveIterator< ::DoFCellAccessor< DoFHandlerType< dim, spacedim >, false >>, Vector< float >::iterator >> const &cell, const Mapping< dim, spacedim > &mapping, const DoFHandlerType< dim, spacedim > &dof_handler, const InputVector &solution, const unsigned int component)
static const types::blas_int zero
const ::parallel::distributed::Triangulation< dim, spacedim > * triangulation
std::array< Number, 1 > eigenvalues(const SymmetricTensor< 2, 1, Number > &T)
unsigned int solve(VectorType &initial_guess_and_solution)
unsigned int n_components(const DoFHandler< dim, spacedim > &dh)
void make_sparsity_pattern(const DoFHandlerType &dof_handler, SparsityPatternType &sparsity_pattern, const AffineConstraints< number > &constraints=AffineConstraints< number >(), const bool keep_constrained_dofs=true, const types::subdomain_id subdomain_id=numbers::invalid_subdomain_id)
BlockType & block(const unsigned int i)
int(&) functions(const void *v1, const void *v2)
Expression atan(const Expression &x)