Reference documentation for deal.II version 8.4.2
mpi.cc
1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2005 - 2015 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE at
12 // the top level of the deal.II distribution.
13 //
14 // ---------------------------------------------------------------------
15 
16 
17 #include <deal.II/base/mpi.h>
18 #include <deal.II/base/utilities.h>
19 #include <deal.II/base/exceptions.h>
20 #include <deal.II/lac/vector_memory.h>
21 #include <deal.II/lac/parallel_vector.h>
22 #include <deal.II/lac/parallel_block_vector.h>
23 #include <deal.II/base/multithread_info.h>
24 
25 #include <cstddef>
26 #include <iostream>
27 
28 #ifdef DEAL_II_WITH_TRILINOS
29 # ifdef DEAL_II_WITH_MPI
30 # include <Epetra_MpiComm.h>
31 # include <deal.II/lac/vector_memory.h>
32 # include <deal.II/lac/trilinos_vector.h>
33 # include <deal.II/lac/trilinos_block_vector.h>
34 # endif
35 #endif
36 
37 #ifdef DEAL_II_WITH_PETSC
38 # ifdef DEAL_II_WITH_MPI
39 # include <petscsys.h>
40 # include <deal.II/lac/petsc_block_vector.h>
41 # include <deal.II/lac/petsc_parallel_block_vector.h>
42 # include <deal.II/lac/petsc_vector.h>
43 # include <deal.II/lac/petsc_parallel_vector.h>
44 # endif
45 #endif
46 
47 #ifdef DEAL_II_WITH_SLEPC
48 # ifdef DEAL_II_WITH_MPI
49 # include <slepcsys.h>
50 # endif
51 #endif
52 
53 #ifdef DEAL_II_WITH_P4EST
54 # include <p4est_bits.h>
55 #endif
56 
57 DEAL_II_NAMESPACE_OPEN
58 
59 
60 namespace Utilities
61 {
62 
63  namespace MPI
64  {
65 #ifdef DEAL_II_WITH_MPI
66  // Unfortunately, we have to work
67  // around an oddity in the way PETSc
68  // and some gcc versions interact. If
69  // we use PETSc's MPI dummy
70  // implementation, it expands the
71  // calls to the two MPI functions
72  // basically as ``(n_jobs=1, 0)'',
73  // i.e. it assigns the number one to
74  // the variable holding the number of
75  // jobs, and then uses the comma
76  // operator to let the entire
77  // expression have the value zero. The
78  // latter is important, since
79  // ``MPI_Comm_size'' returns an error
80  // code that we may want to check (we
81  // don't here, but one could in
82  // principle), and the trick with the
83  // comma operator makes sure that both
84  // the number of jobs is correctly
85  // assigned, and the return value is
86  // zero. Unfortunately, if some recent
87  // versions of gcc detect that the
88  // comma expression just stands by
89  // itself, i.e. the result is not
90  // assigned to another variable, then
91  // they warn ``right-hand operand of
92  // comma has no effect''. This
93  // unwanted side effect can be
94  // suppressed by casting the result of
95  // the entire expression to type
96  // ``void'' -- not beautiful, but
97  // helps calming down unwarranted
98  // compiler warnings...
99  unsigned int n_mpi_processes (const MPI_Comm &mpi_communicator)
100  {
101  int n_jobs=1;
102  (void) MPI_Comm_size (mpi_communicator, &n_jobs);
103 
104  return n_jobs;
105  }
106 
107 
108  unsigned int this_mpi_process (const MPI_Comm &mpi_communicator)
109  {
110  int rank=0;
111  (void) MPI_Comm_rank (mpi_communicator, &rank);
112 
113  return rank;
114  }
115 
116 
117  MPI_Comm duplicate_communicator (const MPI_Comm &mpi_communicator)
118  {
119  MPI_Comm new_communicator;
120  MPI_Comm_dup (mpi_communicator, &new_communicator);
121  return new_communicator;
122  }
123 
124 
125  std::vector<unsigned int>
127  const std::vector<unsigned int> &destinations)
128  {
129  unsigned int myid = Utilities::MPI::this_mpi_process(mpi_comm);
130  unsigned int n_procs = Utilities::MPI::n_mpi_processes(mpi_comm);
131 
132  for (unsigned int i=0; i<destinations.size(); ++i)
133  {
134  Assert (destinations[i] < n_procs,
135  ExcIndexRange (destinations[i], 0, n_procs));
136  Assert (destinations[i] != myid,
137  ExcMessage ("There is no point in communicating with ourselves."));
138  }
139 
140 
141  // let all processors
142  // communicate the maximal
143  // number of destinations they
144  // have
145  const unsigned int max_n_destinations
146  = Utilities::MPI::max (destinations.size(), mpi_comm);
147 
148  if (max_n_destinations==0)
149  // all processes have nothing to send/receive:
150  return std::vector<unsigned int>();
151 
152  // now that we know the number
153  // of data packets every
154  // processor wants to send, set
155  // up a buffer with the maximal
156  // size and copy our
157  // destinations in there,
158  // padded with -1's
159  std::vector<unsigned int> my_destinations(max_n_destinations,
161  std::copy (destinations.begin(), destinations.end(),
162  my_destinations.begin());
163 
164  // now exchange these (we could
165  // communicate less data if we
166  // used MPI_Allgatherv, but
167  // we'd have to communicate
168  // my_n_destinations to all
169  // processors in this case,
170  // which is more expensive than
171  // the reduction operation
172  // above in MPI_Allreduce)
173  std::vector<unsigned int> all_destinations (max_n_destinations * n_procs);
174  MPI_Allgather (&my_destinations[0], max_n_destinations, MPI_UNSIGNED,
175  &all_destinations[0], max_n_destinations, MPI_UNSIGNED,
176  mpi_comm);
177 
178  // now we know who is going to
179  // communicate with
180  // whom. collect who is going
181  // to communicate with us!
182  std::vector<unsigned int> origins;
183  for (unsigned int i=0; i<n_procs; ++i)
184  for (unsigned int j=0; j<max_n_destinations; ++j)
185  if (all_destinations[i*max_n_destinations + j] == myid)
186  origins.push_back (i);
187  else if (all_destinations[i*max_n_destinations + j] ==
189  break;
190 
191  return origins;
192  }
193 
194 
195  namespace
196  {
197  // custom MIP_Op for calculate_collective_mpi_min_max_avg
198  void max_reduce ( const void *in_lhs_,
199  void *inout_rhs_,
200  int *len,
201  MPI_Datatype *)
202  {
203  (void)len;
204  const MinMaxAvg *in_lhs = static_cast<const MinMaxAvg *>(in_lhs_);
205  MinMaxAvg *inout_rhs = static_cast<MinMaxAvg *>(inout_rhs_);
206 
207  Assert(*len==1, ExcInternalError());
208 
209  inout_rhs->sum += in_lhs->sum;
210  if (inout_rhs->min>in_lhs->min)
211  {
212  inout_rhs->min = in_lhs->min;
213  inout_rhs->min_index = in_lhs->min_index;
214  }
215  else if (inout_rhs->min == in_lhs->min)
216  {
217  // choose lower cpu index when tied to make operator commutative
218  if (inout_rhs->min_index > in_lhs->min_index)
219  inout_rhs->min_index = in_lhs->min_index;
220  }
221 
222  if (inout_rhs->max < in_lhs->max)
223  {
224  inout_rhs->max = in_lhs->max;
225  inout_rhs->max_index = in_lhs->max_index;
226  }
227  else if (inout_rhs->max == in_lhs->max)
228  {
229  // choose lower cpu index when tied to make operator commutative
230  if (inout_rhs->max_index > in_lhs->max_index)
231  inout_rhs->max_index = in_lhs->max_index;
232  }
233  }
234  }
235 
236 
237 
238  MinMaxAvg
239  min_max_avg(const double my_value,
240  const MPI_Comm &mpi_communicator)
241  {
242  // If MPI was not started, we have a serial computation and cannot run
243  // the other MPI commands
244  if (job_supports_mpi() == false)
245  {
246  MinMaxAvg result;
247  result.sum = my_value;
248  result.avg = my_value;
249  result.min = my_value;
250  result.max = my_value;
251  result.min_index = 0;
252  result.max_index = 0;
253 
254  return result;
255  }
256 
257  // To avoid uninitialized values on some MPI implementations, provide
258  // result with a default value already...
259  MinMaxAvg result = { 0., std::numeric_limits<double>::max(),
260  -std::numeric_limits<double>::max(), 0, 0, 0.
261  };
262 
263  const unsigned int my_id
264  = ::Utilities::MPI::this_mpi_process(mpi_communicator);
265  const unsigned int numproc
266  = ::Utilities::MPI::n_mpi_processes(mpi_communicator);
267 
268  MPI_Op op;
269  int ierr = MPI_Op_create((MPI_User_function *)&max_reduce, true, &op);
270  AssertThrow(ierr == MPI_SUCCESS, ExcInternalError());
271 
272  MinMaxAvg in;
273  in.sum = in.min = in.max = my_value;
274  in.min_index = in.max_index = my_id;
275 
276  MPI_Datatype type;
277  int lengths[]= {3,2};
278  MPI_Aint displacements[]= {0,offsetof(MinMaxAvg, min_index)};
279  MPI_Datatype types[]= {MPI_DOUBLE, MPI_INT};
280 
281  ierr = MPI_Type_struct(2, lengths, displacements, types, &type);
282  AssertThrow(ierr == MPI_SUCCESS, ExcInternalError());
283 
284  ierr = MPI_Type_commit(&type);
285  ierr = MPI_Allreduce (&in, &result, 1, type, op, mpi_communicator);
286  AssertThrow(ierr == MPI_SUCCESS, ExcInternalError());
287 
288  ierr = MPI_Type_free (&type);
289  AssertThrow(ierr == MPI_SUCCESS, ExcInternalError());
290 
291  ierr = MPI_Op_free(&op);
292  AssertThrow(ierr == MPI_SUCCESS, ExcInternalError());
293 
294  result.avg = result.sum / numproc;
295 
296  return result;
297  }
298 
299 #else
300 
301  unsigned int n_mpi_processes (const MPI_Comm &)
302  {
303  return 1;
304  }
305 
306 
307 
308  unsigned int this_mpi_process (const MPI_Comm &)
309  {
310  return 0;
311  }
312 
313 
314  MPI_Comm duplicate_communicator (const MPI_Comm &mpi_communicator)
315  {
316  return mpi_communicator;
317  }
318 
319 
320 
321  MinMaxAvg
322  min_max_avg(const double my_value,
323  const MPI_Comm &)
324  {
325  MinMaxAvg result;
326 
327  result.sum = my_value;
328  result.avg = my_value;
329  result.min = my_value;
330  result.max = my_value;
331  result.min_index = 0;
332  result.max_index = 0;
333 
334  return result;
335  }
336 
337 #endif
338 
339 
340 
342  char ** &argv,
343  const unsigned int max_num_threads)
344  {
345  static bool constructor_has_already_run = false;
346  (void)constructor_has_already_run;
347  Assert (constructor_has_already_run == false,
348  ExcMessage ("You can only create a single object of this class "
349  "in a program since it initializes the MPI system."));
350 
351 
352 
353 #ifdef DEAL_II_WITH_MPI
354  // if we have PETSc, we will initialize it and let it handle MPI.
355  // Otherwise, we will do it.
356  int MPI_has_been_started = 0;
357  MPI_Initialized(&MPI_has_been_started);
358  AssertThrow (MPI_has_been_started == 0,
359  ExcMessage ("MPI error. You can only start MPI once!"));
360 
361  int mpi_err, provided;
362  // this works like mpi_err = MPI_Init (&argc, &argv); but tells MPI that
363  // we might use several threads but never call two MPI functions at the
364  // same time. For an explanation see on why we do this see
365  // http://www.open-mpi.org/community/lists/users/2010/03/12244.php
366  int wanted = MPI_THREAD_SERIALIZED;
367  mpi_err = MPI_Init_thread(&argc, &argv, wanted, &provided);
368  AssertThrow (mpi_err == 0,
369  ExcMessage ("MPI could not be initialized."));
370 
371  // disable for now because at least some implementations always return MPI_THREAD_SINGLE.
372  //Assert(max_num_threads==1 || provided != MPI_THREAD_SINGLE,
373  // ExcMessage("MPI reports that we are not allowed to use multiple threads."));
374 #else
375  // make sure the compiler doesn't warn
376  // about these variables
377  (void)argc;
378  (void)argv;
379 #endif
380 
381  // we are allowed to call MPI_Init ourselves and PETScInitialize will
382  // detect this. This allows us to use MPI_Init_thread instead.
383 #ifdef DEAL_II_WITH_PETSC
384 # ifdef DEAL_II_WITH_SLEPC
385  // Initialize SLEPc (with PETSc):
386  SlepcInitialize(&argc, &argv, PETSC_NULL, PETSC_NULL);
387 # else
388  // or just initialize PETSc alone:
389  PetscInitialize(&argc, &argv, PETSC_NULL, PETSC_NULL);
390 # endif
391 #endif
392 
393 #ifdef DEAL_II_WITH_P4EST
394  //Initialize p4est and libsc components
395  sc_init(MPI_COMM_WORLD, 0, 0, NULL, SC_LP_SILENT);
396  p4est_init (0, SC_LP_SILENT);
397 #endif
398 
399  constructor_has_already_run = true;
400 
401 
402  // Now also see how many threads we'd like to run
403  if (max_num_threads != numbers::invalid_unsigned_int)
404  {
405  // set maximum number of threads (also respecting the environment
406  // variable that the called function evaluates) based on what
407  // the user asked
408  MultithreadInfo::set_thread_limit(max_num_threads);
409  }
410  else
411  // user wants automatic choice
412  {
413 #ifdef DEAL_II_WITH_MPI
414  // we need to figure out how many MPI processes there
415  // are on the current node, as well as how many CPU cores
416  // we have. for the first task, check what get_hostname()
417  // returns and then to an allgather so each processor
418  // gets the answer
419  //
420  // in calculating the length of the string, don't forget the
421  // terminating \0 on C-style strings
422  const std::string hostname = Utilities::System::get_hostname();
423  const unsigned int max_hostname_size = Utilities::MPI::max (hostname.size()+1,
424  MPI_COMM_WORLD);
425  std::vector<char> hostname_array (max_hostname_size);
426  std::copy (hostname.c_str(), hostname.c_str()+hostname.size()+1,
427  hostname_array.begin());
428 
429  std::vector<char> all_hostnames(max_hostname_size *
430  MPI::n_mpi_processes(MPI_COMM_WORLD));
431  MPI_Allgather (&hostname_array[0], max_hostname_size, MPI_CHAR,
432  &all_hostnames[0], max_hostname_size, MPI_CHAR,
433  MPI_COMM_WORLD);
434 
435  // search how often our own hostname appears and the
436  // how-manyth instance the current process represents
437  unsigned int n_local_processes=0;
438  unsigned int nth_process_on_host = 0;
439  for (unsigned int i=0; i<MPI::n_mpi_processes(MPI_COMM_WORLD); ++i)
440  if (std::string (&all_hostnames[0] + i*max_hostname_size) == hostname)
441  {
442  ++n_local_processes;
443  if (i <= MPI::this_mpi_process (MPI_COMM_WORLD))
444  ++nth_process_on_host;
445  }
446  Assert (nth_process_on_host > 0, ExcInternalError());
447 
448 
449  // compute how many cores each process gets. if the number does
450  // not divide evenly, then we get one more core if we are
451  // among the first few processes
452  //
453  // if the number would be zero, round up to one since every
454  // process needs to have at least one thread
455  const unsigned int n_threads
456  = std::max(MultithreadInfo::n_cores() / n_local_processes
457  +
458  (nth_process_on_host <= MultithreadInfo::n_cores() % n_local_processes
459  ?
460  1
461  :
462  0),
463  1U);
464 #else
465  const unsigned int n_threads = MultithreadInfo::n_cores();
466 #endif
467 
468  // finally set this number of threads
470  }
471  }
472 
473 
475  {
476  // make memory pool release all PETSc/Trilinos/MPI-based vectors that are no
477  // longer used at this point. this is relevant because the
478  // static object destructors run for these vectors at the end of
479  // the program would run after MPI_Finalize is called, leading
480  // to errors
481 
482 #ifdef DEAL_II_WITH_MPI
483  // Start with the deal.II MPI vectors (need to do this before finalizing
484  // PETSc because it finalizes MPI). Delete vectors from the pools:
486  ::release_unused_memory ();
488  ::release_unused_memory ();
490  ::release_unused_memory ();
492  ::release_unused_memory ();
493 
494  // Next with Trilinos:
495 # if defined(DEAL_II_WITH_TRILINOS)
500 # endif
501 #endif
502 
503 
504  // Now deal with PETSc (with or without MPI). Only delete the vectors if finalize hasn't
505  // been called yet, otherwise this will lead to errors.
506 #ifdef DEAL_II_WITH_PETSC
507  if ((PetscInitializeCalled == PETSC_TRUE)
508  &&
509  (PetscFinalizeCalled == PETSC_FALSE))
510  {
519 
520 # ifdef DEAL_II_WITH_SLEPC
521  // and now end SLEPc (with PETSc)
522  SlepcFinalize();
523 # else
524  // or just end PETSc.
525  PetscFinalize();
526 # endif
527  }
528 #endif
529 
530 #ifdef DEAL_II_WITH_P4EST
531  // now end p4est and libsc
532  // Note: p4est has no finalize function
533  sc_finalize ();
534 #endif
535 
536 
537  // only MPI_Finalize if we are running with MPI. We also need to do this
538  // when running PETSc, because we initialize MPI ourselves before calling
539  // PetscInitialize
540 #ifdef DEAL_II_WITH_MPI
541  if (job_supports_mpi() == true)
542  {
543  if (std::uncaught_exception())
544  {
545  std::cerr << "ERROR: Uncaught exception in MPI_InitFinalize on proc "
546  << this_mpi_process(MPI_COMM_WORLD)
547  << ". Skipping MPI_Finalize() to avoid a deadlock."
548  << std::endl;
549  }
550  else
551  {
552  const int mpi_err = MPI_Finalize();
553  AssertThrow (mpi_err == 0,
554  ExcMessage ("An error occurred while calling MPI_Finalize()"));
555  }
556  }
557 #endif
558  }
559 
560 
561  } // end of namespace MPI
562 
563 } // end of namespace Utilities
564 
565 DEAL_II_NAMESPACE_CLOSE
static const unsigned int invalid_unsigned_int
Definition: types.h:164
static void release_unused_memory()
::ExceptionBase & ExcMessage(std::string arg1)
static unsigned int n_cores()
MPI_InitFinalize(int &argc, char **&argv, const unsigned int max_num_threads=numbers::invalid_unsigned_int)
Definition: mpi.cc:341
#define AssertThrow(cond, exc)
Definition: exceptions.h:358
Definition: types.h:30
#define Assert(cond, exc)
Definition: exceptions.h:294
unsigned int n_mpi_processes(const MPI_Comm &mpi_communicator)
Definition: mpi.cc:99
Definition: mpi.h:55
std::string get_hostname()
Definition: utilities.cc:643
MPI_Comm duplicate_communicator(const MPI_Comm &mpi_communicator)
Definition: mpi.cc:117
static void set_thread_limit(const unsigned int max_threads=numbers::invalid_unsigned_int)
unsigned int this_mpi_process(const MPI_Comm &mpi_communicator)
Definition: mpi.cc:108
MinMaxAvg min_max_avg(const double my_value, const MPI_Comm &mpi_communicator)
Definition: mpi.cc:239
std::vector< unsigned int > compute_point_to_point_communication_pattern(const MPI_Comm &mpi_comm, const std::vector< unsigned int > &destinations)
Definition: mpi.cc:126
bool job_supports_mpi()
Definition: mpi.h:750
T max(const T &t, const MPI_Comm &mpi_communicator)
Definition: mpi.h:693