Defines utility functionality for dividing resources and checking for consistency and usefulness.
- Author
- Mark Abraham mark..nosp@m.j.ab.nosp@m.raham.nosp@m.@gma.nosp@m.il.co.nosp@m.m
|
static int | nthreads_omp_faster (const gmx::CpuInfo &cpuInfo, gmx_bool bUseGPU) |
| Returns the maximum OpenMP thread count for which using a single MPI rank should be faster than using multiple ranks with the same total thread count.
|
|
static gmx_unused int | nthreads_omp_efficient_max (int gmx_unused nrank, const gmx::CpuInfo &cpuInfo, gmx_bool bUseGPU) |
| Returns that maximum OpenMP thread count that passes the efficiency check.
|
|
static gmx_unused int | get_tmpi_omp_thread_division (const gmx_hw_info_t *hwinfo, const gmx_hw_opt_t &hw_opt, int nthreads_tot, int ngpu) |
| Return the number of thread-MPI ranks to use. This is chosen such that we can always obey our own efficiency checks.
|
|
static bool | gmxSmtIsUsedOnAllCores (const gmx::HardwareTopology &hwTop) |
| Return whether hyper threading is used on ALL cores.
|
|
int | get_nthreads_mpi (const gmx_hw_info_t *hwinfo, gmx_hw_opt_t *hw_opt, const int numDevicesToUse, bool nonbondedOnGpu, bool pmeOnGpu, const t_inputrec *inputrec, const gmx_mtop_t &mtop, const gmx::MDLogger &mdlog, bool doMembed) |
| Return the number of threads to use for thread-MPI based on how many were requested, which algorithms we're using, and how many particles there are. At the point we have already called check_and_update_hw_opt. Thus all options should be internally consistent and consistent with the hardware, except that ntmpi could be larger than number of GPUs. If necessary, this function will modify hw_opt->nthreads_omp.
|
|
void | check_resource_division_efficiency (const gmx_hw_info_t *hwinfo, bool willUsePhysicalGpu, t_commrec *cr, const gmx::MDLogger &mdlog) |
| Check if the number of OpenMP threads is within reasonable range considering the hardware used. This is a crude check, but mainly intended to catch cases where the user starts 1 MPI rank per hardware thread or 1 rank per physical node. With a sub-optimal setup a note is printed to fplog and stderr. This function should be called after thread-MPI and OpenMP are set up.
|
|
static void | print_hw_opt (FILE *fp, const gmx_hw_opt_t *hw_opt) |
| Dump a hw_opt to fp .
|
|
void | checkAndUpdateHardwareOptions (const gmx::MDLogger &mdlog, gmx_hw_opt_t *hw_opt, const bool isSimulationMainRank, const int nPmeRanks, const t_inputrec *inputrec) |
| Checks what our hardware options are based on how Gromacs was compiled and user-set options. More...
|
|
void | checkAndUpdateRequestedNumOpenmpThreads (gmx_hw_opt_t *hw_opt, const gmx_hw_info_t &hwinfo, const t_commrec *cr, const gmx_multisim_t *ms, int numRanksOnThisNode, PmeRunMode pmeRunMode, const gmx_mtop_t &mtop, const t_inputrec &inputrec) |
| Check, and if necessary update, the number of OpenMP threads requested. More...
|
|
void | gmx::checkHardwareOversubscription (int numThreadsOnThisRank, int rank, const HardwareTopology &hwTop, const PhysicalNodeCommunicator &comm, const MDLogger &mdlog) |
| Warns for oversubscribing the hardware threads, when that is the case.
|
|
|
static constexpr int | min_atoms_per_mpi_thread = 90 |
| The minimum number of atoms per thread-MPI thread when GPUs are present. With fewer atoms than this, the number of thread-MPI ranks will get lowered.
|
|
static constexpr int | min_atoms_per_gpu = 900 |
| The minimum number of atoms per GPU with thread-MPI active. With fewer atoms than this, the number of thread-MPI ranks will get lowered.
|
|
|
constexpr int | nthreads_omp_faster_default = 8 |
| Constants for implementing default divisions of threads.
|
|
constexpr int | nthreads_omp_faster_Nehalem = 12 |
|
constexpr int | nthreads_omp_faster_Intel_AVX = 16 |
|
constexpr int | nthreads_omp_faster_AMD_Ryzen = 16 |
|
constexpr int | nthreads_omp_faster_gpu_fac = 2 |
|
constexpr int | nthreads_omp_mpi_ok_max = 8 |
|
constexpr int | nthreads_omp_mpi_ok_min_cpu = 1 |
|
constexpr int | nthreads_omp_mpi_ok_min_gpu = 2 |
|
constexpr int | nthreads_omp_mpi_target_max = 6 |
|
constexpr int | c_maxAutoTmpiRanksPerGpu = 4 |
|