-
Notifications
You must be signed in to change notification settings - Fork 1
MPI support #7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
MPI support #7
Changes from all commits
e7f623c
673917c
1f113ef
772d226
9ae4c80
b3912c2
59caf70
0cfe527
bea8838
04210fb
89116b4
a7c6543
45d5099
5532369
99d7f10
1d3accf
9fab189
f43800f
41d2029
666130e
5be4fed
e4716ff
c66b510
5ec4072
45b7587
7459d19
f3bbb01
c75de31
8d7011e
9701c70
62c31df
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,12 +5,18 @@ | |
| * See the LICENSE.md file in the project root for full license information. | ||
| */ | ||
|
|
||
| #include "hws/version.hpp" // hws::version::version | ||
| #include "hws/version.hpp" // hws::version::version | ||
|
|
||
| #include "pybind11/pybind11.h" // PYBIND11_MODULE, py::module_ | ||
|
|
||
| #include <string_view> // std::string_view | ||
|
|
||
| #if defined(HWS_MPI_SUPPORT_ENABLED) | ||
| #include "mpi4py_communicator.hpp" | ||
| #include <mpi.h> | ||
| #include <mpi4py/mpi4py.h> | ||
| #endif | ||
|
|
||
| #define HWS_IS_DEFINED_HELPER(x) #x | ||
| #define HWS_IS_DEFINED(x) (std::string_view{ #x } != std::string_view{ HWS_IS_DEFINED_HELPER(x) }) | ||
|
|
||
|
|
@@ -32,6 +38,15 @@ PYBIND11_MODULE(HardwareSampling, m) { | |
| m.doc() = "Hardware Sampling for CPUs and GPUs"; | ||
| m.attr("__version__") = hws::version::version; | ||
|
|
||
| // MPI support | ||
| #if defined(HWS_MPI_SUPPORT_ENABLED) | ||
| // Initialize mpi4py C-API so PyMPIComm_* are usable | ||
| if (import_mpi4py() < 0) { | ||
| throw py::error_already_set(); | ||
| } | ||
| #endif | ||
| m.def("has_mpi_support", []() { return HWS_IS_DEFINED(HWS_MPI_SUPPORT_ENABLED); }); | ||
|
|
||
| init_event(m); | ||
| init_sample_category(m); | ||
| init_relative_event(m); | ||
|
|
@@ -64,3 +79,25 @@ PYBIND11_MODULE(HardwareSampling, m) { | |
|
|
||
| init_version(m); | ||
| } | ||
|
|
||
| #if defined(HWS_MPI_SUPPORT_ENABLED) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you have a look at PyBind's "type caster"? Maybe they could be used for an automatic conversion between the MPI handles instead of this helper function? 🤔 |
||
| /** | ||
| * Extracts an MPI_Comm from a python mpi4py.MPI.Comm object. | ||
| * Has to be in same translation unit as the import_mpi4py() call to ensure that the mpi4py C-API is initialized and the PyMPIComm_Type is available. | ||
| * | ||
| * @param py_comm a Python object that is expected to be an mpi4py.MPI.Comm instance | ||
| * @return the extracted MPI_Comm | ||
| */ | ||
| MPI_Comm mpi_comm_from_python(py::object py_comm) { | ||
| if (!PyObject_TypeCheck(py_comm.ptr(), &PyMPIComm_Type)) { | ||
| throw std::runtime_error{"expected mpi4py.MPI.Comm as communicator argument"}; | ||
| } | ||
|
|
||
| MPI_Comm *comm_ptr = PyMPIComm_Get(py_comm.ptr()); | ||
| if (comm_ptr == nullptr) { | ||
| throw std::runtime_error{"could not extract MPI_Comm from mpi4py communicator"}; | ||
| } | ||
|
|
||
| return *comm_ptr; | ||
| } | ||
| #endif | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| /** | ||
| * @file | ||
| * @author Tim Thüring | ||
| * @copyright 2024-today All Rights Reserved | ||
| * @license This file is released under the MIT license. | ||
| * See the LICENSE.md file in the project root for full license information. | ||
| * | ||
| * @brief Utility functions for transforming mpi4py communicators into C++ MPI communicators | ||
| */ | ||
|
|
||
| #ifndef HWS_BINDINGS_MPI4PY_COMMUNICATOR_HPP | ||
| #define HWS_BINDINGS_MPI4PY_COMMUNICATOR_HPP | ||
| #pragma once | ||
|
|
||
| #include "pybind11/pybind11.h" | ||
|
|
||
| #if defined(HWS_MPI_SUPPORT_ENABLED) | ||
| #include <mpi.h> | ||
| #endif | ||
|
|
||
| namespace py = pybind11; | ||
|
|
||
| #if defined(HWS_MPI_SUPPORT_ENABLED) | ||
| MPI_Comm mpi_comm_from_python(py::object py_comm); | ||
| #endif | ||
|
|
||
| #endif // HWS_BINDINGS_MPI4PY_COMMUNICATOR_HPP |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,17 +19,71 @@ | |
| #include "relative_event.hpp" // hws::detail::relative_event | ||
| #include <string> // std::string | ||
|
|
||
| #if defined(HWS_MPI_SUPPORT_ENABLED) | ||
| #include "mpi4py_communicator.hpp" | ||
| #include <mpi.h> | ||
| #endif | ||
|
|
||
| namespace py = pybind11; | ||
|
|
||
| void init_system_hardware_sampler(py::module_ &m) { | ||
| #if defined(HWS_MPI_SUPPORT_ENABLED) | ||
| // bind mpi sampling mode enum | ||
| py::enum_<hws::detail::mpi_sampling_mode>(m, "MPISamplingMode") | ||
| .value("PER_RANK", hws::detail::mpi_sampling_mode::per_rank) | ||
| .value("WHOLE_NODE", hws::detail::mpi_sampling_mode::whole_node) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The sampling modes define the rules the system hardware sampler follows when creating the hardware samplers and is not related to the output. "PER_RANK" explicitly requests the current behavior where every rank creates a system_hardware_sampler that track all devices it can see. This might lead to dubplicates if multiple ranks are on one node.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok this way it makes more sense. What is the default behavior? In my head, WHOLE_NODE would make the most sense 🤔 |
||
| .export_values(); | ||
| #endif | ||
| // bind the pure virtual hardware sampler base class | ||
| py::class_<hws::system_hardware_sampler>(m, "SystemHardwareSampler") | ||
| .def(py::init<>(), "construct a new system hardware sampler with the default sampling interval") | ||
| .def(py::init<hws::sample_category>(), "construct a new system hardware sampler with the default sampling interval sampling only the provided sample_category samples") | ||
| .def(py::init<std::chrono::milliseconds>(), "construct a new system hardware sampler for with the specified sampling interval") | ||
| .def(py::init<std::chrono::milliseconds, hws::sample_category>(), "construct a new system hardware sampler for with the specified sampling interval sampling only the provided sample_category samples") | ||
| #if defined(HWS_MPI_SUPPORT_ENABLED) | ||
| // MPI-aware constructors | ||
|
|
||
| // (MPI_Comm, mode, category=all) | ||
| .def(py::init([](py::object py_comm, | ||
| hws::detail::mpi_sampling_mode mode, | ||
| hws::sample_category category) { | ||
| MPI_Comm comm = mpi_comm_from_python(py_comm); | ||
| return std::make_unique<hws::system_hardware_sampler>(comm, mode, category); | ||
| }), | ||
| py::arg("comm"), | ||
| py::arg("mode"), | ||
| py::arg("category") = hws::sample_category::all, | ||
| "construct a new system hardware sampler with the default sampling interval and MPI support using the given mpi4py communicator and sampling mode") | ||
|
|
||
| // (MPI_Comm, mode, sampling_interval, category=all) | ||
| .def(py::init([](py::object py_comm, | ||
| hws::detail::mpi_sampling_mode mode, | ||
| std::chrono::milliseconds sampling_interval, | ||
| hws::sample_category category) { | ||
| MPI_Comm comm = mpi_comm_from_python(py_comm); | ||
| return std::make_unique<hws::system_hardware_sampler>(comm, mode, sampling_interval, category); | ||
| }), | ||
| py::arg("comm"), | ||
| py::arg("mode"), | ||
| py::arg("sampling_interval"), | ||
| py::arg("category") = hws::sample_category::all, | ||
| "construct a new system hardware sampler with the specified sampling interval and MPI support using the given mpi4py communicator and sampling mode") | ||
|
|
||
| // Non-MPI overloads | ||
| .def("start", py::overload_cast<>(&hws::system_hardware_sampler::start_sampling), "start hardware sampling for all available hardware samplers") | ||
| .def("stop", py::overload_cast<>(&hws::system_hardware_sampler::stop_sampling), "stop hardware sampling for all available hardware samplers") | ||
| // MPI-aware overloads | ||
| .def("start", [](hws::system_hardware_sampler &self, py::object py_comm) { | ||
| MPI_Comm comm = mpi_comm_from_python(py_comm); | ||
| self.start_sampling(comm); }, py::arg("comm"), "start hardware sampling for all available hardware samplers; executes an MPI barrier on the given communicator before starting") | ||
| .def("stop", [](hws::system_hardware_sampler &self, py::object py_comm) { | ||
| MPI_Comm comm = mpi_comm_from_python(py_comm); | ||
| self.stop_sampling(comm); }, py::arg("comm"), "stop hardware sampling for all available hardware samplers; executes an MPI barrier on the given communicator after stopping") | ||
| #else | ||
| // No MPI support: only the simple overloads exist, no ambiguity | ||
| .def("start", &hws::system_hardware_sampler::start_sampling, "start hardware sampling for all available hardware samplers") | ||
| .def("stop", &hws::system_hardware_sampler::stop_sampling, "stop hardware sampling for all available hardware samplers") | ||
| #endif | ||
| .def("pause", &hws::system_hardware_sampler::pause_sampling, "pause hardware sampling for all available hardware samplers") | ||
| .def("resume", &hws::system_hardware_sampler::resume_sampling, "resume hardware sampling for all available hardware samplers") | ||
| .def("has_started", &hws::system_hardware_sampler::has_sampling_started, "check whether hardware sampling has already been started for all hardware samplers") | ||
|
|
@@ -67,5 +121,10 @@ void init_system_hardware_sampler(py::module_ &m) { | |
| .def("sampler", [](hws::system_hardware_sampler &self, const std::size_t idx) { return self.sampler(idx).get(); }, "get the i-th hardware sampler available for the whole system") | ||
| .def("dump_yaml", py::overload_cast<const std::string &>(&hws::system_hardware_sampler::dump_yaml, py::const_), "dump all hardware samples for all hardware samplers to the given YAML file") | ||
| .def("as_yaml_string", &hws::system_hardware_sampler::as_yaml_string, "return all hardware samples for all hardware samplers as YAML string") | ||
| #if defined(HWS_MPI_SUPPORT_ENABLED) | ||
| .def("dump_yaml_global", [](const hws::system_hardware_sampler &self, const std::string &filename, py::object py_comm) { | ||
| const MPI_Comm comm = mpi_comm_from_python(py_comm); | ||
| self.dump_yaml_global(filename, comm); }, py::arg("filename"), py::arg("comm"), "Let MPI rank 0 dump the hardware samples of all hardware samplers of all MPI ranks to the given YAML file using the provided mpi4py communicator.") | ||
| #endif | ||
| .def("__repr__", [](const hws::system_hardware_sampler &self) { return fmt::format("<hws.SystemHardwareSampler with {} samples>", self.num_samplers()); }); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible, that mpi4py is installed, but broken without include path? Should we additionally check for that?