Reputation: 99
I want to use std::for_each
to iterate over vector indexes in range [a, b)
in parallel, calculate the value of the Weierstrass function and write it to the std::vector
:
std::vector<std::array<float, 2>> values(1000);
auto range = /** equivalent of Pyhthon range(0, values.size()) **/;
std::for_each(std::execution::par, range.begin(), range.end(), [&](auto &&i) {
values[i][0] = static_cast<float>(i) / resolution;
values[i][1] = weierstrass(a, b, static_cast<float>(i) / resolution);
});
// a, b, and resolution are some constants defined before
// weierstrass() is the Weierstrass function
I have found some solutions in the internet, but all of them requires to include some third-party libraries or create my own range class. Is there any standard solution for this?
Upvotes: 6
Views: 1797
Reputation: 16747
You can use std::views::iota(), its use is similar (but a bit different) to Python's range()
. With help of std::ranges::for_each(). Both are available in C++20.
#include <algorithm>
#include <ranges>
#include <iostream>
int main() {
std::ranges::for_each(std::views::iota(1, 10), [](int i) {
std::cout << i << ' ';
});
}
Output:
1 2 3 4 5 6 7 8 9
As noted by @Afshin, in code mentioned above std::ranges::for_each()
doesn't support std::execution::par
for multi-threaded execution.
To overcome this issue you may use iota with regular std::for_each()
as following:
#include <algorithm>
#include <ranges>
#include <iostream>
#include <execution>
int main() {
auto range = std::views::iota(1, 10);
std::for_each(std::execution::par, range.begin(), range.end(),
[](int i) {
std::cout << i << ' ';
});
}
Output:
1 2 3 4 5 6 7 8 9
I decided to implement Range
class plus iterator from scratch, according to how it works in Python's range()
.
Similar to Python you can use it three ways: Range(stop)
, Range(start, stop)
, Range(start, stop, step)
. All three support any negative value.
To test correctness of implementation I filled two unordered sets, one containing all generated values, another containing all used thread ids (to show that it actually used multi-core CPU execution).
Although I marked my iterator as random access type, still it is missing some methods like -=
or --
operators, these extra methods are for further improvements. But for usage of std::for_each()
it has enough methods.
If I made some mistakes of implementation please add comments to my answer with explanation.
#include <limits>
#include <execution>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <thread>
#include <unordered_set>
#include <string>
#include <sstream>
#include <mutex>
class Range {
public:
Range(ptrdiff_t start_stop, ptrdiff_t stop =
std::numeric_limits<ptrdiff_t>::max(), ptrdiff_t step = 1)
: step_(step) {
if (stop == std::numeric_limits<ptrdiff_t>::max()) {
start_ = 0;
stop_ = start_stop;
} else {
start_ = start_stop;
stop_ = stop;
}
if (step_ >= 0)
stop_ = std::max(start_, stop_);
else
stop_ = std::min(start_, stop_);
if (step_ >= 0)
stop_ = start_ + (stop_ - start_ + step_ - 1) / step_ * step_;
else
stop_ = start_ - (start_ - stop_ + step_ - 1) / (-step_) * (-step_);
}
class RangeIter {
public:
using iterator_category = std::random_access_iterator_tag;
using value_type = ptrdiff_t;
using difference_type = ptrdiff_t;
using pointer = ptrdiff_t const *;
using reference = ptrdiff_t const &;
RangeIter() {}
RangeIter(ptrdiff_t start, ptrdiff_t stop, ptrdiff_t step)
: cur_(start), stop_(stop), step_(step) {}
RangeIter & operator += (ptrdiff_t steps) {
cur_ += step_ * steps;
if (step_ >= 0)
cur_ = std::min(cur_, stop_);
else
cur_ = std::max(cur_, stop_);
return *this;
}
RangeIter operator + (ptrdiff_t steps) const {
auto it = *this;
it += steps;
return it;
}
ptrdiff_t operator [] (ptrdiff_t steps) const {
auto it = *this;
it += steps;
return *it;
}
ptrdiff_t operator - (RangeIter const & other) const {
return (cur_ - other.cur_) / step_;
}
RangeIter & operator ++ () {
*this += 1;
return *this;
}
ptrdiff_t const & operator * () const {
return cur_;
}
bool operator == (RangeIter const & other) const {
return cur_ == other.cur_;
}
bool operator != (RangeIter const & other) const {
return !(*this == other);
}
ptrdiff_t cur_ = 0, stop_ = 0, step_ = 0;
};
auto begin() const { return RangeIter(start_, stop_, step_); }
auto end() const { return RangeIter(stop_, stop_, step_); }
private:
ptrdiff_t start_ = 0, stop_ = 0, step_ = 0;
};
int main() {
ptrdiff_t start = 1, stop = 1000000, step = 2;
std::mutex mutex;
std::unordered_set<std::string> threads;
std::unordered_set<ptrdiff_t> values;
auto range = Range(start, stop, step);
std::for_each(std::execution::par, range.begin(), range.end(),
[&](int i) {
std::unique_lock<std::mutex> lock(mutex);
std::ostringstream ss;
ss << std::this_thread::get_id();
threads.insert(ss.str());
values.insert(i);
});
std::cout << "Threads:" << std::endl;
for (auto const & s: threads)
std::cout << s << std::endl;
{
bool correct = true;
size_t cnt = 0;
for (ptrdiff_t i = start; i < stop; i += step) {
++cnt;
if (!values.count(i)) {
correct = false;
std::cout << "No value: " << i << std::endl;
break;
}
}
if (values.size() != cnt)
std::cout << "Expected amount of values: " << cnt
<< ", actual " << values.size() << std::endl;
std::cout << "Correct values: " << std::boolalpha
<< (correct && (values.size() == cnt)) << std::endl;
}
}
Output:
Threads:
1628
9628
5408
2136
2168
8636
2880
6492
1100
Correct values: true
Upvotes: 14
Reputation: 9173
You can write your code in another way and drop any need for range at all like this:
std::vector<std::array<float, 2>> values(1000);
std::for_each(std::execution::par, values.begin(), values.end(), [&](std::array<float, 2>& val) {
auto i = std::distance(&values[0], &val);
val[0] = static_cast<float>(i) / resolution;
val[1] = weierstrass(a, b, static_cast<float>(i) / resolution);
});
I should say that this code is valid if and only if you are using std::for_each
, because it is stated that:
Unlike the rest of the parallel algorithms,
std::for_each
is not allowed to make copies of the elements in the sequence even if they are trivially copyable.
Upvotes: 1
Reputation: 117298
Just as an alternative, you could make each work package carry the necessary information by adding the index you need.
Example:
std::vector<std::pair<size_t, std::array<float, 2>>> values(1000);
for(size_t i = 0; i < values.size(); ++i) values[i].first = i;
std::for_each(std::execution::par, values.begin(), values.end(),
[resolution](auto& p) {
p.second[0] = static_cast<float>(p.first) / resolution;
p.second[1] = weierstrass(a, b, static_cast<float>(p.first) / resolution);
});
Not using indexing on values
inside the threaded part like above may prevent false sharing and improve performance. You could also make each work package aligned to prevent false sharing to see if that has an effect on performance.
#include <new>
struct alignas(std::hardware_destructive_interference_size) workpackage {
size_t index;
std::array<float, 2> arr;
};
std::vector<workpackage> values(1000);
for(size_t i = 0; i < values.size(); ++i) values[i].index = i;
std::for_each(std::execution::par, values.begin(), values.end(),
[resolution](auto& wp) {
wp.arr[0] = static_cast<float>(wp.index) / resolution;
wp.arr[1] = weierstrass(a, b, static_cast<float>(wp.index) / resolution);
});
Upvotes: 1
Reputation: 318
If the problem is in creating range similar to python's range()
you can look through https://en.cppreference.com/w/cpp/iterator/iterator and use it's example:
#include <iostream>
#include <algorithm>
template<long FROM, long TO>
class Range {
public:
// member typedefs provided through inheriting from std::iterator
class iterator: public std::iterator<
std::input_iterator_tag, // iterator_category
long, // value_type
long, // difference_type
const long*, // pointer
long // reference
>{
long num = FROM;
public:
explicit iterator(long _num = 0) : num(_num) {}
iterator& operator++() {num = TO >= FROM ? num + 1: num - 1; return *this;}
iterator operator++(int) {iterator retval = *this; ++(*this); return retval;}
bool operator==(iterator other) const {return num == other.num;}
bool operator!=(iterator other) const {return !(*this == other);}
reference operator*() const {return num;}
};
iterator begin() {return iterator(FROM);}
iterator end() {return iterator(TO >= FROM? TO+1 : TO-1);}
};
int main() {
// std::find requires an input iterator
auto range = Range<15, 25>();
auto itr = std::find(range.begin(), range.end(), 18);
std::cout << *itr << '\n'; // 18
// Range::iterator also satisfies range-based for requirements
for(long l : Range<3, 5>()) {
std::cout << l << ' '; // 3 4 5
}
std::cout << '\n';
}
Upvotes: 5