1 # include "rheolef/config.h"
23 #ifdef _RHEOLEF_HAVE_MPI
24 #include "rheolef/dis_macros.h"
25 #include "rheolef/csr.h"
26 #include "rheolef/asr_to_csr.h"
27 #include "rheolef/asr_to_csr_dist_logical.h"
29 #include "rheolef/csr_amux.h"
30 #include "rheolef/csr_cumul_trans_mult.h"
31 #include "rheolef/mpi_scatter_init.h"
32 #include "rheolef/mpi_scatter_begin.h"
33 #include "rheolef/mpi_scatter_end.h"
41 template <
class Pair1,
class Pair2,
class RandomIterator>
42 struct op_ext2glob_t : unary_function<Pair1,Pair2> {
43 Pair2 operator()(
const Pair1& x)
const {
44 return Pair2(t[x.first], x.second); }
45 op_ext2glob_t(RandomIterator t1) : t(t1) {}
48 template <
class Pair1,
class Pair2>
49 struct op_dia_t : unary_function<Pair1,Pair2> {
50 Pair2 operator()(
const Pair1& x)
const {
51 return Pair2(shift + x.first, x.second); }
52 typedef typename Pair1::first_type Size;
53 op_dia_t(Size s) : shift(s) {}
56 template <
class Pair1,
class Pair2,
class RandomIterator>
57 struct op_dis_j2jext_t : unary_function<Pair1,Pair2> {
58 Pair2 operator()(
const Pair1& x)
const {
59 RandomIterator t = std::lower_bound(t1, t2, x.first);
60 assert_macro(*t == x.first,
"problem in ditributed asr_to_csr");
61 return Pair2(distance(t1,t), x.second); }
62 op_dis_j2jext_t(RandomIterator u1, RandomIterator u2) : t1(u1), t2(u2) {}
63 RandomIterator t1, t2;
69 csr_rep<T,distributed>::csr_rep()
75 _scatter_initialized(false),
82 csr_rep<T,distributed>::csr_rep(
const csr_rep<T,distributed>&
a)
85 _jext2dis_j(
a._jext2dis_j),
87 _dis_ext_nnz(
a._dis_ext_nnz),
88 _scatter_initialized(
a._scatter_initialized),
97 csr_rep<T,distributed>::resize (
const distributor& row_ownership,
const distributor& col_ownership,
size_type nnz1)
99 base::resize (row_ownership, col_ownership, nnz1);
100 _ext.resize (row_ownership, col_ownership, 0);
101 _scatter_initialized =
false;
106 csr_rep<T,distributed>::build_from_asr (
const asr<T,distributed,A>&
a)
108 base::resize (
a.row_ownership(),
a.col_ownership(), 0);
109 _ext.resize (
a.row_ownership(),
a.col_ownership(), 0);
111 distributor::tag_type tag = distributor::get_new_tag();
112 typedef typename asr<T>::row_type row_type;
113 typedef typename row_type::value_type const_pair_type;
114 typedef pair<size_type,T> pair_type;
115 is_dia_t<size_type, const_pair_type>
116 is_dia(col_ownership().first_index(), col_ownership().last_index());
120 set<size_type> colext;
128 size_type ncoldia = col_ownership().size();
130 base::resize(
a.row_ownership(),
a.col_ownership(), nnzdia);
131 _ext.resize (
a.row_ownership(),
a.col_ownership(), nnzext);
132 _jext2dis_j.resize (ncolext);
133 copy (colext.begin(), colext.end(), _jext2dis_j.begin());
139 op_dia_t<const_pair_type,pair_type> op_dia(- col_ownership().first_index());
147 base::_data.begin());
150 unary_negate<is_dia_t<size_type, const_pair_type> >
152 op_dis_j2jext_t<const_pair_type, pair_type, typename vector<size_type>::const_iterator>
153 op_dis_j2jext(_jext2dis_j.begin(), _jext2dis_j.end());
155 a.begin(),
a.end(), is_ext, op_dis_j2jext,
156 _ext.begin(), _ext._data.begin());
159 _dis_nnz =
a.dis_nnz();
160 _dis_ext_nnz = mpi::all_reduce (comm(), _ext.nnz(), std::plus<size_type>());
161 _scatter_initialized =
false;
163 _dis_nnz = mpi::all_reduce (comm(), nnz() + _ext.nnz(), std::plus<size_type>());
164 check_macro (_dis_nnz ==
a.dis_nnz(),
"build_from_asr: mistake: asr::dis_nnz="<<
a.dis_nnz()
165 <<
" while _dis_nnz="<<_dis_nnz);
171 csr_rep<T,distributed>::_scatter_init()
const
173 vector<size_type> id (_jext2dis_j.size());
174 for (
size_type i = 0,
n =
id.size(); i <
n; i++)
id[i] = i;
176 distributor::tag_type tag = distributor::get_new_tag();
180 _jext2dis_j.begin().operator->(),
182 id.begin().operator->(),
184 col_ownership().begin().operator->(),
186 row_ownership().comm(),
190 _buffer.resize(_jext2dis_j.size());
200 size_type io_proc = odiststream::io_proc();
202 distributor a_row_ownership (dis_nrow(), comm(), (my_proc == io_proc ? dis_nrow() : 0));
203 distributor a_col_ownership (dis_ncol(), comm(), (my_proc == io_proc ? dis_ncol() : 0));
204 typedef std::allocator<T> A;
205 asr<T,distributed,A>
a (a_row_ownership, a_col_ownership);
206 size_type first_dis_i = row_ownership().first_index();
207 size_type first_dis_j = col_ownership().first_index();
209 const_iterator ia = begin();
211 for (const_data_iterator
p = ia[i];
p < ia[i+1];
p++) {
213 const T& val = (*p).second;
214 a.dis_entry (i+first_dis_i, j+first_dis_j) += val;
218 if (_ext.nnz() != 0) {
219 const_iterator ext_ia = _ext.begin();
221 for (const_data_iterator
p = ext_ia[i];
p < ext_ia[i+1];
p++) {
223 const T& val = (*p).second;
224 a.dis_entry (i+first_dis_i, _jext2dis_j[j]) += val;
228 a.dis_entry_assembly();
229 if (my_proc == io_proc) {
239 std::string filename =
name +
itos(comm().rank());
240 ops.open (filename,
"mtx", comm());
241 check_macro(ops.good(),
"\"" << filename <<
"[.mtx]\" cannot be created.");
242 ops <<
"%%MatrixMarket matrix coordinate real general" << std::endl
243 << dis_nrow() <<
" " << dis_ncol() <<
" " << dis_nnz() << std::endl;
251 csr_rep<T,distributed>::mult(
252 const vec<T,distributed>& x,
253 vec<T,distributed>& y)
256 check_macro (x.size() == ncol(),
"csr*vec: incompatible csr("<<nrow()<<
","<<ncol()<<
") and vec("<<x.size()<<
")");
257 y.resize (row_ownership());
259 distributor::tag_type tag = distributor::get_new_tag();
262 _scatter_init_guard();
266 x.begin().operator->(),
267 _buffer.begin().operator->(),
272 row_ownership().comm());
290 row_ownership().comm());
293 csr_amux (_ext.begin(), _ext.end(), _buffer.begin(), set_add_op<T,T>(), y.begin());
297 csr_rep<T,distributed>::trans_mult(
298 const vec<T,distributed>& x,
299 vec<T,distributed>& y)
302 check_macro (x.size() == nrow(),
"csr.trans_mult(vec): incompatible csr("<<nrow()<<
","<<ncol()<<
") and vec("<<x.size()<<
")");
305 _scatter_init_guard();
307 y.resize (col_ownership());
310 std::fill (y.begin(), y.end(),
T(0));
319 std::fill (_buffer.begin(), _buffer.end(),
T(0));
328 distributor::tag_type tag = distributor::get_new_tag();
330 _buffer.begin().operator->(),
331 y.begin().operator->(),
336 col_ownership().comm());
346 col_ownership().comm());
352 csr_rep<T,distributed>&
365 template<
class T,
class BinaryOp>
374 typedef typename csr_rep<T,distributed>::iterator iterator;
375 typedef typename csr_rep<T,distributed>::const_iterator const_iterator;
376 typedef typename csr_rep<T,distributed>::data_iterator data_iterator;
377 typedef typename csr_rep<T,distributed>::const_data_iterator const_data_iterator;
378 typedef std::pair<size_type,T> pair_type;
383 const size_type infty = std::numeric_limits<size_type>::max();
384 const_iterator ia =
a.begin();
385 const_iterator ib =
b.begin();
386 std::set<size_type> jext_c_set;
388 for (const_data_iterator iter_jva = ia[i], last_jva = ia[i+1],
389 iter_jvb = ib[i], last_jvb = ib[i+1];
390 iter_jva != last_jva || iter_jvb != last_jvb; ) {
392 size_type dis_ja = iter_jva == last_jva ? infty : jext_a2dis_j [(*iter_jva).first];
393 size_type dis_jb = iter_jvb == last_jvb ? infty : jext_b2dis_j [(*iter_jvb).first];
394 if (dis_ja == dis_jb) {
395 jext_c_set.insert (dis_ja);
398 }
else if (dis_ja < dis_jb) {
399 jext_c_set.insert (dis_ja);
402 jext_c_set.insert (dis_jb);
408 c.resize (
a.nrow(),
b.ncol(), nnz_ext_c);
409 jext_c2dis_j.resize (jext_c_set.size());
410 std::copy (jext_c_set.begin(), jext_c_set.end(), jext_c2dis_j.begin());
414 op_dis_j2jext_t<pair_type, pair_type, typename vector<size_type>::const_iterator>
415 op_dis_j2jext_c (jext_c2dis_j.begin(), jext_c2dis_j.end());
416 data_iterator iter_jvc =
c._data.begin().operator->();
417 iterator ic =
c.begin();
420 for (const_data_iterator iter_jva = ia[i], last_jva = ia[i+1],
421 iter_jvb = ib[i], last_jvb = ib[i+1];
422 iter_jva != last_jva || iter_jvb != last_jvb; ) {
424 size_type dis_ja = iter_jva == last_jva ? infty : jext_a2dis_j [(*iter_jva).first];
425 size_type dis_jb = iter_jvb == last_jvb ? infty : jext_b2dis_j [(*iter_jvb).first];
426 if (dis_ja == dis_jb) {
427 *iter_jvc++ = op_dis_j2jext_c (pair_type(dis_ja, binop((*iter_jva).second, (*iter_jvb).second)));
430 }
else if (dis_ja < dis_jb) {
431 *iter_jvc++ = op_dis_j2jext_c (pair_type(dis_ja, binop((*iter_jva).second,
T(0))));
434 *iter_jvc++ = op_dis_j2jext_c (pair_type(dis_jb, binop(
T(0),(*iter_jvb).second)));
442 template<
class BinaryOp>
444 csr_rep<T,distributed>::assign_add (
445 const csr_rep<T,distributed>&
a,
446 const csr_rep<T,distributed>&
b,
449 check_macro (
a.dis_nrow() ==
b.dis_nrow() &&
a.dis_ncol() ==
b.dis_ncol(),
450 "a+b: invalid matrix a("<<
a.dis_nrow()<<
","<<
a.dis_ncol()<<
") and b("
451 <<
b.dis_nrow()<<
","<<
b.dis_ncol()<<
")");
453 "a+b: matrix local distribution mismatch: a("<<
a.nrow()<<
","<<
a.ncol()<<
") and b("
454 <<
b.nrow()<<
","<<
b.ncol()<<
")");
457 base::assign_add (
a,
b, binop);
461 a._ext,
a._jext2dis_j,
462 b._ext,
b._jext2dis_j,
466 _dis_nnz = mpi::all_reduce (comm(), nnz() + _ext.nnz(), std::plus<size_type>());
467 _dis_ext_nnz = mpi::all_reduce (comm(), _ext.nnz(), std::plus<size_type>());
468 _scatter_initialized =
false;
472 vector<size_type> id(_jext2dis_j.size());
473 for (
size_type i = 0; i <
id.size(); i++)
id[i] = i;
474 distributor::tag_type tag = distributor::get_new_tag();
476 _buffer.resize (_jext2dis_j.size());
479 _jext2dis_j.begin().operator->(),
481 id.begin().operator->(),
483 col_ownership().begin().operator->(),
485 row_ownership().comm(),
495 csr_rep<T,distributed>::build_transpose (csr_rep<T,distributed>&
b)
const
500 asr<T> b_ext (col_ownership(), row_ownership());
501 size_type first_i = row_ownership().first_index();
502 const_iterator ext_ia = ext_begin();
505 for (const_data_iterator
p = ext_ia[i], last_p = ext_ia[i+1];
p < last_p;
p++) {
506 size_type dis_j = jext2dis_j ((*p).first);
507 const T& val = (*p).second;
508 b_ext.dis_entry (dis_j, dis_i) += val;
511 b_ext.dis_entry_assembly();
512 b.build_from_asr (b_ext);
516 base::build_transpose (
b);
520 b._dis_nnz = mpi::all_reduce (comm(),
b.nnz() +
b._ext.nnz(), std::plus<size_type>());
521 b._dis_ext_nnz = mpi::all_reduce (comm(),
b._ext.nnz(), std::plus<size_type>());
522 b._scatter_initialized =
false;
529 csr_rep<T,distributed>::set_symmetry_by_check (
const T& tol)
const
531 if (dis_nrow() != dis_ncol()) {
536 csr_rep<T,distributed> at,
d;
537 build_transpose (at);
538 d.assign_add (*
this, at, std::minus<T>());
539 set_symmetry (
d.max_abs() <= tol);
546 csr_rep<T,distributed>::assign_mult (
547 const csr_rep<T,distributed>&
a,
548 const csr_rep<T,distributed>&
b)
554 "incompatible csr([0:"<<
a.nrow()<<
"|"<<
a.dis_nrow()<<
"[x"
555 <<
"[0:"<<
a.ncol()<<
"|"<<
a.dis_ncol()<<
"[)"
556 "*csr([0:"<<
b.nrow()<<
"|"<<
b.dis_nrow()<<
"[x"
557 <<
"[0:"<<
b.ncol()<<
"|"<<
b.dis_ncol()<<
"[)");
561 size_type a_dia_ncol =
a.col_ownership().size();
563 size_type first_a_dis_j =
a.col_ownership().first_index();
564 std::map<size_type,size_type> a_dis_j2a_zip_j;
566 for (
size_type jext = 0; jext < a_ext_ncol &&
a._jext2dis_j [jext] < first_a_dis_j; jext++, a_zip_j++) {
569 a_dis_j2a_zip_j [dis_j] = a_zip_j;
572 for (
size_type j = 0; j < a_dia_ncol; j++, a_zip_j++) {
575 a_dis_j2a_zip_j [dis_j] = a_zip_j;
577 for (
size_type jext = jext_up; jext < a_ext_ncol; jext++, a_zip_j++) {
580 a_dis_j2a_zip_j [dis_j] = a_zip_j;
582 size_type a_zip_ncol = a_dia_ncol + a_ext_ncol;
587 typedef std::allocator<T> A;
590 asr<T,distributed,A> b_asr (
b, alloc);
592 b_asr.set_dis_indexes (
a._jext2dis_j);
594 distributor a_zip_col_ownership (distributor::decide,
b.comm(), a_zip_ncol);
595 distributor b_zip_row_ownership = a_zip_col_ownership;
596 asr<T,sequential,A> b_asr_zip (b_zip_row_ownership,
b.col_ownership());
597 size_type first_dis_j =
b.row_ownership().first_index();
599 for (
typename asr<T,distributed,A>::const_iterator
600 iter = b_asr.begin(),
601 last = b_asr.end(); iter != last; ++iter, ++j) {
602 typedef typename asr<T,distributed,A>::row_type row_type;
604 size_type zip_j = a_dis_j2a_zip_j [dis_j];
605 const row_type& row = *iter;
606 for (
typename row_type::const_iterator
607 row_iter = row.begin(),
608 row_last = row.end(); row_iter != row_last; ++row_iter) {
610 const T& value = (*row_iter).second;
611 b_asr_zip.semi_dis_entry (zip_j, dis_k) = value;
615 typedef typename asr<T,distributed,A>::scatter_map_type ext_row_type;
616 const ext_row_type& b_ext_row_map = b_asr.get_dis_map_entries();
617 for (
typename ext_row_type::const_iterator
618 iter = b_ext_row_map.begin(),
619 last = b_ext_row_map.end(); iter != last; ++iter) {
620 typedef typename asr<T,distributed,A>::row_type row_type;
622 size_type zip_j = a_dis_j2a_zip_j [dis_j];
623 const row_type& row = (*iter).second;
624 for (
typename row_type::const_iterator
625 row_iter = row.begin(),
626 row_last = row.end(); row_iter != row_last; ++row_iter) {
628 const T& value = (*row_iter).second;
629 b_asr_zip.semi_dis_entry (zip_j, dis_k) = value;
632 b_asr_zip.dis_entry_assembly();
634 csr<T,sequential> b_zip (b_asr_zip);
640 asr<T,sequential> a_asr_zip (
a.row_ownership(), a_zip_col_ownership);
641 size_type first_dis_i =
a.row_ownership().first_index();
642 const_iterator dia_ia =
a.begin();
643 const_iterator ext_ia =
a.ext_begin();
646 for (const_data_iterator
p = dia_ia[i], last_p = dia_ia[i+1];
p != last_p; ++
p) {
648 const T& value = (*p).second;
650 size_type zip_j = a_dis_j2a_zip_j [dis_j];
651 a_asr_zip.semi_dis_entry (i, zip_j) += value;
653 if (
a.ext_nnz() == 0)
continue;
654 for (const_data_iterator
p = ext_ia[i], last_p = ext_ia[i+1];
p != last_p; ++
p) {
656 const T& value = (*p).second;
658 size_type zip_j = a_dis_j2a_zip_j [dis_j];
659 a_asr_zip.semi_dis_entry (i, zip_j) += value;
662 a_asr_zip.dis_entry_assembly();
664 csr<T,sequential> a_zip (a_asr_zip);
668 csr<T,sequential> c_zip = a_zip*b_zip;
673 asr<T,distributed> c_asr_zip (
a.row_ownership(),
b.col_ownership());
674 const_iterator ic = c_zip.begin();
675 for (
size_type i = 0,
n = c_zip.nrow(); i <
n; ++i) {
677 for (const_data_iterator
p = ic[i], last_p = ic[i+1];
p != last_p; ++
p) {
679 const T& value = (*p).second;
680 c_asr_zip.semi_dis_entry (i, dis_k) += value;
683 c_asr_zip.dis_entry_assembly();
685 build_from_asr (c_asr_zip);
690 #ifdef _RHEOLEF_DO_NOT_USE_HEAP_ALLOCATOR
691 #define _RHEOLEF_instanciate_class(T) \
692 template void csr_rep<T,distributed>::build_from_asr (const asr<T,distributed,std::allocator<T> >&);
694 #define _RHEOLEF_instanciate_class(T) \
695 template void csr_rep<T,distributed>::build_from_asr (const asr<T,distributed,std::allocator<T> >&); \
696 template void csr_rep<T,distributed>::build_from_asr (const asr<T,distributed,heap_allocator<T> >&);
699 #define _RHEOLEF_istanciate(T) \
700 template class csr_rep<T,distributed>; \
701 template void csr_rep<T,distributed>::assign_add ( \
702 const csr_rep<T,distributed>&, \
703 const csr_rep<T,distributed>&, \
705 template void csr_rep<T,distributed>::assign_add ( \
706 const csr_rep<T,distributed>&, \
707 const csr_rep<T,distributed>&, \
709 _RHEOLEF_instanciate_class(T)
711 _RHEOLEF_istanciate(
Float)
void put(idiststream &in, odiststream &out, bool do_proj, bool do_lumped_mass, bool def_fill_opt, size_type extract_id, const Float &scale_value, const std::pair< Float, Float > &u_range, render_type render)
field::size_type size_type
see the Float page for the full documentation
check_macro(expr1.have_homogeneous_space(Xh1), "dual(expr1,expr2); expr1 should have homogeneous space. HINT: use dual(interpolate(Xh, expr1),expr2)")
verbose clean transpose logscale grid shrink ball stereo iso volume skipvtk deformation fastfieldload lattice reader_on_stdin color format format format format format format format format format format format format format format format format format format dump
This file is part of Rheolef.
std::enable_if< details::is_rheolef_arithmetic< U >::value,ad3_basic< T > & >::type operator*=(ad3_basic< T > &a, const U &b)
void mpi_scatter_init(Size nidx, SizeRandomIterator1 idx, Size nidy, SizeRandomIterator2 idy, Size idy_maxval, SizeRandomIterator3 ownership, Tag tag, const distributor::communicator_type &comm, Message &from, Message &to)
void mpi_scatter_end(InputIterator x, OutputIterator y, Message &from, Message &to, SetOp op, Tag tag, Comm comm)
void csr_cumul_trans_mult(InputIterator1 ia, InputIterator1 last_ia, InputIterator3 x, SetOperator set_op, RandomAccessMutableIterator y)
OutputPtrIterator asr_to_csr(InputPtrIterator iter_ptr_a, InputPtrIterator last_ptr_a, Predicate pred, Operation op, OutputPtrIterator iter_ptr_b, OutputDataIterator iter_data_b)
std::string itos(std::string::size_type i)
itos: see the rheostream page for the full documentation
Set::value_type asr_to_csr_dist_logical(InputPtrIterator iter_ptr_a, InputPtrIterator last_ptr_a, Predicate is_dia, Set &colext)
void mpi_scatter_begin(InputIterator x, OutputIterator y, Message &from, Message &to, SetOp op, Tag tag, Comm comm)
void csr_amux(InputIterator ia, InputIterator last_ia, InputRandomAcessIterator x, SetOperator set_op, OutputIterator y)