| [0b990d] | 1 | // | 
|---|
|  | 2 | // reduce.cc | 
|---|
|  | 3 | // | 
|---|
|  | 4 | // Copyright (C) 1996 Limit Point Systems, Inc. | 
|---|
|  | 5 | // | 
|---|
|  | 6 | // Author: Curtis Janssen <cljanss@limitpt.com> | 
|---|
|  | 7 | // Maintainer: LPS | 
|---|
|  | 8 | // | 
|---|
|  | 9 | // This file is part of the SC Toolkit. | 
|---|
|  | 10 | // | 
|---|
|  | 11 | // The SC Toolkit is free software; you can redistribute it and/or modify | 
|---|
|  | 12 | // it under the terms of the GNU Library General Public License as published by | 
|---|
|  | 13 | // the Free Software Foundation; either version 2, or (at your option) | 
|---|
|  | 14 | // any later version. | 
|---|
|  | 15 | // | 
|---|
|  | 16 | // The SC Toolkit is distributed in the hope that it will be useful, | 
|---|
|  | 17 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
|  | 18 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
|  | 19 | // GNU Library General Public License for more details. | 
|---|
|  | 20 | // | 
|---|
|  | 21 | // You should have received a copy of the GNU Library General Public License | 
|---|
|  | 22 | // along with the SC Toolkit; see the file COPYING.LIB.  If not, write to | 
|---|
|  | 23 | // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | 
|---|
|  | 24 | // | 
|---|
|  | 25 | // The U.S. Government is granted a limited license as per AL 91-7. | 
|---|
|  | 26 | // | 
|---|
|  | 27 |  | 
|---|
|  | 28 | #ifdef HAVE_CONFIG_H | 
|---|
|  | 29 | #include <scconfig.h> | 
|---|
|  | 30 | #endif | 
|---|
|  | 31 | #include <util/group/message.h> | 
|---|
|  | 32 |  | 
|---|
|  | 33 | using namespace sc; | 
|---|
|  | 34 |  | 
|---|
|  | 35 | ///////////////////////////////////////////////////////////////////////// | 
|---|
|  | 36 | // instantiate templates | 
|---|
|  | 37 |  | 
|---|
|  | 38 | #ifdef EXPLICIT_TEMPLATE_INSTANTIATION | 
|---|
|  | 39 | template class GrpReduce<double>; | 
|---|
|  | 40 | template class GrpReduce<unsigned int>; | 
|---|
|  | 41 | template class GrpReduce<int>; | 
|---|
|  | 42 | template class GrpReduce<long>; | 
|---|
|  | 43 | template class GrpReduce<float>; | 
|---|
|  | 44 | template class GrpReduce<short>; | 
|---|
|  | 45 | template class GrpReduce<char>; | 
|---|
|  | 46 | template class GrpReduce<unsigned char>; | 
|---|
|  | 47 | template class GrpReduce<signed char>; | 
|---|
|  | 48 |  | 
|---|
|  | 49 | template class GrpFunctionReduce<double>; | 
|---|
|  | 50 | template class GrpFunctionReduce<unsigned int>; | 
|---|
|  | 51 | template class GrpFunctionReduce<int>; | 
|---|
|  | 52 | template class GrpFunctionReduce<long>; | 
|---|
|  | 53 | template class GrpFunctionReduce<float>; | 
|---|
|  | 54 | template class GrpFunctionReduce<short>; | 
|---|
|  | 55 | template class GrpFunctionReduce<char>; | 
|---|
|  | 56 | template class GrpFunctionReduce<unsigned char>; | 
|---|
|  | 57 | template class GrpFunctionReduce<signed char>; | 
|---|
|  | 58 |  | 
|---|
|  | 59 | template class GrpMinReduce<double>; | 
|---|
|  | 60 | template class GrpMinReduce<unsigned int>; | 
|---|
|  | 61 | template class GrpMinReduce<int>; | 
|---|
|  | 62 | template class GrpMinReduce<long>; | 
|---|
|  | 63 | template class GrpMinReduce<float>; | 
|---|
|  | 64 | template class GrpMinReduce<short>; | 
|---|
|  | 65 | template class GrpMinReduce<char>; | 
|---|
|  | 66 | template class GrpMinReduce<unsigned char>; | 
|---|
|  | 67 | template class GrpMinReduce<signed char>; | 
|---|
|  | 68 |  | 
|---|
|  | 69 | template class GrpMaxReduce<double>; | 
|---|
|  | 70 | template class GrpMaxReduce<unsigned int>; | 
|---|
|  | 71 | template class GrpMaxReduce<int>; | 
|---|
|  | 72 | template class GrpMaxReduce<long>; | 
|---|
|  | 73 | template class GrpMaxReduce<float>; | 
|---|
|  | 74 | template class GrpMaxReduce<short>; | 
|---|
|  | 75 | template class GrpMaxReduce<char>; | 
|---|
|  | 76 | template class GrpMaxReduce<unsigned char>; | 
|---|
|  | 77 | template class GrpMaxReduce<signed char>; | 
|---|
|  | 78 |  | 
|---|
|  | 79 | template class GrpSumReduce<double>; | 
|---|
|  | 80 | template class GrpSumReduce<unsigned int>; | 
|---|
|  | 81 | template class GrpSumReduce<int>; | 
|---|
|  | 82 | template class GrpSumReduce<long>; | 
|---|
|  | 83 | template class GrpSumReduce<float>; | 
|---|
|  | 84 | template class GrpSumReduce<short>; | 
|---|
|  | 85 | template class GrpSumReduce<char>; | 
|---|
|  | 86 | template class GrpSumReduce<unsigned char>; | 
|---|
|  | 87 | template class GrpSumReduce<signed char>; | 
|---|
|  | 88 |  | 
|---|
|  | 89 | template class GrpProductReduce<double>; | 
|---|
|  | 90 | template class GrpProductReduce<unsigned int>; | 
|---|
|  | 91 | template class GrpProductReduce<int>; | 
|---|
|  | 92 | template class GrpProductReduce<long>; | 
|---|
|  | 93 | template class GrpProductReduce<float>; | 
|---|
|  | 94 | template class GrpProductReduce<short>; | 
|---|
|  | 95 | template class GrpProductReduce<char>; | 
|---|
|  | 96 | template class GrpProductReduce<unsigned char>; | 
|---|
|  | 97 | template class GrpProductReduce<signed char>; | 
|---|
|  | 98 |  | 
|---|
|  | 99 | template class GrpArithmeticOrReduce<unsigned int>; | 
|---|
|  | 100 | template class GrpArithmeticOrReduce<int>; | 
|---|
|  | 101 | template class GrpArithmeticOrReduce<long>; | 
|---|
|  | 102 | template class GrpArithmeticOrReduce<short>; | 
|---|
|  | 103 | template class GrpArithmeticOrReduce<char>; | 
|---|
|  | 104 | template class GrpArithmeticOrReduce<unsigned char>; | 
|---|
|  | 105 | template class GrpArithmeticOrReduce<signed char>; | 
|---|
|  | 106 |  | 
|---|
|  | 107 | template class GrpArithmeticAndReduce<unsigned int>; | 
|---|
|  | 108 | template class GrpArithmeticAndReduce<int>; | 
|---|
|  | 109 | template class GrpArithmeticAndReduce<long>; | 
|---|
|  | 110 | template class GrpArithmeticAndReduce<short>; | 
|---|
|  | 111 | template class GrpArithmeticAndReduce<char>; | 
|---|
|  | 112 | template class GrpArithmeticAndReduce<unsigned char>; | 
|---|
|  | 113 | template class GrpArithmeticAndReduce<signed char>; | 
|---|
|  | 114 |  | 
|---|
|  | 115 | template class GrpArithmeticXOrReduce<unsigned int>; | 
|---|
|  | 116 | template class GrpArithmeticXOrReduce<int>; | 
|---|
|  | 117 | template class GrpArithmeticXOrReduce<long>; | 
|---|
|  | 118 | template class GrpArithmeticXOrReduce<short>; | 
|---|
|  | 119 | template class GrpArithmeticXOrReduce<char>; | 
|---|
|  | 120 | template class GrpArithmeticXOrReduce<unsigned char>; | 
|---|
|  | 121 | template class GrpArithmeticXOrReduce<signed char>; | 
|---|
|  | 122 | #endif | 
|---|
|  | 123 |  | 
|---|
|  | 124 | ///////////////////////////////////////////////////////////////////////// | 
|---|
|  | 125 | // sum reduction members | 
|---|
|  | 126 |  | 
|---|
|  | 127 | template <class T> | 
|---|
|  | 128 | void | 
|---|
|  | 129 | do_sum(MessageGrp* grp, T* data, int n, T* tmp, int target) | 
|---|
|  | 130 | { | 
|---|
|  | 131 | GrpSumReduce<T> gred; | 
|---|
|  | 132 | grp->reduce(data, n, gred, tmp, target); | 
|---|
|  | 133 | } | 
|---|
|  | 134 |  | 
|---|
|  | 135 | void | 
|---|
|  | 136 | MessageGrp::sum(double* data, int n, double* tmp, int target) | 
|---|
|  | 137 | { | 
|---|
|  | 138 | do_sum(this, data, n, tmp, target); | 
|---|
|  | 139 | } | 
|---|
|  | 140 |  | 
|---|
|  | 141 | void | 
|---|
|  | 142 | MessageGrp::sum(unsigned int* data, int n, unsigned int* tmp, int target) | 
|---|
|  | 143 | { | 
|---|
|  | 144 | do_sum(this, data, n, tmp, target); | 
|---|
|  | 145 | } | 
|---|
|  | 146 |  | 
|---|
|  | 147 | void | 
|---|
|  | 148 | MessageGrp::sum(int* data, int n, int* tmp, int target) | 
|---|
|  | 149 | { | 
|---|
|  | 150 | do_sum(this, data, n, tmp, target); | 
|---|
|  | 151 | } | 
|---|
|  | 152 |  | 
|---|
|  | 153 | void | 
|---|
|  | 154 | MessageGrp::sum(char* data, int n, char* tmp, int target) | 
|---|
|  | 155 | { | 
|---|
|  | 156 | do_sum(this, data, n, tmp, target); | 
|---|
|  | 157 | } | 
|---|
|  | 158 |  | 
|---|
|  | 159 | void | 
|---|
|  | 160 | MessageGrp::sum(unsigned char* data, int n, unsigned char* tmp, int target) | 
|---|
|  | 161 | { | 
|---|
|  | 162 | do_sum(this, data, n, tmp, target); | 
|---|
|  | 163 | } | 
|---|
|  | 164 |  | 
|---|
|  | 165 | void | 
|---|
|  | 166 | MessageGrp::sum(signed char* data, int n, signed char* tmp, int target) | 
|---|
|  | 167 | { | 
|---|
|  | 168 | do_sum(this, data, n, tmp, target); | 
|---|
|  | 169 | } | 
|---|
|  | 170 |  | 
|---|
|  | 171 | ///////////////////////////////////////////////////////////////////////// | 
|---|
|  | 172 | // min reduction members | 
|---|
|  | 173 |  | 
|---|
|  | 174 | template <class T> | 
|---|
|  | 175 | void | 
|---|
|  | 176 | do_max(MessageGrp* grp, T* data, int n, T* tmp, int target) | 
|---|
|  | 177 | { | 
|---|
|  | 178 | GrpMaxReduce<T> gred; | 
|---|
|  | 179 | grp->reduce(data, n, gred, tmp, target); | 
|---|
|  | 180 | } | 
|---|
|  | 181 |  | 
|---|
|  | 182 | void | 
|---|
|  | 183 | MessageGrp::max(double* data, int n, double* tmp, int target) | 
|---|
|  | 184 | { | 
|---|
|  | 185 | do_max(this, data, n, tmp, target); | 
|---|
|  | 186 | } | 
|---|
|  | 187 |  | 
|---|
|  | 188 | void | 
|---|
|  | 189 | MessageGrp::max(unsigned int* data, int n, unsigned int* tmp, int target) | 
|---|
|  | 190 | { | 
|---|
|  | 191 | do_max(this, data, n, tmp, target); | 
|---|
|  | 192 | } | 
|---|
|  | 193 |  | 
|---|
|  | 194 | void | 
|---|
|  | 195 | MessageGrp::max(int* data, int n, int* tmp, int target) | 
|---|
|  | 196 | { | 
|---|
|  | 197 | do_max(this, data, n, tmp, target); | 
|---|
|  | 198 | } | 
|---|
|  | 199 |  | 
|---|
|  | 200 | void | 
|---|
|  | 201 | MessageGrp::max(char* data, int n, char* tmp, int target) | 
|---|
|  | 202 | { | 
|---|
|  | 203 | do_max(this, data, n, tmp, target); | 
|---|
|  | 204 | } | 
|---|
|  | 205 |  | 
|---|
|  | 206 | void | 
|---|
|  | 207 | MessageGrp::max(unsigned char* data, int n, unsigned char* tmp, int target) | 
|---|
|  | 208 | { | 
|---|
|  | 209 | do_max(this, data, n, tmp, target); | 
|---|
|  | 210 | } | 
|---|
|  | 211 |  | 
|---|
|  | 212 | void | 
|---|
|  | 213 | MessageGrp::max(signed char* data, int n, signed char* tmp, int target) | 
|---|
|  | 214 | { | 
|---|
|  | 215 | do_max(this, data, n, tmp, target); | 
|---|
|  | 216 | } | 
|---|
|  | 217 |  | 
|---|
|  | 218 | ///////////////////////////////////////////////////////////////////////// | 
|---|
|  | 219 | // max reduction members | 
|---|
|  | 220 |  | 
|---|
|  | 221 | template <class T> | 
|---|
|  | 222 | void | 
|---|
|  | 223 | do_min(MessageGrp* grp, T* data, int n, T* tmp, int target) | 
|---|
|  | 224 | { | 
|---|
|  | 225 | GrpMinReduce<T> gred; | 
|---|
|  | 226 | grp->reduce(data, n, gred, tmp, target); | 
|---|
|  | 227 | } | 
|---|
|  | 228 |  | 
|---|
|  | 229 | void | 
|---|
|  | 230 | MessageGrp::min(double* data, int n, double* tmp, int target) | 
|---|
|  | 231 | { | 
|---|
|  | 232 | do_min(this, data, n, tmp, target); | 
|---|
|  | 233 | } | 
|---|
|  | 234 |  | 
|---|
|  | 235 | void | 
|---|
|  | 236 | MessageGrp::min(unsigned int* data, int n, unsigned int* tmp, int target) | 
|---|
|  | 237 | { | 
|---|
|  | 238 | do_min(this, data, n, tmp, target); | 
|---|
|  | 239 | } | 
|---|
|  | 240 |  | 
|---|
|  | 241 | void | 
|---|
|  | 242 | MessageGrp::min(int* data, int n, int* tmp, int target) | 
|---|
|  | 243 | { | 
|---|
|  | 244 | do_min(this, data, n, tmp, target); | 
|---|
|  | 245 | } | 
|---|
|  | 246 |  | 
|---|
|  | 247 | void | 
|---|
|  | 248 | MessageGrp::min(char* data, int n, char* tmp, int target) | 
|---|
|  | 249 | { | 
|---|
|  | 250 | do_min(this, data, n, tmp, target); | 
|---|
|  | 251 | } | 
|---|
|  | 252 |  | 
|---|
|  | 253 | void | 
|---|
|  | 254 | MessageGrp::min(unsigned char* data, int n, unsigned char* tmp, int target) | 
|---|
|  | 255 | { | 
|---|
|  | 256 | do_min(this, data, n, tmp, target); | 
|---|
|  | 257 | } | 
|---|
|  | 258 |  | 
|---|
|  | 259 | void | 
|---|
|  | 260 | MessageGrp::min(signed char* data, int n, signed char* tmp, int target) | 
|---|
|  | 261 | { | 
|---|
|  | 262 | do_min(this, data, n, tmp, target); | 
|---|
|  | 263 | } | 
|---|
|  | 264 |  | 
|---|
|  | 265 | ///////////////////////////////////////////////////////////////////////// | 
|---|
|  | 266 | // generic reduction | 
|---|
|  | 267 |  | 
|---|
|  | 268 | void | 
|---|
|  | 269 | MessageGrp::reduce(double* data, int n, GrpReduce<double>& red, | 
|---|
|  | 270 | double* scratch, int target) | 
|---|
|  | 271 | { | 
|---|
|  | 272 | int tgop_max = gop_max_/sizeof(double); | 
|---|
|  | 273 | if (tgop_max == 0) tgop_max = gop_max_?1:n; | 
|---|
|  | 274 |  | 
|---|
|  | 275 | int passed_scratch; | 
|---|
|  | 276 | if (!scratch) { | 
|---|
|  | 277 | scratch = new double[n>tgop_max?tgop_max:n]; | 
|---|
|  | 278 | passed_scratch = 0; | 
|---|
|  | 279 | } | 
|---|
|  | 280 | else passed_scratch = 1; | 
|---|
|  | 281 |  | 
|---|
|  | 282 | Ref<GlobalMsgIter> i(topology_->global_msg_iter(this, | 
|---|
|  | 283 | (target== -1?0:target))); | 
|---|
|  | 284 | for (i->backwards(); !i->done(); i->next()) { | 
|---|
|  | 285 | for (int idat=0; idat<n; idat+=tgop_max) { | 
|---|
|  | 286 | int ndat = (idat+tgop_max>n)?(n-idat):tgop_max; | 
|---|
|  | 287 | if (i->send()) { | 
|---|
|  | 288 | send(i->sendto(), &data[idat], ndat); | 
|---|
|  | 289 | } | 
|---|
|  | 290 | if (i->recv()) { | 
|---|
|  | 291 | recv(i->recvfrom(), scratch, ndat); | 
|---|
|  | 292 | red.reduce(&data[idat], scratch, ndat); | 
|---|
|  | 293 | } | 
|---|
|  | 294 | } | 
|---|
|  | 295 | if (n > tgop_max) sync(); | 
|---|
|  | 296 | } | 
|---|
|  | 297 |  | 
|---|
|  | 298 | if (target == -1) { | 
|---|
|  | 299 | bcast(data, n, 0); | 
|---|
|  | 300 | } | 
|---|
|  | 301 |  | 
|---|
|  | 302 | if (!passed_scratch) delete[] scratch; | 
|---|
|  | 303 | } | 
|---|
|  | 304 |  | 
|---|
|  | 305 | void | 
|---|
|  | 306 | MessageGrp::reduce(unsigned int* data, int n, GrpReduce<unsigned int>& red, | 
|---|
|  | 307 | unsigned int* scratch, int target) | 
|---|
|  | 308 | { | 
|---|
|  | 309 | int tgop_max = gop_max_/sizeof(unsigned int); | 
|---|
|  | 310 | if (tgop_max == 0) tgop_max = gop_max_?1:n; | 
|---|
|  | 311 |  | 
|---|
|  | 312 | int passed_scratch; | 
|---|
|  | 313 | if (!scratch) { | 
|---|
|  | 314 | scratch = new unsigned int[n>tgop_max?tgop_max:n]; | 
|---|
|  | 315 | passed_scratch = 0; | 
|---|
|  | 316 | } | 
|---|
|  | 317 | else passed_scratch = 1; | 
|---|
|  | 318 |  | 
|---|
|  | 319 | Ref<GlobalMsgIter> i(topology_->global_msg_iter(this, | 
|---|
|  | 320 | (target== -1?0:target))); | 
|---|
|  | 321 | for (i->backwards(); !i->done(); i->next()) { | 
|---|
|  | 322 | for (int idat=0; idat<n; idat+=tgop_max) { | 
|---|
|  | 323 | int ndat = (idat+tgop_max>n)?(n-idat):tgop_max; | 
|---|
|  | 324 | if (i->send()) { | 
|---|
|  | 325 | send(i->sendto(), &data[idat], ndat); | 
|---|
|  | 326 | } | 
|---|
|  | 327 | if (i->recv()) { | 
|---|
|  | 328 | recv(i->recvfrom(), scratch, ndat); | 
|---|
|  | 329 | red.reduce(&data[idat], scratch, ndat); | 
|---|
|  | 330 | } | 
|---|
|  | 331 | } | 
|---|
|  | 332 | if (n > tgop_max) sync(); | 
|---|
|  | 333 | } | 
|---|
|  | 334 |  | 
|---|
|  | 335 | if (target == -1) { | 
|---|
|  | 336 | bcast(data, n, 0); | 
|---|
|  | 337 | } | 
|---|
|  | 338 |  | 
|---|
|  | 339 | if (!passed_scratch) delete[] scratch; | 
|---|
|  | 340 | } | 
|---|
|  | 341 |  | 
|---|
|  | 342 | void | 
|---|
|  | 343 | MessageGrp::reduce(int* data, int n, GrpReduce<int>& red, | 
|---|
|  | 344 | int* scratch, int target) | 
|---|
|  | 345 | { | 
|---|
|  | 346 | int tgop_max = gop_max_/sizeof(int); | 
|---|
|  | 347 | if (tgop_max == 0) tgop_max = gop_max_?1:n; | 
|---|
|  | 348 |  | 
|---|
|  | 349 | int passed_scratch; | 
|---|
|  | 350 | if (!scratch) { | 
|---|
|  | 351 | scratch = new int[n>tgop_max?tgop_max:n]; | 
|---|
|  | 352 | passed_scratch = 0; | 
|---|
|  | 353 | } | 
|---|
|  | 354 | else passed_scratch = 1; | 
|---|
|  | 355 |  | 
|---|
|  | 356 | Ref<GlobalMsgIter> i(topology_->global_msg_iter(this, | 
|---|
|  | 357 | (target== -1?0:target))); | 
|---|
|  | 358 | for (i->backwards(); !i->done(); i->next()) { | 
|---|
|  | 359 | for (int idat=0; idat<n; idat+=tgop_max) { | 
|---|
|  | 360 | int ndat = (idat+tgop_max>n)?(n-idat):tgop_max; | 
|---|
|  | 361 | if (i->send()) { | 
|---|
|  | 362 | send(i->sendto(), &data[idat], ndat); | 
|---|
|  | 363 | } | 
|---|
|  | 364 | if (i->recv()) { | 
|---|
|  | 365 | recv(i->recvfrom(), scratch, ndat); | 
|---|
|  | 366 | red.reduce(&data[idat], scratch, ndat); | 
|---|
|  | 367 | } | 
|---|
|  | 368 | } | 
|---|
|  | 369 | if (n > tgop_max) sync(); | 
|---|
|  | 370 | } | 
|---|
|  | 371 |  | 
|---|
|  | 372 | if (target == -1) { | 
|---|
|  | 373 | bcast(data, n, 0); | 
|---|
|  | 374 | } | 
|---|
|  | 375 |  | 
|---|
|  | 376 | if (!passed_scratch) delete[] scratch; | 
|---|
|  | 377 | } | 
|---|
|  | 378 |  | 
|---|
|  | 379 | void | 
|---|
|  | 380 | MessageGrp::reduce(char* data, int n, GrpReduce<char>& red, | 
|---|
|  | 381 | char* scratch, int target) | 
|---|
|  | 382 | { | 
|---|
|  | 383 | int tgop_max = gop_max_/sizeof(char); | 
|---|
|  | 384 | if (tgop_max == 0) tgop_max = gop_max_?1:n; | 
|---|
|  | 385 |  | 
|---|
|  | 386 | int passed_scratch; | 
|---|
|  | 387 | if (!scratch) { | 
|---|
|  | 388 | scratch = new char[n>tgop_max?tgop_max:n]; | 
|---|
|  | 389 | passed_scratch = 0; | 
|---|
|  | 390 | } | 
|---|
|  | 391 | else passed_scratch = 1; | 
|---|
|  | 392 |  | 
|---|
|  | 393 | Ref<GlobalMsgIter> i(topology_->global_msg_iter(this, | 
|---|
|  | 394 | (target== -1?0:target))); | 
|---|
|  | 395 | for (i->backwards(); !i->done(); i->next()) { | 
|---|
|  | 396 | for (int idat=0; idat<n; idat+=tgop_max) { | 
|---|
|  | 397 | int ndat = (idat+tgop_max>n)?(n-idat):tgop_max; | 
|---|
|  | 398 | if (i->send()) { | 
|---|
|  | 399 | send(i->sendto(), &data[idat], ndat); | 
|---|
|  | 400 | } | 
|---|
|  | 401 | if (i->recv()) { | 
|---|
|  | 402 | recv(i->recvfrom(), scratch, ndat); | 
|---|
|  | 403 | red.reduce(&data[idat], scratch, ndat); | 
|---|
|  | 404 | } | 
|---|
|  | 405 | } | 
|---|
|  | 406 | if (n > tgop_max) sync(); | 
|---|
|  | 407 | } | 
|---|
|  | 408 |  | 
|---|
|  | 409 | if (target == -1) { | 
|---|
|  | 410 | bcast(data, n, 0); | 
|---|
|  | 411 | } | 
|---|
|  | 412 |  | 
|---|
|  | 413 | if (!passed_scratch) delete[] scratch; | 
|---|
|  | 414 | } | 
|---|
|  | 415 |  | 
|---|
|  | 416 | void | 
|---|
|  | 417 | MessageGrp::reduce(unsigned char* data, int n, GrpReduce<unsigned char>& red, | 
|---|
|  | 418 | unsigned char* scratch, int target) | 
|---|
|  | 419 | { | 
|---|
|  | 420 | int tgop_max = gop_max_/sizeof(unsigned char); | 
|---|
|  | 421 | if (tgop_max == 0) tgop_max = gop_max_?1:n; | 
|---|
|  | 422 |  | 
|---|
|  | 423 | int passed_scratch; | 
|---|
|  | 424 | if (!scratch) { | 
|---|
|  | 425 | scratch = new unsigned char[n>tgop_max?tgop_max:n]; | 
|---|
|  | 426 | passed_scratch = 0; | 
|---|
|  | 427 | } | 
|---|
|  | 428 | else passed_scratch = 1; | 
|---|
|  | 429 |  | 
|---|
|  | 430 | Ref<GlobalMsgIter> i(topology_->global_msg_iter(this, | 
|---|
|  | 431 | (target== -1?0:target))); | 
|---|
|  | 432 | for (i->backwards(); !i->done(); i->next()) { | 
|---|
|  | 433 | for (int idat=0; idat<n; idat+=tgop_max) { | 
|---|
|  | 434 | int ndat = (idat+tgop_max>n)?(n-idat):tgop_max; | 
|---|
|  | 435 | if (i->send()) { | 
|---|
|  | 436 | send(i->sendto(), &data[idat], ndat); | 
|---|
|  | 437 | } | 
|---|
|  | 438 | if (i->recv()) { | 
|---|
|  | 439 | recv(i->recvfrom(), scratch, ndat); | 
|---|
|  | 440 | red.reduce(&data[idat], scratch, ndat); | 
|---|
|  | 441 | } | 
|---|
|  | 442 | } | 
|---|
|  | 443 | if (n > tgop_max) sync(); | 
|---|
|  | 444 | } | 
|---|
|  | 445 |  | 
|---|
|  | 446 | if (target == -1) { | 
|---|
|  | 447 | bcast(data, n, 0); | 
|---|
|  | 448 | } | 
|---|
|  | 449 |  | 
|---|
|  | 450 | if (!passed_scratch) delete[] scratch; | 
|---|
|  | 451 | } | 
|---|
|  | 452 |  | 
|---|
|  | 453 | void | 
|---|
|  | 454 | MessageGrp::reduce(signed char* data, int n, GrpReduce<signed char>& red, | 
|---|
|  | 455 | signed char* scratch, int target) | 
|---|
|  | 456 | { | 
|---|
|  | 457 | int tgop_max = gop_max_/sizeof(signed char); | 
|---|
|  | 458 | if (tgop_max == 0) tgop_max = gop_max_?1:n; | 
|---|
|  | 459 |  | 
|---|
|  | 460 | int passed_scratch; | 
|---|
|  | 461 | if (!scratch) { | 
|---|
|  | 462 | scratch = new signed char[n>tgop_max?tgop_max:n]; | 
|---|
|  | 463 | passed_scratch = 0; | 
|---|
|  | 464 | } | 
|---|
|  | 465 | else passed_scratch = 1; | 
|---|
|  | 466 |  | 
|---|
|  | 467 | Ref<GlobalMsgIter> i(topology_->global_msg_iter(this, | 
|---|
|  | 468 | (target== -1?0:target))); | 
|---|
|  | 469 | for (i->backwards(); !i->done(); i->next()) { | 
|---|
|  | 470 | for (int idat=0; idat<n; idat+=tgop_max) { | 
|---|
|  | 471 | int ndat = (idat+tgop_max>n)?(n-idat):tgop_max; | 
|---|
|  | 472 | if (i->send()) { | 
|---|
|  | 473 | send(i->sendto(), &data[idat], ndat); | 
|---|
|  | 474 | } | 
|---|
|  | 475 | if (i->recv()) { | 
|---|
|  | 476 | recv(i->recvfrom(), scratch, ndat); | 
|---|
|  | 477 | red.reduce(&data[idat], scratch, ndat); | 
|---|
|  | 478 | } | 
|---|
|  | 479 | } | 
|---|
|  | 480 | if (n > tgop_max) sync(); | 
|---|
|  | 481 | } | 
|---|
|  | 482 |  | 
|---|
|  | 483 | if (target == -1) { | 
|---|
|  | 484 | bcast(data, n, 0); | 
|---|
|  | 485 | } | 
|---|
|  | 486 |  | 
|---|
|  | 487 | if (!passed_scratch) delete[] scratch; | 
|---|
|  | 488 | } | 
|---|
|  | 489 |  | 
|---|
|  | 490 | void | 
|---|
|  | 491 | MessageGrp::reduce(short* data, int n, GrpReduce<short>& red, | 
|---|
|  | 492 | short* scratch, int target) | 
|---|
|  | 493 | { | 
|---|
|  | 494 | int tgop_max = gop_max_/sizeof(short); | 
|---|
|  | 495 | if (tgop_max == 0) tgop_max = gop_max_?1:n; | 
|---|
|  | 496 |  | 
|---|
|  | 497 | int passed_scratch; | 
|---|
|  | 498 | if (!scratch) { | 
|---|
|  | 499 | scratch = new short[n>tgop_max?tgop_max:n]; | 
|---|
|  | 500 | passed_scratch = 0; | 
|---|
|  | 501 | } | 
|---|
|  | 502 | else passed_scratch = 1; | 
|---|
|  | 503 |  | 
|---|
|  | 504 | Ref<GlobalMsgIter> i(topology_->global_msg_iter(this, | 
|---|
|  | 505 | (target== -1?0:target))); | 
|---|
|  | 506 | for (i->backwards(); !i->done(); i->next()) { | 
|---|
|  | 507 | for (int idat=0; idat<n; idat+=tgop_max) { | 
|---|
|  | 508 | int ndat = (idat+tgop_max>n)?(n-idat):tgop_max; | 
|---|
|  | 509 | if (i->send()) { | 
|---|
|  | 510 | send(i->sendto(), &data[idat], ndat); | 
|---|
|  | 511 | } | 
|---|
|  | 512 | if (i->recv()) { | 
|---|
|  | 513 | recv(i->recvfrom(), scratch, ndat); | 
|---|
|  | 514 | red.reduce(&data[idat], scratch, ndat); | 
|---|
|  | 515 | } | 
|---|
|  | 516 | } | 
|---|
|  | 517 | if (n > tgop_max) sync(); | 
|---|
|  | 518 | } | 
|---|
|  | 519 |  | 
|---|
|  | 520 | if (target == -1) { | 
|---|
|  | 521 | bcast(data, n, 0); | 
|---|
|  | 522 | } | 
|---|
|  | 523 |  | 
|---|
|  | 524 | if (!passed_scratch) delete[] scratch; | 
|---|
|  | 525 | } | 
|---|
|  | 526 |  | 
|---|
|  | 527 | void | 
|---|
|  | 528 | MessageGrp::reduce(float* data, int n, GrpReduce<float>& red, | 
|---|
|  | 529 | float* scratch, int target) | 
|---|
|  | 530 | { | 
|---|
|  | 531 | int tgop_max = gop_max_/sizeof(float); | 
|---|
|  | 532 | if (tgop_max == 0) tgop_max = gop_max_?1:n; | 
|---|
|  | 533 |  | 
|---|
|  | 534 | int passed_scratch; | 
|---|
|  | 535 | if (!scratch) { | 
|---|
|  | 536 | scratch = new float[n>tgop_max?tgop_max:n]; | 
|---|
|  | 537 | passed_scratch = 0; | 
|---|
|  | 538 | } | 
|---|
|  | 539 | else passed_scratch = 1; | 
|---|
|  | 540 |  | 
|---|
|  | 541 | Ref<GlobalMsgIter> i(topology_->global_msg_iter(this, | 
|---|
|  | 542 | (target== -1?0:target))); | 
|---|
|  | 543 | for (i->backwards(); !i->done(); i->next()) { | 
|---|
|  | 544 | for (int idat=0; idat<n; idat+=tgop_max) { | 
|---|
|  | 545 | int ndat = (idat+tgop_max>n)?(n-idat):tgop_max; | 
|---|
|  | 546 | if (i->send()) { | 
|---|
|  | 547 | send(i->sendto(), &data[idat], ndat); | 
|---|
|  | 548 | } | 
|---|
|  | 549 | if (i->recv()) { | 
|---|
|  | 550 | recv(i->recvfrom(), scratch, ndat); | 
|---|
|  | 551 | red.reduce(&data[idat], scratch, ndat); | 
|---|
|  | 552 | } | 
|---|
|  | 553 | } | 
|---|
|  | 554 | if (n > tgop_max) sync(); | 
|---|
|  | 555 | } | 
|---|
|  | 556 |  | 
|---|
|  | 557 | if (target == -1) { | 
|---|
|  | 558 | bcast(data, n, 0); | 
|---|
|  | 559 | } | 
|---|
|  | 560 |  | 
|---|
|  | 561 | if (!passed_scratch) delete[] scratch; | 
|---|
|  | 562 | } | 
|---|
|  | 563 |  | 
|---|
|  | 564 | void | 
|---|
|  | 565 | MessageGrp::reduce(long* data, int n, GrpReduce<long>& red, | 
|---|
|  | 566 | long* scratch, int target) | 
|---|
|  | 567 | { | 
|---|
|  | 568 | int tgop_max = gop_max_/sizeof(long); | 
|---|
|  | 569 | if (tgop_max == 0) tgop_max = gop_max_?1:n; | 
|---|
|  | 570 |  | 
|---|
|  | 571 | int passed_scratch; | 
|---|
|  | 572 | if (!scratch) { | 
|---|
|  | 573 | scratch = new long[n>tgop_max?tgop_max:n]; | 
|---|
|  | 574 | passed_scratch = 0; | 
|---|
|  | 575 | } | 
|---|
|  | 576 | else passed_scratch = 1; | 
|---|
|  | 577 |  | 
|---|
|  | 578 | Ref<GlobalMsgIter> i(topology_->global_msg_iter(this, | 
|---|
|  | 579 | (target== -1?0:target))); | 
|---|
|  | 580 | for (i->backwards(); !i->done(); i->next()) { | 
|---|
|  | 581 | for (int idat=0; idat<n; idat+=tgop_max) { | 
|---|
|  | 582 | int ndat = (idat+tgop_max>n)?(n-idat):tgop_max; | 
|---|
|  | 583 | if (i->send()) { | 
|---|
|  | 584 | send(i->sendto(), &data[idat], ndat); | 
|---|
|  | 585 | } | 
|---|
|  | 586 | if (i->recv()) { | 
|---|
|  | 587 | recv(i->recvfrom(), scratch, ndat); | 
|---|
|  | 588 | red.reduce(&data[idat], scratch, ndat); | 
|---|
|  | 589 | } | 
|---|
|  | 590 | } | 
|---|
|  | 591 | if (n > tgop_max) sync(); | 
|---|
|  | 592 | } | 
|---|
|  | 593 |  | 
|---|
|  | 594 | if (target == -1) { | 
|---|
|  | 595 | bcast(data, n, 0); | 
|---|
|  | 596 | } | 
|---|
|  | 597 |  | 
|---|
|  | 598 | if (!passed_scratch) delete[] scratch; | 
|---|
|  | 599 | } | 
|---|
|  | 600 |  | 
|---|
|  | 601 | #ifdef EXPLICIT_TEMPLATE_INSTANTIATION | 
|---|
|  | 602 | #define INSTANTIATE_DO_X(func,type) \ | 
|---|
|  | 603 | template void func(MessageGrp*, type *, int, type *, int) | 
|---|
|  | 604 |  | 
|---|
|  | 605 | INSTANTIATE_DO_X(do_sum,unsigned int); | 
|---|
|  | 606 | INSTANTIATE_DO_X(do_sum,int); | 
|---|
|  | 607 | INSTANTIATE_DO_X(do_sum,double); | 
|---|
|  | 608 | INSTANTIATE_DO_X(do_sum,char); | 
|---|
|  | 609 | INSTANTIATE_DO_X(do_sum,unsigned char); | 
|---|
|  | 610 | INSTANTIATE_DO_X(do_sum,signed char); | 
|---|
|  | 611 |  | 
|---|
|  | 612 | INSTANTIATE_DO_X(do_max,unsigned int); | 
|---|
|  | 613 | INSTANTIATE_DO_X(do_max,int); | 
|---|
|  | 614 | INSTANTIATE_DO_X(do_max,double); | 
|---|
|  | 615 | INSTANTIATE_DO_X(do_max,char); | 
|---|
|  | 616 | INSTANTIATE_DO_X(do_max,unsigned char); | 
|---|
|  | 617 | INSTANTIATE_DO_X(do_max,signed char); | 
|---|
|  | 618 |  | 
|---|
|  | 619 | INSTANTIATE_DO_X(do_min,unsigned int); | 
|---|
|  | 620 | INSTANTIATE_DO_X(do_min,int); | 
|---|
|  | 621 | INSTANTIATE_DO_X(do_min,double); | 
|---|
|  | 622 | INSTANTIATE_DO_X(do_min,char); | 
|---|
|  | 623 | INSTANTIATE_DO_X(do_min,unsigned char); | 
|---|
|  | 624 | INSTANTIATE_DO_X(do_min,signed char); | 
|---|
|  | 625 |  | 
|---|
|  | 626 | #endif | 
|---|
|  | 627 |  | 
|---|
|  | 628 | ///////////////////////////////////////////////////////////////////////////// | 
|---|
|  | 629 |  | 
|---|
|  | 630 | // Local Variables: | 
|---|
|  | 631 | // mode: c++ | 
|---|
|  | 632 | // c-file-style: "CLJ" | 
|---|
|  | 633 | // End: | 
|---|