-: 0:Source:/home/MPI/testing/mpich2/mpich2/src/mpid/common/datatype/dataloop/dataloop_create_indexed.c -: 0:Graph:dataloop_create_indexed.gcno -: 0:Data:dataloop_create_indexed.gcda -: 0:Runs:3086 -: 0:Programs:893 -: 1:/* -*- Mode: C; c-basic-offset:4 ; -*- */ -: 2: -: 3:/* -: 4: * (C) 2001 by Argonne National Laboratory. -: 5: * See COPYRIGHT in top-level directory. -: 6: */ -: 7: -: 8:#include <stdlib.h> -: 9: -: 10:#include "./dataloop.h" -: 11: -: 12:static void DLOOP_Type_indexed_array_copy(DLOOP_Count count, -: 13: DLOOP_Count contig_count, -: 14: int *input_blocklength_array, -: 15: void *input_displacement_array, -: 16: DLOOP_Count *output_blocklength_array, -: 17: DLOOP_Offset *out_disp_array, -: 18: int dispinbytes, -: 19: DLOOP_Offset old_extent); -: 20: -: 21:/*@ -: 22: DLOOP_Dataloop_create_indexed -: 23: -: 24: Arguments: -: 25:+ int icount -: 26:. int *iblocklength_array -: 27:. void *displacement_array (either ints or MPI_Aints) -: 28:. int dispinbytes -: 29:. MPI_Datatype oldtype -: 30:. DLOOP_Dataloop **dlp_p -: 31:. int *dlsz_p -: 32:. int *dldepth_p -: 33:- int flag -: 34: -: 35:.N Errors -: 36:.N Returns 0 on success, -1 on error. -: 37:@*/ -: 38: -: 39:int PREPEND_PREFIX(Dataloop_create_indexed)(int icount, -: 40: int *blocklength_array, -: 41: void *displacement_array, -: 42: int dispinbytes, -: 43: MPI_Datatype oldtype, -: 44: DLOOP_Dataloop **dlp_p, -: 45: int *dlsz_p, -: 46: int *dldepth_p, -: 47: int flag) 72493: 48:{ -: 49: int err, is_builtin; -: 50: int i, new_loop_sz, old_loop_depth, blksz; -: 51: DLOOP_Count first; -: 52: 72493: 53: DLOOP_Count old_type_count = 0, contig_count, count; -: 54: DLOOP_Offset old_extent; -: 55: struct DLOOP_Dataloop *new_dlp; -: 56: 72493: 57: count = (DLOOP_Count) icount; /* avoid subsequent casting */ -: 58: -: 59: -: 60: /* if count is zero, handle with contig code, call it an int */ 72493: 61: if (count == 0) -: 62: { |
#####: 63: err = PREPEND_PREFIX(Dataloop_create_contiguous)(0, -: 64: MPI_INT, -: 65: dlp_p, -: 66: dlsz_p, -: 67: dldepth_p, -: 68: flag); #####: 69: return err; -: 70: } -: 71: -: 72: /* Skip any initial zero-length blocks */ |
78109: 73: for (first = 0; first < count; first++) 77275: 74: if ((DLOOP_Count) blocklength_array[first]) 71659: 75: break; -: 76: -: 77: 72493: 78: is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1; -: 79: 72493: 80: if (is_builtin) -: 81: { 71137: 82: DLOOP_Handle_get_extent_macro(oldtype, old_extent); 71137: 83: old_loop_depth = 0; -: 84: } -: 85: else -: 86: { 1356: 87: DLOOP_Handle_get_extent_macro(oldtype, old_extent); 1356: 88: DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag); -: 89: } -: 90: -: 92: { -: 94: } -: 95: 72493: 96: contig_count = PREPEND_PREFIX(Type_indexed_count_contig)(count, -: 97: blocklength_array, -: 98: displacement_array, -: 99: dispinbytes, -: 100: old_extent); -: 101: -: 102: /* if contig_count is zero (no data), handle with contig code */ 72493: 103: if (contig_count == 0) -: 104: { |
#####: 105: err = PREPEND_PREFIX(Dataloop_create_contiguous)(0, -: 106: MPI_INT, -: 107: dlp_p, -: 108: dlsz_p, -: 109: dldepth_p, -: 110: flag); #####: 111: return err; -: 112: } -: 113: -: 114: /* optimization: -: 115: * -: 116: * if contig_count == 1 and block starts at displacement 0, -: 117: * store it as a contiguous rather than an indexed dataloop. -: 118: */ |
72493: 119: if ((contig_count == 1) && -: 120: ((!dispinbytes && ((int *) displacement_array)[first] == 0) || -: 121: (dispinbytes && ((MPI_Aint *) displacement_array)[first] == 0))) -: 122: { 29190: 123: err = PREPEND_PREFIX(Dataloop_create_contiguous)((int) old_type_count, -: 124: oldtype, -: 125: dlp_p, -: 126: dlsz_p, -: 127: dldepth_p, -: 128: flag); 29190: 129: return err; -: 130: } -: 131: -: 132: /* optimization: -: 133: * -: 134: * if contig_count == 1 (and displacement != 0), store this as -: 135: * a single element blockindexed rather than a lot of individual -: 136: * blocks. -: 137: */ 43303: 138: if (contig_count == 1) -: 139: { 5014: 140: err = PREPEND_PREFIX(Dataloop_create_blockindexed)(1, -: 141: (int) old_type_count, -: 142: &(((int *)displacement_array)[first]), -: 143: dispinbytes, -: 144: oldtype, -: 145: dlp_p, -: 146: dlsz_p, -: 147: dldepth_p, -: 148: flag); -: 149: 5014: 150: return err; -: 151: } -: 152: -: 153: /* optimization: -: 154: * -: 155: * if block length is the same for all blocks, store it as a -: 156: * blockindexed rather than an indexed dataloop. -: 157: */ 38289: 158: blksz = blocklength_array[first]; 84194654: 159: for (i = first+1; i < count; i++) -: 160: { 84171621: 161: if (blocklength_array[i] != blksz) -: 162: { 15256: 163: blksz--; 15256: 164: break; -: 165: } -: 166: } 38289: 167: if (blksz == blocklength_array[first]) -: 168: { 23033: 169: err = PREPEND_PREFIX(Dataloop_create_blockindexed)(icount-first, -: 170: blksz, -: 171: &(((int *)displacement_array)[first]), -: 172: dispinbytes, -: 173: oldtype, -: 174: dlp_p, -: 175: dlsz_p, -: 176: dldepth_p, -: 177: flag); -: 178: 23033: 179: return err; -: 180: } -: 181: -: 182: /* note: blockindexed looks for the vector optimization */ -: 183: -: 184: /* TODO: optimization: -: 185: * -: 186: * if an indexed of a contig, absorb the contig into the blocklen array -: 187: * and keep the same overall depth -: 188: */ -: 189: -: 190: /* otherwise storing as an indexed dataloop */ -: 191: 15256: 192: if (is_builtin) -: 193: { 15252: 194: PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_INDEXED, -: 195: count, -: 196: &new_dlp, -: 197: &new_loop_sz); |
-: 198: /* --BEGIN ERROR HANDLING-- */ 15252: 199: if (!new_dlp) return -1; -: 200: /* --END ERROR HANDLING-- */ -: 201: |
15252: 202: new_dlp->kind = DLOOP_KIND_INDEXED | DLOOP_FINAL_MASK; -: 203: 15252: 204: if (flag == DLOOP_DATALOOP_ALL_BYTES) -: 205: { -: 206: /* blocklengths are modified below */ |
#####: 207: new_dlp->el_size = 1; #####: 208: new_dlp->el_extent = 1; #####: 209: new_dlp->el_type = MPI_BYTE; -: 210: } -: 211: else -: 212: { |
15252: 213: new_dlp->el_size = old_extent; 15252: 214: new_dlp->el_extent = old_extent; 15252: 215: new_dlp->el_type = oldtype; -: 216: } -: 217: } -: 218: else -: 219: { 4: 220: DLOOP_Dataloop *old_loop_ptr = NULL; 4: 221: int old_loop_sz = 0; -: 222: 4: 223: DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag); 4: 224: DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag); -: 225: 4: 226: PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_INDEXED, -: 227: contig_count, -: 228: old_loop_ptr, -: 229: old_loop_sz, -: 230: &new_dlp, -: 231: &new_loop_sz); |
-: 232: /* --BEGIN ERROR HANDLING-- */ 4: 233: if (!new_dlp) return -1; -: 234: /* --END ERROR HANDLING-- */ -: 235: |
4: 236: new_dlp->kind = DLOOP_KIND_INDEXED; -: 237: 4: 238: DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size); 4: 239: DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent); 4: 240: DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type); -: 241: } -: 242: 15256: 243: new_dlp->loop_params.i_t.count = contig_count; 15256: 244: new_dlp->loop_params.i_t.total_blocks = old_type_count; -: 245: -: 246: /* copy in blocklength and displacement parameters (in that order) -: 247: * -: 248: * regardless of dispinbytes, we store displacements in bytes in loop. -: 249: */ 15256: 250: DLOOP_Type_indexed_array_copy(count, -: 251: contig_count, -: 252: blocklength_array, -: 253: displacement_array, -: 254: new_dlp->loop_params.i_t.blocksize_array, -: 255: new_dlp->loop_params.i_t.offset_array, -: 256: dispinbytes, -: 257: old_extent); -: 258: 15256: 259: if (is_builtin && (flag == DLOOP_DATALOOP_ALL_BYTES)) -: 260: { -: 261: DLOOP_Count *tmp_blklen_array = |
#####: 262: new_dlp->loop_params.i_t.blocksize_array; -: 263: #####: 264: for (i=0; i < contig_count; i++) -: 265: { -: 266: /* increase block lengths so they are in bytes */ #####: 267: tmp_blklen_array[i] *= old_extent; -: 268: } -: 269: #####: 270: new_dlp->loop_params.i_t.total_blocks *= old_extent; -: 271: } -: 272: |
15256: 273: *dlp_p = new_dlp; 15256: 274: *dlsz_p = new_loop_sz; 15256: 275: *dldepth_p = old_loop_depth + 1; -: 276: 15256: 277: return MPI_SUCCESS; -: 278:} -: 279: -: 280:/* DLOOP_Type_indexed_array_copy() -: 281: * -: 282: * Copies arrays into place, combining adjacent contiguous regions and -: 283: * dropping zero-length regions. -: 284: * -: 285: * Extent passed in is for the original type. -: 286: * -: 287: * Output displacements are always output in bytes, while block -: 288: * lengths are always output in terms of the base type. -: 289: */ -: 290:static void DLOOP_Type_indexed_array_copy(DLOOP_Count count, -: 291: DLOOP_Count contig_count, -: 292: int *in_blklen_array, -: 293: void *in_disp_array, -: 294: DLOOP_Count *out_blklen_array, -: 295: DLOOP_Offset *out_disp_array, -: 296: int dispinbytes, -: 297: DLOOP_Offset old_extent) 15256: 298:{ 15256: 299: DLOOP_Count i, first, cur_idx = 0; -: 300: -: 301: /* Skip any initial zero-length blocks */ 15256: 302: for (first = 0; first < count; ++first) 15256: 303: if ((DLOOP_Count) in_blklen_array[first]) 15256: 304: break; -: 305: 15256: 306: out_blklen_array[0] = (DLOOP_Count) in_blklen_array[first]; -: 307: 15256: 308: if (!dispinbytes) -: 309: { 5298: 310: out_disp_array[0] = (DLOOP_Offset) -: 311: ((int *) in_disp_array)[first] * old_extent; -: 312: 2103662: 313: for (i = first+1; i < count; ++i) -: 314: { 2098364: 315: if (in_blklen_array[i] == 0) -: 316: { |
#####: 317: continue; -: 318: } |
2098364: 319: else if (out_disp_array[cur_idx] + -: 320: ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent == -: 321: ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent) -: 322: { -: 323: /* adjacent to current block; add to block */ 2052: 324: out_blklen_array[cur_idx] += (DLOOP_Count) in_blklen_array[i]; -: 325: } -: 326: else -: 327: { 2096312: 328: cur_idx++; 2096312: 329: DLOOP_Assert(cur_idx < contig_count); 2096312: 330: out_disp_array[cur_idx] = -: 331: ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent; 2096312: 332: out_blklen_array[cur_idx] = in_blklen_array[i]; -: 333: } -: 334: } -: 335: } -: 336: else /* input displacements already in bytes */ -: 337: { 9958: 338: out_disp_array[0] = (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[first]; -: 339: 110393: 340: for (i = first+1; i < count; ++i) -: 341: { 100435: 342: if (in_blklen_array[i] == 0) -: 343: { 2: 344: continue; -: 345: } 100433: 346: else if (out_disp_array[cur_idx] + -: 347: ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent == -: 348: ((DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i])) -: 349: { -: 350: /* adjacent to current block; add to block */ 21848: 351: out_blklen_array[cur_idx] += in_blklen_array[i]; -: 352: } -: 353: else -: 354: { 78585: 355: cur_idx++; 78585: 356: DLOOP_Assert(cur_idx < contig_count); 78585: 357: out_disp_array[cur_idx] = -: 358: (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i]; 78585: 359: out_blklen_array[cur_idx] = (DLOOP_Count) in_blklen_array[i]; -: 360: } -: 361: } -: 362: } -: 363: 15256: 364: DLOOP_Assert(cur_idx == contig_count - 1); -: 365: return; -: 366:} -: 367: -: 368:/* DLOOP_Type_indexed_count_contig() -: 369: * -: 370: * Determines the actual number of contiguous blocks represented by the -: 371: * blocklength/displacement arrays. This might be less than count (as -: 372: * few as 1). -: 373: * -: 374: * Extent passed in is for the original type. -: 375: */ -: 376:DLOOP_Count PREPEND_PREFIX(Type_indexed_count_contig)(DLOOP_Count count, -: 377: int *blocklength_array, -: 378: void *displacement_array, -: 379: int dispinbytes, -: 380: DLOOP_Offset old_extent) 93604: 381:{ 93604: 382: DLOOP_Count i, contig_count = 1; -: 383: DLOOP_Count cur_blklen, first; -: 384: 93604: 385: if (count) -: 386: { -: 387: /* Skip any initial zero-length blocks */ 101322: 388: for (first = 0; first < count; ++first) 100488: 389: if ((DLOOP_Count) blocklength_array[first]) 92770: 390: break; -: 391: 93604: 392: cur_blklen = (DLOOP_Count) blocklength_array[first]; 93604: 393: if (!dispinbytes) -: 394: { -: 395: DLOOP_Offset cur_tdisp = 59145: 396: (DLOOP_Offset) ((int *) displacement_array)[first]; -: 397: -: 399: { -: 401: { 6485: 402: continue; -: 403: } -: 405: (DLOOP_Offset) ((int *) displacement_array)[i]) -: 406: { -: 407: /* adjacent to current block; add to block */ 7908948: 408: cur_blklen += (DLOOP_Count) blocklength_array[i]; -: 409: } -: 410: else -: 411: { -: 415: } -: 416: } -: 417: } -: 418: else -: 419: { -: 420: DLOOP_Offset cur_bdisp = 34459: 421: (DLOOP_Offset) ((MPI_Aint *) displacement_array)[first]; -: 422: 13300604: 423: for (i = first+1; i < count; ++i) -: 424: { 13266145: 425: if (blocklength_array[i] == 0) -: 426: { 3: 427: continue; -: 428: } 13266142: 429: else if (cur_bdisp + (DLOOP_Offset) cur_blklen * old_extent == -: 430: (DLOOP_Offset) ((MPI_Aint *) displacement_array)[i]) -: 431: { -: 432: /* adjacent to current block; add to block */ 13181419: 433: cur_blklen += (DLOOP_Count) blocklength_array[i]; -: 434: } -: 435: else -: 436: { 84723: 437: cur_bdisp = -: 438: (DLOOP_Offset) ((MPI_Aint *) displacement_array)[i]; 84723: 439: cur_blklen = (DLOOP_Count) blocklength_array[i]; 84723: 440: contig_count++; -: 441: } -: 442: } -: 443: } -: 444: } 93604: 445: return contig_count; -: 446:} |