-: 0:Source:/home/MPI/testing/mpich2/mpich2/src/mpid/common/datatype/dataloop/segment.c -: 0:Graph:segment.gcno -: 0:Data:segment.gcda -: 0:Runs:4069 -: 0:Programs:1200 -: 1:/* -*- Mode: C; c-basic-offset:4 ; -*- */ -: 2: -: 3:/* -: 4: * (C) 2001 by Argonne National Laboratory. -: 5: * See COPYRIGHT in top-level directory. -: 6: */ -: 7: -: 8:#include <stdio.h> -: 9:#include <stdlib.h> -: 10: -: 11:#include "./dataloop.h" -: 12: -: 13:#undef DLOOP_DEBUG_MANIPULATE -: 14: -: 15:#ifndef PREPEND_PREFIX -: 16:#error "You must explicitly include a header that sets the PREPEND_PREFIX and includes dataloop_parts.h" -: 17:#endif -: 18: -: 19:/* Notes on functions: -: 20: * -: 21: * There are a few different sets of functions here: -: 22: * - DLOOP_Segment_manipulate() - uses a "piece" function to perform operations -: 23: * using segments (piece functions defined elsewhere) -: 24: * - PREPEND_PREFIX functions - these define the externally visible interface -: 25: * to segment functionality -: 26: */ -: 27: -: 28:static inline DLOOP_Count DLOOP_Stackelm_blocksize(struct DLOOP_Dataloop_stackelm *elmp); -: 29:static inline DLOOP_Offset DLOOP_Stackelm_offset(struct DLOOP_Dataloop_stackelm *elmp); -: 30:static inline void DLOOP_Stackelm_load(struct DLOOP_Dataloop_stackelm *elmp, -: 31: struct DLOOP_Dataloop *dlp, -: 32: int branch_flag); -: 33:/* Segment_init -: 34: * -: 35: * buf - datatype buffer location -: 36: * count - number of instances of the datatype in the buffer -: 37: * handle - handle for datatype (could be derived or not) -: 38: * segp - pointer to previously allocated segment structure -: 39: * flag - flag indicating which optimizations are valid -: 40: * should be one of DLOOP_DATALOOP_HOMOGENEOUS, _HETEROGENEOUS, -: 41: * of _ALL_BYTES. -: 42: * -: 43: * Notes: -: 44: * - Assumes that the segment has been allocated. -: 45: * - Older MPICH2 code may pass "0" to indicate HETEROGENEOUS or "1" to -: 46: * indicate HETEROGENEOUS. -: 47: * -: 48: */ -: 49:int PREPEND_PREFIX(Segment_init)(const DLOOP_Buffer buf, -: 50: DLOOP_Count count, -: 51: DLOOP_Handle handle, -: 52: struct DLOOP_Segment *segp, -: 53: int flag) 1246624: 54:{ 1246624: 55: DLOOP_Offset elmsize = 0; 1246624: 56: int i, depth = 0; 1246624: 57: int branch_detected = 0; -: 58: -: 59: struct DLOOP_Dataloop_stackelm *elmp; 1246624: 60: struct DLOOP_Dataloop *dlp = 0, *sblp = &segp->builtin_loop; -: 61: 1246624: 62: DLOOP_Assert(flag == DLOOP_DATALOOP_HETEROGENEOUS || -: 63: flag == DLOOP_DATALOOP_HOMOGENEOUS || -: 64: flag == DLOOP_DATALOOP_ALL_BYTES); -: 65: -: 66:#ifdef DLOOP_DEBUG_MANIPULATE -: 67: DLOOP_dbg_printf("DLOOP_Segment_init: count = %d, buf = %x\n", -: 68: count, -: 69: buf); -: 70:#endif -: 71: 1246624: 72: if (!DLOOP_Handle_hasloop_macro(handle)) { -: 73: /* simplest case; datatype has no loop (basic) */ -: 74: 511898: 75: DLOOP_Handle_get_size_macro(handle, elmsize); -: 76: 511898: 77: sblp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK; 511898: 78: sblp->loop_params.c_t.count = count; 511898: 79: sblp->loop_params.c_t.dataloop = 0; 511898: 80: sblp->el_size = elmsize; 511898: 81: DLOOP_Handle_get_basic_type_macro(handle, sblp->el_type); 511898: 82: DLOOP_Handle_get_extent_macro(handle, sblp->el_extent); -: 83: 511898: 84: dlp = sblp; 511898: 85: depth = 1; -: 86: } 734726: 87: else if (count == 0) { -: 88: /* only use the builtin */ |
#####: 89: sblp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK; #####: 90: sblp->loop_params.c_t.count = 0; #####: 91: sblp->loop_params.c_t.dataloop = 0; #####: 92: sblp->el_size = 0; #####: 93: sblp->el_extent = 0; -: 94: #####: 95: dlp = sblp; #####: 96: depth = 1; -: 97: } |
734726: 98: else if (count == 1) { -: 99: /* don't use the builtin */ 562463: 100: DLOOP_Handle_get_loopptr_macro(handle, dlp, flag); 562463: 101: DLOOP_Handle_get_loopdepth_macro(handle, depth, flag); -: 102: } -: 103: else { -: 104: /* default: need to use builtin to handle contig; must check -: 105: * loop depth first -: 106: */ -: 107: DLOOP_Dataloop *oldloop; /* loop from original type, before new count */ -: 108: DLOOP_Offset type_size, type_extent; -: 109: DLOOP_Type el_type; -: 110: 172263: 111: DLOOP_Handle_get_loopdepth_macro(handle, depth, flag); 172263: 112: if (depth >= DLOOP_MAX_DATATYPE_DEPTH) return -1; -: 113: 172263: 114: DLOOP_Handle_get_loopptr_macro(handle, oldloop, flag); 172263: 115: DLOOP_Assert(oldloop != NULL); 172263: 116: DLOOP_Handle_get_size_macro(handle, type_size); 172263: 117: DLOOP_Handle_get_extent_macro(handle, type_extent); 172263: 118: DLOOP_Handle_get_basic_type_macro(handle, el_type); -: 119: 172263: 120: if (depth == 1 && ((oldloop->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG)) -: 121: { 7123: 122: if (type_size == type_extent) -: 123: { -: 124: /* use a contig */ 4960: 125: sblp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK; 4960: 126: sblp->loop_params.c_t.count = count * oldloop->loop_params.c_t.count; 4960: 127: sblp->loop_params.c_t.dataloop = NULL; 4960: 128: sblp->el_size = oldloop->el_size; 4960: 129: sblp->el_extent = oldloop->el_extent; 4960: 130: sblp->el_type = oldloop->el_type; -: 131: } -: 132: else -: 133: { -: 134: /* use a vector, with extent of original type becoming the stride */ 2163: 135: sblp->kind = DLOOP_KIND_VECTOR | DLOOP_FINAL_MASK; 2163: 136: sblp->loop_params.v_t.count = count; 2163: 137: sblp->loop_params.v_t.blocksize = oldloop->loop_params.c_t.count; 2163: 138: sblp->loop_params.v_t.stride = type_extent; 2163: 139: sblp->loop_params.v_t.dataloop = NULL; 2163: 140: sblp->el_size = oldloop->el_size; 2163: 141: sblp->el_extent = oldloop->el_extent; 2163: 142: sblp->el_type = oldloop->el_type; -: 143: } -: 144: } -: 145: else -: 146: { -: 147: /* general case */ 165140: 148: sblp->kind = DLOOP_KIND_CONTIG; 165140: 149: sblp->loop_params.c_t.count = count; 165140: 150: sblp->loop_params.c_t.dataloop = oldloop; 165140: 151: sblp->el_size = type_size; 165140: 152: sblp->el_extent = type_extent; 165140: 153: sblp->el_type = el_type; -: 154: 165140: 155: depth++; /* we're adding to the depth with the builtin */ -: 156: } -: 157: 172263: 158: dlp = sblp; -: 159: } -: 160: -: 161: /* initialize the rest of the segment values */ 1246624: 162: segp->handle = handle; 1246624: 163: segp->ptr = (DLOOP_Buffer) buf; 1246624: 164: segp->stream_off = 0; 1246624: 165: segp->cur_sp = 0; 1246624: 166: segp->valid_sp = 0; -: 167: -: 168: /* initialize the first stackelm in its entirety */ 1246624: 169: elmp = &(segp->stackelm[0]); 1246624: 170: DLOOP_Stackelm_load(elmp, dlp, 0); 1246624: 171: branch_detected = elmp->may_require_reloading; -: 172: -: 173: /* Fill in parameters not set by DLOOP_Stackelm_load */ 1246624: 174: elmp->orig_offset = 0; 1246624: 175: elmp->curblock = elmp->orig_block; -: 176: /* DLOOP_Stackelm_offset assumes correct orig_count, curcount, loop_p */ 1246624: 177: elmp->curoffset = /* elmp->orig_offset + */ DLOOP_Stackelm_offset(elmp); -: 178: 1246624: 179: i = 1; 2665403: 180: while(!(dlp->kind & DLOOP_FINAL_MASK)) -: 181: { -: 182: /* get pointer to next dataloop */ 172155: 183: switch (dlp->kind & DLOOP_KIND_MASK) -: 184: { -: 185: case DLOOP_KIND_CONTIG: -: 186: case DLOOP_KIND_VECTOR: -: 187: case DLOOP_KIND_BLOCKINDEXED: -: 188: case DLOOP_KIND_INDEXED: 172155: 189: dlp = dlp->loop_params.cm_t.dataloop; 172155: 190: break; -: 191: case DLOOP_KIND_STRUCT: |
#####: 192: dlp = dlp->loop_params.s_t.dataloop_array[0]; #####: 193: break; -: 194: default: |
-: 195: /* --BEGIN ERROR HANDLING-- */ #####: 196: DLOOP_Assert(0); -: 197: break; -: 198: /* --END ERROR HANDLING-- */ -: 199: } -: 200: -: 201: /* loop_p, orig_count, orig_block, and curcount are all filled by us now. -: 202: * the rest are filled in at processing time. -: 203: */ |
172155: 204: elmp = &(segp->stackelm[i]); -: 205: 172155: 206: DLOOP_Stackelm_load(elmp, dlp, branch_detected); 172155: 207: branch_detected = elmp->may_require_reloading; 172155: 208: i++; -: 209: -: 210: } -: 211: 1246624: 212: segp->valid_sp = depth-1; -: 213: 1246624: 214: return 0; -: 215:} -: 216: -: 217:/* Segment_alloc -: 218: * -: 219: */ -: 220:struct DLOOP_Segment * PREPEND_PREFIX(Segment_alloc)(void) 1117705: 221:{ 1117705: 222: return (struct DLOOP_Segment *) DLOOP_Malloc(sizeof(struct DLOOP_Segment)); -: 223:} -: 224: -: 225:/* Segment_free -: 226: * -: 227: * Input Parameters: -: 228: * segp - pointer to segment -: 229: */ -: 230:void PREPEND_PREFIX(Segment_free)(struct DLOOP_Segment *segp) 1117705: 231:{ 1117705: 232: DLOOP_Free(segp); -: 233: return; -: 234:} -: 235: -: 236:/* DLOOP_Segment_manipulate - do something to a segment -: 237: * -: 238: * If you think of all the data to be manipulated (packed, unpacked, whatever), -: 239: * as a stream of bytes, it's easier to understand how first and last fit in. -: 240: * -: 241: * This function does all the work, calling the piecefn passed in when it -: 242: * encounters a datatype element which falls into the range of first..(last-1). -: 243: * -: 244: * piecefn can be NULL, in which case this function doesn't do anything when it -: 245: * hits a region. This is used internally for repositioning within this stream. -: 246: * -: 247: * last is a byte offset to the byte just past the last byte in the stream -: 248: * to operate on. this makes the calculations all over MUCH cleaner. -: 249: * -: 250: * stream_off, stream_el_size, first, and last are all working in terms of the -: 251: * types and sizes for the stream, which might be different from the local sizes -: 252: * (in the heterogeneous case). -: 253: * -: 254: * This is a horribly long function. Too bad; it's complicated :)! -- Rob -: 255: * -: 256: * NOTE: THIS IMPLEMENTATION CANNOT HANDLE STRUCT DATALOOPS. -: 257: */ -: 258:#define DLOOP_SEGMENT_SAVE_LOCAL_VALUES \ -: 259:{ \ -: 260: segp->cur_sp = cur_sp; \ -: 261: segp->valid_sp = valid_sp; \ -: 262: segp->stream_off = stream_off; \ -: 263: *lastp = stream_off; \ -: 264:} -: 265: -: 266:#define DLOOP_SEGMENT_LOAD_LOCAL_VALUES \ -: 267:{ \ -: 268: last = *lastp; \ -: 269: cur_sp = segp->cur_sp; \ -: 270: valid_sp = segp->valid_sp; \ -: 271: stream_off = segp->stream_off; \ -: 272: cur_elmp = &(segp->stackelm[cur_sp]); \ -: 273:} -: 274: -: 275:#define DLOOP_SEGMENT_RESET_VALUES \ -: 276:{ \ -: 277: segp->stream_off = 0; \ -: 278: segp->cur_sp = 0; \ -: 279: cur_elmp = &(segp->stackelm[0]); \ -: 280: cur_elmp->curcount = cur_elmp->orig_count; \ -: 281: cur_elmp->orig_block = DLOOP_Stackelm_blocksize(cur_elmp); \ -: 282: cur_elmp->curblock = cur_elmp->orig_block; \ -: 283: cur_elmp->curoffset = cur_elmp->orig_offset + \ -: 284: DLOOP_Stackelm_offset(cur_elmp); \ -: 285:} -: 286: -: 287:#define DLOOP_SEGMENT_POP_AND_MAYBE_EXIT \ -: 288:{ \ -: 289: cur_sp--; \ -: 290: if (cur_sp >= 0) cur_elmp = &segp->stackelm[cur_sp]; \ -: 291: else { \ -: 292: DLOOP_SEGMENT_SAVE_LOCAL_VALUES; \ -: 293: return; \ -: 294: } \ -: 295:} -: 296: -: 297:#define DLOOP_SEGMENT_PUSH \ -: 298:{ \ -: 299: cur_sp++; \ -: 300: cur_elmp = &segp->stackelm[cur_sp]; \ -: 301:} -: 302: -: 303:#define DLOOP_STACKELM_BLOCKINDEXED_OFFSET(elmp_, curcount_) \ -: 304:(elmp_)->loop_p->loop_params.bi_t.offset_array[(curcount_)] -: 305: -: 306:#define DLOOP_STACKELM_INDEXED_OFFSET(elmp_, curcount_) \ -: 307:(elmp_)->loop_p->loop_params.i_t.offset_array[(curcount_)] -: 308: -: 309:#define DLOOP_STACKELM_INDEXED_BLOCKSIZE(elmp_, curcount_) \ -: 310:(elmp_)->loop_p->loop_params.i_t.blocksize_array[(curcount_)] -: 311: -: 312:#define DLOOP_STACKELM_STRUCT_OFFSET(elmp_, curcount_) \ -: 313:(elmp_)->loop_p->loop_params.s_t.offset_array[(curcount_)] -: 314: -: 315:#define DLOOP_STACKELM_STRUCT_BLOCKSIZE(elmp_, curcount_) \ -: 316:(elmp_)->loop_p->loop_params.s_t.blocksize_array[(curcount_)] -: 317: -: 318:#define DLOOP_STACKELM_STRUCT_EL_EXTENT(elmp_, curcount_) \ -: 319:(elmp_)->loop_p->loop_params.s_t.el_extent_array[(curcount_)] -: 320: -: 321:#define DLOOP_STACKELM_STRUCT_DATALOOP(elmp_, curcount_) \ -: 322:(elmp_)->loop_p->loop_params.s_t.dataloop_array[(curcount_)] -: 323: -: 324:void PREPEND_PREFIX(Segment_manipulate)(struct DLOOP_Segment *segp, -: 325: DLOOP_Offset first, -: 326: DLOOP_Offset *lastp, -: 327: int (*contigfn) (DLOOP_Offset *blocks_p, -: 328: DLOOP_Type el_type, -: 329: DLOOP_Offset rel_off, -: 330: DLOOP_Buffer bufp, -: 331: void *v_paramp), -: 332: int (*vectorfn) (DLOOP_Offset *blocks_p, -: 333: DLOOP_Count count, -: 334: DLOOP_Count blklen, -: 335: DLOOP_Offset stride, -: 336: DLOOP_Type el_type, -: 337: DLOOP_Offset rel_off, -: 338: DLOOP_Buffer bufp, -: 339: void *v_paramp), -: 340: int (*blkidxfn) (DLOOP_Offset *blocks_p, -: 341: DLOOP_Count count, -: 342: DLOOP_Count blklen, -: 343: DLOOP_Offset *offsetarray, -: 344: DLOOP_Type el_type, -: 345: DLOOP_Offset rel_off, -: 346: DLOOP_Buffer bufp, -: 347: void *v_paramp), -: 348: int (*indexfn) (DLOOP_Offset *blocks_p, -: 349: DLOOP_Count count, -: 350: DLOOP_Count *blockarray, -: 351: DLOOP_Offset *offsetarray, -: 352: DLOOP_Type el_type, -: 353: DLOOP_Offset rel_off, -: 354: DLOOP_Buffer bufp, -: 355: void *v_paramp), -: 356: DLOOP_Offset (*sizefn) (DLOOP_Type el_type), -: 357: void *pieceparams) 1786974: 358:{ -: 359: /* these four are the "local values": cur_sp, valid_sp, last, stream_off */ -: 360: int cur_sp, valid_sp; -: 361: DLOOP_Offset last, stream_off; -: 362: -: 363: struct DLOOP_Dataloop_stackelm *cur_elmp; 1786974: 364: enum { PF_NULL, PF_CONTIG, PF_VECTOR, PF_BLOCKINDEXED, PF_INDEXED } piecefn_type = PF_NULL; -: 365: 1786974: 366: DLOOP_SEGMENT_LOAD_LOCAL_VALUES; -: 367: 1786974: 368: if (first == *lastp) { -: 369: /* nothing to do */ |
#####: 370: DLOOP_dbg_printf("dloop_segment_manipulate: warning: first == last (" MPI_AINT_FMT_DEC_SPEC ")\n", first); #####: 371: return; -: 372: } -: 373: -: 374: /* first we ensure that stream_off and first are in the same spot */ |
1786974: 375: if (first != stream_off) { -: 376:#ifdef DLOOP_DEBUG_MANIPULATE -: 377: DLOOP_dbg_printf("first=" MPI_AINT_FMT_DEC_SPEC "; stream_off=" MPI_AINT_FMT_DEC_SPEC "; resetting.\n", -: 378: first, stream_off); -: 379:#endif -: 380: 64763: 381: if (first < stream_off) { 64763: 382: DLOOP_SEGMENT_RESET_VALUES; 64763: 383: stream_off = 0; -: 384: } -: 385: 64763: 386: if (first != stream_off) { |
#####: 387: DLOOP_Offset tmp_last = first; -: 388: -: 389: /* use manipulate function with a NULL piecefn to advance -: 390: * stream offset -: 391: */ #####: 392: PREPEND_PREFIX(Segment_manipulate)(segp, -: 393: stream_off, -: 394: &tmp_last, -: 395: NULL, /* contig fn */ -: 396: NULL, /* vector fn */ -: 397: NULL, /* blkidx fn */ -: 398: NULL, /* index fn */ -: 399: sizefn, -: 400: NULL); -: 401: |
-: 402: /* --BEGIN ERROR HANDLING-- */ -: 403: /* verify that we're in the right location */ #####: 404: if (tmp_last != first) DLOOP_Assert(0); -: 405: /* --END ERROR HANDLING-- */ -: 406: } -: 407: |
64763: 408: DLOOP_SEGMENT_LOAD_LOCAL_VALUES; -: 409: -: 410:#ifdef DLOOP_DEBUG_MANIPULATE -: 411: DLOOP_dbg_printf("done repositioning stream_off; first=" MPI_AINT_FMT_DEC_SPEC ", stream_off=" MPI_AINT_FMT_DEC_SPEC ", last=" MPI_AINT_FMT_DEC_SPEC "\n", -: 412: first, stream_off, last); -: 413:#endif -: 414: } -: 415: -: 416: for (;;) { -: 417:#ifdef DLOOP_DEBUG_MANIPULATE -: 418:#if 0 -: 419: DLOOP_dbg_printf("looptop; cur_sp=%d, cur_elmp=%x\n", -: 420: cur_sp, (unsigned) cur_elmp); -: 421:#endif -: 422:#endif -: 423: 28565039: 424: if (cur_elmp->loop_p->kind & DLOOP_FINAL_MASK) { 15309657: 425: int piecefn_indicated_exit = -1; -: 426: DLOOP_Offset myblocks, local_el_size, stream_el_size; -: 427: DLOOP_Type el_type; -: 428: -: 429: /* structs are never finals (leaves) */ 15309657: 430: DLOOP_Assert((cur_elmp->loop_p->kind & DLOOP_KIND_MASK) != -: 431: DLOOP_KIND_STRUCT); -: 432: -: 433: /* pop immediately on zero count */ 15309657: 434: if (cur_elmp->curcount == 0) DLOOP_SEGMENT_POP_AND_MAYBE_EXIT; -: 435: -: 436: /* size on this system of the int, double, etc. that is -: 437: * the elementary type. -: 438: */ 15309657: 439: local_el_size = cur_elmp->loop_p->el_size; 15309657: 440: el_type = cur_elmp->loop_p->el_type; 15309657: 441: stream_el_size = (sizefn) ? sizefn(el_type) : local_el_size; -: 442: -: 443: /* calculate number of elem. types to work on and function to use. -: 444: * default is to use the contig piecefn (if there is one). -: 445: */ 15309657: 446: myblocks = cur_elmp->curblock; 15309657: 447: piecefn_type = (contigfn ? PF_CONTIG : PF_NULL); -: 448: -: 449: /* check for opportunities to use other piecefns */ 15309657: 450: switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) { -: 451: case DLOOP_KIND_CONTIG: -: 452: break; -: 453: case DLOOP_KIND_BLOCKINDEXED: -: 454: /* only use blkidx piecefn if at start of blkidx type */ 97380: 455: if (blkidxfn && -: 456: cur_elmp->orig_block == cur_elmp->curblock && -: 457: cur_elmp->orig_count == cur_elmp->curcount) -: 458: { -: 459: /* TODO: RELAX CONSTRAINTS */ 4067: 460: myblocks = cur_elmp->curblock * cur_elmp->curcount; 4067: 461: piecefn_type = PF_BLOCKINDEXED; -: 462: } -: 463: break; -: 464: case DLOOP_KIND_INDEXED: -: 465: /* only use index piecefn if at start of the index type. -: 466: * count test checks that we're on first block. -: 467: * block test checks that we haven't made progress on first block. -: 468: */ 13359680: 469: if (indexfn && -: 470: cur_elmp->orig_count == cur_elmp->curcount && -: 471: cur_elmp->curblock == DLOOP_STACKELM_INDEXED_BLOCKSIZE(cur_elmp, 0)) -: 472: { -: 473: /* TODO: RELAX CONSTRAINT ON COUNT? */ 12546492: 474: myblocks = cur_elmp->loop_p->loop_params.i_t.total_blocks; 12546492: 475: piecefn_type = PF_INDEXED; -: 476: } -: 477: break; -: 478: case DLOOP_KIND_VECTOR: -: 479: /* only use the vector piecefn if at the start of a -: 480: * contiguous block. -: 481: */ 759477: 482: if (vectorfn && cur_elmp->orig_block == cur_elmp->curblock) -: 483: { 759133: 484: myblocks = cur_elmp->curblock * cur_elmp->curcount; 759133: 485: piecefn_type = PF_VECTOR; -: 486: } -: 487: break; -: 488: default: |
-: 489: /* --BEGIN ERROR HANDLING-- */ #####: 490: DLOOP_Assert(0); -: 491: break; -: 492: /* --END ERROR HANDLING-- */ -: 493: } -: 494: -: 495:#ifdef DLOOP_DEBUG_MANIPULATE -: 496: DLOOP_dbg_printf("\thit leaf; cur_sp=%d, elmp=%x, piece_sz=" MPI_AINT_FMT_DEC_SPEC "\n", -: 497: cur_sp, -: 498: (unsigned) cur_elmp, myblocks * local_el_size); -: 499:#endif -: 500: -: 501: /* enforce the last parameter if necessary by reducing myblocks */ |
15309657: 502: if (last != SEGMENT_IGNORE_LAST && -: 503: (stream_off + (myblocks * stream_el_size) > last)) -: 504: { 293715: 505: myblocks = ((last - stream_off) / stream_el_size); -: 506:#ifdef DLOOP_DEBUG_MANIPULATE -: 507: DLOOP_dbg_printf("\tpartial block count=" MPI_AINT_FMT_DEC_SPEC " (" MPI_AINT_FMT_DEC_SPEC " bytes)\n", -: 508: myblocks, -: 509: myblocks * stream_el_size); -: 510:#endif 293715: 511: if (myblocks == 0) { 14628: 512: DLOOP_SEGMENT_SAVE_LOCAL_VALUES; 14628: 513: return; -: 514: } -: 515: } -: 516: -: 517: /* call piecefn to perform data manipulation */ 15295029: 518: switch (piecefn_type) { -: 519: case PF_NULL: |
#####: 520: piecefn_indicated_exit = 0; -: 521:#ifdef DLOOP_DEBUG_MANIPULATE -: 522: DLOOP_dbg_printf("\tNULL piecefn for this piece\n"); -: 523:#endif #####: 524: break; -: 525: case PF_CONTIG: |
1999310: 526: DLOOP_Assert(myblocks <= cur_elmp->curblock); 1999310: 527: piecefn_indicated_exit = -: 528: contigfn(&myblocks, -: 529: el_type, -: 530: cur_elmp->curoffset, /* relative to segp->ptr */ -: 531: segp->ptr, /* start of buffer (from segment) */ -: 532: pieceparams); 1999310: 533: break; -: 534: case PF_VECTOR: 754569: 535: piecefn_indicated_exit = -: 536: vectorfn(&myblocks, -: 537: cur_elmp->curcount, -: 538: cur_elmp->orig_block, -: 539: cur_elmp->loop_p->loop_params.v_t.stride, -: 540: el_type, -: 541: cur_elmp->curoffset, -: 542: segp->ptr, -: 543: pieceparams); 754569: 544: break; -: 545: case PF_BLOCKINDEXED: 4067: 546: piecefn_indicated_exit = -: 547: blkidxfn(&myblocks, -: 548: cur_elmp->curcount, -: 549: cur_elmp->orig_block, -: 550: cur_elmp->loop_p->loop_params.bi_t.offset_array, -: 551: el_type, -: 552: cur_elmp->orig_offset, /* blkidxfn adds offset */ -: 553: segp->ptr, -: 554: pieceparams); 4067: 555: break; -: 556: case PF_INDEXED: 12537083: 557: piecefn_indicated_exit = -: 558: indexfn(&myblocks, -: 559: cur_elmp->curcount, -: 560: cur_elmp->loop_p->loop_params.i_t.blocksize_array, -: 561: cur_elmp->loop_p->loop_params.i_t.offset_array, -: 562: el_type, -: 563: cur_elmp->orig_offset, /* indexfn adds offset value */ -: 564: segp->ptr, -: 565: pieceparams); -: 566: break; -: 567: } -: 568: -: 569: /* update local values based on piecefn returns (myblocks and -: 570: * piecefn_indicated_exit) -: 571: */ 15295029: 572: DLOOP_Assert(piecefn_indicated_exit >= 0); 15295029: 573: DLOOP_Assert(myblocks >= 0); 15295029: 574: stream_off += myblocks * stream_el_size; -: 575: -: 576: /* myblocks of 0 or less than cur_elmp->curblock indicates -: 577: * that we should stop processing and return. -: 578: */ 15295029: 579: if (myblocks == 0) { 36118: 580: DLOOP_SEGMENT_SAVE_LOCAL_VALUES; 36118: 581: return; -: 582: } 15258911: 583: else if (myblocks < (DLOOP_Offset)(cur_elmp->curblock)) { 273957: 584: cur_elmp->curoffset += myblocks * local_el_size; 273957: 585: cur_elmp->curblock -= myblocks; -: 586: 273957: 587: DLOOP_SEGMENT_SAVE_LOCAL_VALUES; 273957: 588: return; -: 589: } -: 590: else /* myblocks >= cur_elmp->curblock */ { 14984954: 591: int count_index = 0; -: 592: -: 593: /* this assumes we're either *just* processing the last parts -: 594: * of the current block, or we're processing as many blocks as -: 595: * we like starting at the beginning of one. -: 596: */ -: 597: 14984954: 598: switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) { -: 599: case DLOOP_KIND_INDEXED: -: 604: -: 606: cur_elmp->curcount; -: 608: DLOOP_STACKELM_INDEXED_BLOCKSIZE(cur_elmp, -: 609: count_index); -: 610: } -: 611: 13312919: 612: if (cur_elmp->curcount == 0) { -: 613: /* don't bother to fill in values; we're popping anyway */ 12618981: 614: DLOOP_Assert(myblocks == 0); 12618981: 615: DLOOP_SEGMENT_POP_AND_MAYBE_EXIT; -: 616: } -: 617: else { 693938: 618: cur_elmp->orig_block = cur_elmp->curblock; 693938: 619: cur_elmp->curoffset = cur_elmp->orig_offset + -: 620: DLOOP_STACKELM_INDEXED_OFFSET(cur_elmp, -: 621: count_index); -: 622: 693938: 623: cur_elmp->curblock -= myblocks; 693938: 624: cur_elmp->curoffset += myblocks * local_el_size; -: 625: } -: 626: break; -: 627: case DLOOP_KIND_VECTOR: -: 628: /* this math relies on assertions at top of code block */ 754641: 629: cur_elmp->curcount -= myblocks / (DLOOP_Offset)(cur_elmp->curblock); 754641: 630: if (cur_elmp->curcount == 0) { 520507: 631: DLOOP_Assert(myblocks % ((DLOOP_Offset)(cur_elmp->curblock)) == 0); 520507: 632: DLOOP_SEGMENT_POP_AND_MAYBE_EXIT; -: 633: } -: 634: else { -: 635: /* this math relies on assertions at top of code -: 636: * block -: 637: */ 234134: 638: cur_elmp->curblock = cur_elmp->orig_block - -: 639: (myblocks % (DLOOP_Offset)(cur_elmp->curblock)); -: 640: /* new offset = original offset + -: 641: * stride * whole blocks + -: 642: * leftover bytes -: 643: */ 234134: 644: cur_elmp->curoffset = cur_elmp->orig_offset + -: 645: (((DLOOP_Offset)(cur_elmp->orig_count - cur_elmp->curcount)) * -: 646: cur_elmp->loop_p->loop_params.v_t.stride) + -: 647: (((DLOOP_Offset)(cur_elmp->orig_block - cur_elmp->curblock)) * -: 648: local_el_size); -: 649: } -: 650: break; -: 651: case DLOOP_KIND_CONTIG: -: 652: /* contigs that reach this point have always been -: 653: * completely processed -: 654: */ 838127: 655: DLOOP_Assert(myblocks == (DLOOP_Offset)(cur_elmp->curblock) && -: 656: cur_elmp->curcount == 1); 838127: 657: DLOOP_SEGMENT_POP_AND_MAYBE_EXIT; -: 658: break; -: 659: case DLOOP_KIND_BLOCKINDEXED: 370979: 660: while (myblocks > 0 && myblocks >= (DLOOP_Offset)(cur_elmp->curblock)) -: 661: { 291712: 662: myblocks -= (DLOOP_Offset)(cur_elmp->curblock); 291712: 663: cur_elmp->curcount--; 291712: 664: DLOOP_Assert(cur_elmp->curcount >= 0); -: 665: 291712: 666: count_index = cur_elmp->orig_count - -: 667: cur_elmp->curcount; 291712: 668: cur_elmp->curblock = cur_elmp->orig_block; -: 669: } 79267: 670: if (cur_elmp->curcount == 0) { -: 671: /* popping */ 4724: 672: DLOOP_Assert(myblocks == 0); 4724: 673: DLOOP_SEGMENT_POP_AND_MAYBE_EXIT; -: 674: } -: 675: else { -: 676: /* cur_elmp->orig_block = cur_elmp->curblock; */ 74543: 677: cur_elmp->curoffset = cur_elmp->orig_offset + -: 678: DLOOP_STACKELM_BLOCKINDEXED_OFFSET(cur_elmp, -: 679: count_index); 74543: 680: cur_elmp->curblock -= myblocks; 74543: 681: cur_elmp->curoffset += myblocks * local_el_size; -: 682: } -: 683: break; -: 684: } -: 685: } -: 686: 13913904: 687: if (piecefn_indicated_exit) { -: 688: /* piece function indicated that we should quit processing */ 229427: 689: DLOOP_SEGMENT_SAVE_LOCAL_VALUES; 229427: 690: return; -: 691: } -: 692: } /* end of if leaf */ 13255382: 693: else if (cur_elmp->curblock == 0) { -: 694:#ifdef DLOOP_DEBUG_MANIPULATE -: 695: DLOOP_dbg_printf("\thit end of block; elmp=%x [%d]\n", -: 696: (unsigned) cur_elmp, cur_sp); -: 697:#endif 308719: 698: cur_elmp->curcount--; -: 699: -: 700: /* new block. for indexed and struct reset orig_block. -: 701: * reset curblock for all types -: 702: */ 308719: 703: switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) { -: 704: case DLOOP_KIND_CONTIG: -: 705: case DLOOP_KIND_VECTOR: -: 706: case DLOOP_KIND_BLOCKINDEXED: -: 707: break; -: 708: case DLOOP_KIND_INDEXED: 34: 709: cur_elmp->orig_block = -: 710: DLOOP_STACKELM_INDEXED_BLOCKSIZE(cur_elmp, cur_elmp->curcount ? cur_elmp->orig_count - cur_elmp->curcount : 0); 34: 711: break; -: 712: case DLOOP_KIND_STRUCT: |
#####: 713: cur_elmp->orig_block = -: 714: DLOOP_STACKELM_STRUCT_BLOCKSIZE(cur_elmp, cur_elmp->curcount ? cur_elmp->orig_count - cur_elmp->curcount : 0); #####: 715: break; -: 716: default: |
-: 717: /* --BEGIN ERROR HANDLING-- */ #####: 718: DLOOP_Assert(0); -: 719: break; -: 720: /* --END ERROR HANDLING-- */ -: 721: } |
308719: 722: cur_elmp->curblock = cur_elmp->orig_block; -: 723: 308719: 724: if (cur_elmp->curcount == 0) { -: 725:#ifdef DLOOP_DEBUG_MANIPULATE -: 726: DLOOP_dbg_printf("\talso hit end of count; elmp=%x [%d]\n", -: 727: (unsigned) cur_elmp, cur_sp); -: 728:#endif 183317: 729: DLOOP_SEGMENT_POP_AND_MAYBE_EXIT; -: 730: } -: 731: } -: 732: else /* push the stackelm */ { -: 733: DLOOP_Dataloop_stackelm *next_elmp; -: 734: int count_index, block_index; -: 735: 12946663: 736: count_index = cur_elmp->orig_count - cur_elmp->curcount; 12946663: 737: block_index = cur_elmp->orig_block - cur_elmp->curblock; -: 738: -: 739: /* reload the next stackelm if necessary */ 12946663: 740: next_elmp = &(segp->stackelm[cur_sp + 1]); 12946663: 741: if (cur_elmp->may_require_reloading) { |
#####: 742: DLOOP_Dataloop *load_dlp = NULL; #####: 743: switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) { -: 744: case DLOOP_KIND_CONTIG: -: 745: case DLOOP_KIND_VECTOR: -: 746: case DLOOP_KIND_BLOCKINDEXED: -: 747: case DLOOP_KIND_INDEXED: #####: 748: load_dlp = cur_elmp->loop_p->loop_params.cm_t.dataloop; #####: 749: break; -: 750: case DLOOP_KIND_STRUCT: #####: 751: load_dlp = DLOOP_STACKELM_STRUCT_DATALOOP(cur_elmp, -: 752: count_index); #####: 753: break; -: 754: default: |
-: 755: /* --BEGIN ERROR HANDLING-- */ #####: 756: DLOOP_Assert(0); -: 757: break; -: 758: /* --END ERROR HANDLING-- */ -: 759: } -: 760: -: 761:#ifdef DLOOP_DEBUG_MANIPULATE -: 762: DLOOP_dbg_printf("\tloading dlp=%x, elmp=%x [%d]\n", -: 763: (unsigned) load_dlp, -: 764: (unsigned) next_elmp, -: 765: cur_sp+1); -: 766:#endif -: 767: |
#####: 768: DLOOP_Stackelm_load(next_elmp, -: 769: load_dlp, -: 770: 1); -: 771: } -: 772: -: 773:#ifdef DLOOP_DEBUG_MANIPULATE -: 774: DLOOP_dbg_printf("\tpushing type, elmp=%x [%d], count=%d, block=%d\n", -: 775: (unsigned) cur_elmp, cur_sp, count_index, -: 776: block_index); -: 777:#endif -: 778: /* set orig_offset and all cur values for new stackelm. -: 779: * this is done in two steps: first set orig_offset based on -: 780: * current stackelm, then set cur values based on new stackelm. -: 781: */ |
12946663: 782: switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) { -: 783: case DLOOP_KIND_CONTIG: 12790507: 784: next_elmp->orig_offset = cur_elmp->curoffset + -: 785: (DLOOP_Offset) block_index * cur_elmp->loop_p->el_extent; 12790507: 786: break; -: 787: case DLOOP_KIND_VECTOR: -: 788: /* note: stride is in bytes */ 154846: 789: next_elmp->orig_offset = cur_elmp->orig_offset + -: 790: (DLOOP_Offset) count_index * cur_elmp->loop_p->loop_params.v_t.stride + -: 791: (DLOOP_Offset) block_index * cur_elmp->loop_p->el_extent; 154846: 792: break; -: 793: case DLOOP_KIND_BLOCKINDEXED: 1139: 794: next_elmp->orig_offset = cur_elmp->orig_offset + -: 795: (DLOOP_Offset) block_index * cur_elmp->loop_p->el_extent + -: 796: DLOOP_STACKELM_BLOCKINDEXED_OFFSET(cur_elmp, -: 797: count_index); 1139: 798: break; -: 799: case DLOOP_KIND_INDEXED: 171: 800: next_elmp->orig_offset = cur_elmp->orig_offset + -: 801: (DLOOP_Offset) block_index * cur_elmp->loop_p->el_extent + -: 802: DLOOP_STACKELM_INDEXED_OFFSET(cur_elmp, count_index); 171: 803: break; -: 804: case DLOOP_KIND_STRUCT: |
#####: 805: next_elmp->orig_offset = cur_elmp->orig_offset + -: 806: (DLOOP_Offset) block_index * DLOOP_STACKELM_STRUCT_EL_EXTENT(cur_elmp, count_index) + -: 807: DLOOP_STACKELM_STRUCT_OFFSET(cur_elmp, count_index); #####: 808: break; -: 809: default: |
-: 810: /* --BEGIN ERROR HANDLING-- */ #####: 811: DLOOP_Assert(0); -: 812: break; -: 813: /* --END ERROR HANDLING-- */ -: 814: } -: 815: -: 816:#ifdef DLOOP_DEBUG_MANIPULATE -: 817: DLOOP_dbg_printf("\tstep 1: next orig_offset = " MPI_AINT_FMT_DEC_SPEC " (0x" MPI_AINT_FMT_HEX_SPEC ")\n", -: 818: next_elmp->orig_offset, -: 819: next_elmp->orig_offset); -: 820:#endif -: 821: |
12946663: 822: switch (next_elmp->loop_p->kind & DLOOP_KIND_MASK) { -: 823: case DLOOP_KIND_CONTIG: -: 824: case DLOOP_KIND_VECTOR: 327835: 825: next_elmp->curcount = next_elmp->orig_count; 327835: 826: next_elmp->curblock = next_elmp->orig_block; 327835: 827: next_elmp->curoffset = next_elmp->orig_offset; 327835: 828: break; -: 829: case DLOOP_KIND_BLOCKINDEXED: 560: 830: next_elmp->curcount = next_elmp->orig_count; 560: 831: next_elmp->curblock = next_elmp->orig_block; 560: 832: next_elmp->curoffset = next_elmp->orig_offset + -: 833: DLOOP_STACKELM_BLOCKINDEXED_OFFSET(next_elmp, 0); 560: 834: break; -: 835: case DLOOP_KIND_INDEXED: 12618268: 836: next_elmp->curcount = next_elmp->orig_count; 12618268: 837: next_elmp->curblock = -: 838: DLOOP_STACKELM_INDEXED_BLOCKSIZE(next_elmp, 0); 12618268: 839: next_elmp->curoffset = next_elmp->orig_offset + -: 840: DLOOP_STACKELM_INDEXED_OFFSET(next_elmp, 0); 12618268: 841: break; -: 842: case DLOOP_KIND_STRUCT: |
#####: 843: next_elmp->curcount = next_elmp->orig_count; #####: 844: next_elmp->curblock = -: 845: DLOOP_STACKELM_STRUCT_BLOCKSIZE(next_elmp, 0); #####: 846: next_elmp->curoffset = next_elmp->orig_offset + -: 847: DLOOP_STACKELM_STRUCT_OFFSET(next_elmp, 0); #####: 848: break; -: 849: default: |
-: 850: /* --BEGIN ERROR HANDLING-- */ #####: 851: DLOOP_Assert(0); -: 852: break; -: 853: /* --END ERROR HANDLING-- */ -: 854: } -: 855: -: 856:#ifdef DLOOP_DEBUG_MANIPULATE -: 857: DLOOP_dbg_printf("\tstep 2: next curoffset = " MPI_AINT_FMT_DEC_SPEC " (0x" MPI_AINT_FMT_HEX_SPEC ")\n", -: 858: next_elmp->curoffset, -: 859: next_elmp->curoffset); -: 860:#endif -: 861: |
12946663: 862: cur_elmp->curblock--; 12946663: 863: DLOOP_SEGMENT_PUSH; -: 864: } /* end of else push the stackelm */ -: 865: } /* end of for (;;) */ -: 866: -: 867:#ifdef DLOOP_DEBUG_MANIPULATE -: 868: DLOOP_dbg_printf("hit end of datatype\n"); -: 869:#endif -: 870: -: 871: DLOOP_SEGMENT_SAVE_LOCAL_VALUES; -: 872: return; -: 873:} -: 874: -: 875:/* DLOOP_Stackelm_blocksize - returns block size for stackelm based on current -: 876: * count in stackelm. -: 877: * -: 878: * NOTE: loop_p, orig_count, and curcount members of stackelm MUST be correct -: 879: * before this is called! -: 880: * -: 881: */ -: 882:static inline DLOOP_Count DLOOP_Stackelm_blocksize(struct DLOOP_Dataloop_stackelm *elmp) 1483542: 883:{ 1483542: 884: struct DLOOP_Dataloop *dlp = elmp->loop_p; -: 885: 1483542: 886: switch(dlp->kind & DLOOP_KIND_MASK) { -: 887: case DLOOP_KIND_CONTIG: -: 888: /* NOTE: we're dropping the count into the -: 889: * blksize field for contigs, as described -: 890: * in the init call. -: 891: */ 729814: 892: return dlp->loop_params.c_t.count; -: 893: break; -: 894: case DLOOP_KIND_VECTOR: 567777: 895: return dlp->loop_params.v_t.blocksize; -: 896: break; -: 897: case DLOOP_KIND_BLOCKINDEXED: 5370: 898: return dlp->loop_params.bi_t.blocksize; -: 899: break; -: 900: case DLOOP_KIND_INDEXED: 180581: 901: return dlp->loop_params.i_t.blocksize_array[elmp->orig_count - elmp->curcount]; -: 902: break; -: 903: case DLOOP_KIND_STRUCT: |
#####: 904: return dlp->loop_params.s_t.blocksize_array[elmp->orig_count - elmp->curcount]; -: 905: break; -: 906: default: |
-: 907: /* --BEGIN ERROR HANDLING-- */ #####: 908: DLOOP_Assert(0); -: 909: break; -: 910: /* --END ERROR HANDLING-- */ -: 911: } -: 912: return -1; -: 913:} -: 914: -: 915:/* DLOOP_Stackelm_offset - returns starting offset (displacement) for stackelm -: 916: * based on current count in stackelm. -: 917: * -: 918: * NOTE: loop_p, orig_count, and curcount members of stackelm MUST be correct -: 919: * before this is called! -: 920: * -: 921: * also, this really is only good at init time for vectors and contigs -: 922: * (all the time for indexed) at the moment. -: 923: * -: 924: */ -: 925:static inline DLOOP_Offset DLOOP_Stackelm_offset(struct DLOOP_Dataloop_stackelm *elmp) |
1311387: 926:{ 1311387: 927: struct DLOOP_Dataloop *dlp = elmp->loop_p; -: 928: 1311387: 929: switch(dlp->kind & DLOOP_KIND_MASK) { -: 930: case DLOOP_KIND_VECTOR: -: 931: case DLOOP_KIND_CONTIG: 1290602: 932: return 0; -: 933: break; -: 934: case DLOOP_KIND_BLOCKINDEXED: 5314: 935: return dlp->loop_params.bi_t.offset_array[elmp->orig_count - elmp->curcount]; -: 936: break; -: 937: case DLOOP_KIND_INDEXED: 15471: 938: return dlp->loop_params.i_t.offset_array[elmp->orig_count - elmp->curcount]; -: 939: break; -: 940: case DLOOP_KIND_STRUCT: |
#####: 941: return dlp->loop_params.s_t.offset_array[elmp->orig_count - elmp->curcount]; -: 942: break; -: 943: default: |
-: 944: /* --BEGIN ERROR HANDLING-- */ #####: 945: DLOOP_Assert(0); -: 946: break; -: 947: /* --END ERROR HANDLING-- */ -: 948: } -: 949: return -1; -: 950:} -: 951: -: 952:/* DLOOP_Stackelm_load -: 953: * loop_p, orig_count, orig_block, and curcount are all filled by us now. -: 954: * the rest are filled in at processing time. -: 955: */ -: 956:static inline void DLOOP_Stackelm_load(struct DLOOP_Dataloop_stackelm *elmp, -: 957: struct DLOOP_Dataloop *dlp, -: 958: int branch_flag) |
1418779: 959:{ 1418779: 960: elmp->loop_p = dlp; -: 961: 1418779: 962: if ((dlp->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG) { 725453: 963: elmp->orig_count = 1; /* put in blocksize instead */ -: 964: } -: 965: else { 693326: 966: elmp->orig_count = dlp->loop_params.count; -: 967: } -: 968: 1418779: 969: if (branch_flag || (dlp->kind & DLOOP_KIND_MASK) == DLOOP_KIND_STRUCT) -: 970: { |
#####: 971: elmp->may_require_reloading = 1; -: 972: } -: 973: else { |
1418779: 974: elmp->may_require_reloading = 0; -: 975: } -: 976: -: 977: /* required by DLOOP_Stackelm_blocksize */ 1418779: 978: elmp->curcount = elmp->orig_count; -: 979: 1418779: 980: elmp->orig_block = DLOOP_Stackelm_blocksize(elmp); -: 981: /* TODO: GO AHEAD AND FILL IN CURBLOCK? */ 1418779: 982:} -: 983: -: 984:/* -: 985: * Local variables: -: 986: * c-indent-tabs-mode: nil -: 987: * End: -: 988: */ |