-:    0:Source:/home/MPI/testing/mpich2/mpich2/src/mpid/common/datatype/dataloop/segment.c
        -:    0:Graph:segment.gcno
        -:    0:Data:segment.gcda
        -:    0:Runs:4069
        -:    0:Programs:1200
        -:    1:/* -*- Mode: C; c-basic-offset:4 ; -*- */
        -:    2:
        -:    3:/*
        -:    4: *  (C) 2001 by Argonne National Laboratory.
        -:    5: *      See COPYRIGHT in top-level directory.
        -:    6: */
        -:    7:
        -:    8:#include <stdio.h>
        -:    9:#include <stdlib.h>
        -:   10:
        -:   11:#include "./dataloop.h"
        -:   12:
        -:   13:#undef DLOOP_DEBUG_MANIPULATE
        -:   14:
        -:   15:#ifndef PREPEND_PREFIX
        -:   16:#error "You must explicitly include a header that sets the PREPEND_PREFIX and includes dataloop_parts.h"
        -:   17:#endif
        -:   18:
        -:   19:/* Notes on functions:
        -:   20: *
        -:   21: * There are a few different sets of functions here:
        -:   22: * - DLOOP_Segment_manipulate() - uses a "piece" function to perform operations
        -:   23: *   using segments (piece functions defined elsewhere)
        -:   24: * - PREPEND_PREFIX functions - these define the externally visible interface
        -:   25: *   to segment functionality
        -:   26: */
        -:   27:
        -:   28:static inline DLOOP_Count DLOOP_Stackelm_blocksize(struct DLOOP_Dataloop_stackelm *elmp);
        -:   29:static inline DLOOP_Offset DLOOP_Stackelm_offset(struct DLOOP_Dataloop_stackelm *elmp);
        -:   30:static inline void DLOOP_Stackelm_load(struct DLOOP_Dataloop_stackelm *elmp,
        -:   31:				       struct DLOOP_Dataloop *dlp,
        -:   32:				       int branch_flag);
        -:   33:/* Segment_init
        -:   34: *
        -:   35: * buf    - datatype buffer location
        -:   36: * count  - number of instances of the datatype in the buffer
        -:   37: * handle - handle for datatype (could be derived or not)
        -:   38: * segp   - pointer to previously allocated segment structure
        -:   39: * flag   - flag indicating which optimizations are valid
        -:   40: *          should be one of DLOOP_DATALOOP_HOMOGENEOUS, _HETEROGENEOUS,
        -:   41: *          of _ALL_BYTES.
        -:   42: *
        -:   43: * Notes:
        -:   44: * - Assumes that the segment has been allocated.
        -:   45: * - Older MPICH2 code may pass "0" to indicate HETEROGENEOUS or "1" to
        -:   46: *   indicate HETEROGENEOUS.
        -:   47: *
        -:   48: */
        -:   49:int PREPEND_PREFIX(Segment_init)(const DLOOP_Buffer buf,
        -:   50:				 DLOOP_Count count,
        -:   51:				 DLOOP_Handle handle,
        -:   52:				 struct DLOOP_Segment *segp,
        -:   53:				 int flag)
  1246624:   54:{
  1246624:   55:    DLOOP_Offset elmsize = 0;
  1246624:   56:    int i, depth = 0;
  1246624:   57:    int branch_detected = 0;
        -:   58:
        -:   59:    struct DLOOP_Dataloop_stackelm *elmp;
  1246624:   60:    struct DLOOP_Dataloop *dlp = 0, *sblp = &segp->builtin_loop;
        -:   61:
  1246624:   62:    DLOOP_Assert(flag == DLOOP_DATALOOP_HETEROGENEOUS ||
        -:   63:		 flag == DLOOP_DATALOOP_HOMOGENEOUS   ||
        -:   64:		 flag == DLOOP_DATALOOP_ALL_BYTES);
        -:   65:
        -:   66:#ifdef DLOOP_DEBUG_MANIPULATE
        -:   67:    DLOOP_dbg_printf("DLOOP_Segment_init: count = %d, buf = %x\n",
        -:   68:		    count,
        -:   69:		    buf);
        -:   70:#endif
        -:   71:
  1246624:   72:    if (!DLOOP_Handle_hasloop_macro(handle)) {
        -:   73:	/* simplest case; datatype has no loop (basic) */
        -:   74:
   511898:   75:	DLOOP_Handle_get_size_macro(handle, elmsize);
        -:   76:
   511898:   77:	sblp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK;
   511898:   78:	sblp->loop_params.c_t.count = count;
   511898:   79:	sblp->loop_params.c_t.dataloop = 0;
   511898:   80:	sblp->el_size = elmsize;
   511898:   81:        DLOOP_Handle_get_basic_type_macro(handle, sblp->el_type);
   511898:   82:	DLOOP_Handle_get_extent_macro(handle, sblp->el_extent);
        -:   83:
   511898:   84:	dlp = sblp;
   511898:   85:	depth = 1;
        -:   86:    }
   734726:   87:    else if (count == 0) {
        -:   88:	/* only use the builtin */
    #####:   89:	sblp->kind = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK;
    #####:   90:	sblp->loop_params.c_t.count = 0;
    #####:   91:	sblp->loop_params.c_t.dataloop = 0;
    #####:   92:	sblp->el_size = 0;
    #####:   93:	sblp->el_extent = 0;
        -:   94:
    #####:   95:	dlp = sblp;
    #####:   96:	depth = 1;
        -:   97:    }
   734726:   98:    else if (count == 1) {
        -:   99:	/* don't use the builtin */
   562463:  100:	DLOOP_Handle_get_loopptr_macro(handle, dlp, flag);
   562463:  101:	DLOOP_Handle_get_loopdepth_macro(handle, depth, flag);
        -:  102:    }
        -:  103:    else {
        -:  104:	/* default: need to use builtin to handle contig; must check
        -:  105:	 * loop depth first
        -:  106:	 */
        -:  107:	DLOOP_Dataloop *oldloop; /* loop from original type, before new count */
        -:  108:	DLOOP_Offset type_size, type_extent;
        -:  109:	DLOOP_Type el_type;
        -:  110:	
   172263:  111:	DLOOP_Handle_get_loopdepth_macro(handle, depth, flag);
   172263:  112:	if (depth >= DLOOP_MAX_DATATYPE_DEPTH) return -1;
        -:  113:
   172263:  114:	DLOOP_Handle_get_loopptr_macro(handle, oldloop, flag);
   172263:  115:	DLOOP_Assert(oldloop != NULL);
   172263:  116:	DLOOP_Handle_get_size_macro(handle, type_size);
   172263:  117:	DLOOP_Handle_get_extent_macro(handle, type_extent);
   172263:  118:        DLOOP_Handle_get_basic_type_macro(handle, el_type);
        -:  119:
   172263:  120:	if (depth == 1 && ((oldloop->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG))
        -:  121:	{
     7123:  122:	    if (type_size == type_extent)
        -:  123:	    {
        -:  124:		/* use a contig */
     4960:  125:		sblp->kind                     = DLOOP_KIND_CONTIG | DLOOP_FINAL_MASK;
     4960:  126:		sblp->loop_params.c_t.count    = count * oldloop->loop_params.c_t.count;
     4960:  127:		sblp->loop_params.c_t.dataloop = NULL;
     4960:  128:		sblp->el_size                  = oldloop->el_size;
     4960:  129:		sblp->el_extent                = oldloop->el_extent;
     4960:  130:		sblp->el_type                  = oldloop->el_type;
        -:  131:	    }
        -:  132:	    else
        -:  133:	    {
        -:  134:		/* use a vector, with extent of original type becoming the stride */
     2163:  135:		sblp->kind                      = DLOOP_KIND_VECTOR | DLOOP_FINAL_MASK;
     2163:  136:		sblp->loop_params.v_t.count     = count;
     2163:  137:		sblp->loop_params.v_t.blocksize = oldloop->loop_params.c_t.count;
     2163:  138:		sblp->loop_params.v_t.stride    = type_extent;
     2163:  139:		sblp->loop_params.v_t.dataloop  = NULL;
     2163:  140:		sblp->el_size                   = oldloop->el_size;
     2163:  141:		sblp->el_extent                 = oldloop->el_extent;
     2163:  142:		sblp->el_type                   = oldloop->el_type;
        -:  143:	    }
        -:  144:	}
        -:  145:	else
        -:  146:	{
        -:  147:	    /* general case */
   165140:  148:	    sblp->kind                     = DLOOP_KIND_CONTIG;
   165140:  149:	    sblp->loop_params.c_t.count    = count;
   165140:  150:	    sblp->loop_params.c_t.dataloop = oldloop;
   165140:  151:	    sblp->el_size                  = type_size;
   165140:  152:	    sblp->el_extent                = type_extent;
   165140:  153:	    sblp->el_type                  = el_type;
        -:  154:
   165140:  155:	    depth++; /* we're adding to the depth with the builtin */
        -:  156:	}
        -:  157:
   172263:  158:	dlp = sblp;
        -:  159:    }
        -:  160:
        -:  161:    /* initialize the rest of the segment values */
  1246624:  162:    segp->handle = handle;
  1246624:  163:    segp->ptr = (DLOOP_Buffer) buf;
  1246624:  164:    segp->stream_off = 0;
  1246624:  165:    segp->cur_sp = 0;
  1246624:  166:    segp->valid_sp = 0;
        -:  167:
        -:  168:    /* initialize the first stackelm in its entirety */
  1246624:  169:    elmp = &(segp->stackelm[0]);
  1246624:  170:    DLOOP_Stackelm_load(elmp, dlp, 0);
  1246624:  171:    branch_detected = elmp->may_require_reloading;
        -:  172:
        -:  173:    /* Fill in parameters not set by DLOOP_Stackelm_load */
  1246624:  174:    elmp->orig_offset = 0;
  1246624:  175:    elmp->curblock    = elmp->orig_block;
        -:  176:    /* DLOOP_Stackelm_offset assumes correct orig_count, curcount, loop_p */
  1246624:  177:    elmp->curoffset   = /* elmp->orig_offset + */ DLOOP_Stackelm_offset(elmp);
        -:  178:
  1246624:  179:    i = 1;
  2665403:  180:    while(!(dlp->kind & DLOOP_FINAL_MASK))
        -:  181:    {
        -:  182:        /* get pointer to next dataloop */
   172155:  183:        switch (dlp->kind & DLOOP_KIND_MASK)
        -:  184:        {
        -:  185:            case DLOOP_KIND_CONTIG:
        -:  186:            case DLOOP_KIND_VECTOR:
        -:  187:            case DLOOP_KIND_BLOCKINDEXED:
        -:  188:            case DLOOP_KIND_INDEXED:
   172155:  189:                dlp = dlp->loop_params.cm_t.dataloop;
   172155:  190:                break;
        -:  191:            case DLOOP_KIND_STRUCT:
    #####:  192:                dlp = dlp->loop_params.s_t.dataloop_array[0];
    #####:  193:                break;
        -:  194:            default:
        -:  195:                /* --BEGIN ERROR HANDLING-- */
    #####:  196:                DLOOP_Assert(0);
        -:  197:                break;
        -:  198:                /* --END ERROR HANDLING-- */
        -:  199:        }
        -:  200:
        -:  201:	/* loop_p, orig_count, orig_block, and curcount are all filled by us now.
        -:  202:	 * the rest are filled in at processing time.
        -:  203:	 */
   172155:  204:	elmp = &(segp->stackelm[i]);
        -:  205:
   172155:  206:	DLOOP_Stackelm_load(elmp, dlp, branch_detected);
   172155:  207:	branch_detected = elmp->may_require_reloading;
   172155:  208:        i++;
        -:  209:
        -:  210:    }
        -:  211:
  1246624:  212:    segp->valid_sp = depth-1;
        -:  213:
  1246624:  214:    return 0;
        -:  215:}
        -:  216:
        -:  217:/* Segment_alloc
        -:  218: *
        -:  219: */
        -:  220:struct DLOOP_Segment * PREPEND_PREFIX(Segment_alloc)(void)
  1117705:  221:{
  1117705:  222:    return (struct DLOOP_Segment *) DLOOP_Malloc(sizeof(struct DLOOP_Segment));
        -:  223:}
        -:  224:
        -:  225:/* Segment_free
        -:  226: *
        -:  227: * Input Parameters:
        -:  228: * segp - pointer to segment
        -:  229: */
        -:  230:void PREPEND_PREFIX(Segment_free)(struct DLOOP_Segment *segp)
  1117705:  231:{
  1117705:  232:    DLOOP_Free(segp);
        -:  233:    return;
        -:  234:}
        -:  235:
        -:  236:/* DLOOP_Segment_manipulate - do something to a segment
        -:  237: *
        -:  238: * If you think of all the data to be manipulated (packed, unpacked, whatever),
        -:  239: * as a stream of bytes, it's easier to understand how first and last fit in.
        -:  240: *
        -:  241: * This function does all the work, calling the piecefn passed in when it
        -:  242: * encounters a datatype element which falls into the range of first..(last-1).
        -:  243: *
        -:  244: * piecefn can be NULL, in which case this function doesn't do anything when it
        -:  245: * hits a region.  This is used internally for repositioning within this stream.
        -:  246: *
        -:  247: * last is a byte offset to the byte just past the last byte in the stream
        -:  248: * to operate on.  this makes the calculations all over MUCH cleaner.
        -:  249: *
        -:  250: * stream_off, stream_el_size, first, and last are all working in terms of the
        -:  251: * types and sizes for the stream, which might be different from the local sizes
        -:  252: * (in the heterogeneous case).
        -:  253: *
        -:  254: * This is a horribly long function.  Too bad; it's complicated :)! -- Rob
        -:  255: *
        -:  256: * NOTE: THIS IMPLEMENTATION CANNOT HANDLE STRUCT DATALOOPS.
        -:  257: */
        -:  258:#define DLOOP_SEGMENT_SAVE_LOCAL_VALUES		\
        -:  259:{						\
        -:  260:    segp->cur_sp     = cur_sp;			\
        -:  261:    segp->valid_sp   = valid_sp;		\
        -:  262:    segp->stream_off = stream_off;		\
        -:  263:    *lastp           = stream_off;		\
        -:  264:}
        -:  265:
        -:  266:#define DLOOP_SEGMENT_LOAD_LOCAL_VALUES		\
        -:  267:{						\
        -:  268:    last       = *lastp;			\
        -:  269:    cur_sp     = segp->cur_sp;			\
        -:  270:    valid_sp   = segp->valid_sp;		\
        -:  271:    stream_off = segp->stream_off;		\
        -:  272:    cur_elmp   = &(segp->stackelm[cur_sp]);	\
        -:  273:}
        -:  274:
        -:  275:#define DLOOP_SEGMENT_RESET_VALUES				\
        -:  276:{								\
        -:  277:    segp->stream_off     = 0;					\
        -:  278:    segp->cur_sp         = 0; 					\
        -:  279:    cur_elmp             = &(segp->stackelm[0]);		\
        -:  280:    cur_elmp->curcount   = cur_elmp->orig_count;		\
        -:  281:    cur_elmp->orig_block = DLOOP_Stackelm_blocksize(cur_elmp);	\
        -:  282:    cur_elmp->curblock   = cur_elmp->orig_block;		\
        -:  283:    cur_elmp->curoffset  = cur_elmp->orig_offset +              \
        -:  284:                           DLOOP_Stackelm_offset(cur_elmp);     \
        -:  285:}
        -:  286:
        -:  287:#define DLOOP_SEGMENT_POP_AND_MAYBE_EXIT			\
        -:  288:{								\
        -:  289:    cur_sp--;							\
        -:  290:    if (cur_sp >= 0) cur_elmp = &segp->stackelm[cur_sp];	\
        -:  291:    else {							\
        -:  292:	DLOOP_SEGMENT_SAVE_LOCAL_VALUES;			\
        -:  293:	return;							\
        -:  294:    }								\
        -:  295:}
        -:  296:
        -:  297:#define DLOOP_SEGMENT_PUSH			\
        -:  298:{						\
        -:  299:    cur_sp++;					\
        -:  300:    cur_elmp = &segp->stackelm[cur_sp];		\
        -:  301:}
        -:  302:
        -:  303:#define DLOOP_STACKELM_BLOCKINDEXED_OFFSET(elmp_, curcount_) \
        -:  304:(elmp_)->loop_p->loop_params.bi_t.offset_array[(curcount_)]
        -:  305:
        -:  306:#define DLOOP_STACKELM_INDEXED_OFFSET(elmp_, curcount_) \
        -:  307:(elmp_)->loop_p->loop_params.i_t.offset_array[(curcount_)]
        -:  308:
        -:  309:#define DLOOP_STACKELM_INDEXED_BLOCKSIZE(elmp_, curcount_) \
        -:  310:(elmp_)->loop_p->loop_params.i_t.blocksize_array[(curcount_)]
        -:  311:
        -:  312:#define DLOOP_STACKELM_STRUCT_OFFSET(elmp_, curcount_) \
        -:  313:(elmp_)->loop_p->loop_params.s_t.offset_array[(curcount_)]
        -:  314:
        -:  315:#define DLOOP_STACKELM_STRUCT_BLOCKSIZE(elmp_, curcount_) \
        -:  316:(elmp_)->loop_p->loop_params.s_t.blocksize_array[(curcount_)]
        -:  317:
        -:  318:#define DLOOP_STACKELM_STRUCT_EL_EXTENT(elmp_, curcount_) \
        -:  319:(elmp_)->loop_p->loop_params.s_t.el_extent_array[(curcount_)]
        -:  320:
        -:  321:#define DLOOP_STACKELM_STRUCT_DATALOOP(elmp_, curcount_) \
        -:  322:(elmp_)->loop_p->loop_params.s_t.dataloop_array[(curcount_)]
        -:  323:
        -:  324:void PREPEND_PREFIX(Segment_manipulate)(struct DLOOP_Segment *segp,
        -:  325:					DLOOP_Offset first,
        -:  326:					DLOOP_Offset *lastp,
        -:  327:					int (*contigfn) (DLOOP_Offset *blocks_p,
        -:  328:							 DLOOP_Type el_type,
        -:  329:							 DLOOP_Offset rel_off,
        -:  330:							 DLOOP_Buffer bufp,
        -:  331:							 void *v_paramp),
        -:  332:					int (*vectorfn) (DLOOP_Offset *blocks_p,
        -:  333:							 DLOOP_Count count,
        -:  334:							 DLOOP_Count blklen,
        -:  335:							 DLOOP_Offset stride,
        -:  336:							 DLOOP_Type el_type,
        -:  337:							 DLOOP_Offset rel_off,
        -:  338:							 DLOOP_Buffer bufp,
        -:  339:							 void *v_paramp),
        -:  340:					int (*blkidxfn) (DLOOP_Offset *blocks_p,
        -:  341:							 DLOOP_Count count,
        -:  342:							 DLOOP_Count blklen,
        -:  343:							 DLOOP_Offset *offsetarray,
        -:  344:							 DLOOP_Type el_type,
        -:  345:							 DLOOP_Offset rel_off,
        -:  346:							 DLOOP_Buffer bufp,
        -:  347:							 void *v_paramp),
        -:  348:					int (*indexfn) (DLOOP_Offset *blocks_p,
        -:  349:							DLOOP_Count count,
        -:  350:							DLOOP_Count *blockarray,
        -:  351:							DLOOP_Offset *offsetarray,
        -:  352:							DLOOP_Type el_type,
        -:  353:							DLOOP_Offset rel_off,
        -:  354:							DLOOP_Buffer bufp,
        -:  355:							void *v_paramp),
        -:  356:					DLOOP_Offset (*sizefn) (DLOOP_Type el_type),
        -:  357:					void *pieceparams)
  1786974:  358:{
        -:  359:    /* these four are the "local values": cur_sp, valid_sp, last, stream_off */
        -:  360:    int cur_sp, valid_sp;
        -:  361:    DLOOP_Offset last, stream_off;
        -:  362:
        -:  363:    struct DLOOP_Dataloop_stackelm *cur_elmp;
  1786974:  364:    enum { PF_NULL, PF_CONTIG, PF_VECTOR, PF_BLOCKINDEXED, PF_INDEXED } piecefn_type = PF_NULL;
        -:  365:
  1786974:  366:    DLOOP_SEGMENT_LOAD_LOCAL_VALUES;
        -:  367:
  1786974:  368:    if (first == *lastp) {
        -:  369:	/* nothing to do */
    #####:  370:	DLOOP_dbg_printf("dloop_segment_manipulate: warning: first == last (" MPI_AINT_FMT_DEC_SPEC ")\n", first);
    #####:  371:	return;
        -:  372:    }
        -:  373:
        -:  374:    /* first we ensure that stream_off and first are in the same spot */
  1786974:  375:    if (first != stream_off) {
        -:  376:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  377:	DLOOP_dbg_printf("first=" MPI_AINT_FMT_DEC_SPEC "; stream_off=" MPI_AINT_FMT_DEC_SPEC "; resetting.\n",
        -:  378:			 first, stream_off);
        -:  379:#endif
        -:  380:
    64763:  381:	if (first < stream_off) {
    64763:  382:	    DLOOP_SEGMENT_RESET_VALUES;
    64763:  383:	    stream_off = 0;
        -:  384:	}
        -:  385:
    64763:  386:	if (first != stream_off) {
    #####:  387:	    DLOOP_Offset tmp_last = first;
        -:  388:
        -:  389:	    /* use manipulate function with a NULL piecefn to advance
        -:  390:	     * stream offset
        -:  391:	     */
    #####:  392:	    PREPEND_PREFIX(Segment_manipulate)(segp,
        -:  393:					       stream_off,
        -:  394:					       &tmp_last,
        -:  395:					       NULL, /* contig fn */
        -:  396:					       NULL, /* vector fn */
        -:  397:					       NULL, /* blkidx fn */
        -:  398:					       NULL, /* index fn */
        -:  399:					       sizefn,
        -:  400:                                               NULL);
        -:  401:
        -:  402:	    /* --BEGIN ERROR HANDLING-- */
        -:  403:	    /* verify that we're in the right location */
    #####:  404:	    if (tmp_last != first) DLOOP_Assert(0);
        -:  405:	    /* --END ERROR HANDLING-- */
        -:  406:	}
        -:  407:
    64763:  408:	DLOOP_SEGMENT_LOAD_LOCAL_VALUES;
        -:  409:
        -:  410:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  411:	DLOOP_dbg_printf("done repositioning stream_off; first=" MPI_AINT_FMT_DEC_SPEC ", stream_off=" MPI_AINT_FMT_DEC_SPEC ", last=" MPI_AINT_FMT_DEC_SPEC "\n",
        -:  412:		   first, stream_off, last);
        -:  413:#endif
        -:  414:    }
        -:  415:
        -:  416:    for (;;) {
        -:  417:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  418:#if 0
        -:  419:        DLOOP_dbg_printf("looptop; cur_sp=%d, cur_elmp=%x\n",
        -:  420:			 cur_sp, (unsigned) cur_elmp);
        -:  421:#endif
        -:  422:#endif
        -:  423:
 28565039:  424:	if (cur_elmp->loop_p->kind & DLOOP_FINAL_MASK) {
 15309657:  425:	    int piecefn_indicated_exit = -1;
        -:  426:	    DLOOP_Offset myblocks, local_el_size, stream_el_size;
        -:  427:	    DLOOP_Type el_type;
        -:  428:
        -:  429:	    /* structs are never finals (leaves) */
 15309657:  430:	    DLOOP_Assert((cur_elmp->loop_p->kind & DLOOP_KIND_MASK) !=
        -:  431:		   DLOOP_KIND_STRUCT);
        -:  432:
        -:  433:	    /* pop immediately on zero count */
 15309657:  434:	    if (cur_elmp->curcount == 0) DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
        -:  435:
        -:  436:	    /* size on this system of the int, double, etc. that is
        -:  437:	     * the elementary type.
        -:  438:	     */
 15309657:  439:	    local_el_size  = cur_elmp->loop_p->el_size;
 15309657:  440:	    el_type        = cur_elmp->loop_p->el_type;
 15309657:  441:	    stream_el_size = (sizefn) ? sizefn(el_type) : local_el_size;
        -:  442:
        -:  443:	    /* calculate number of elem. types to work on and function to use.
        -:  444:	     * default is to use the contig piecefn (if there is one).
        -:  445:	     */
 15309657:  446:	    myblocks = cur_elmp->curblock;
 15309657:  447:	    piecefn_type = (contigfn ? PF_CONTIG : PF_NULL);
        -:  448:
        -:  449:	    /* check for opportunities to use other piecefns */
 15309657:  450:	    switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
        -:  451:		case DLOOP_KIND_CONTIG:
        -:  452:		    break;
        -:  453:         	case DLOOP_KIND_BLOCKINDEXED:
        -:  454:		    /* only use blkidx piecefn if at start of blkidx type */
    97380:  455:		    if (blkidxfn &&
        -:  456:			cur_elmp->orig_block == cur_elmp->curblock &&
        -:  457:			cur_elmp->orig_count == cur_elmp->curcount)
        -:  458:		    {
        -:  459:			/* TODO: RELAX CONSTRAINTS */
     4067:  460:			myblocks = cur_elmp->curblock * cur_elmp->curcount;
     4067:  461:			piecefn_type = PF_BLOCKINDEXED;
        -:  462:		    }
        -:  463:		    break;
        -:  464:		case DLOOP_KIND_INDEXED:
        -:  465:		    /* only use index piecefn if at start of the index type.
        -:  466:		     *   count test checks that we're on first block.
        -:  467:		     *   block test checks that we haven't made progress on first block.
        -:  468:		     */
 13359680:  469:		    if (indexfn &&
        -:  470:			cur_elmp->orig_count == cur_elmp->curcount &&
        -:  471:			cur_elmp->curblock == DLOOP_STACKELM_INDEXED_BLOCKSIZE(cur_elmp, 0))
        -:  472:		    {
        -:  473:			/* TODO: RELAX CONSTRAINT ON COUNT? */
 12546492:  474:			myblocks = cur_elmp->loop_p->loop_params.i_t.total_blocks;
 12546492:  475:			piecefn_type = PF_INDEXED;
        -:  476:		    }
        -:  477:		    break;
        -:  478:		case DLOOP_KIND_VECTOR:
        -:  479:		    /* only use the vector piecefn if at the start of a
        -:  480:		     * contiguous block.
        -:  481:		     */
   759477:  482:		    if (vectorfn && cur_elmp->orig_block == cur_elmp->curblock)
        -:  483:		    {
   759133:  484:			myblocks = cur_elmp->curblock * cur_elmp->curcount;
   759133:  485:			piecefn_type = PF_VECTOR;
        -:  486:		    }
        -:  487:		    break;
        -:  488:		default:
        -:  489:		    /* --BEGIN ERROR HANDLING-- */
    #####:  490:		    DLOOP_Assert(0);
        -:  491:		    break;
        -:  492:		    /* --END ERROR HANDLING-- */
        -:  493:	    }
        -:  494:
        -:  495:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  496:	    DLOOP_dbg_printf("\thit leaf; cur_sp=%d, elmp=%x, piece_sz=" MPI_AINT_FMT_DEC_SPEC "\n",
        -:  497:			     cur_sp,
        -:  498:		             (unsigned) cur_elmp, myblocks * local_el_size);
        -:  499:#endif
        -:  500:
        -:  501:	    /* enforce the last parameter if necessary by reducing myblocks */
 15309657:  502:	    if (last != SEGMENT_IGNORE_LAST &&
        -:  503:		(stream_off + (myblocks * stream_el_size) > last))
        -:  504:	    {
   293715:  505:		myblocks = ((last - stream_off) / stream_el_size);
        -:  506:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  507:		DLOOP_dbg_printf("\tpartial block count=" MPI_AINT_FMT_DEC_SPEC " (" MPI_AINT_FMT_DEC_SPEC " bytes)\n",
        -:  508:				 myblocks,
        -:  509:                                 myblocks * stream_el_size);
        -:  510:#endif
   293715:  511:		if (myblocks == 0) {
    14628:  512:		    DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
    14628:  513:		    return;
        -:  514:		}
        -:  515:	    }
        -:  516:
        -:  517:	    /* call piecefn to perform data manipulation */
 15295029:  518:	    switch (piecefn_type) {
        -:  519:		case PF_NULL:
    #####:  520:		    piecefn_indicated_exit = 0;
        -:  521:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  522:		    DLOOP_dbg_printf("\tNULL piecefn for this piece\n");
        -:  523:#endif
    #####:  524:		    break;
        -:  525:		case PF_CONTIG:
  1999310:  526:		    DLOOP_Assert(myblocks <= cur_elmp->curblock);
  1999310:  527:		    piecefn_indicated_exit =
        -:  528:			contigfn(&myblocks,
        -:  529:				 el_type,
        -:  530:				 cur_elmp->curoffset, /* relative to segp->ptr */
        -:  531:				 segp->ptr, /* start of buffer (from segment) */
        -:  532:				 pieceparams);
  1999310:  533:		    break;
        -:  534:		case PF_VECTOR:
   754569:  535:		    piecefn_indicated_exit =
        -:  536:			vectorfn(&myblocks,
        -:  537:				 cur_elmp->curcount,
        -:  538:				 cur_elmp->orig_block,
        -:  539:				 cur_elmp->loop_p->loop_params.v_t.stride,
        -:  540:				 el_type,
        -:  541:				 cur_elmp->curoffset,
        -:  542:				 segp->ptr,
        -:  543:				 pieceparams);
   754569:  544:		    break;
        -:  545:		case PF_BLOCKINDEXED:
     4067:  546:		    piecefn_indicated_exit =
        -:  547:			blkidxfn(&myblocks,
        -:  548:				 cur_elmp->curcount,
        -:  549:				 cur_elmp->orig_block,
        -:  550:				 cur_elmp->loop_p->loop_params.bi_t.offset_array,
        -:  551:				 el_type,
        -:  552:				 cur_elmp->orig_offset, /* blkidxfn adds offset */
        -:  553:				 segp->ptr,
        -:  554:				 pieceparams);
     4067:  555:		    break;
        -:  556:		case PF_INDEXED:
 12537083:  557:		    piecefn_indicated_exit =
        -:  558:			indexfn(&myblocks,
        -:  559:				cur_elmp->curcount,
        -:  560:				cur_elmp->loop_p->loop_params.i_t.blocksize_array,
        -:  561:				cur_elmp->loop_p->loop_params.i_t.offset_array,
        -:  562:				el_type,
        -:  563:				cur_elmp->orig_offset, /* indexfn adds offset value */
        -:  564:				segp->ptr,
        -:  565:				pieceparams);
        -:  566:		    break;
        -:  567:	    }
        -:  568:
        -:  569:	    /* update local values based on piecefn returns (myblocks and
        -:  570:	     * piecefn_indicated_exit)
        -:  571:	     */
 15295029:  572:	    DLOOP_Assert(piecefn_indicated_exit >= 0);
 15295029:  573:	    DLOOP_Assert(myblocks >= 0);
 15295029:  574:	    stream_off += myblocks * stream_el_size;
        -:  575:
        -:  576:	    /* myblocks of 0 or less than cur_elmp->curblock indicates
        -:  577:	     * that we should stop processing and return.
        -:  578:	     */
 15295029:  579:	    if (myblocks == 0) {
    36118:  580:		DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
    36118:  581:		return;
        -:  582:	    }
 15258911:  583:	    else if (myblocks < (DLOOP_Offset)(cur_elmp->curblock)) {
   273957:  584:		cur_elmp->curoffset += myblocks * local_el_size;
   273957:  585:		cur_elmp->curblock  -= myblocks;
        -:  586:
   273957:  587:		DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
   273957:  588:		return;
        -:  589:	    }
        -:  590:	    else /* myblocks >= cur_elmp->curblock */ {
 14984954:  591:		int count_index = 0;
        -:  592:
        -:  593:		/* this assumes we're either *just* processing the last parts
        -:  594:		 * of the current block, or we're processing as many blocks as
        -:  595:		 * we like starting at the beginning of one.
        -:  596:		 */
        -:  597:
 14984954:  598:		switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
        -:  599:		    case DLOOP_KIND_INDEXED:
        -:  604:
        -:  606:				cur_elmp->curcount;
        -:  608:				DLOOP_STACKELM_INDEXED_BLOCKSIZE(cur_elmp,
        -:  609:								 count_index);
        -:  610:			}
        -:  611:
 13312919:  612:			if (cur_elmp->curcount == 0) {
        -:  613:			    /* don't bother to fill in values; we're popping anyway */
 12618981:  614:			    DLOOP_Assert(myblocks == 0);
 12618981:  615:			    DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
        -:  616:			}
        -:  617:			else {
   693938:  618:			    cur_elmp->orig_block = cur_elmp->curblock;
   693938:  619:			    cur_elmp->curoffset  = cur_elmp->orig_offset +
        -:  620:				DLOOP_STACKELM_INDEXED_OFFSET(cur_elmp,
        -:  621:							      count_index);
        -:  622:
   693938:  623:			    cur_elmp->curblock  -= myblocks;
   693938:  624:			    cur_elmp->curoffset += myblocks * local_el_size;
        -:  625:			}
        -:  626:			break;
        -:  627:		    case DLOOP_KIND_VECTOR:
        -:  628:			/* this math relies on assertions at top of code block */
   754641:  629:			cur_elmp->curcount -= myblocks / (DLOOP_Offset)(cur_elmp->curblock);
   754641:  630:			if (cur_elmp->curcount == 0) {
   520507:  631:			    DLOOP_Assert(myblocks % ((DLOOP_Offset)(cur_elmp->curblock)) == 0);
   520507:  632:			    DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
        -:  633:			}
        -:  634:			else {
        -:  635:			    /* this math relies on assertions at top of code
        -:  636:			     * block
        -:  637:			     */
   234134:  638:			    cur_elmp->curblock = cur_elmp->orig_block -
        -:  639:				(myblocks % (DLOOP_Offset)(cur_elmp->curblock));
        -:  640:			    /* new offset = original offset +
        -:  641:			     *              stride * whole blocks +
        -:  642:			     *              leftover bytes
        -:  643:			     */
   234134:  644:			    cur_elmp->curoffset = cur_elmp->orig_offset +
        -:  645:				(((DLOOP_Offset)(cur_elmp->orig_count - cur_elmp->curcount)) *
        -:  646:				 cur_elmp->loop_p->loop_params.v_t.stride) +
        -:  647:				(((DLOOP_Offset)(cur_elmp->orig_block - cur_elmp->curblock)) *
        -:  648:				 local_el_size);
        -:  649:			}
        -:  650:			break;
        -:  651:		    case DLOOP_KIND_CONTIG:
        -:  652:			/* contigs that reach this point have always been
        -:  653:			 * completely processed
        -:  654:			 */
   838127:  655:			DLOOP_Assert(myblocks == (DLOOP_Offset)(cur_elmp->curblock) &&
        -:  656:			       cur_elmp->curcount == 1);
   838127:  657:			DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
        -:  658:			break;
        -:  659:		    case DLOOP_KIND_BLOCKINDEXED:
   370979:  660:			while (myblocks > 0 && myblocks >= (DLOOP_Offset)(cur_elmp->curblock))
        -:  661:			{
   291712:  662:			    myblocks -= (DLOOP_Offset)(cur_elmp->curblock);
   291712:  663:			    cur_elmp->curcount--;
   291712:  664:			    DLOOP_Assert(cur_elmp->curcount >= 0);
        -:  665:
   291712:  666:			    count_index = cur_elmp->orig_count -
        -:  667:				cur_elmp->curcount;
   291712:  668:			    cur_elmp->curblock = cur_elmp->orig_block;
        -:  669:			}
    79267:  670:			if (cur_elmp->curcount == 0) {
        -:  671:			    /* popping */
     4724:  672:			    DLOOP_Assert(myblocks == 0);
     4724:  673:			    DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
        -:  674:			}
        -:  675:			else {
        -:  676:			    /* cur_elmp->orig_block = cur_elmp->curblock; */
    74543:  677:			    cur_elmp->curoffset = cur_elmp->orig_offset +
        -:  678:				DLOOP_STACKELM_BLOCKINDEXED_OFFSET(cur_elmp,
        -:  679:								   count_index);
    74543:  680:			    cur_elmp->curblock  -= myblocks;
    74543:  681:			    cur_elmp->curoffset += myblocks * local_el_size;
        -:  682:			}
        -:  683:			break;
        -:  684:		}
        -:  685:	    }
        -:  686:
 13913904:  687:	    if (piecefn_indicated_exit) {
        -:  688:		/* piece function indicated that we should quit processing */
   229427:  689:		DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
   229427:  690:		return;
        -:  691:	    }
        -:  692:	} /* end of if leaf */
 13255382:  693:	else if (cur_elmp->curblock == 0) {
        -:  694:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  695:	    DLOOP_dbg_printf("\thit end of block; elmp=%x [%d]\n",
        -:  696:			    (unsigned) cur_elmp, cur_sp);
        -:  697:#endif
   308719:  698:	    cur_elmp->curcount--;
        -:  699:
        -:  700:	    /* new block.  for indexed and struct reset orig_block.
        -:  701:	     * reset curblock for all types
        -:  702:	     */
   308719:  703:	    switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
        -:  704:		case DLOOP_KIND_CONTIG:
        -:  705:		case DLOOP_KIND_VECTOR:
        -:  706:		case DLOOP_KIND_BLOCKINDEXED:
        -:  707:		    break;
        -:  708:		case DLOOP_KIND_INDEXED:
       34:  709:		    cur_elmp->orig_block =
        -:  710:			DLOOP_STACKELM_INDEXED_BLOCKSIZE(cur_elmp, cur_elmp->curcount ? cur_elmp->orig_count - cur_elmp->curcount : 0);
       34:  711:		    break;
        -:  712:		case DLOOP_KIND_STRUCT:
    #####:  713:		    cur_elmp->orig_block =
        -:  714:			DLOOP_STACKELM_STRUCT_BLOCKSIZE(cur_elmp, cur_elmp->curcount ? cur_elmp->orig_count - cur_elmp->curcount : 0);
    #####:  715:		    break;
        -:  716:		default:
        -:  717:		    /* --BEGIN ERROR HANDLING-- */
    #####:  718:		    DLOOP_Assert(0);
        -:  719:		    break;
        -:  720:		    /* --END ERROR HANDLING-- */
        -:  721:	    }
   308719:  722:	    cur_elmp->curblock = cur_elmp->orig_block;
        -:  723:
   308719:  724:	    if (cur_elmp->curcount == 0) {
        -:  725:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  726:		DLOOP_dbg_printf("\talso hit end of count; elmp=%x [%d]\n",
        -:  727:				(unsigned) cur_elmp, cur_sp);
        -:  728:#endif
   183317:  729:		DLOOP_SEGMENT_POP_AND_MAYBE_EXIT;
        -:  730:	    }
        -:  731:	}
        -:  732:	else /* push the stackelm */ {
        -:  733:	    DLOOP_Dataloop_stackelm *next_elmp;
        -:  734:	    int count_index, block_index;
        -:  735:
 12946663:  736:	    count_index = cur_elmp->orig_count - cur_elmp->curcount;
 12946663:  737:	    block_index = cur_elmp->orig_block - cur_elmp->curblock;
        -:  738:
        -:  739:	    /* reload the next stackelm if necessary */
 12946663:  740:	    next_elmp = &(segp->stackelm[cur_sp + 1]);
 12946663:  741:	    if (cur_elmp->may_require_reloading) {
    #####:  742:		DLOOP_Dataloop *load_dlp = NULL;
    #####:  743:		switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
        -:  744:		    case DLOOP_KIND_CONTIG:
        -:  745:		    case DLOOP_KIND_VECTOR:
        -:  746:		    case DLOOP_KIND_BLOCKINDEXED:
        -:  747:		    case DLOOP_KIND_INDEXED:
    #####:  748:			load_dlp = cur_elmp->loop_p->loop_params.cm_t.dataloop;
    #####:  749:			break;
        -:  750:		    case DLOOP_KIND_STRUCT:
    #####:  751:			load_dlp = DLOOP_STACKELM_STRUCT_DATALOOP(cur_elmp,
        -:  752:								  count_index);
    #####:  753:			break;
        -:  754:		    default:
        -:  755:			/* --BEGIN ERROR HANDLING-- */
    #####:  756:			DLOOP_Assert(0);
        -:  757:			break;
        -:  758:			/* --END ERROR HANDLING-- */
        -:  759:		}
        -:  760:
        -:  761:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  762:		DLOOP_dbg_printf("\tloading dlp=%x, elmp=%x [%d]\n",
        -:  763:				 (unsigned) load_dlp,
        -:  764:				 (unsigned) next_elmp,
        -:  765:				 cur_sp+1);
        -:  766:#endif
        -:  767:
    #####:  768:		DLOOP_Stackelm_load(next_elmp,
        -:  769:				    load_dlp,
        -:  770:				    1);
        -:  771:	    }
        -:  772:
        -:  773:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  774:	    DLOOP_dbg_printf("\tpushing type, elmp=%x [%d], count=%d, block=%d\n",
        -:  775:			    (unsigned) cur_elmp, cur_sp, count_index,
        -:  776:			     block_index);
        -:  777:#endif
        -:  778:	    /* set orig_offset and all cur values for new stackelm.
        -:  779:	     * this is done in two steps: first set orig_offset based on
        -:  780:	     * current stackelm, then set cur values based on new stackelm.
        -:  781:	     */
 12946663:  782:	    switch (cur_elmp->loop_p->kind & DLOOP_KIND_MASK) {
        -:  783:		case DLOOP_KIND_CONTIG:
 12790507:  784:		    next_elmp->orig_offset = cur_elmp->curoffset +
        -:  785:			(DLOOP_Offset) block_index * cur_elmp->loop_p->el_extent;
 12790507:  786:		    break;
        -:  787:		case DLOOP_KIND_VECTOR:
        -:  788:		    /* note: stride is in bytes */
   154846:  789:		    next_elmp->orig_offset = cur_elmp->orig_offset +
        -:  790:			(DLOOP_Offset) count_index * cur_elmp->loop_p->loop_params.v_t.stride +
        -:  791:			(DLOOP_Offset) block_index * cur_elmp->loop_p->el_extent;
   154846:  792:		    break;
        -:  793:		case DLOOP_KIND_BLOCKINDEXED:
     1139:  794:		    next_elmp->orig_offset = cur_elmp->orig_offset +
        -:  795:			(DLOOP_Offset) block_index * cur_elmp->loop_p->el_extent +
        -:  796:			DLOOP_STACKELM_BLOCKINDEXED_OFFSET(cur_elmp,
        -:  797:							   count_index);
     1139:  798:		    break;
        -:  799:		case DLOOP_KIND_INDEXED:
      171:  800:		    next_elmp->orig_offset = cur_elmp->orig_offset +
        -:  801:			(DLOOP_Offset) block_index * cur_elmp->loop_p->el_extent +
        -:  802:			DLOOP_STACKELM_INDEXED_OFFSET(cur_elmp, count_index);
      171:  803:		    break;
        -:  804:		case DLOOP_KIND_STRUCT:
    #####:  805:		    next_elmp->orig_offset = cur_elmp->orig_offset +
        -:  806:			(DLOOP_Offset) block_index * DLOOP_STACKELM_STRUCT_EL_EXTENT(cur_elmp, count_index) +
        -:  807:			DLOOP_STACKELM_STRUCT_OFFSET(cur_elmp, count_index);
    #####:  808:		    break;
        -:  809:		default:
        -:  810:		    /* --BEGIN ERROR HANDLING-- */
    #####:  811:		    DLOOP_Assert(0);
        -:  812:		    break;
        -:  813:		    /* --END ERROR HANDLING-- */
        -:  814:	    }
        -:  815:
        -:  816:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  817:	    DLOOP_dbg_printf("\tstep 1: next orig_offset = " MPI_AINT_FMT_DEC_SPEC " (0x" MPI_AINT_FMT_HEX_SPEC ")\n",
        -:  818:			     next_elmp->orig_offset,
        -:  819:			     next_elmp->orig_offset);
        -:  820:#endif
        -:  821:
 12946663:  822:	    switch (next_elmp->loop_p->kind & DLOOP_KIND_MASK) {
        -:  823:		case DLOOP_KIND_CONTIG:
        -:  824:		case DLOOP_KIND_VECTOR:
   327835:  825:		    next_elmp->curcount  = next_elmp->orig_count;
   327835:  826:		    next_elmp->curblock  = next_elmp->orig_block;
   327835:  827:		    next_elmp->curoffset = next_elmp->orig_offset;
   327835:  828:		    break;
        -:  829:		case DLOOP_KIND_BLOCKINDEXED:
      560:  830:		    next_elmp->curcount  = next_elmp->orig_count;
      560:  831:		    next_elmp->curblock  = next_elmp->orig_block;
      560:  832:		    next_elmp->curoffset = next_elmp->orig_offset +
        -:  833:			DLOOP_STACKELM_BLOCKINDEXED_OFFSET(next_elmp, 0);
      560:  834:		    break;
        -:  835:		case DLOOP_KIND_INDEXED:
 12618268:  836:		    next_elmp->curcount  = next_elmp->orig_count;
 12618268:  837:		    next_elmp->curblock  =
        -:  838:			DLOOP_STACKELM_INDEXED_BLOCKSIZE(next_elmp, 0);
 12618268:  839:		    next_elmp->curoffset = next_elmp->orig_offset +
        -:  840:			DLOOP_STACKELM_INDEXED_OFFSET(next_elmp, 0);
 12618268:  841:		    break;
        -:  842:		case DLOOP_KIND_STRUCT:
    #####:  843:		    next_elmp->curcount = next_elmp->orig_count;
    #####:  844:		    next_elmp->curblock =
        -:  845:			DLOOP_STACKELM_STRUCT_BLOCKSIZE(next_elmp, 0);
    #####:  846:		    next_elmp->curoffset = next_elmp->orig_offset +
        -:  847:			DLOOP_STACKELM_STRUCT_OFFSET(next_elmp, 0);
    #####:  848:		    break;
        -:  849:		default:
        -:  850:		    /* --BEGIN ERROR HANDLING-- */
    #####:  851:		    DLOOP_Assert(0);
        -:  852:		    break;
        -:  853:		    /* --END ERROR HANDLING-- */
        -:  854:	    }
        -:  855:
        -:  856:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  857:	    DLOOP_dbg_printf("\tstep 2: next curoffset = " MPI_AINT_FMT_DEC_SPEC " (0x" MPI_AINT_FMT_HEX_SPEC ")\n",
        -:  858:			     next_elmp->curoffset,
        -:  859:			     next_elmp->curoffset);
        -:  860:#endif
        -:  861:
 12946663:  862:	    cur_elmp->curblock--;
 12946663:  863:	    DLOOP_SEGMENT_PUSH;
        -:  864:	} /* end of else push the stackelm */
        -:  865:    } /* end of for (;;) */
        -:  866:
        -:  867:#ifdef DLOOP_DEBUG_MANIPULATE
        -:  868:    DLOOP_dbg_printf("hit end of datatype\n");
        -:  869:#endif
        -:  870:
        -:  871:    DLOOP_SEGMENT_SAVE_LOCAL_VALUES;
        -:  872:    return;
        -:  873:}
        -:  874:
        -:  875:/* DLOOP_Stackelm_blocksize - returns block size for stackelm based on current
        -:  876: * count in stackelm.
        -:  877: *
        -:  878: * NOTE: loop_p, orig_count, and curcount members of stackelm MUST be correct
        -:  879: * before this is called!
        -:  880: *
        -:  881: */
        -:  882:static inline DLOOP_Count DLOOP_Stackelm_blocksize(struct DLOOP_Dataloop_stackelm *elmp)
  1483542:  883:{
  1483542:  884:    struct DLOOP_Dataloop *dlp = elmp->loop_p;
        -:  885:
  1483542:  886:    switch(dlp->kind & DLOOP_KIND_MASK) {
        -:  887:	case DLOOP_KIND_CONTIG:
        -:  888:	    /* NOTE: we're dropping the count into the
        -:  889:	     * blksize field for contigs, as described
        -:  890:	     * in the init call.
        -:  891:	     */
   729814:  892:	    return dlp->loop_params.c_t.count;
        -:  893:	    break;
        -:  894:	case DLOOP_KIND_VECTOR:
   567777:  895:	    return dlp->loop_params.v_t.blocksize;
        -:  896:	    break;
        -:  897:	case DLOOP_KIND_BLOCKINDEXED:
     5370:  898:	    return dlp->loop_params.bi_t.blocksize;
        -:  899:	    break;
        -:  900:	case DLOOP_KIND_INDEXED:
   180581:  901:	    return dlp->loop_params.i_t.blocksize_array[elmp->orig_count - elmp->curcount];
        -:  902:	    break;
        -:  903:	case DLOOP_KIND_STRUCT:
    #####:  904:	    return dlp->loop_params.s_t.blocksize_array[elmp->orig_count - elmp->curcount];
        -:  905:	    break;
        -:  906:	default:
        -:  907:	    /* --BEGIN ERROR HANDLING-- */
    #####:  908:	    DLOOP_Assert(0);
        -:  909:	    break;
        -:  910:	    /* --END ERROR HANDLING-- */
        -:  911:    }
        -:  912:    return -1;
        -:  913:}
        -:  914:
        -:  915:/* DLOOP_Stackelm_offset - returns starting offset (displacement) for stackelm
        -:  916: * based on current count in stackelm.
        -:  917: *
        -:  918: * NOTE: loop_p, orig_count, and curcount members of stackelm MUST be correct
        -:  919: * before this is called!
        -:  920: *
        -:  921: * also, this really is only good at init time for vectors and contigs
        -:  922: * (all the time for indexed) at the moment.
        -:  923: *
        -:  924: */
        -:  925:static inline DLOOP_Offset DLOOP_Stackelm_offset(struct DLOOP_Dataloop_stackelm *elmp)
  1311387:  926:{
  1311387:  927:    struct DLOOP_Dataloop *dlp = elmp->loop_p;
        -:  928:
  1311387:  929:    switch(dlp->kind & DLOOP_KIND_MASK) {
        -:  930:	case DLOOP_KIND_VECTOR:
        -:  931:	case DLOOP_KIND_CONTIG:
  1290602:  932:	    return 0;
        -:  933:	    break;
        -:  934:	case DLOOP_KIND_BLOCKINDEXED:
     5314:  935:	    return dlp->loop_params.bi_t.offset_array[elmp->orig_count - elmp->curcount];
        -:  936:	    break;
        -:  937:	case DLOOP_KIND_INDEXED:
    15471:  938:	    return dlp->loop_params.i_t.offset_array[elmp->orig_count - elmp->curcount];
        -:  939:	    break;
        -:  940:	case DLOOP_KIND_STRUCT:
    #####:  941:	    return dlp->loop_params.s_t.offset_array[elmp->orig_count - elmp->curcount];
        -:  942:	    break;
        -:  943:	default:
        -:  944:	    /* --BEGIN ERROR HANDLING-- */
    #####:  945:	    DLOOP_Assert(0);
        -:  946:	    break;
        -:  947:	    /* --END ERROR HANDLING-- */
        -:  948:    }
        -:  949:    return -1;
        -:  950:}
        -:  951:
        -:  952:/* DLOOP_Stackelm_load
        -:  953: * loop_p, orig_count, orig_block, and curcount are all filled by us now.
        -:  954: * the rest are filled in at processing time.
        -:  955: */
        -:  956:static inline void DLOOP_Stackelm_load(struct DLOOP_Dataloop_stackelm *elmp,
        -:  957:				       struct DLOOP_Dataloop *dlp,
        -:  958:				       int branch_flag)
  1418779:  959:{
  1418779:  960:    elmp->loop_p = dlp;
        -:  961:
  1418779:  962:    if ((dlp->kind & DLOOP_KIND_MASK) == DLOOP_KIND_CONTIG) {
   725453:  963:	elmp->orig_count = 1; /* put in blocksize instead */
        -:  964:    }
        -:  965:    else {
   693326:  966:	elmp->orig_count = dlp->loop_params.count;
        -:  967:    }
        -:  968:
  1418779:  969:    if (branch_flag || (dlp->kind & DLOOP_KIND_MASK) == DLOOP_KIND_STRUCT)
        -:  970:    {
    #####:  971:	elmp->may_require_reloading = 1;
        -:  972:    }
        -:  973:    else {
  1418779:  974:	elmp->may_require_reloading = 0;
        -:  975:    }
        -:  976:
        -:  977:    /* required by DLOOP_Stackelm_blocksize */
  1418779:  978:    elmp->curcount = elmp->orig_count;
        -:  979:
  1418779:  980:    elmp->orig_block = DLOOP_Stackelm_blocksize(elmp);
        -:  981:    /* TODO: GO AHEAD AND FILL IN CURBLOCK? */
  1418779:  982:}
        -:  983:
        -:  984:/*
        -:  985: * Local variables:
        -:  986: * c-indent-tabs-mode: nil
        -:  987: * End:
        -:  988: */