-:    0:Source:/home/MPI/testing/mpich2/mpich2/src/mpid/common/datatype/dataloop/dataloop_create_indexed.c
        -:    0:Graph:dataloop_create_indexed.gcno
        -:    0:Data:dataloop_create_indexed.gcda
        -:    0:Runs:3086
        -:    0:Programs:893
        -:    1:/* -*- Mode: C; c-basic-offset:4 ; -*- */
        -:    2:
        -:    3:/*
        -:    4: *  (C) 2001 by Argonne National Laboratory.
        -:    5: *      See COPYRIGHT in top-level directory.
        -:    6: */
        -:    7:
        -:    8:#include <stdlib.h>
        -:    9:
        -:   10:#include "./dataloop.h"
        -:   11:
        -:   12:static void DLOOP_Type_indexed_array_copy(DLOOP_Count count,
        -:   13:					  DLOOP_Count contig_count,
        -:   14:					  int *input_blocklength_array,
        -:   15:					  void *input_displacement_array,
        -:   16:					  DLOOP_Count *output_blocklength_array,
        -:   17:					  DLOOP_Offset *out_disp_array,
        -:   18:					  int dispinbytes,
        -:   19:					  DLOOP_Offset old_extent);
        -:   20:
        -:   21:/*@
        -:   22:   DLOOP_Dataloop_create_indexed
        -:   23:
        -:   24:   Arguments:
        -:   25:+  int icount
        -:   26:.  int *iblocklength_array
        -:   27:.  void *displacement_array (either ints or MPI_Aints)
        -:   28:.  int dispinbytes
        -:   29:.  MPI_Datatype oldtype
        -:   30:.  DLOOP_Dataloop **dlp_p
        -:   31:.  int *dlsz_p
        -:   32:.  int *dldepth_p
        -:   33:-  int flag
        -:   34:
        -:   35:.N Errors
        -:   36:.N Returns 0 on success, -1 on error.
        -:   37:@*/
        -:   38:
        -:   39:int PREPEND_PREFIX(Dataloop_create_indexed)(int icount,
        -:   40:					    int *blocklength_array,
        -:   41:					    void *displacement_array,
        -:   42:					    int dispinbytes,
        -:   43:					    MPI_Datatype oldtype,
        -:   44:					    DLOOP_Dataloop **dlp_p,
        -:   45:					    int *dlsz_p,
        -:   46:					    int *dldepth_p,
        -:   47:					    int flag)
    72493:   48:{
        -:   49:    int err, is_builtin;
        -:   50:    int i, new_loop_sz, old_loop_depth, blksz;
        -:   51:    DLOOP_Count first;
        -:   52:
    72493:   53:    DLOOP_Count old_type_count = 0, contig_count, count;
        -:   54:    DLOOP_Offset old_extent;
        -:   55:    struct DLOOP_Dataloop *new_dlp;
        -:   56:
    72493:   57:    count = (DLOOP_Count) icount; /* avoid subsequent casting */
        -:   58:
        -:   59:
        -:   60:    /* if count is zero, handle with contig code, call it an int */
    72493:   61:    if (count == 0)
        -:   62:    {
    #####:   63:	err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
        -:   64:							 MPI_INT,
        -:   65:							 dlp_p,
        -:   66:							 dlsz_p,
        -:   67:							 dldepth_p,
        -:   68:							 flag);
    #####:   69:	return err;
        -:   70:    }
        -:   71:
        -:   72:    /* Skip any initial zero-length blocks */
    78109:   73:    for (first = 0; first < count; first++)
    77275:   74:        if ((DLOOP_Count) blocklength_array[first])
    71659:   75:            break;
        -:   76:    
        -:   77:
    72493:   78:    is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;
        -:   79:
    72493:   80:    if (is_builtin)
        -:   81:    {
    71137:   82:	DLOOP_Handle_get_extent_macro(oldtype, old_extent);
    71137:   83:	old_loop_depth = 0;
        -:   84:    }
        -:   85:    else
        -:   86:    {
     1356:   87:	DLOOP_Handle_get_extent_macro(oldtype, old_extent);
     1356:   88:	DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);
        -:   89:    }
        -:   90:
        -:   92:    {
        -:   94:    }
        -:   95:
    72493:   96:    contig_count = PREPEND_PREFIX(Type_indexed_count_contig)(count,
        -:   97:                                                             blocklength_array,
        -:   98:                                                             displacement_array,
        -:   99:                                                             dispinbytes,
        -:  100:                                                             old_extent);
        -:  101:
        -:  102:    /* if contig_count is zero (no data), handle with contig code */
    72493:  103:    if (contig_count == 0)
        -:  104:    {
    #####:  105:	err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
        -:  106:							 MPI_INT,
        -:  107:							 dlp_p,
        -:  108:							 dlsz_p,
        -:  109:							 dldepth_p,
        -:  110:							 flag);
    #####:  111:	return err;
        -:  112:    }
        -:  113:
        -:  114:    /* optimization:
        -:  115:     *
        -:  116:     * if contig_count == 1 and block starts at displacement 0,
        -:  117:     * store it as a contiguous rather than an indexed dataloop.
        -:  118:     */    
    72493:  119:    if ((contig_count == 1) &&
        -:  120:	((!dispinbytes && ((int *) displacement_array)[first] == 0) ||
        -:  121:	 (dispinbytes && ((MPI_Aint *) displacement_array)[first] == 0)))
        -:  122:    {
    29190:  123:	err = PREPEND_PREFIX(Dataloop_create_contiguous)((int) old_type_count,
        -:  124:							 oldtype,
        -:  125:							 dlp_p,
        -:  126:							 dlsz_p,
        -:  127:							 dldepth_p,
        -:  128:							 flag);
    29190:  129:	return err;
        -:  130:    }
        -:  131:
        -:  132:    /* optimization:
        -:  133:     *
        -:  134:     * if contig_count == 1 (and displacement != 0), store this as
        -:  135:     * a single element blockindexed rather than a lot of individual
        -:  136:     * blocks.
        -:  137:     */
    43303:  138:    if (contig_count == 1)
        -:  139:    {
     5014:  140:	err = PREPEND_PREFIX(Dataloop_create_blockindexed)(1,
        -:  141:							   (int) old_type_count,
        -:  142:							   &(((int *)displacement_array)[first]),
        -:  143:							   dispinbytes,
        -:  144:							   oldtype,
        -:  145:							   dlp_p,
        -:  146:							   dlsz_p,
        -:  147:							   dldepth_p,
        -:  148:							   flag);
        -:  149:
     5014:  150:	return err;
        -:  151:    }
        -:  152:
        -:  153:    /* optimization:
        -:  154:     *
        -:  155:     * if block length is the same for all blocks, store it as a
        -:  156:     * blockindexed rather than an indexed dataloop.
        -:  157:     */
    38289:  158:    blksz = blocklength_array[first];
 84194654:  159:    for (i = first+1; i < count; i++)
        -:  160:    {
 84171621:  161:	if (blocklength_array[i] != blksz)
        -:  162:	{
    15256:  163:	    blksz--;
    15256:  164:	    break;
        -:  165:	}
        -:  166:    }
    38289:  167:    if (blksz == blocklength_array[first])
        -:  168:    {
    23033:  169:	err = PREPEND_PREFIX(Dataloop_create_blockindexed)(icount-first,
        -:  170:							   blksz,
        -:  171:							   &(((int *)displacement_array)[first]),
        -:  172:							   dispinbytes,
        -:  173:							   oldtype,
        -:  174:							   dlp_p,
        -:  175:							   dlsz_p,
        -:  176:							   dldepth_p,
        -:  177:							   flag);
        -:  178:
    23033:  179:	return err;
        -:  180:    }
        -:  181:
        -:  182:    /* note: blockindexed looks for the vector optimization */
        -:  183:
        -:  184:    /* TODO: optimization:
        -:  185:     *
        -:  186:     * if an indexed of a contig, absorb the contig into the blocklen array
        -:  187:     * and keep the same overall depth
        -:  188:     */
        -:  189:
        -:  190:    /* otherwise storing as an indexed dataloop */
        -:  191:
    15256:  192:    if (is_builtin)
        -:  193:    {
    15252:  194:	PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_INDEXED,
        -:  195:				       count,
        -:  196:				       &new_dlp,
        -:  197:				       &new_loop_sz);
        -:  198:	/* --BEGIN ERROR HANDLING-- */
    15252:  199:	if (!new_dlp) return -1;
        -:  200:	/* --END ERROR HANDLING-- */
        -:  201:
    15252:  202:	new_dlp->kind = DLOOP_KIND_INDEXED | DLOOP_FINAL_MASK;
        -:  203:
    15252:  204:	if (flag == DLOOP_DATALOOP_ALL_BYTES)
        -:  205:	{
        -:  206:	    /* blocklengths are modified below */
    #####:  207:	    new_dlp->el_size   = 1;
    #####:  208:	    new_dlp->el_extent = 1;
    #####:  209:	    new_dlp->el_type   = MPI_BYTE;
        -:  210:	}
        -:  211:	else
        -:  212:	{
    15252:  213:	    new_dlp->el_size   = old_extent;
    15252:  214:	    new_dlp->el_extent = old_extent;
    15252:  215:	    new_dlp->el_type   = oldtype;
        -:  216:	}
        -:  217:    }
        -:  218:    else
        -:  219:    {
        4:  220:	DLOOP_Dataloop *old_loop_ptr = NULL;
        4:  221:	int old_loop_sz = 0;
        -:  222:
        4:  223:	DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
        4:  224:	DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);
        -:  225:
        4:  226:	PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_INDEXED,
        -:  227:						contig_count,
        -:  228:						old_loop_ptr,
        -:  229:						old_loop_sz,
        -:  230:						&new_dlp,
        -:  231:						&new_loop_sz);
        -:  232:	/* --BEGIN ERROR HANDLING-- */
        4:  233:	if (!new_dlp) return -1;
        -:  234:	/* --END ERROR HANDLING-- */
        -:  235:
        4:  236:	new_dlp->kind = DLOOP_KIND_INDEXED;
        -:  237:
        4:  238:	DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
        4:  239:	DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
        4:  240:	DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
        -:  241:    }
        -:  242:
    15256:  243:    new_dlp->loop_params.i_t.count        = contig_count;
    15256:  244:    new_dlp->loop_params.i_t.total_blocks = old_type_count;
        -:  245:
        -:  246:    /* copy in blocklength and displacement parameters (in that order)
        -:  247:     *
        -:  248:     * regardless of dispinbytes, we store displacements in bytes in loop.
        -:  249:     */
    15256:  250:    DLOOP_Type_indexed_array_copy(count,
        -:  251:				  contig_count,
        -:  252:				  blocklength_array,
        -:  253:				  displacement_array,
        -:  254:				  new_dlp->loop_params.i_t.blocksize_array,
        -:  255:				  new_dlp->loop_params.i_t.offset_array,
        -:  256:				  dispinbytes,
        -:  257:				  old_extent);
        -:  258:
    15256:  259:    if (is_builtin && (flag == DLOOP_DATALOOP_ALL_BYTES))
        -:  260:    {
        -:  261:	DLOOP_Count *tmp_blklen_array =
    #####:  262:	    new_dlp->loop_params.i_t.blocksize_array;
        -:  263:
    #####:  264:	for (i=0; i < contig_count; i++)
        -:  265:	{
        -:  266:	    /* increase block lengths so they are in bytes */
    #####:  267:	    tmp_blklen_array[i] *= old_extent;
        -:  268:	}
        -:  269:
    #####:  270:        new_dlp->loop_params.i_t.total_blocks *= old_extent;
        -:  271:    }
        -:  272:
    15256:  273:    *dlp_p     = new_dlp;
    15256:  274:    *dlsz_p    = new_loop_sz;
    15256:  275:    *dldepth_p = old_loop_depth + 1;
        -:  276:
    15256:  277:    return MPI_SUCCESS;
        -:  278:}
        -:  279:
        -:  280:/* DLOOP_Type_indexed_array_copy()
        -:  281: *
        -:  282: * Copies arrays into place, combining adjacent contiguous regions and
        -:  283: * dropping zero-length regions.
        -:  284: *
        -:  285: * Extent passed in is for the original type.
        -:  286: *
        -:  287: * Output displacements are always output in bytes, while block
        -:  288: * lengths are always output in terms of the base type.
        -:  289: */
        -:  290:static void DLOOP_Type_indexed_array_copy(DLOOP_Count count,
        -:  291:					  DLOOP_Count contig_count,
        -:  292:					  int *in_blklen_array,
        -:  293:					  void *in_disp_array,
        -:  294:					  DLOOP_Count *out_blklen_array,
        -:  295:					  DLOOP_Offset *out_disp_array,
        -:  296:					  int dispinbytes,
        -:  297:					  DLOOP_Offset old_extent)
    15256:  298:{
    15256:  299:    DLOOP_Count i, first, cur_idx = 0;
        -:  300:
        -:  301:    /* Skip any initial zero-length blocks */
    15256:  302:    for (first = 0; first < count; ++first)
    15256:  303:        if ((DLOOP_Count) in_blklen_array[first])
    15256:  304:            break;
        -:  305:    
    15256:  306:    out_blklen_array[0] = (DLOOP_Count) in_blklen_array[first];
        -:  307:
    15256:  308:    if (!dispinbytes)
        -:  309:    {
     5298:  310:	out_disp_array[0] = (DLOOP_Offset)
        -:  311:	    ((int *) in_disp_array)[first] * old_extent;
        -:  312:	
  2103662:  313:	for (i = first+1; i < count; ++i)
        -:  314:	{
  2098364:  315:	    if (in_blklen_array[i] == 0)
        -:  316:	    {
    #####:  317:		continue;
        -:  318:	    }
  2098364:  319:	    else if (out_disp_array[cur_idx] +
        -:  320:		     ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent ==
        -:  321:		     ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent)
        -:  322:	    {
        -:  323:		/* adjacent to current block; add to block */
     2052:  324:		out_blklen_array[cur_idx] += (DLOOP_Count) in_blklen_array[i];
        -:  325:	    }
        -:  326:	    else
        -:  327:	    {
  2096312:  328:		cur_idx++;
  2096312:  329:		DLOOP_Assert(cur_idx < contig_count);
  2096312:  330:		out_disp_array[cur_idx] =
        -:  331:		    ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent;
  2096312:  332:		out_blklen_array[cur_idx] = in_blklen_array[i];
        -:  333:	    }
        -:  334:	}
        -:  335:    }
        -:  336:    else /* input displacements already in bytes */
        -:  337:    {
     9958:  338:	out_disp_array[0] = (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[first];
        -:  339:	
   110393:  340:	for (i = first+1; i < count; ++i)
        -:  341:	{
   100435:  342:	    if (in_blklen_array[i] == 0)
        -:  343:	    {
        2:  344:		continue;
        -:  345:	    }
   100433:  346:	    else if (out_disp_array[cur_idx] +
        -:  347:		     ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent ==
        -:  348:		     ((DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i]))
        -:  349:	    {
        -:  350:		/* adjacent to current block; add to block */
    21848:  351:		out_blklen_array[cur_idx] += in_blklen_array[i];
        -:  352:	    }
        -:  353:	    else
        -:  354:	    {
    78585:  355:		cur_idx++;
    78585:  356:		DLOOP_Assert(cur_idx < contig_count);
    78585:  357:		out_disp_array[cur_idx] =
        -:  358:		    (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i];
    78585:  359:		out_blklen_array[cur_idx] = (DLOOP_Count) in_blklen_array[i];
        -:  360:	    }
        -:  361:	}
        -:  362:    }
        -:  363:
    15256:  364:    DLOOP_Assert(cur_idx == contig_count - 1);
        -:  365:    return;
        -:  366:}
        -:  367:
        -:  368:/* DLOOP_Type_indexed_count_contig()
        -:  369: *
        -:  370: * Determines the actual number of contiguous blocks represented by the
        -:  371: * blocklength/displacement arrays.  This might be less than count (as
        -:  372: * few as 1).
        -:  373: *
        -:  374: * Extent passed in is for the original type.
        -:  375: */
        -:  376:DLOOP_Count PREPEND_PREFIX(Type_indexed_count_contig)(DLOOP_Count count,
        -:  377:                                                      int *blocklength_array,
        -:  378:                                                      void *displacement_array,
        -:  379:                                                      int dispinbytes,
        -:  380:                                                      DLOOP_Offset old_extent)
    93604:  381:{
    93604:  382:    DLOOP_Count i, contig_count = 1;
        -:  383:    DLOOP_Count cur_blklen, first;
        -:  384:
    93604:  385:    if (count)
        -:  386:    {
        -:  387:        /* Skip any initial zero-length blocks */
   101322:  388:        for (first = 0; first < count; ++first)
   100488:  389:            if ((DLOOP_Count) blocklength_array[first])
    92770:  390:                break;
        -:  391:        
    93604:  392:        cur_blklen = (DLOOP_Count) blocklength_array[first];
    93604:  393:        if (!dispinbytes)
        -:  394:        {
        -:  395:            DLOOP_Offset cur_tdisp =
    59145:  396:                (DLOOP_Offset) ((int *) displacement_array)[first];
        -:  397:	
        -:  399:            {
        -:  401:                {
     6485:  402:                    continue;
        -:  403:                }
        -:  405:                         (DLOOP_Offset) ((int *) displacement_array)[i])
        -:  406:                {
        -:  407:                    /* adjacent to current block; add to block */
  7908948:  408:                    cur_blklen += (DLOOP_Count) blocklength_array[i];
        -:  409:                }
        -:  410:                else
        -:  411:                {
        -:  415:                }
        -:  416:            }
        -:  417:        }
        -:  418:        else
        -:  419:        {
        -:  420:            DLOOP_Offset cur_bdisp =
    34459:  421:                (DLOOP_Offset) ((MPI_Aint *) displacement_array)[first];
        -:  422:	
 13300604:  423:            for (i = first+1; i < count; ++i)
        -:  424:            {
 13266145:  425:                if (blocklength_array[i] == 0)
        -:  426:                {
        3:  427:                    continue;
        -:  428:                }
 13266142:  429:                else if (cur_bdisp + (DLOOP_Offset) cur_blklen * old_extent ==
        -:  430:                         (DLOOP_Offset) ((MPI_Aint *) displacement_array)[i])
        -:  431:                {
        -:  432:                    /* adjacent to current block; add to block */
 13181419:  433:                    cur_blklen += (DLOOP_Count) blocklength_array[i];
        -:  434:                }
        -:  435:                else
        -:  436:                {
    84723:  437:                    cur_bdisp  =
        -:  438:                        (DLOOP_Offset) ((MPI_Aint *) displacement_array)[i];
    84723:  439:                    cur_blklen = (DLOOP_Count) blocklength_array[i];
    84723:  440:                    contig_count++;
        -:  441:                }
        -:  442:            }
        -:  443:        }
        -:  444:    }
    93604:  445:    return contig_count;
        -:  446:}