/*===========================================================================
 *
 *                            PUBLIC DOMAIN NOTICE
 *               National Center for Biotechnology Information
 *
 *  This software/database is a "United States Government Work" under the
 *  terms of the United States Copyright Act.  It was written as part of
 *  the author's official duties as a United States Government employee and
 *  thus cannot be copyrighted.  This software/database is freely available
 *  to the public for use. The National Library of Medicine and the U.S.
 *  Government have not placed any restriction on its use or reproduction.
 *
 *  Although all reasonable efforts have been taken to ensure the accuracy
 *  and reliability of the software and data, the NLM and the U.S.
 *  Government do not and cannot warrant the performance or results that
 *  may be obtained by using this software or data. The NLM and the U.S.
 *  Government disclaim all warranties, express or implied, including
 *  warranties of performance, merchantability or fitness for any particular
 *  purpose.
 *
 *  Please cite the author in any work or product based on this material.
 *
 * ===========================================================================
 *
 */
#include <vdb/extern.h>

#include <klib/defs.h>
#include <klib/rc.h>
#include <vdb/table.h>
#include <vdb/xform.h>
#include <vdb/schema.h>
#include <kdb/meta.h>
#include <klib/data-buffer.h>
#include <bitstr.h>
#include <sysalloc.h>

#include <klib/out.h>

#include <stdint.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>

#include <insdc/insdc.h>

typedef struct trim_self_struct {
    uint8_t version;
} self_t;

static
rc_t cigar_string(char cigar_buf[], size_t bmax, size_t *bsize, int version,
                  uint8_t const has_mismatch[],
                  uint8_t const has_ref_offset[],
                  int32_t const ref_offset[],
                  unsigned len,
                  unsigned ro_len)
{
    size_t bsz = 0;
    uint32_t i,roi,m,mm;
    
#define MACRO_FLUSH_MATCH \
if(m > 0){ \
    if(bsz > bmax - 10) /*** reserve some space for %d%c ***/ \
        return RC(rcXF, rcFunction, rcExecuting, rcBuffer, rcInsufficient ); \
    bsz += sprintf(cigar_buf+bsz,"%d=",m); \
    m = 0; \
}

#define MACRO_FLUSH_MISMATCH \
if(mm > 0){ \
    if(bsz > bmax - 10) /*** reserve some space for %d%c ***/ \
        return RC(rcXF, rcFunction, rcExecuting, rcBuffer, rcInsufficient ); \
    if(i==len) bsz += sprintf(cigar_buf+bsz,"%dS",mm); \
    else       bsz += sprintf(cigar_buf+bsz,"%dX",mm); \
    mm = 0; \
}

#define MACRO_FLUSH_BOTH \
if(m+mm > 0) { \
    if(i==len) { \
        if(bsz > bmax - 20) /*** reserve some space for %d%c%d%c ***/ \
            return RC(rcXF, rcFunction, rcExecuting, rcBuffer, rcInsufficient ); \
        if(m > 0)  bsz += sprintf(cigar_buf+bsz,"%dM",m); \
        if(mm > 0) bsz += sprintf(cigar_buf+bsz,"%dS",mm); \
    } else { \
        if(bsz > bmax - 10) /*** reserve some space for %d%c ***/ \
            return RC(rcXF, rcFunction, rcExecuting, rcBuffer, rcInsufficient ); \
        bsz += sprintf(cigar_buf+bsz,"%dM",m+mm); \
    } \
    m=mm=0; \
}

#define MACRO_FLUSH \
if(version==1){ \
    MACRO_FLUSH_MATCH; \
    MACRO_FLUSH_MISMATCH; \
} else { \
    MACRO_FLUSH_BOTH; \
}
    for ( i = roi = bsz = m = mm = 0; i < len; ++i)
    {
        if(has_ref_offset[i]){ /*** No offset in the reference **/
            uint32_t offset;
            
            if( roi >= ro_len) /*** bad data ***/
                return RC(rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
            if( ref_offset[roi] > 0 ){ /*** insert in the reference, delete in sequence ***/
                offset = ref_offset[roi];
                if(i==0) /**** deletes in the beginning are disallowed, REF_START should have been moved and delete converted to insert **/
                    return RC(rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
                MACRO_FLUSH;
                if(bsz > bmax - 10) /*** reserve some space for %d%c ***/
                    return RC(rcXF, rcFunction, rcExecuting, rcBuffer, rcInsufficient );
                bsz += sprintf(cigar_buf+bsz,"%dD",ref_offset[roi]);
            } else if ( ref_offset[roi] < 0){ /**** delete from the reference ***/
                offset = -ref_offset[roi];
                if( i + offset > len)
                    return RC(rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
                if(bsz > bmax - 10) /*** reserve some space for %d%c ***/
                    return RC(rcXF, rcFunction, rcExecuting, rcBuffer, rcInsufficient );
                if(i > 0) { /** normally indels are after the current base ***/
                    MACRO_FLUSH;
                    if( i + offset == len) /*** end soft clip **/
                        bsz += sprintf(cigar_buf+bsz,"%dS",offset);
                    else 
                        bsz += sprintf(cigar_buf+bsz,"%dI",offset);
                } else { /***  this  is a soft clip at the beginning ***/
                    m=mm=0;
                    bsz += sprintf(cigar_buf+bsz,"%dS",offset);
                }
                i += offset;
            } else { /*** Not possible ??? ***/
                return RC(rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
            }
            roi++;
        }
        if(has_mismatch[i]){
            if(version==1){
                MACRO_FLUSH_MATCH;
            }
            mm++;
        } else {
            if(version==1) {
                MACRO_FLUSH_MISMATCH;
            } else {
                m+=mm;
                mm=0;
            }
            m++;
        }
    }
    MACRO_FLUSH;
    *bsize = bsz;
    return 0;
}

static
rc_t CC cigar_impl ( void *data, const VXformInfo *info, int64_t row_id,
    VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    const self_t *self = data;
    uint32_t len    = argv[0].u.data.elem_count;
    uint32_t ro_len = argv[2].u.data.elem_count;
    char   cigar_buf[1024];
    size_t cbi;
    const uint8_t *has_mismatch   = argv [ 0 ] . u . data . base;
    const uint8_t *has_ref_offset = argv [ 1 ] . u . data . base;
    const int32_t *ref_offset     = argv [ 2 ] . u . data . base;
    
    assert(argv[0].u.data.elem_bits == 8);
    assert(argv[1].u.data.elem_bits == 8);
    assert(argv[2].u.data.elem_bits == 32);

    assert(len == argv[1].u.data.elem_count);

    has_mismatch   += argv [ 0 ] . u . data . first_elem;
    has_ref_offset += argv [ 1 ] . u . data . first_elem;
    ref_offset     += argv [ 2 ] . u . data . first_elem;

    rc = cigar_string(cigar_buf, sizeof(cigar_buf), &cbi, self->version, has_mismatch, has_ref_offset, ref_offset, len, ro_len);
    if (rc) return rc;
   
    /* resize output row for the total number of reads */    
    rslt -> data -> elem_bits = 8;
    rc = KDataBufferResize ( rslt -> data, cbi );
    if ( rc != 0 )
        return rc;

    rslt -> elem_count = cbi;
    if(cbi > 0)
        memcpy(rslt -> data -> base, cigar_buf, cbi );
    return 0;
}

static
void CC self_whack( void *ptr )
{
        free( ptr );
}


/* 
 * function
 * ascii ALIGN:cigar #1 ( bool has_mismatch, bool has_ref_offset, I32 ref_offset);
 */
VTRANSFACT_IMPL ( ALIGN_cigar, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    self_t self;

    self.version =cp -> argv [ 0 ] . data . u8 [ 0 ];
    switch(self.version){
	case 0:
        case 1:
		break;
	default:
		return RC ( rcXF, rcFunction, rcConstructing, rcParam, rcIncorrect );
    }
    rslt->u.rf = cigar_impl;
    rslt->variant = vftRow;
    rslt -> self = malloc ( sizeof self );
    memcpy(rslt -> self,&self,sizeof(self));
    rslt -> whack = self_whack;

    return 0;
}

static
rc_t CC edit_distance_impl ( void *data, const VXformInfo *info, int64_t row_id,
    VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    uint32_t i,roi,mrun;
    uint32_t len    = argv[0].u.data.elem_count;
    uint32_t ro_len = argv[2].u.data.elem_count;
    uint32_t *dst;

    const uint8_t *has_mismatch   = argv [ 0 ] . u . data . base;
    const uint8_t *has_ref_offset = argv [ 1 ] . u . data . base;
    const int32_t *ref_offset     = argv [ 2 ] . u . data . base;
    
    assert(argv[0].u.data.elem_bits == 8);
    assert(argv[1].u.data.elem_bits == 8);
    assert(argv[2].u.data.elem_bits == 32);

    assert(len == argv[1].u.data.elem_count);

    has_mismatch   += argv [ 0 ] . u . data . first_elem;
    has_ref_offset += argv [ 1 ] . u . data . first_elem;
    ref_offset     += argv [ 2 ] . u . data . first_elem;

    /* resize output row for the total number of reads */    
    rc = KDataBufferResize ( rslt -> data, 1);
    if ( rc != 0 ) return rc;
    rslt -> elem_count = 1;
    dst = rslt -> data -> base;
    dst[0]=0;
    if(len == 0) return 0; /** nothing to do **/

    if(has_ref_offset[0]){/** skip mismatches from the beginning == soft clip ***/
	if(ref_offset[0] > 0)  /**** deletes in the beginning are disallowed, REF_START should have been moved and delete converted to insert **/
		return RC(rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
	i=-ref_offset[0];
	roi=1;
	mrun=0;
    } else {
	i=roi=0;
    }
    for(mrun=0;i<len;i++){
	if(has_mismatch[i]){
		mrun++;
	} else {/*** intentionally skipping last run of mismatches **/
		dst[0]+=mrun;
		mrun=0;
	}
    }
    return 0;
}

/*
 * function
 * ascii NCBI:align:edit_distance #1 ( bool has_mismatch, bool has_ref_offset, I32 ref_offset);
 */
VTRANSFACT_IMPL ( NCBI_align_edit_distance, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = edit_distance_impl;
    rslt->variant = vftRow;
    return 0;
}


/*
 * function bool ALIGN:generate_has_mismatch #1 (INSDC:4na:bin reference,INSDC:4na:bin subject, bool has_ref_offset, I32 ref_offset);
 */
static
rc_t CC generate_has_mismatch_impl ( void *data, const VXformInfo *info, int64_t row_id,
    VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    int32_t si,ri,roi;
    uint32_t ref_len = argv[0].u.data.elem_count;
    uint32_t sbj_len = argv[1].u.data.elem_count;
    uint32_t hro_len = argv[2].u.data.elem_count;
    uint32_t ro_len  = argv[3].u.data.elem_count;
    const uint8_t *ref  = argv [ 0 ] . u . data . base;
    const uint8_t *sbj  = argv [ 1 ] . u . data . base;
    const uint8_t *has_ref_offset     = argv [ 2 ] . u . data . base;
    const int32_t *ref_offset = argv [ 3 ] . u . data . base;

    uint8_t * dst;
    uint32_t  len;

    assert(sbj_len == hro_len );
    len = sbj_len;

    ref            += argv [ 0 ] . u . data . first_elem;
    sbj            += argv [ 1 ] . u . data . first_elem;
    has_ref_offset += argv [ 2 ] . u . data . first_elem;
    ref_offset     += argv [ 3 ] . u . data . first_elem;

    /* resize output row for the total number of reads */
    rslt -> data -> elem_bits = 8;
    rc = KDataBufferResize ( rslt -> data, len );
    if ( rc != 0 )
        return rc;
    rslt -> elem_count = len;
    dst = rslt -> data->base;
    for(si=ri=roi=0;si< len;si++,ri++){
	if(has_ref_offset[si] != 0){/*** need to offset the reference ***/
		if( roi >= ro_len){
			return RC(rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
		}
		ri += ref_offset[roi];
	}
	if(ri >=0 && ri < ref_len && (sbj[si]&ref[ri])!=0)
		dst[si]=0;
	else
		dst[si]=1;
    }
    return 0;
}


VTRANSFACT_IMPL ( ALIGN_generate_has_mismatch, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = generate_has_mismatch_impl;
    rslt->variant = vftRow;
    rslt -> self = NULL;
    rslt -> whack = NULL;
    return 0;
}

/*
 * function bool ALIGN:generate_has_mismatch #1 (INSDC:4na:bin reference,INSDC:4na:bin subject, bool has_ref_offset, I32 ref_offset);
 */
static
rc_t CC generate_mismatch_impl ( void *data, const VXformInfo *info, int64_t row_id,
    VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    int32_t si,ri,roi;
    uint32_t ref_len = argv[0].u.data.elem_count;
    uint32_t sbj_len = argv[1].u.data.elem_count;
    uint32_t hro_len = argv[2].u.data.elem_count;
    uint32_t ro_len  = argv[3].u.data.elem_count;
    const uint8_t *ref  = argv [ 0 ] . u . data . base;
    const uint8_t *sbj  = argv [ 1 ] . u . data . base;
    const uint8_t *has_ref_offset     = argv [ 2 ] . u . data . base;
    const int32_t *ref_offset = argv [ 3 ] . u . data . base;
    uint8_t	buf[5*1024];
    uint32_t  len;

    assert(sbj_len == hro_len );

    ref            += argv [ 0 ] . u . data . first_elem;
    sbj            += argv [ 1 ] . u . data . first_elem;
    has_ref_offset += argv [ 2 ] . u . data . first_elem;
    ref_offset     += argv [ 3 ] . u . data . first_elem;

    for(si=ri=roi=0,len=0;si<sbj_len;si++,ri++){
	if(has_ref_offset[si] != 0){/*** need to offset the reference ***/
		if( roi >= ro_len){
			return RC(rcXF, rcFunction, rcExecuting, rcData, rcInvalid );
		}
		ri += ref_offset[roi];
	}
	if(ri >=0 && ri < ref_len && (sbj[si]&ref[ri])!=0){/*noop*/}
	else {
		if(len > sizeof(buf)) return RC(rcXF, rcFunction, rcExecuting, rcBuffer, rcInsufficient);
		buf[len++]=sbj[si];
	}
    }
    /* resize output row for the total number of reads */
    rslt -> data -> elem_bits = 8;
    rc = KDataBufferResize ( rslt -> data, len );
    if ( rc != 0 )
        return rc;
    rslt -> elem_count = len;
    memcpy(rslt -> data->base,buf,len);
    return 0;
}

VTRANSFACT_IMPL ( ALIGN_generate_mismatch, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = generate_mismatch_impl;
    rslt->variant = vftRow;
    rslt -> self = NULL;
    rslt -> whack = NULL;
    return 0;
}



/*
 * function INSDC:quality:phred NCBI:align:generate_mismatch_qual #1 (INSDC:quality:phred qual,bool has_mismatch)
 */
static
rc_t CC generate_mismatch_qual_impl ( void *data, const VXformInfo *info, int64_t row_id,
    VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    const uint8_t *q  = argv[0].u.data.base; 
    const uint8_t *h_mm           = argv[1].u.data.base;
    uint8_t	buf[5*1024];
    uint32_t	mm_cnt,i;

    q    += argv[0].u.data.first_elem;
    h_mm += argv[1].u.data.first_elem;
    for(mm_cnt=0,i=0;i<argv[0].u.data.elem_count;i++){
	if(h_mm[i]){
		buf[mm_cnt++]=q[i];
	}
    }
    /* resize output row for the total number of reads */
    rslt -> data -> elem_bits = 8;
    rc = KDataBufferResize ( rslt -> data, mm_cnt );
    if ( rc != 0 )
        return rc;
    rslt -> elem_count = mm_cnt;
    if(mm_cnt > 0) memcpy(rslt -> data->base,buf,mm_cnt);
    return 0;
}


VTRANSFACT_IMPL ( ALIGN_generate_mismatch_qual, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = generate_mismatch_qual_impl;
    rslt->variant = vftRow;
    rslt -> self = NULL;
    rslt -> whack = NULL;
    return 0;
}



/*
 * function ascii NCBI:align:get_mismatch_read #1
 *    ( bool has_mismatch, INSDC:dna:text mismatch )
 */
static
rc_t CC get_mismatch_read_impl ( void *data, const VXformInfo *info, int64_t row_id,
    VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    char *result;
    unsigned const readlen = argv[0].u.data.elem_count;
    
    rslt->data->elem_bits = sizeof(result[0]) * 8;
    rslt->elem_count = readlen;
    rc = KDataBufferResize(rslt->data, rslt->elem_count);
    if (rc == 0) {
        unsigned i;
        unsigned j;
        bool const *has_mismatch = argv[0].u.data.base;
        char const *mismatch = argv[1].u.data.base;
        
        assert(argv[0].u.data.elem_bits == sizeof(has_mismatch[0]) * 8);
        has_mismatch += argv[0].u.data.first_elem;
        
        assert(argv[1].u.data.elem_bits == sizeof(mismatch[0]) * 8);
        mismatch += argv[1].u.data.first_elem;
        
        result = rslt->data->base;
        for (i = j = 0; i != readlen; ++i) {
            result[i] = has_mismatch[i] ? mismatch[j++] : '=';
        }
    }
    return rc;
}

VTRANSFACT_IMPL ( NCBI_align_get_mismatch_read, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = get_mismatch_read_impl;
    rslt->variant = vftRow;
    rslt -> self = NULL;
    rslt -> whack = NULL;
    return 0;
}



/*
 * function INSDC:coord:len NCBI:align:get_left_soft_clip #1 (
 *     bool has_ref_offset, INSDC:coord:zero ref_offset )
 */
static
rc_t CC left_soft_clip_impl ( void *data, const VXformInfo *info, int64_t row_id,
    VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    INSDC_coord_len result;
    unsigned const n_offsets = argv[1].u.data.elem_count;
    
    result = 0;
    
    if (n_offsets > 0) {
        bool const *has_ref_offset = argv[0].u.data.base;
        int32_t const *ref_offset = argv[1].u.data.base;
        
        assert(argv[0].u.data.elem_bits == sizeof(has_ref_offset[0]) * 8);
        assert(argv[1].u.data.elem_bits == sizeof(ref_offset[0]) * 8);
        
        has_ref_offset += argv[0].u.data.first_elem;
        ref_offset += argv[1].u.data.first_elem;
        
        if (has_ref_offset[0] && ref_offset[0] < 0) {
            result = -ref_offset[0];
        }
    }
    rslt->data->elem_bits = sizeof(result) * 8;
    rslt->elem_count = 1;
    rc = KDataBufferResize(rslt->data, 1);
    if (rc == 0)
        memcpy(rslt->data->base, &result, sizeof(result));
    return rc;
}

VTRANSFACT_IMPL ( NCBI_align_get_left_soft_clip, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = left_soft_clip_impl;
    rslt->variant = vftRow;
    rslt -> self = NULL;
    rslt -> whack = NULL;
    return 0;
}



/*
 * function INSDC:coord:len NCBI:align:get_right_soft_clip #1 ( 
 *     bool has_mismatch, INSDC:coord:len left_clip )
 */
static
rc_t CC right_soft_clip_impl ( void *data, const VXformInfo *info, int64_t row_id,
    VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    INSDC_coord_len result = 0;
    INSDC_coord_len left = 0;
    uint32_t    right = argv[0].u.data.elem_count;
    bool const *has_mismatch = argv[0].u.data.base;
    bool const *has_ref_offset = NULL;
    int32_t    last_ref_offset = 0;

    assert(argv[0].u.data.elem_bits == sizeof(has_mismatch[0]) * 8);
    has_mismatch += argv[0].u.data.first_elem;

    if( argc > 2 ) {
        has_ref_offset = argv[2].u.data.base;
        has_ref_offset += argv[2].u.data.first_elem;
	if(argc > 3 ) {
		int32_t *ro = argv[3].u.data.base;
		ro         += argv[3].u.data.first_elem;
		if(argv[3].u.data.elem_count > 0){
			last_ref_offset = ro[argv[3].u.data.elem_count-1];
		}
	}
    }

    assert(argv[1].u.data.elem_bits == sizeof(left) * 8);
    left = ((INSDC_coord_len const *)argv[1].u.data.base)[argv[1].u.data.first_elem];
    
    while (right != left && has_mismatch[right - 1] &&
           (has_ref_offset == NULL || has_ref_offset[right - 1] == 0) ) {
        ++result;
        --right;
    }
    while(right > 0 && has_ref_offset[right - 1] == 0 && last_ref_offset <0){ /*** some mismatches from left needs to be recovered to cover for inserts **/
	last_ref_offset++;
	right--;
    }
    if(last_ref_offset < -1 ){
	last_ref_offset ++;
	if(result < -last_ref_offset) result=0;
	else result += last_ref_offset;
    }
    
    rslt->data->elem_bits = sizeof(result) * 8;
    rslt->elem_count = 1;
    rc = KDataBufferResize(rslt->data, 1);
    if (rc == 0)
        memcpy(rslt->data->base, &result, sizeof(result));
    return rc;
}

VTRANSFACT_IMPL ( NCBI_align_get_right_soft_clip, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = right_soft_clip_impl;
    rslt->variant = vftRow;
    rslt -> self = NULL;
    rslt -> whack = NULL;
    return 0;
}
VTRANSFACT_IMPL ( NCBI_align_get_right_soft_clip_2, 2, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = right_soft_clip_impl;
    rslt->variant = vftRow;
    rslt -> self = NULL;
    rslt -> whack = NULL;
    return 0;
}




/*
 * function ascii NCBI:align:get_clipped_cigar #1 ( ascii cigar )
 */
static
rc_t CC clipped_cigar_impl ( void *data, const VXformInfo *info, int64_t row_id,
                              VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    char const *cigar = argv[0].u.data.base;
    unsigned const ciglen = argv[0].u.data.elem_count;
    int n;
    unsigned start = 0;
    unsigned end = ciglen;
    
    assert(argv[0].u.data.elem_bits == sizeof(cigar[0]) * 8);
    cigar += argv[0].u.data.first_elem;
    
    for (n = 0; n != ciglen; ++n) {
        if (!isdigit(cigar[n]))
            break;
    }
    if (cigar[n] == 'S')
        start = n + 1;
    
    if (cigar[end - 1] == 'S') {
        --end;
        while (end > start && isdigit(cigar[end - 1]))
            --end;
    }
    
    rslt->data->elem_bits = sizeof(cigar[0]) * 8;
    rslt->elem_count = (end > start)? end - start:0;
    rc = KDataBufferResize(rslt->data, rslt->elem_count);
    if (rc == 0 && rslt->elem_count > 0)
        memcpy(rslt->data->base, &cigar[start], rslt->elem_count);
    
    return rc;
}

VTRANSFACT_IMPL ( NCBI_align_get_clipped_cigar, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = clipped_cigar_impl;
    rslt->variant = vftRow;
    rslt -> self = NULL;
    rslt -> whack = NULL;
    return 0;
}



/*
 * function I32 NCBI:align:get_clipped_ref_offset #1 (
 *     bool has_ref_offset, I32 ref_offset )
 */
static
rc_t CC clipped_ref_offset_impl ( void *data, const VXformInfo *info, int64_t row_id,
    VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    unsigned start = 0;
    unsigned const n_offsets = argv[1].u.data.elem_count;
    int32_t const *ref_offset = argv[1].u.data.base;
    
    assert(argv[1].u.data.elem_bits == sizeof(ref_offset[0]) * 8);
    ref_offset += argv[1].u.data.first_elem;
    
    if (n_offsets > 0) {
        bool const *has_ref_offset = argv[0].u.data.base;
        
        assert(argv[0].u.data.elem_bits == sizeof(has_ref_offset[0]) * 8);
        has_ref_offset += argv[0].u.data.first_elem;
        
        if (has_ref_offset[0] && ref_offset[0] < 0)
            start = 1;
    }
    rslt->data->elem_bits = sizeof(ref_offset[0]) * 8;
    rslt->elem_count = n_offsets - start;
    rc = KDataBufferResize(rslt->data, rslt->elem_count);
    if (rc == 0)
        memcpy(rslt->data->base, &ref_offset[start], sizeof(ref_offset[0]) * rslt->elem_count);
    return rc;
}

VTRANSFACT_IMPL ( NCBI_align_get_clipped_ref_offset, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = clipped_ref_offset_impl;
    rslt->variant = vftRow;
    rslt -> self = NULL;
    rslt -> whack = NULL;
    return 0;
}



/*
 * function INSDC:coord:len NCBI:align:get_ref_len #1 (
 *     bool has_ref_offset, I32 ref_offset, INSDC:coord:len right_clip )
 */
static
rc_t CC get_ref_len_impl ( void *data, const VXformInfo *info, int64_t row_id,
    VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    INSDC_coord_len result;
    unsigned const n_offsets = argv[1].u.data.elem_count;
    INSDC_coord_len const *right = argv[2].u.data.base;
    int32_t const *ref_offset = argv[1].u.data.base;
    int32_t sum;
    unsigned i;
    
    assert(argv[2].u.data.elem_bits == sizeof(right[0]) * 8);
    assert(argv[0].u.data.elem_count >= right[argv[2].u.data.first_elem]);
    assert(argv[1].u.data.elem_bits == sizeof(ref_offset[0]) * 8);

    ref_offset += argv[1].u.data.first_elem;
    result = argv[0].u.data.elem_count - right[argv[2].u.data.first_elem];
    
    for (sum = 0, i = 0; i != n_offsets; ++i) {
        sum += ref_offset[i];
    }
    assert((int32_t)result >= -sum);
    result += sum;
    
    rslt->data->elem_bits = sizeof(result) * 8;
    rslt->elem_count = 1;
    rc = KDataBufferResize(rslt->data, rslt->elem_count);
    if (rc == 0)
        memcpy(rslt->data->base, &result, sizeof(result));
    return rc;
}

VTRANSFACT_IMPL ( NCBI_align_get_ref_len, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = get_ref_len_impl;
    rslt->variant = vftRow;
    rslt -> self = NULL;
    rslt -> whack = NULL;
    return 0;
}



/*
 * function < type T > T NCBI:align:clip #1 ( T value,
 *     INSDC:coord:len left, INSDC:coord:len right )
 */
static
rc_t CC clip_impl ( void *data, const VXformInfo *info, int64_t row_id,
    VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    INSDC_coord_len left =  ((INSDC_coord_len const *)argv[1].u.data.base)[argv[1].u.data.first_elem];
    INSDC_coord_len right = ((INSDC_coord_len const *)argv[2].u.data.base)[argv[2].u.data.first_elem];

    rslt->data->elem_bits = argv[0].u.data.elem_bits;
    rslt->elem_count = argv[0].u.data.elem_count - left - right;
    rc = KDataBufferResize(rslt->data, rslt->elem_count);
    if (rc == 0) {
        if ((rslt->data->elem_bits & 7) == 0)
            memcpy(rslt->data->base,
                   &((char const *)argv[0].u.data.base)[((left + argv[0].u.data.first_elem) * argv[0].u.data.elem_bits) >> 3],
                   (rslt->elem_count * rslt->data->elem_bits) >> 3);
        else
            bitcpy(rslt->data->base, 0, argv[0].u.data.base,
                   (left + argv[0].u.data.first_elem) * argv[0].u.data.elem_bits,
                   rslt->elem_count * rslt->data->elem_bits);
    }
    return rc;
}

VTRANSFACT_IMPL ( NCBI_align_clip, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = clip_impl;
    rslt->variant = vftRow;
    rslt -> self = NULL;
    rslt -> whack = NULL;
    return 0;
}



/*
 * function bool NCBI:align:get_ref_mismatch #1
 *     ( bool has_mismatch, bool has_ref_offset, I32 ref_offset,
 *       INSDC:coord:len ref_len )
 */
static
rc_t CC get_ref_mismatch_impl ( void *data, const VXformInfo *info, int64_t row_id,
                   VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    bool const *has_mismatch       = argv [ 0 ] . u . data . base;
    bool const *has_ref_offset     = argv [ 1 ] . u . data . base;
    int32_t const *ref_offset      = argv [ 2 ] . u . data . base;
    INSDC_coord_len const *ref_len = argv [ 3 ] . u . data . base; 
    
    assert(argv[0].u.data.elem_bits == sizeof(has_mismatch[0]) * 8);
    assert(argv[1].u.data.elem_bits == sizeof(has_ref_offset[0]) * 8);
    assert(argv[2].u.data.elem_bits == sizeof(ref_offset[0]) * 8);
    assert(argv[3].u.data.elem_bits == sizeof(ref_len[0]) * 8);
    
    has_mismatch   += argv[0].u.data.first_elem;
    has_ref_offset += argv[1].u.data.first_elem;
    ref_offset     += argv[2].u.data.first_elem;
    
    rslt->data->elem_bits = sizeof(bool) * 8;
    rslt->elem_count = ref_len[argv[3].u.data.first_elem];
    rc = KDataBufferResize(rslt->data, rslt->elem_count);
    if (rc == 0) {
        bool *result = (bool *)rslt->data->base;
        unsigned j;
        unsigned ri;
        unsigned si;
        
        memset(result, 0, sizeof(result[0]) * rslt->elem_count);
        for (j = ri = si = 0; si < argv[0].u.data.elem_count;) {
            if (has_ref_offset[si]) {
                int offset = ref_offset[j++];
                
                if (offset > 0)
                    ri += offset;
                else {
                    si -= offset;
                    continue;
                }
            }
            if (ri >= rslt->elem_count) break;
            if (has_mismatch[si]) {
                result[ri] = 1;
            }
            ++si;
            ++ri;
        }
    }
    return rc;
}

VTRANSFACT_IMPL ( NCBI_align_get_ref_mismatch, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = get_ref_mismatch_impl;
    rslt->variant = vftRow;
    rslt -> self = NULL;
    rslt -> whack = NULL;
    return 0;
}



/*
 * function bool NCBI:align:get_ref_insert #1
 *     ( bool has_mismatch, bool has_ref_offset, I32 ref_offset,
 *       INSDC:coord:len ref_len )
 */
static
rc_t CC get_ref_insert_impl ( void *data, const VXformInfo *info, int64_t row_id,
                               VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    bool const *has_mismatch       = argv [ 0 ] . u . data . base;
    bool const *has_ref_offset     = argv [ 1 ] . u . data . base;
    int32_t const *ref_offset      = argv [ 2 ] . u . data . base;
    INSDC_coord_len const *ref_len = argv [ 3 ] . u . data . base; 
    
    assert(argv[0].u.data.elem_bits == sizeof(has_mismatch[0]) * 8);
    assert(argv[1].u.data.elem_bits == sizeof(has_ref_offset[0]) * 8);
    assert(argv[2].u.data.elem_bits == sizeof(ref_offset[0]) * 8);
    assert(argv[3].u.data.elem_bits == sizeof(ref_len[0]) * 8);
    
    has_mismatch   += argv[0].u.data.first_elem;
    has_ref_offset += argv[1].u.data.first_elem;
    ref_offset     += argv[2].u.data.first_elem;
    
    rslt->data->elem_bits = sizeof(bool) * 8;
    rslt->elem_count = ref_len[argv[3].u.data.first_elem];
    rc = KDataBufferResize(rslt->data, rslt->elem_count);
    if (rc == 0) {
        bool *result = (bool *)rslt->data->base;
        unsigned j;
        unsigned ri;
        unsigned si;
        
        memset(result, 0, sizeof(result[0]) * rslt->elem_count);
        for (j = ri = si = 0; si < argv[0].u.data.elem_count;) {
            if (has_ref_offset[si]) {
                int offset = ref_offset[j++];
                
                if (offset > 0) {
                    ri += offset;
                }
                else {
                    if (si) {
                        if (ri >= 1) result[ri-1] = 1;
                        result[ri] = 1;
                    }
                    si -= offset;
                    continue;
                }
            }
            ++si;
            ++ri;
        }
    }
    return rc;
}

VTRANSFACT_IMPL ( NCBI_align_get_ref_insert, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = get_ref_insert_impl;
    rslt->variant = vftRow;
    rslt -> self = NULL;
    rslt -> whack = NULL;
    return 0;
}



/*
 * function bool NCBI:align:get_ref_delete #1
 *     ( bool has_mismatch, bool has_ref_offset, I32 ref_offset,
 *       INSDC:coord:len ref_len )
 */
static
rc_t CC get_ref_delete_impl ( void *data, const VXformInfo *info, int64_t row_id,
    VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    bool const *has_mismatch       = argv [ 0 ] . u . data . base;
    bool const *has_ref_offset     = argv [ 1 ] . u . data . base;
    int32_t const *ref_offset      = argv [ 2 ] . u . data . base;
    INSDC_coord_len const *ref_len = argv [ 3 ] . u . data . base; 
    
    assert(argv[0].u.data.elem_bits == sizeof(has_mismatch[0]) * 8);
    assert(argv[1].u.data.elem_bits == sizeof(has_ref_offset[0]) * 8);
    assert(argv[2].u.data.elem_bits == sizeof(ref_offset[0]) * 8);
    assert(argv[3].u.data.elem_bits == sizeof(ref_len[0]) * 8);
    
    has_mismatch   += argv[0].u.data.first_elem;
    has_ref_offset += argv[1].u.data.first_elem;
    ref_offset     += argv[2].u.data.first_elem;
    
    rslt->data->elem_bits = sizeof(bool) * 8;
    rslt->elem_count = ref_len[argv[3].u.data.first_elem];
    rc = KDataBufferResize(rslt->data, rslt->elem_count);
    if (rc == 0) {
        bool *result = (bool *)rslt->data->base;
        unsigned j;
        unsigned ri;
        unsigned si;
        
        memset(result, 0, sizeof(result[0]) * rslt->elem_count);
        for (j = ri = si = 0; si < argv[0].u.data.elem_count;) {
            if (has_ref_offset[si]) {
                int offset = ref_offset[j++];
                
                if (offset > 0) {
                    memset(&result[ri], 1, offset);
                    ri += offset;
                }
                else {
                    si -= offset;
                    continue;
                }
            }
            ++si;
            ++ri;
        }
    }
    return rc;
}

VTRANSFACT_IMPL ( NCBI_align_get_ref_delete, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = get_ref_delete_impl;
    rslt->variant = vftRow;
    rslt -> self = NULL;
    rslt -> whack = NULL;
    return 0;
}



#define USE_BIGGER_PRESERVE_BORDER 1
/*
 * function bool NCBI:align:get_preserve_qual #1
 *     ( bool has_mismatch, bool has_ref_offset, I32 ref_offset,
 *       INSDC:coord:len ref_len )
 */
static
rc_t CC get_ref_preserve_qual_impl ( void *data, const VXformInfo *info, int64_t row_id,
    VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    bool const *has_mismatch       = argv [ 0 ] . u . data . base;
    bool const *has_ref_offset     = argv [ 1 ] . u . data . base;
    int32_t const *ref_offset      = argv [ 2 ] . u . data . base;
    INSDC_coord_len const *ref_len = argv [ 3 ] . u . data . base; 
    
    assert(argv[0].u.data.elem_bits == sizeof(has_mismatch[0]) * 8);
    assert(argv[1].u.data.elem_bits == sizeof(has_ref_offset[0]) * 8);
    assert(argv[2].u.data.elem_bits == sizeof(ref_offset[0]) * 8);
    assert(argv[3].u.data.elem_bits == sizeof(ref_len[0]) * 8);
    
    has_mismatch   += argv[0].u.data.first_elem;
    has_ref_offset += argv[1].u.data.first_elem;
    ref_offset     += argv[2].u.data.first_elem;
    
    rslt->data->elem_bits = sizeof(bool) * 8;
    rslt->elem_count = ref_len[argv[3].u.data.first_elem];
    rc = KDataBufferResize(rslt->data, rslt->elem_count);
    if (rc == 0) {
        bool *result = (bool *)rslt->data->base;
        unsigned j;
        unsigned ri;
        unsigned si;
        
        memset(result, 0, sizeof(result[0]) * rslt->elem_count);
        for (j = ri = si = 0; si < argv[0].u.data.elem_count;) {
            if (has_ref_offset[si]) {
                int offset = ref_offset[j++];
                
                if (offset > 0) {
                    /* Preserve the qualities for deleted bases + plus the border */
#if USE_BIGGER_PRESERVE_BORDER
                    if (ri >= 2) result[ri-2] = 1;
#endif
                    if (ri >= 1) result[ri-1] = 1;
                    memset(&result[ri], 1, offset);
                    ri += offset;
                    result[ri] = 1;
#if USE_BIGGER_PRESERVE_BORDER
                    if (ri + 1 < rslt->elem_count) result[ri+1] = 1;
#endif
                }
                else {
                    if (si) {
                        /* Preserve the qualites for the bases bordering the insert */
#if USE_BIGGER_PRESERVE_BORDER
                        if (ri >= 2) result[ri-2] = 1;
#endif
                        if (ri >= 1) result[ri-1] = 1;
                        result[ri] = 1;
#if USE_BIGGER_PRESERVE_BORDER
                        if (ri + 1 < rslt->elem_count) result[ri+1] = 1;
#endif
                    }
                    si -= offset;
                    continue;
                }
            }
            if (ri >= rslt->elem_count) break;
            if (has_mismatch[si]) {
#if USE_BIGGER_PRESERVE_BORDER
                if (ri >= 1) result[ri-1] = 1;
#endif
                result[ri] = 1;
#if USE_BIGGER_PRESERVE_BORDER
                if (ri + 1 < rslt->elem_count) result[ri+1] = 1;
#endif
            }
            ++si;
            ++ri;
        }
    }
    return rc;
}

VTRANSFACT_IMPL ( NCBI_align_get_ref_preserve_qual, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
    VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = get_ref_preserve_qual_impl;
    rslt->variant = vftRow;
    rslt -> self = NULL;
    rslt -> whack = NULL;
    return 0;
}



/*
 * function bool NCBI:align:get_seq_preserve_qual #1
 *    ( bool ref_preserve_qual, bool has_ref_offset, I32 ref_offset );
 */
static
rc_t CC get_seq_preserve_qual_impl ( void *data, const VXformInfo *info, int64_t row_id,
                                    VRowResult *rslt, uint32_t argc, const VRowData argv [] )
{
    rc_t rc;
    bool const *ref_pq             = argv [ 0 ] . u . data . base;
    bool const *has_ref_offset     = argv [ 1 ] . u . data . base;
    int32_t const *ref_offset      = argv [ 2 ] . u . data . base;
    
    assert(argv[0].u.data.elem_bits == sizeof(ref_pq[0]) * 8);
    assert(argv[1].u.data.elem_bits == sizeof(has_ref_offset[0]) * 8);
    assert(argv[2].u.data.elem_bits == sizeof(ref_offset[0]) * 8);
    
    ref_pq         += argv[0].u.data.first_elem;
    has_ref_offset += argv[1].u.data.first_elem;
    ref_offset     += argv[2].u.data.first_elem;
    
    rslt->data->elem_bits = sizeof(bool) * 8;
    rslt->elem_count = argv[1].u.data.elem_count;
    rc = KDataBufferResize(rslt->data, rslt->elem_count);
    if (rc == 0) {
        bool *result = (bool *)rslt->data->base;
        unsigned j;
        unsigned ri;
        unsigned si;
        
        memset(result, 1, sizeof(result[0]) * rslt->elem_count);
        for (j = ri = si = 0; si < argv[1].u.data.elem_count;) {
            if (has_ref_offset[si]) {
                int offset = ref_offset[j++];
                
                if (offset > 0) {
                    ri += offset;
                }
                else {
                    si -= offset;
                    continue;
                }
            }
            if (ri >= argv[0].u.data.elem_count) break;
            result[si] = ref_pq[ri];
            ++si;
            ++ri;
        }
    }
    return rc;
}

VTRANSFACT_IMPL ( NCBI_align_get_seq_preserve_qual, 1, 0, 0 ) ( const void *Self, const VXfactInfo *info,
                                                               VFuncDesc *rslt, const VFactoryParams *cp, const VFunctionParams *dp )
{
    rslt->u.rf = get_seq_preserve_qual_impl;
    rslt->variant = vftRow;
    rslt -> self = NULL;
    rslt -> whack = NULL;
    return 0;
}
