| #include "mltaln.h" |
| |
| #define DEBUG 0 |
| #define IODEBUG 0 |
| #define SCOREOUT 0 |
| |
| #define NODIST -9999 |
| |
| static char *whereispairalign; |
| static char *laraparams; |
| static char foldalignopt[1000]; |
| static int out_align_instead_of_hat3; |
| |
| static void t2u( char *seq ) |
| { |
| while( *seq ) |
| { |
| if ( *seq == 'A' ) *seq = 'a'; |
| else if( *seq == 'a' ) *seq = 'a'; |
| else if( *seq == 'T' ) *seq = 'u'; |
| else if( *seq == 't' ) *seq = 'u'; |
| else if( *seq == 'U' ) *seq = 'u'; |
| else if( *seq == 'u' ) *seq = 'u'; |
| else if( *seq == 'G' ) *seq = 'g'; |
| else if( *seq == 'g' ) *seq = 'g'; |
| else if( *seq == 'C' ) *seq = 'c'; |
| else if( *seq == 'c' ) *seq = 'c'; |
| else *seq = 'n'; |
| seq++; |
| } |
| } |
| |
| static float recallpairfoldalign( char **mseq1, char **mseq2, int m1, int m2, int *of1pt, int *of2pt, int alloclen ) |
| { |
| static FILE *fp = NULL; |
| float value; |
| char *aln1; |
| char *aln2; |
| int of1tmp, of2tmp; |
| |
| if( fp == NULL ) |
| { |
| fp = fopen( "_foldalignout", "r" ); |
| if( fp == NULL ) |
| { |
| fprintf( stderr, "Cannot open _foldalignout\n" ); |
| exit( 1 ); |
| } |
| } |
| |
| aln1 = calloc( alloclen, sizeof( char ) ); |
| aln2 = calloc( alloclen, sizeof( char ) ); |
| |
| readpairfoldalign( fp, *mseq1, *mseq2, aln1, aln2, m1, m2, &of1tmp, &of2tmp, alloclen ); |
| |
| if( strstr( foldalignopt, "-global") ) |
| { |
| fprintf( stderr, "Calling G__align11\n" ); |
| value = G__align11( mseq1, mseq2, alloclen ); |
| *of1pt = 0; |
| *of2pt = 0; |
| } |
| else |
| { |
| fprintf( stderr, "Calling L__align11\n" ); |
| value = L__align11( mseq1, mseq2, alloclen, of1pt, of2pt ); |
| } |
| |
| // value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); // nennnotame |
| |
| if( aln1[0] == 0 ) |
| { |
| fprintf( stderr, "FOLDALIGN returned no alignment between %d and %d. Sequence alignment is used instead.\n", m1+1, m2+1 ); |
| } |
| else |
| { |
| strcpy( *mseq1, aln1 ); |
| strcpy( *mseq2, aln2 ); |
| *of1pt = of1tmp; |
| *of2pt = of2tmp; |
| } |
| |
| // value = naivepairscore11( *mseq1, *mseq2, penalty ); // v6.511 ha kore wo tsukau, global nomi dakara. |
| |
| // fclose( fp ); // saigo dake yatta houga yoi. |
| |
| // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); |
| // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); |
| |
| |
| free( aln1 ); |
| free( aln2 ); |
| |
| return( value ); |
| } |
| |
| static void callfoldalign( int nseq, char **mseq ) |
| { |
| FILE *fp; |
| int i; |
| int res; |
| static char com[10000]; |
| |
| for( i=0; i<nseq; i++ ) |
| t2u( mseq[i] ); |
| |
| fp = fopen( "_foldalignin", "w" ); |
| if( !fp ) |
| { |
| fprintf( stderr, "Cannot open _foldalignin\n" ); |
| exit( 1 ); |
| } |
| for( i=0; i<nseq; i++ ) |
| { |
| fprintf( fp, ">%d\n", i+1 ); |
| fprintf( fp, "%s\n", mseq[i] ); |
| } |
| fclose( fp ); |
| |
| sprintf( com, "env PATH=%s foldalign210 %s _foldalignin > _foldalignout ", whereispairalign, foldalignopt ); |
| fprintf(stderr, "Cannot execute system calls!\n"); |
| abort(); |
| res = 0; |
| // res = system( com ); |
| if( res ) |
| { |
| fprintf( stderr, "Error in foldalign\n" ); |
| exit( 1 ); |
| } |
| |
| } |
| |
| static void calllara( int nseq, char **mseq, char *laraarg ) |
| { |
| FILE *fp; |
| int i; |
| int res; |
| static char com[10000]; |
| |
| for( i=0; i<nseq; i++ ) |
| |
| fp = fopen( "_larain", "w" ); |
| if( !fp ) |
| { |
| fprintf( stderr, "Cannot open _larain\n" ); |
| exit( 1 ); |
| } |
| for( i=0; i<nseq; i++ ) |
| { |
| fprintf( fp, ">%d\n", i+1 ); |
| fprintf( fp, "%s\n", mseq[i] ); |
| } |
| fclose( fp ); |
| |
| |
| // fprintf( stderr, "calling LaRA\n" ); |
| sprintf( com, "env PATH=%s:/bin:/usr/bin mafft_lara -i _larain -w _laraout -o _lara.params %s", whereispairalign, laraarg ); |
| fprintf(stderr, "Cannot execute system calls!\n"); |
| abort(); |
| res = 0; |
| // res = system( com ); |
| if( res ) |
| { |
| fprintf( stderr, "Error in lara\n" ); |
| exit( 1 ); |
| } |
| } |
| |
| static float recalllara( char **mseq1, char **mseq2, int alloclen ) |
| { |
| static FILE *fp = NULL; |
| static char *ungap1; |
| static char *ungap2; |
| static char *ori1; |
| static char *ori2; |
| // int res; |
| static char com[10000]; |
| float value; |
| |
| |
| if( fp == NULL ) |
| { |
| fp = fopen( "_laraout", "r" ); |
| if( fp == NULL ) |
| { |
| fprintf( stderr, "Cannot open _laraout\n" ); |
| exit( 1 ); |
| } |
| ungap1 = AllocateCharVec( alloclen ); |
| ungap2 = AllocateCharVec( alloclen ); |
| ori1 = AllocateCharVec( alloclen ); |
| ori2 = AllocateCharVec( alloclen ); |
| } |
| |
| |
| strcpy( ori1, *mseq1 ); |
| strcpy( ori2, *mseq2 ); |
| |
| fgets( com, 999, fp ); |
| myfgets( com, 9999, fp ); |
| strcpy( *mseq1, com ); |
| myfgets( com, 9999, fp ); |
| strcpy( *mseq2, com ); |
| |
| gappick0( ungap1, *mseq1 ); |
| gappick0( ungap2, *mseq2 ); |
| t2u( ungap1 ); |
| t2u( ungap2 ); |
| t2u( ori1 ); |
| t2u( ori2 ); |
| |
| if( strcmp( ungap1, ori1 ) || strcmp( ungap2, ori2 ) ) |
| { |
| fprintf( stderr, "SEQUENCE CHANGED!!\n" ); |
| fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); |
| fprintf( stderr, "ungap1 = %s\n", ungap1 ); |
| fprintf( stderr, "ori1 = %s\n", ori1 ); |
| fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); |
| fprintf( stderr, "ungap2 = %s\n", ungap2 ); |
| fprintf( stderr, "ori2 = %s\n", ori2 ); |
| exit( 1 ); |
| } |
| |
| value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); |
| |
| // fclose( fp ); // saigo dake yatta houga yoi. |
| |
| return( value ); |
| } |
| |
| static float recallmxscarna( char **mseq1, char **mseq2, int m1, int m2, int alloclen ) |
| { |
| FILE *fp = NULL; |
| static char *ungap1 = NULL; |
| static char *ungap2 = NULL; |
| static char *ori1 = NULL; |
| static char *ori2 = NULL; |
| // int res; |
| static char com[10000]; |
| float value; |
| |
| |
| if( ungap1 == NULL ) |
| { |
| ungap1 = AllocateCharVec( alloclen ); |
| ungap2 = AllocateCharVec( alloclen ); |
| ori1 = AllocateCharVec( alloclen ); |
| ori2 = AllocateCharVec( alloclen ); |
| } |
| |
| sprintf( com, "_%d-_%d.fasta", m1, m2 ); |
| fp = fopen( com, "r" ); |
| if( fp == NULL ) |
| { |
| fprintf( stderr, "Cannot open %s.\n", com ); |
| exit( 1 ); |
| } |
| |
| strcpy( ori1, *mseq1 ); |
| strcpy( ori2, *mseq2 ); |
| |
| fgets( com, 999, fp ); |
| load1SeqWithoutName_new( fp, *mseq1 ); |
| fgets( com, 999, fp ); |
| load1SeqWithoutName_new( fp, *mseq2 ); |
| |
| gappick0( ungap1, *mseq1 ); |
| gappick0( ungap2, *mseq2 ); |
| t2u( ungap1 ); |
| t2u( ungap2 ); |
| t2u( ori1 ); |
| t2u( ori2 ); |
| |
| if( strcmp( ungap1, ori1 ) || strcmp( ungap2, ori2 ) ) |
| { |
| fprintf( stderr, "SEQUENCE CHANGED!!\n" ); |
| fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); |
| fprintf( stderr, "ungap1 = %s\n", ungap1 ); |
| fprintf( stderr, "ori1 = %s\n", ori1 ); |
| fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); |
| fprintf( stderr, "ungap2 = %s\n", ungap2 ); |
| fprintf( stderr, "ori2 = %s\n", ori2 ); |
| exit( 1 ); |
| } |
| |
| value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); |
| |
| fclose( fp ); |
| |
| return( value ); |
| } |
| |
| static float callmxscarna_giving_bpp( char **mseq1, char **mseq2, char **bpp1, char **bpp2, int alloclen ) |
| { |
| FILE *fp; |
| int res; |
| static char com[10000]; |
| float value; |
| |
| fp = fopen( "_bpporg", "w" ); |
| if( !fp ) |
| { |
| fprintf( stderr, "Cannot write to _bpporg\n" ); |
| exit( 1 ); |
| } |
| fprintf( fp, ">a\n" ); |
| while( *bpp1 ) |
| fprintf( fp, *bpp1++ ); |
| |
| fprintf( fp, ">b\n" ); |
| while( *bpp2 ) |
| fprintf( fp, *bpp2++ ); |
| fclose( fp ); |
| |
| fprintf(stderr, "Cannot execute system calls!\n"); |
| abort(); |
| // system( "tr -d '\\r' < _bpporg > _bpp" ); // for cygwin, wakaran |
| |
| t2u( *mseq1 ); |
| t2u( *mseq2 ); |
| fp = fopen( "_mxscarnainorg", "w" ); |
| if( !fp ) |
| { |
| fprintf( stderr, "Cannot open _mxscarnainorg\n" ); |
| exit( 1 ); |
| } |
| fprintf( fp, ">1\n" ); |
| // fprintf( fp, "%s\n", *mseq1 ); |
| write1seq( fp, *mseq1 ); |
| fprintf( fp, ">2\n" ); |
| // fprintf( fp, "%s\n", *mseq2 ); |
| write1seq( fp, *mseq2 ); |
| fclose( fp ); |
| |
| fprintf(stderr, "Cannot execute system calls!\n"); |
| abort(); |
| // system( "tr -d '\\r' < _mxscarnainorg > _mxscarnain" ); // for cygwin, wakaran |
| |
| sprintf( com, "env PATH=%s mxscarnamod -readbpp _mxscarnain > _mxscarnaout 2>_dum", whereispairalign ); |
| fprintf(stderr, "Cannot execute system calls!\n"); |
| abort(); |
| res = 0; |
| // res = system( com ); |
| if( res ) |
| { |
| fprintf( stderr, "Error in mxscarna\n" ); |
| exit( 1 ); |
| } |
| |
| fp = fopen( "_mxscarnaout", "r" ); |
| if( !fp ) |
| { |
| fprintf( stderr, "Cannot open _mxscarnaout\n" ); |
| exit( 1 ); |
| } |
| |
| fgets( com, 999, fp ); |
| load1SeqWithoutName_new( fp, *mseq1 ); |
| fgets( com, 999, fp ); |
| load1SeqWithoutName_new( fp, *mseq2 ); |
| |
| fclose( fp ); |
| |
| // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); |
| // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); |
| |
| value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); |
| |
| return( value ); |
| } |
| |
| static float callmxscarna_slow( char **mseq1, char **mseq2, int alloclen ) |
| { |
| FILE *fp; |
| int res; |
| static char com[10000]; |
| float value; |
| |
| |
| t2u( *mseq1 ); |
| t2u( *mseq2 ); |
| fp = fopen( "_mxscarnain", "w" ); |
| if( !fp ) |
| { |
| fprintf( stderr, "Cannot open _mxscarnain\n" ); |
| exit( 1 ); |
| } |
| fprintf( fp, ">1\n" ); |
| fprintf( fp, "%s\n", *mseq1 ); |
| fprintf( fp, ">2\n" ); |
| fprintf( fp, "%s\n", *mseq2 ); |
| fclose( fp ); |
| |
| sprintf( com, "env PATH=%s mxscarnamod _mxscarnain > _mxscarnaout 2>_dum", whereispairalign ); |
| fprintf(stderr, "Cannot execute system calls!\n"); |
| abort(); |
| res = 0; |
| // res = system( com ); |
| if( res ) |
| { |
| fprintf( stderr, "Error in mxscarna\n" ); |
| exit( 1 ); |
| } |
| |
| fp = fopen( "_mxscarnaout", "r" ); |
| if( !fp ) |
| { |
| fprintf( stderr, "Cannot open _mxscarnaout\n" ); |
| exit( 1 ); |
| } |
| |
| fgets( com, 999, fp ); |
| load1SeqWithoutName_new( fp, *mseq1 ); |
| fgets( com, 999, fp ); |
| load1SeqWithoutName_new( fp, *mseq2 ); |
| |
| fclose( fp ); |
| |
| // fprintf( stderr, "*mseq1 = %s\n", *mseq1 ); |
| // fprintf( stderr, "*mseq2 = %s\n", *mseq2 ); |
| |
| value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); |
| |
| return( value ); |
| } |
| |
| static void readhat4( FILE *fp, char ***bpp ) |
| { |
| char oneline[1000]; |
| int bppsize; |
| int onechar; |
| // double prob; |
| // int posi, posj; |
| |
| bppsize = 0; |
| // fprintf( stderr, "reading hat4\n" ); |
| onechar = getc(fp); |
| // fprintf( stderr, "onechar = %c\n", onechar ); |
| if( onechar != '>' ) |
| { |
| fprintf( stderr, "Format error\n" ); |
| exit( 1 ); |
| } |
| ungetc( onechar, fp ); |
| fgets( oneline, 999, fp ); |
| while( 1 ) |
| { |
| onechar = getc(fp); |
| ungetc( onechar, fp ); |
| if( onechar == '>' || onechar == EOF ) |
| { |
| // fprintf( stderr, "Next\n" ); |
| *bpp = realloc( *bpp, (bppsize+2) * sizeof( char * ) ); |
| (*bpp)[bppsize] = NULL; |
| break; |
| } |
| fgets( oneline, 999, fp ); |
| // fprintf( stderr, "oneline=%s\n", oneline ); |
| // sscanf( oneline, "%d %d %f", &posi, &posj, &prob ); |
| // fprintf( stderr, "%d %d -> %f\n", posi, posj, prob ); |
| *bpp = realloc( *bpp, (bppsize+2) * sizeof( char * ) ); |
| (*bpp)[bppsize] = calloc( 100, sizeof( char ) ); |
| strcpy( (*bpp)[bppsize], oneline ); |
| bppsize++; |
| } |
| } |
| |
| static void preparebpp( int nseq, char ***bpp ) |
| { |
| FILE *fp; |
| int i; |
| |
| fp = fopen( "hat4", "r" ); |
| if( !fp ) |
| { |
| fprintf( stderr, "Cannot open hat4\n" ); |
| exit( 1 ); |
| } |
| for( i=0; i<nseq; i++ ) |
| readhat4( fp, bpp+i ); |
| fclose( fp ); |
| } |
| |
| void arguments( int argc, char *argv[] ) |
| { |
| int c; |
| |
| foldalignopt[0] = 0; |
| laraparams = NULL; |
| inputfile = NULL; |
| fftkeika = 0; |
| pslocal = -1000.0; |
| constraint = 0; |
| nblosum = 62; |
| fmodel = 0; |
| calledByXced = 0; |
| devide = 0; |
| use_fft = 0; |
| fftscore = 1; |
| fftRepeatStop = 0; |
| fftNoAnchStop = 0; |
| weight = 3; |
| utree = 1; |
| tbutree = 1; |
| refine = 0; |
| check = 1; |
| cut = 0.0; |
| disp = 0; |
| outgap = 1; |
| alg = 'A'; |
| mix = 0; |
| tbitr = 0; |
| scmtd = 5; |
| tbweight = 0; |
| tbrweight = 3; |
| checkC = 0; |
| treemethod = 'x'; |
| contin = 0; |
| scoremtx = 1; |
| kobetsubunkatsu = 0; |
| divpairscore = 0; |
| out_align_instead_of_hat3 = 0; |
| dorp = NOTSPECIFIED; |
| ppenalty = NOTSPECIFIED; |
| ppenalty_OP = NOTSPECIFIED; |
| ppenalty_ex = NOTSPECIFIED; |
| ppenalty_EX = NOTSPECIFIED; |
| poffset = NOTSPECIFIED; |
| kimuraR = NOTSPECIFIED; |
| pamN = NOTSPECIFIED; |
| geta2 = GETA2; |
| fftWinSize = NOTSPECIFIED; |
| fftThreshold = NOTSPECIFIED; |
| RNAppenalty = NOTSPECIFIED; |
| RNApthr = NOTSPECIFIED; |
| |
| while( --argc > 0 && (*++argv)[0] == '-' ) |
| { |
| while ( ( c = *++argv[0] ) ) |
| { |
| switch( c ) |
| { |
| case 'i': |
| inputfile = *++argv; |
| fprintf( stderr, "inputfile = %s\n", inputfile ); |
| --argc; |
| goto nextoption; |
| case 'f': |
| ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); |
| --argc; |
| goto nextoption; |
| case 'g': |
| ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); |
| --argc; |
| goto nextoption; |
| case 'O': |
| ppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 ); |
| --argc; |
| goto nextoption; |
| case 'E': |
| ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 ); |
| --argc; |
| goto nextoption; |
| case 'h': |
| poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); |
| --argc; |
| goto nextoption; |
| case 'k': |
| kimuraR = atoi( *++argv ); |
| // fprintf( stderr, "kimuraR = %d\n", kimuraR ); |
| --argc; |
| goto nextoption; |
| case 'b': |
| nblosum = atoi( *++argv ); |
| scoremtx = 1; |
| // fprintf( stderr, "blosum %d\n", nblosum ); |
| --argc; |
| goto nextoption; |
| case 'j': |
| pamN = atoi( *++argv ); |
| scoremtx = 0; |
| TMorJTT = JTT; |
| fprintf( stderr, "jtt %d\n", pamN ); |
| --argc; |
| goto nextoption; |
| case 'm': |
| pamN = atoi( *++argv ); |
| scoremtx = 0; |
| TMorJTT = TM; |
| fprintf( stderr, "TM %d\n", pamN ); |
| --argc; |
| goto nextoption; |
| case 'l': |
| ppslocal = (int)( atof( *++argv ) * 1000 + 0.5 ); |
| pslocal = (int)( 600.0 / 1000.0 * ppslocal + 0.5); |
| // fprintf( stderr, "ppslocal = %d\n", ppslocal ); |
| // fprintf( stderr, "pslocal = %d\n", pslocal ); |
| --argc; |
| goto nextoption; |
| case 'd': |
| whereispairalign = *++argv; |
| fprintf( stderr, "whereispairalign = %s\n", whereispairalign ); |
| --argc; |
| goto nextoption; |
| case 'p': |
| laraparams = *++argv; |
| fprintf( stderr, "laraparams = %s\n", laraparams ); |
| --argc; |
| goto nextoption; |
| case 'n': |
| out_align_instead_of_hat3 = 1; |
| break; |
| #if 1 |
| case 'a': |
| fmodel = 1; |
| break; |
| #endif |
| case 'r': |
| fmodel = -1; |
| break; |
| case 'D': |
| dorp = 'd'; |
| break; |
| case 'P': |
| dorp = 'p'; |
| break; |
| case 'e': |
| fftscore = 0; |
| break; |
| #if 0 |
| case 'O': |
| fftNoAnchStop = 1; |
| break; |
| #endif |
| case 'Q': |
| calledByXced = 1; |
| break; |
| case 'x': |
| disp = 1; |
| break; |
| #if 0 |
| case 'a': |
| alg = 'a'; |
| break; |
| #endif |
| case 'S': |
| alg = 'S'; |
| break; |
| case 't': |
| alg = 't'; |
| break; |
| case 'L': |
| alg = 'L'; |
| break; |
| case 's': |
| alg = 's'; |
| break; |
| case 'B': |
| alg = 'B'; |
| break; |
| case 'T': |
| alg = 'T'; |
| break; |
| case 'H': |
| alg = 'H'; |
| break; |
| case 'M': |
| alg = 'M'; |
| break; |
| case 'R': |
| alg = 'R'; |
| break; |
| case 'N': |
| alg = 'N'; |
| break; |
| case 'K': |
| alg = 'K'; |
| break; |
| case 'A': |
| alg = 'A'; |
| break; |
| case 'V': |
| alg = 'V'; |
| break; |
| case 'C': |
| alg = 'C'; |
| break; |
| case 'F': |
| use_fft = 1; |
| break; |
| case 'v': |
| tbrweight = 3; |
| break; |
| case 'y': |
| divpairscore = 1; |
| break; |
| /* Modified 01/08/27, default: user tree */ |
| case 'J': |
| tbutree = 0; |
| break; |
| /* modification end. */ |
| case 'o': |
| // foldalignopt = *++argv; |
| strcat( foldalignopt, " " ); |
| strcat( foldalignopt, *++argv ); |
| fprintf( stderr, "foldalignopt = %s\n", foldalignopt ); |
| --argc; |
| goto nextoption; |
| case 'z': |
| fftThreshold = atoi( *++argv ); |
| --argc; |
| goto nextoption; |
| case 'w': |
| fftWinSize = atoi( *++argv ); |
| --argc; |
| goto nextoption; |
| case 'Z': |
| checkC = 1; |
| break; |
| default: |
| fprintf( stderr, "illegal option %c\n", c ); |
| argc = 0; |
| break; |
| } |
| } |
| nextoption: |
| ; |
| } |
| if( argc == 1 ) |
| { |
| cut = atof( (*argv) ); |
| argc--; |
| } |
| if( argc != 0 ) |
| { |
| fprintf( stderr, "options: Check source file !\n" ); |
| exit( 1 ); |
| } |
| if( tbitr == 1 && outgap == 0 ) |
| { |
| fprintf( stderr, "conflicting options : o, m or u\n" ); |
| exit( 1 ); |
| } |
| if( alg == 'C' && outgap == 0 ) |
| { |
| fprintf( stderr, "conflicting options : C, o\n" ); |
| exit( 1 ); |
| } |
| } |
| |
| int countamino( char *s, int end ) |
| { |
| int val = 0; |
| while( end-- ) |
| if( *s++ != '-' ) val++; |
| return( val ); |
| } |
| |
| static void pairalign( char name[M][B], int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, double *effarr, int alloclen ) |
| { |
| int i, j, ilim; |
| int clus1, clus2; |
| int off1, off2; |
| float pscore = 0.0; // by D.Mathog |
| static char *indication1, *indication2; |
| FILE *hat2p, *hat3p; |
| static double **distancemtx; |
| static double *effarr1 = NULL; |
| static double *effarr2 = NULL; |
| char *pt; |
| char *hat2file = "hat2"; |
| LocalHom **localhomtable, *tmpptr; |
| static char **pair; |
| int intdum; |
| double bunbo; |
| char ***bpp; // mxscarna no toki dake |
| |
| localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); |
| for( i=0; i<njob; i++) |
| { |
| localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) ); |
| for( j=0; j<njob; j++) |
| { |
| localhomtable[i][j].start1 = -1; |
| localhomtable[i][j].end1 = -1; |
| localhomtable[i][j].start2 = -1; |
| localhomtable[i][j].end2 = -1; |
| localhomtable[i][j].opt = -1.0; |
| localhomtable[i][j].next = NULL; |
| localhomtable[i][j].nokori = 0; |
| } |
| } |
| |
| if( effarr1 == NULL ) |
| { |
| distancemtx = AllocateDoubleMtx( njob, njob ); |
| effarr1 = AllocateDoubleVec( njob ); |
| effarr2 = AllocateDoubleVec( njob ); |
| indication1 = AllocateCharVec( 150 ); |
| indication2 = AllocateCharVec( 150 ); |
| #if 0 |
| #else |
| pair = AllocateCharMtx( njob, njob ); |
| #endif |
| } |
| |
| #if 0 |
| fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold ); |
| #endif |
| |
| #if 0 |
| for( i=0; i<njob; i++ ) |
| fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] ); |
| #endif |
| |
| |
| // writePre( njob, name, nlen, aseq, 0 ); |
| |
| for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ ) pair[i][j] = 0; |
| for( i=0; i<njob; i++ ) pair[i][i] = 1; |
| |
| if( alg == 'H' ) |
| { |
| fprintf( stderr, "Calling FOLDALIGN with option '%s'\n", foldalignopt ); |
| callfoldalign( njob, seq ); |
| fprintf( stderr, "done.\n" ); |
| } |
| if( alg == 'B' ) |
| { |
| fprintf( stderr, "Running LARA (Bauer et al. http://www.planet-lisa.net/)\n" ); |
| calllara( njob, seq, "" ); |
| fprintf( stderr, "done.\n" ); |
| } |
| if( alg == 'T' ) |
| { |
| fprintf( stderr, "Running SLARA (Bauer et al. http://www.planet-lisa.net/)\n" ); |
| calllara( njob, seq, "-s" ); |
| fprintf( stderr, "done.\n" ); |
| } |
| if( alg == 's' ) |
| { |
| fprintf( stderr, "Preparing bpp\n" ); |
| // bpp = AllocateCharCub( njob, nlenmax, 0 ); |
| bpp = calloc( njob, sizeof( char ** ) ); |
| preparebpp( njob, bpp ); |
| fprintf( stderr, "done.\n" ); |
| fprintf( stderr, "Running MXSCARNA (Tabei et al. http://www.ncrna.org/software/mxscarna)\n" ); |
| } |
| |
| ilim = njob - 1; |
| for( i=0; i<ilim; i++ ) |
| { |
| fprintf( stderr, "% 5d / %d\r", i, njob ); |
| for( j=i+1; j<njob; j++ ) |
| { |
| |
| if( strlen( seq[i] ) == 0 || strlen( seq[j] ) == 0 ) |
| { |
| distancemtx[i][j] = NODIST; |
| continue; |
| } |
| |
| strcpy( aseq[i], seq[i] ); |
| strcpy( aseq[j], seq[j] ); |
| clus1 = conjuctionfortbfast( pair, i, aseq, mseq1, effarr1, effarr, indication1 ); |
| clus2 = conjuctionfortbfast( pair, j, aseq, mseq2, effarr2, effarr, indication2 ); |
| // fprintf( stderr, "mseq1 = %s\n", mseq1[0] ); |
| // fprintf( stderr, "mseq2 = %s\n", mseq2[0] ); |
| |
| #if 0 |
| fprintf( stderr, "group1 = %.66s", indication1 ); |
| fprintf( stderr, "\n" ); |
| fprintf( stderr, "group2 = %.66s", indication2 ); |
| fprintf( stderr, "\n" ); |
| #endif |
| // for( l=0; l<clus1; l++ ) fprintf( stderr, "## STEP-eff for mseq1-%d %f\n", l, effarr1[l] ); |
| |
| #if 1 |
| if( use_fft ) |
| { |
| pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, &intdum ); |
| // fprintf( stderr, "pscore (fft) = %f\n", pscore ); |
| off1 = off2 = 0; |
| } |
| else |
| #endif |
| { |
| switch( alg ) |
| { |
| case( 'a' ): |
| pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen ); |
| off1 = off2 = 0; |
| break; |
| case( 't' ): |
| pscore = G__align11_noalign( amino_dis, penalty, penalty_ex, mseq1, mseq2, alloclen ); |
| off1 = off2 = 0; |
| break; |
| case( 'A' ): |
| pscore = G__align11( mseq1, mseq2, alloclen ); |
| off1 = off2 = 0; |
| break; |
| #if 0 |
| case( 'V' ): |
| pscore = VAalign11( mseq1, mseq2, alloclen, &off1, &off2, localhomtable[i]+j ); |
| fprintf( stderr, "i,j = %d,%d, score = %f\n", i,j, pscore ); |
| break; |
| case( 'S' ): |
| fprintf( stderr, "aligning %d-%d\n", i, j ); |
| pscore = suboptalign11( mseq1, mseq2, alloclen, &off1, &off2, localhomtable[i]+j ); |
| fprintf( stderr, "i,j = %d,%d, score = %f\n", i,j, pscore ); |
| break; |
| #endif |
| case( 'N' ): |
| pscore = G__align11_noalign( amino_dis, penalty, penalty_ex, mseq1, mseq2, alloclen ); |
| genL__align11( mseq1, mseq2, alloclen, &off1, &off2 ); |
| // fprintf( stderr, "pscore = %f\n", pscore ); |
| break; |
| case( 'K' ): |
| pscore = genG__align11( mseq1, mseq2, alloclen ); |
| off1 = off2 = 0; |
| break; |
| case( 'L' ): |
| pscore = G__align11_noalign( amino_dis, penalty, penalty_ex, mseq1, mseq2, alloclen ); |
| L__align11( mseq1, mseq2, alloclen, &off1, &off2 ); |
| // fprintf( stderr, "pscore (1) = %f\n", pscore ); |
| // pscore = (float)naivepairscore11( *mseq1, *mseq2, penalty ); // nennnotame |
| // fprintf( stderr, "pscore (2) = %f\n\n", pscore ); |
| break; |
| case( 'H' ): |
| pscore = recallpairfoldalign( mseq1, mseq2, i, j, &off1, &off2, alloclen ); |
| break; |
| case( 'B' ): |
| case( 'T' ): |
| pscore = recalllara( mseq1, mseq2, alloclen ); |
| off1 = off2 = 0; |
| // fprintf( stderr, "lara, pscore = %f\n", pscore ); |
| break; |
| case( 's' ): |
| pscore = callmxscarna_giving_bpp( mseq1, mseq2, bpp[i], bpp[j], alloclen ); |
| // pscore = G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, alloclen ); |
| off1 = off2 = 0; |
| // fprintf( stderr, "scarna, pscore = %f\n", pscore ); |
| break; |
| case( 'M' ): |
| // pscore = MSalign11( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, NULL, NULL, NULL ); |
| pscore = MSalign11( mseq1, mseq2, alloclen ); |
| // fprintf( stderr, "pscore (M)= %f\n", pscore ); |
| break; |
| ErrorExit( "ERROR IN SOURCE FILE" ); |
| } |
| } |
| if( alg == 't' || ( mseq1[0][0] != 0 && mseq2[0][0] != 0 ) ) // 't' no jouken ha iranai to omou. if( ( mseq1[0][0] != 0 && mseq2[0][0] != 0 ) ) |
| { |
| distancemtx[i][j] = pscore; |
| #if SCOREOUT |
| fprintf( stderr, "score = %10.2f (%d,%d)\n", pscore, i, j ); |
| #endif |
| // fprintf( stderr, "pslocal = %d\n", pslocal ); |
| // offset = makelocal( *mseq1, *mseq2, pslocal ); |
| #if 0 |
| fprintf( stderr, "off1 = %d, off2 = %d\n", off1, off2 ); |
| #endif |
| |
| if( alg == 't' || out_align_instead_of_hat3 == 1 ) |
| ; |
| else if( alg == 'H' ) |
| putlocalhom_ext( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) ); |
| else if( alg != 'S' && alg != 'V' ) |
| putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) ); |
| } |
| else |
| { |
| distancemtx[i][j] = NODIST; |
| } |
| |
| if( out_align_instead_of_hat3 && alg != 't' ) |
| { |
| fprintf( stdout, "sequence %d - sequence %d, pairwise score = %.0f\n", i+1, j+1, pscore ); |
| fprintf( stdout, ">%s\n", name[i] ); |
| write1seq( stdout, mseq1[0] ); |
| fprintf( stdout, ">%s\n", name[j] ); |
| write1seq( stdout, mseq2[0] ); |
| fprintf( stdout, "\n" ); |
| } |
| } |
| } |
| for( i=0; i<njob; i++ ) |
| { |
| pscore = 0.0; |
| for( pt=seq[i]; *pt; pt++ ) |
| pscore += amino_dis[(int)*pt][(int)*pt]; |
| distancemtx[i][i] = pscore; |
| |
| } |
| |
| ilim = njob-1; |
| for( i=0; i<ilim; i++ ) |
| { |
| for( j=i+1; j<njob; j++ ) |
| { |
| |
| #if 0 |
| fprintf( stdout, "distancemtx[i][j] = %f\n", distancemtx[i][j] ); |
| fprintf( stdout, "distancemtx[i][i] = %f\n", distancemtx[i][i] ); |
| fprintf( stdout, "distancemtx[j][j] = %f\n", distancemtx[j][j] ); |
| fprintf( stdout, "bunbo = %f\n", MIN( distancemtx[i][i], distancemtx[j][j] ) ); |
| #endif |
| if( distancemtx[i][j] == NODIST || (bunbo=MIN( distancemtx[i][i], distancemtx[j][j] )) == 0.0 || bunbo < distancemtx[i][j] ) |
| distancemtx[i][j] = 2.0; |
| else |
| distancemtx[i][j] = ( 1.0 - distancemtx[i][j] / bunbo ) * 2.0; |
| #if 0 |
| fprintf( stdout, "distancemtx[i][j] => %f\n", distancemtx[i][j] ); |
| #endif |
| } |
| } |
| |
| hat2p = fopen( hat2file, "w" ); |
| if( !hat2p ) ErrorExit( "Cannot open hat2." ); |
| if( out_align_instead_of_hat3 == 0 ) |
| WriteHat2( hat2p, njob, name, distancemtx ); |
| else |
| WriteHat2plain( hat2p, njob, distancemtx ); |
| fclose( hat2p ); |
| |
| if( out_align_instead_of_hat3 == 0 ) |
| { |
| fprintf( stderr, "##### writing hat3\n" ); |
| #ifndef LLVM |
| hat3p = fopen( "hat3", "w" ); |
| if( !hat3p ) ErrorExit( "Cannot open hat3." ); |
| #endif |
| ilim = njob-1; |
| for( i=0; i<ilim; i++ ) |
| { |
| for( j=i+1; j<njob; j++ ) |
| { |
| for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next ) |
| { |
| if( tmpptr->opt == -1.0 ) continue; |
| // tmptmptmptmptmp |
| // if( alg == 'B' || alg == 'T' ) |
| // fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, 1.0, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); |
| // else |
| #ifdef LLVM |
| fprintf( stdout, "%d %d %d %7.5f %d %d %d %d\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2 ); |
| #else |
| fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); |
| #endif |
| } |
| } |
| } |
| #ifndef LLVM |
| fclose( hat3p ); |
| #endif |
| } |
| #if DEBUG |
| fprintf( stderr, "calling FreeLocalHomTable\n" ); |
| #endif |
| FreeLocalHomTable( localhomtable, njob ); |
| #if DEBUG |
| fprintf( stderr, "done. FreeLocalHomTable\n" ); |
| #endif |
| |
| if( alg == 's' ) |
| { |
| char **ptpt; |
| for( i=0; i<njob; i++ ) |
| { |
| ptpt = bpp[i]; |
| while( 1 ) |
| { |
| if( *ptpt ) free( *ptpt ); |
| else break; |
| ptpt++; |
| } |
| free( bpp[i] ); |
| } |
| free( bpp ); |
| } |
| } |
| |
| static void WriteOptions( FILE *fp ) |
| { |
| |
| if( dorp == 'd' ) fprintf( fp, "DNA\n" ); |
| else |
| { |
| if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN ); |
| else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum ); |
| else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" ); |
| } |
| fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); |
| if( use_fft ) fprintf( fp, "FFT on\n" ); |
| |
| fprintf( fp, "tree-base method\n" ); |
| if( tbrweight == 0 ) fprintf( fp, "unweighted\n" ); |
| else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" ); |
| if( tbitr || tbweight ) |
| { |
| fprintf( fp, "iterate at each step\n" ); |
| if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" ); |
| if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" ); |
| if( tbweight ) fprintf( fp, " weighted\n" ); |
| fprintf( fp, "\n" ); |
| } |
| |
| fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); |
| |
| if( alg == 'a' ) |
| fprintf( fp, "Algorithm A\n" ); |
| else if( alg == 'A' ) |
| fprintf( fp, "Algorithm A+\n" ); |
| else if( alg == 'S' ) |
| fprintf( fp, "Apgorithm S\n" ); |
| else if( alg == 'C' ) |
| fprintf( fp, "Apgorithm A+/C\n" ); |
| else |
| fprintf( fp, "Unknown algorithm\n" ); |
| |
| if( use_fft ) |
| { |
| fprintf( fp, "FFT on\n" ); |
| if( dorp == 'd' ) |
| fprintf( fp, "Basis : 4 nucleotides\n" ); |
| else |
| { |
| if( fftscore ) |
| fprintf( fp, "Basis : Polarity and Volume\n" ); |
| else |
| fprintf( fp, "Basis : 20 amino acids\n" ); |
| } |
| fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold ); |
| fprintf( fp, "window size of anchors = %dsites\n", fftWinSize ); |
| } |
| else |
| fprintf( fp, "FFT off\n" ); |
| fflush( fp ); |
| } |
| |
| |
| int main( int argc, char *argv[] ) |
| { |
| static int nlen[M]; |
| static char name[M][B], **seq; |
| static char **mseq1, **mseq2; |
| static char **aseq; |
| static char **bseq; |
| static double *eff; |
| int i; |
| FILE *infp; |
| char c; |
| int alloclen; |
| |
| arguments( argc, argv ); |
| |
| if( inputfile ) |
| { |
| infp = fopen( inputfile, "r" ); |
| if( !infp ) |
| { |
| fprintf( stderr, "Cannot open %s\n", inputfile ); |
| exit( 1 ); |
| } |
| } |
| else |
| infp = stdin; |
| |
| getnumlen( infp ); |
| rewind( infp ); |
| |
| if( njob < 2 ) |
| { |
| fprintf( stderr, "At least 2 sequences should be input!\n" |
| "Only %d sequence found.\n", njob ); |
| exit( 1 ); |
| } |
| if( njob > M ) |
| { |
| fprintf( stderr, "The number of sequences must be < %d\n", M ); |
| fprintf( stderr, "Please try the splittbfast program for such large data.\n" ); |
| exit( 1 ); |
| } |
| |
| seq = AllocateCharMtx( njob, nlenmax*9+1 ); |
| aseq = AllocateCharMtx( njob, nlenmax*9+1 ); |
| bseq = AllocateCharMtx( njob, nlenmax*9+1 ); |
| mseq1 = AllocateCharMtx( njob, 0 ); |
| mseq2 = AllocateCharMtx( njob, 0 ); |
| alloclen = nlenmax*9; |
| |
| eff = AllocateDoubleVec( njob ); |
| |
| #if 0 |
| Read( name, nlen, seq ); |
| #else |
| readData( infp, name, nlen, seq ); |
| #endif |
| fclose( infp ); |
| |
| constants( njob, seq ); |
| |
| #if 0 |
| fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset ); |
| #endif |
| |
| initSignalSM(); |
| |
| initFiles(); |
| |
| WriteOptions( trap_g ); |
| |
| c = seqcheck( seq ); |
| if( c ) |
| { |
| fprintf( stderr, "Illegal character %c\n", c ); |
| exit( 1 ); |
| } |
| |
| // writePre( njob, name, nlen, seq, 0 ); |
| |
| for( i=0; i<njob; i++ ) eff[i] = 1.0; |
| |
| |
| for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] ); |
| |
| pairalign( name, nlen, bseq, aseq, mseq1, mseq2, eff, alloclen ); |
| |
| fprintf( trap_g, "done.\n" ); |
| #if DEBUG |
| fprintf( stderr, "closing trap_g\n" ); |
| #endif |
| fclose( trap_g ); |
| |
| // writePre( njob, name, nlen, aseq, !contin ); |
| #if 0 |
| writeData( stdout, njob, name, nlen, aseq ); |
| #endif |
| #if IODEBUG |
| fprintf( stderr, "OSHIMAI\n" ); |
| #endif |
| SHOWVERSION; |
| return( 0 ); |
| } |