From bd53d2af21b21deb20cfb033d4f3e991f5c61a91 Mon Sep 17 00:00:00 2001 From: William R Pearson Date: Mon, 14 Nov 2022 16:42:47 -0500 Subject: [PATCH] add new -m8Cr option for raw scores, update documentation --- doc/README_v36.3.8i.md | 2 ++ doc/fasta_guide.tex | 6 +++++- doc/readme.v36 | 8 +++++++- src/defs.h | 1 + src/doinit.c | 3 +++ src/mshowbest.c | 33 +++++++++++++++++++++++++++++---- 6 files changed, 47 insertions(+), 6 deletions(-) diff --git a/doc/README_v36.3.8i.md b/doc/README_v36.3.8i.md index b38fdc2..2ba51c4 100644 --- a/doc/README_v36.3.8i.md +++ b/doc/README_v36.3.8i.md @@ -79,6 +79,8 @@ Changes in **fasta-36.3.8i** Nov, 2022 4. changes to annotation scripts for Pfam shutdown; new ann_pfam_www.py, ann_pfam_sql.py +5. a new option, `r` for `-m 8CB` that displays the raw optimal alignment score (typically Smith-Waterman). + Changes in **fasta-36.3.8i** Sept, 2021 1. Enable translation table -t 9 for Echinoderms. This bug has existed diff --git a/doc/fasta_guide.tex b/doc/fasta_guide.tex index e5ad091..255d251 100644 --- a/doc/fasta_guide.tex +++ b/doc/fasta_guide.tex @@ -906,11 +906,15 @@ \subsubsection{Command line options} \texttt{-m 8CBd} adds a new field with the raw domain coordinate information for the query and subject, with \texttt{DX/XD} used to indicate the domains. \texttt{-m 8CBL} provides both query/subject lengths and raw domain information. + + \begin{verbatim} |DX:3-82;C=C.Thio|DX:105-201;C=C.GST_C|XD:3-82;C=C.Thio|XD:105-201;C=C.GST_C \end{verbatim} - These \texttt{-m 8CB} options, \texttt{'l/L'}, \texttt{'s'}, \texttt{'d'} can be combined in any order. + \texttt(-m 8CBr) (v36.3.8i release, 14-Nov-2022) adds a new field with the raw optimal alignment score (typically Smith-Waterman for proteins). + + These \texttt{-m 8CB} options, \texttt{'l/L'}, \texttt{'s'}, \texttt{'d'}, and \texttt{'r'} can be combined in any order. \item[\texttt{-m 9}] display alignment coordinates and scores with the best score information. \texttt{-m 9i} provides alignment length, diff --git a/doc/readme.v36 b/doc/readme.v36 index d4b4ea1..f3059d8 100644 --- a/doc/readme.v36 +++ b/doc/readme.v36 @@ -16,6 +16,12 @@ because clan information is not yet available (or I do not know how to get it). In addition, ann_pfam_sql.py is available, which largely replaces ann_pfam_sql.pl. +>>Nov 14, 2022 +[defs.h, initfa.c, mshowbest.c] + +Add option 'r' to -m8CB to display raw optimal score in BLAST tabular +output, e.g "-m8Cr" or "-m8CBlr". + >>Nov 7, 2022 [doinit.c/initenv()] Check string size for query and library file names, and abort if they @@ -82,7 +88,7 @@ Format (PEFF) files (http://www.psidev.info/peff). >>June 2, 2020 [src/scaleswn.c] -Modifiy line describing query in -R .res file. +Modify line describing query in -R .res file. Add line to report ave_n1, sample_fract, zs_off in -R .res file. Add line to report properties of library to -R .res file. diff --git a/src/defs.h b/src/defs.h index a65ac6b..e191a03 100644 --- a/src/defs.h +++ b/src/defs.h @@ -158,6 +158,7 @@ #define MX_RES_ALIGN_SCORE (1<<20) /* show residue alignment score, not alignment */ #define MX_M8_BTAB_LEN (1<<21) /* show query/subject seq. lens in -m 8 output */ #define MX_M8_BTAB_SIM (1<<22) /* show similarity + identity in -m 8 output */ +#define MX_M8_BTAB_RAW (1<<23) /* show raw score */ /* codes for -m 9, -m 8C? */ #define SHOW_CODE_ID 1 /* identity only */ diff --git a/src/doinit.c b/src/doinit.c index 9988208..cd714c2 100644 --- a/src/doinit.c +++ b/src/doinit.c @@ -938,6 +938,9 @@ parse_markx(char *optarg, struct markx_str *this) { if (strchr(stmp,'s')) { this->markx |= MX_M8_BTAB_SIM; } + if (strchr(stmp,'r')) { + this->markx |= MX_M8_BTAB_RAW; + } if (strchr(stmp,'d')) { this->show_code |= SHOW_CODE_DOMINFO; } diff --git a/src/mshowbest.c b/src/mshowbest.c index 3a8656c..b012fa9 100644 --- a/src/mshowbest.c +++ b/src/mshowbest.c @@ -250,18 +250,39 @@ void showbest (FILE *fp, unsigned char **aa0, unsigned char *aa1save, int maxn, /* line below copied from BLAST+ output */ if (m_msp->markx & MX_M8_BTAB_LEN) { /* yes qslen */ if (m_msp->markx & MX_M8_BTAB_SIM) { /* yes similarity */ - fprintf(fp,"# Fields: query id, query length, subject id, subject length, %% identity, %% similar, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score"); + + if (m_msp->markx & MX_M8_BTAB_RAW) { + fprintf(fp,"# Fields: query id, query length, subject id, subject length, %% identity, %% similar, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score, raw score"); + } + else { + fprintf(fp,"# Fields: query id, query length, subject id, subject length, %% identity, %% similar, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score"); + } } else { /* no similarity */ - fprintf(fp,"# Fields: query id, query length, subject id, subject length, %% identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score"); + if (m_msp->markx & MX_M8_BTAB_RAW) { + fprintf(fp,"# Fields: query id, query length, subject id, subject length, %% identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score, raw score"); + } + else { + fprintf(fp,"# Fields: query id, query length, subject id, subject length, %% identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score"); + } } } else { /* no qslen */ if (m_msp->markx & MX_M8_BTAB_SIM) { /* yes similarity */ - fprintf(fp,"# Fields: query id, query length, subject id, subject length, %% identity, %% simlar, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score"); + if (m_msp->markx & MX_M8_BTAB_RAW) { + fprintf(fp,"# Fields: query id, query length, subject id, subject length, %% identity, %% simlar, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score, raw score"); + } + else { + fprintf(fp,"# Fields: query id, query length, subject id, subject length, %% identity, %% simlar, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score"); + } } else { /* no similarity */ - fprintf(fp,"# Fields: query id, query length, subject id, subject length, %% identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score"); + if (m_msp->markx & MX_M8_BTAB_RAW) { + fprintf(fp,"# Fields: query id, query length, subject id, subject length, %% identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score, raw score"); + } + else { + fprintf(fp,"# Fields: query id, query length, subject id, subject length, %% identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score"); + } } } @@ -635,6 +656,10 @@ void showbest (FILE *fp, unsigned char **aa0, unsigned char *aa1save, int maxn, zs_to_E(lzscore,n1,ppst->dnaseq,ppst->zdb_size,m_msp->db), lbits); + if (m_msp->markx & MX_M8_BTAB_RAW) { + fprintf(fp,"\t%d",cur_ares_p->sw_score); + } + if (ppst->zsflag > 20) { fprintf(fp,"\t%.2g",zs_to_E(lzscore2, n1, ppst->dnaseq, ppst->zdb_size, m_msp->db)); }