52 :
CAppJob(
"Align Tab Export"), m_Params(params)
59 vector<CSeq_align::TDim> anchors;
74 anchors.push_back(
row);
78 if (anchors.empty()) {
80 for (
size_t level = 0; level <= 5 && anchors.empty(); ++level) {
89 anchors.push_back(
row);
96 if (anchors.empty()) {
98 "Can find the anchor sequence in the alignment!");
125 defline =
s_DefGen.GenerateDefline(bsh);
131 string out_label =
label;
133 if (out_label.length() > 20 && out_label.find(
"\\panfs\\") != string::npos) {
134 size_t pos = out_label.find_last_of(
"\\");
135 if (pos != string::npos) {
136 out_label = out_label.substr(pos + 1);
137 if (out_label.length() > 20) {
138 pos = out_label.rfind(
'.');
139 if (pos != string::npos && pos > 0) {
140 pos = out_label.rfind(
'.', pos - 1);
141 if (pos != string::npos) {
142 out_label = out_label.substr(pos + 1);
153 string out_label =
label;
155 size_t pos = out_label.find(
"\\panfs\\");
156 if (pos != string::npos) {
157 pos = out_label.find(
"\\", pos + 7);
158 if (pos != string::npos) {
159 out_label = out_label.substr(pos);
173 vector<bool> strands;
185 if ( !curr_text.empty() ) {
195 ids.push_back(
label);
205 if (num_rows > ids.size()) {
206 curr_text +=
" x ... [total ";
211 fieldData[
"Alignment"] = curr_text;
213 if (num_rows > ids.size()) {
217 bool is_protein =
false;
218 if (ids.size() == 2 && num_rows == 2 && anchorRow >= 0 && anchorRow < 2) {
220 curr_text = ids[anchorRow] +
" (";
225 fieldData[
"Anchor"] = curr_text;
234 fieldData[
"Query"] = curr_text;
246 if (!is_protein && num_rows == 2 && strands.size() == 2) {
247 fieldData[
"Strand"] = (strands[0] == strands[1]) ?
"forward" :
"reverse";
257 tag_name =
"Aligned ";
258 tag_name += is_protein ?
"residues" :
"bases";
266 double coverage = -1.0;
274 if (coverage >= 0.0) {
275 if (coverage < 100.0 && coverage > 99.9) {
278 sprintf(
buf,
"%2.1f", coverage);
282 fieldData[
"Coverage"] = curr_text;
285 if (align_length < 1000000) {
299 if (identities >= 0) {
300 double identity = 0.0;
302 identity = identities * 100.0 / align_length;
304 if (identity < 100.0 && identity > 99.9) {
307 sprintf(
buf,
"%2.1f", identity);
311 fieldData[
"Identity"] = curr_text;
313 if (mismatches >= 0) {
353 ITERATE(vector<string>, it, fields)
357 for (
CAlign_CI align_iter(handle,
range, sel); align_iter; ++align_iter) {
365 ITERATE(vector<string>, it, fields)
366 exporter.
Field(fieldData[*it]);
371 err_msg =
"Failed to save file:\n";
375 if (err_msg.empty()) {
User-defined methods of the data storage class.
static CFastMutex s_DFLock
USING_SCOPE(ncbi::objects)
static sequence::CDeflineGenerator s_DefGen
This class seems to be non-thread safe even as a stack var.
static CSeq_align::TDim sFindAnchorRow(CBioseq_Handle handle, const CSeq_align &align)
static string s_ShortenIdLabel(const string &label)
static void s_GetDefline(CBioseq_Handle bsh, string &defline)
static size_t s_CountSegments(const CSeq_align &align)
static string s_RemovePanfsBasePath(const string &label)
static map< string, string > sGenerateFields(CBioseq_Handle handle, const CSeq_align &align, CSeq_align::TDim anchorRow)
CAlignTabExportJob(const CAlignTabExportParams ¶ms)
CAlignTabExportParams m_Params
virtual EJobState Run()
Function that does all the useful work, called by the Engine.
const SConstScopedObject & GetObject() const
wxString GetFileName() const
string GetAlignmentName() const
vector< string > GetFields() const
CAppJobError Default implementation for IAppJobError - encapsulates a text error message.
CAppJob - default implementation of IAppJob that could be used as a base class.
void Field(const string &value)
TSeqPos GetAlignLength(const CSeq_align &align, bool ungapped=false)
Compute the length of the alignment (= length of all segments, gaps + aligned)
int GetGapCount(const CSeq_align &align)
Compute the number of gaps in the alignment.
double GetPercentCoverage(CScope &scope, const CSeq_align &align, unsigned query=0)
Compute percent coverage of the query (sequence 0) (range 0-100)
int GetMismatchCount(CScope &scope, const CSeq_align &align)
Compute the number of mismatches in the alignment.
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
TDim CheckNumRows(void) const
Validatiors.
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
bool GetNamedScore(const string &id, int &score) const
Get score.
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & GetMsg(void) const
Get message string.
void Info(CExceptionArgs_Base &args)
static objects::SAnnotSelector GetAnnotSelector(TAnnotFlags flags=0)
request an annotation selector for a given type
static void SetAnnot(objects::SAnnotSelector &sel, const string &annot)
help function for setting up an annotation.
static void SetResolveDepth(objects::SAnnotSelector &sel, bool adaptive, int depth=-1)
help function for setting selector resolve depth.
CRef< CAppJobError > m_Error
CRef< objects::CScope > scope
string m_Descr
mutex to sync our internals
virtual bool IsCanceled() const override
EJobState
Job states (describe FSM)
CConstRef< CObject > object
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
CConstRef< CSeq_id > GetSeqId(void) const
CConstRef< CSeq_id > GetSeqIdOrNull(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
@ eContent
Untagged human-readable accession or the like.
TRange GetTotalRange(void) const
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
bool IsProtein(void) const
bool ContainsSegment(const CSeq_id &id, size_t resolve_depth=kMax_Int, EFindSegment limit_flag=eFindSegment_NoLimit) const
Check if the seq-id describes a segment of the bioseq.
CScope & GetScope(void) const
Get scope this handle belongs to.
bool IsSynonym(const CSeq_id &id) const
Check if this id can be used to obtain this bioseq handle.
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
TObjectType & GetObject(void)
Get object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
@ fWithCommas
Use commas as thousands separator.
static const char label[]
E_Choice Which(void) const
Which variant is currently selected.
const TSpliced & GetSpliced(void) const
Get the variant data.
const TExons & GetExons(void) const
Get the Exons member data.
const TDisc & GetDisc(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
range(_Ty, _Ty) -> range< _Ty >
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
#define row(bind, expected)