89 m_CommentInternalIndent(0),
91 m_NeedPeriod(need_period)
103 (
const string& comment,
107 m_CommentInternalIndent(0),
112 if (!
ctx.Config().IsFormatGBSeq() && !
ctx.Config().IsFormatINSDSeq()) {
124 m_CommentInternalIndent(0),
139 m_CommentInternalIndent(0),
156 m_CommentInternalIndent(0),
182 if( ends_with_ellipsis ) {
197 const string & next_comment_first_string = next_comment.
m_Comment.front();
198 bool next_comment_starts_with_empty_line =
false;
199 ITERATE(
string, next_com_line_it, next_comment_first_string ) {
200 const char ch = *next_com_line_it;
202 next_comment_starts_with_empty_line =
true;
209 if( ! next_comment_starts_with_empty_line ) {
216 string & last_str_of_comment =
m_Comment.back();
217 if( last_str_of_comment.empty() ) {
221 string::size_type pos = (last_str_of_comment.length() - 1);
222 if( last_str_of_comment[pos] ==
'\n' ) {
227 for( ; pos < last_str_of_comment.length(); --pos ) {
228 const char ch = last_str_of_comment[pos];
232 last_str_of_comment.erase(pos);
254 static const string kNsAreGaps =
"The strings of n's in this record represent " \
255 "gaps between contigs, and the length of each string corresponds " \
256 "to the length of the gap.";
277 seglist.push_back(
TAln(&aln) );
296 static const string tpa_string =
297 "THIRD PARTY DATABASE: This TPA record uses data from DDBJ/EMBL/GenBank ";
299 if ( !
ctx.IsTPA() ||
ctx.IsRefSeq() ) {
313 vector<string> histaccns;
324 if (other_id->
IsGi()) {
333 if (other_id->
IsGi()) {
344 if ( !tid.empty() ) {
348 if ( histaccns.empty() ) {
352 sort( histaccns.begin(), histaccns.end() );
353 histaccns.erase( unique( histaccns.begin(), histaccns.end() ), histaccns.end() );
356 text << tpa_string << ((histaccns.size() > 1) ?
"entries " :
"entry ");
358 size_t size = histaccns.size();
361 for (
size_t i = 0;
i <
size; ) {
362 text << histaccns[
i];
373 vector<string> accessions;
381 if( !(*ufi)->CanGetData() || !(*ufi)->GetData().IsStr() ||
382 !(*ufi)->CanGetLabel() ) {
388 string acc = (*ufi)->GetData().
GetStr();
389 if ( !acc.empty() ) {
395 if ( accessions.empty() ) {
400 text << tpa_string << ((accessions.size() > 1) ?
"entries " :
"entry ");
402 size_t size = accessions.size();
405 for (
size_t i = 0;
i <
size; ) {
406 text << accessions[
i];
424 const string *uvc =
nullptr, *bic =
nullptr, *smc =
nullptr;
426 if ( uo.
HasField(
"UniVecComment") ) {
432 if ( uo.
HasField(
"AdditionalComment") ) {
438 if ( uo.
HasField(
"SmartComment") && dump_mode ) {
448 text << pfx <<
"Vector Explanation: " << *uvc;
452 text << pfx <<
"Bankit Comment: " << *bic;
456 text << pfx <<
"Bankit Comment: " << *smc;
470 vector<string> assembly_pieces;
481 if ( !(*fit)->GetData().IsFields() ) {
495 (*fit)->GetData().GetFields())
504 if(
label ==
"accession" ) {
506 }
else if(
label ==
"name" ) {
510 if(
label ==
"gi" ) {
515 }
else if(
label ==
"from" ) {
517 }
else if(
label ==
"to" ) {
523 if ( ! accession.empty() ) {
538 ctx.Config().GetHTMLFormatter().FormatGeneralId(oss, accession);
543 if( from > 0 && to > 0 ) {
544 oss <<
" (range: " << from <<
"-" << to <<
")";
548 assembly_pieces.push_back( new_piece );
549 }
else if( ! name.empty() ) {
550 assembly_pieces.push_back( name );
555 if( ! assembly_pieces.empty() ) {
557 oss <<
" The reference sequence was derived from ";
559 size_t assembly_size = assembly_pieces.size();
560 for (
size_t ii = 0; ii < assembly_size; ++ii ) {
562 oss << ((ii < assembly_size - 1) ?
", " :
" and ");
564 oss << assembly_pieces[ii];
620 bool is_html =
ctx.Config().DoHTML();
635 if ( uo.
HasField(
"Collaborator") ) {
643 if ( uo.
HasField(
"GenomicSource") ) {
650 string identical_to_start;
651 string identical_to_end;
655 enum EIdenticalToPriority {
656 eIdenticalToPriority_Nothing = 1,
657 eIdenticalToPriority_Gi,
658 eIdenticalToPriority_Name,
659 eIdenticalToPriority_Accn
661 int identical_to_priority = eIdenticalToPriority_Nothing;
666 if ( !(*it)->GetData().IsFields() ) {
677 if (sub.
GetLabel().
GetStr() ==
"accession" && identical_to_priority <= eIdenticalToPriority_Accn ) {
679 identical_to_priority = eIdenticalToPriority_Accn;
681 if (sub.
GetLabel().
GetStr() ==
"name" && identical_to_priority <= eIdenticalToPriority_Name ) {
683 identical_to_priority = eIdenticalToPriority_Name;
685 if (sub.
GetLabel().
GetStr() ==
"gi" && identical_to_priority <= eIdenticalToPriority_Gi ) {
686 identical_to =
"gi:" +
688 identical_to_priority = eIdenticalToPriority_Gi;
700 oss << status_str <<
' '
705 oss <<
" This record is predicted by genome sequence analysis and is "
706 <<
"not yet supported by experimental evidence.";
710 if ( !build_num.empty() ) {
711 oss <<
" Features on this sequence have been produced for build "
712 << build_num <<
" of the NCBI's genome annotation"
717 oss <<
"documentation";
723 oss <<
" NCBI contigs are derived from assembled genomic sequence data.~"
725 <<
" Documentation of NCBI's Annotation Process ";
730 if (collaborator.empty()) {
731 oss <<
" This record has not yet been subject to final NCBI review.";
733 oss <<
" This record is based on preliminary "
734 "annotation provided by " << collaborator <<
'.';
738 oss <<
" This record has not been reviewed and the function is unknown.";
741 oss <<
" This record has undergone validation or preliminary review.";
744 oss <<
" This record has been curated by "
745 << (collaborator.empty() ?
"NCBI staff" : collaborator) <<
'.';
748 oss <<
" This record is predicted by automated computational analysis.";
751 oss <<
" This record is provided to represent a collection of "
752 <<
"whole genome shotgun sequences.";
755 oss <<
" This record is provided to represent a collection of "
756 <<
"transcriptome shotgun assembly sequences.";
764 !collaborator.empty() ) {
765 oss <<
" This record has been curated by " << collaborator <<
'.';
769 oss <<
" This record is derived from an annotated genomic sequence ("
773 if ( !identical_to.empty() ) {
774 oss <<
" The reference sequence is identical to ";
775 const bool add_link = (is_html && identical_to_priority != eIdenticalToPriority_Name);
777 ctx.Config().GetHTMLFormatter().FormatGeneralId(oss, identical_to);
783 if( ! identical_to_start.empty() && ! identical_to_end.empty() ) {
784 oss <<
" (range: " << identical_to_start <<
"-" <<
785 identical_to_end <<
")";
797 const static string kRefSeqGeneLink =
"<a href=\"https://www.ncbi.nlm.nih.gov/refseq/rsg/\">RefSeqGene</a>";
798 const static string kRefSeqGene =
"RefSeqGene";
802 desc_it; ++desc_it) {
807 if (
f &&
f->GetData().IsStr()) {
808 const string& status1 =
f->GetData().GetStr();
809 if (status1 ==
"Reference Standard") {
810 oss <<
"~This sequence is a reference standard in the "
811 << (is_html ? kRefSeqGeneLink : kRefSeqGene)
833 const static string kRefSeqCat =
"RefSeq Category";
836 result_oss << kRefSeqCat <<
": ";
838 if( pCategoryField &&
841 const string & sCategory = pCategoryField->
GetData().
GetStr();
842 result_oss << sCategory <<
'\n';
844 result_oss <<
"(?UNKNOWN?)" <<
'\n';
851 if( pDetailsField ) {
855 const static char * arrFieldNames[] = {
856 "CALC",
"CCA",
"CLI",
"COM",
"FGS",
"MOD",
"PHY",
"PRT",
"QfO",
"TYS",
"UPR"
860 const CTempString sFieldName( arrFieldNames[field_idx] );
862 field_name += sFieldName;
865 mapFieldNameToRef.
find(field_name);
866 if( find_iter == mapFieldNameToRef.
end() ) {
877 if( sFieldName.
length() < kRefSeqCat.length() ) {
879 (kRefSeqCat.length() - sFieldName.
length()),
' ');
882 result_oss << sFieldName <<
": "
883 << find_iter->second->GetData().GetStr() <<
'\n';
894 static const string default_str =
"?";
896 if (!
ctx.IsWGSMaster()) {
900 const string& wgsaccn =
ctx.GetWGSMasterAccn();
901 const string& wgsname =
ctx.GetWGSMasterName();
907 const string* taxname = &default_str;
916 const string*
first = &default_str, *
last = &default_str;
921 if (uo.
HasField(
"WGS_accession_first")) {
928 if (uo.
HasField(
"WGS_accession_last")) {
946 text <<
"The " << *taxname
947 <<
" whole genome shotgun (WGS) project has the project accession "
948 << wgsaccn <<
". This version of the project (" <<
version
949 <<
") has the accession number " << wgsname <<
",";
951 text <<
" and consists of sequences " << *
first <<
"-" << *
last <<
".";
953 text <<
" and consists of sequence " << *
first <<
".";
961 static const string default_str =
"?";
963 if (!
ctx.IsTSAMaster()) {
967 const string& tsaaccn =
ctx.GetTSAMasterAccn();
968 const string& tsaname =
ctx.GetTSAMasterName();
974 const string* taxname = &default_str;
983 const string*
first = &default_str, *
last = &default_str;
990 if (uo.
HasField(
"Accession_first")) {
996 }
else if (uo.
HasField(
"TSA_accession_first")) {
1003 if (uo.
HasField(
"Accession_last")) {
1009 }
else if (uo.
HasField(
"TSA_accession_last")) {
1023 text <<
"The " << *taxname
1024 <<
" transcriptome shotgun assembly (TSA) project has the project accession "
1025 << tsaaccn <<
". This version of the project (" <<
version
1026 <<
") has the accession number " << tsaname <<
",";
1028 text <<
" and consists of sequences " << *
first <<
"-" << *
last <<
".";
1030 text <<
" and consists of sequence " << *
first <<
".";
1038 static const string default_str =
"?";
1040 if (!
ctx.IsTLSMaster()) {
1044 const string& tlsaccn =
ctx.GetTLSMasterAccn();
1045 const string& tlsname =
ctx.GetTLSMasterName();
1051 const string* taxname = &default_str;
1060 const string*
first = &default_str, *
last = &default_str;
1066 if (uo.
HasField(
"TLS_accession_first")) {
1073 if (uo.
HasField(
"TLS_accession_last")) {
1087 text <<
"The " << *taxname
1088 <<
" targeted locus study (TLS) project has the project accession "
1089 << tlsaccn <<
". This version of the project (" <<
version
1090 <<
") has the accession number " << tlsname <<
",";
1092 text <<
" and consists of sequences " << *
first <<
"-" << *
last <<
".";
1094 text <<
" and consists of sequence " << *
first <<
".";
1104 bool is_prot =
ctx.IsProt();
1108 return "COMPLETENESS: full length";
1111 return "COMPLETENESS: not full length";
1114 return (is_prot ?
"COMPLETENESS: incomplete on the amino end" :
1115 "COMPLETENESS: incomplete on the 5' end");
1118 return (is_prot ?
"COMPLETENESS: incomplete on the carboxy end" :
1119 "COMPLETENESS: incomplete on the 3' end");
1122 return "COMPLETENESS: incomplete on both ends";
1125 return (is_prot ?
"COMPLETENESS: complete on the amino end" :
1126 "COMPLETENESS: complete on the 5' end");
1129 return (is_prot ?
"COMPLETENESS: complete on the carboxy end" :
1130 "COMPLETENESS: complete on the 3' end");
1133 return "COMPLETENESS: unknown";
1143 if (
ctx.IsDelta()) {
1149 text <<
"* NOTE: This is a partial genome representation.";
1151 text <<
" It currently~* consists of " << (summary.
num_gaps + 1) <<
" contigs. The true order of the pieces~"
1152 <<
"* is not known and their order in this sequence record is~"
1153 <<
"* arbitrary. Gaps between the contigs are represented as~"
1154 <<
"* runs of N, but the exact sizes of the gaps are unknown.";
1169 if (
ctx.IsDelta()) {
1179 text <<
"* NOTE: This record contains " << (summary.
num_gaps + 1) <<
" individual~"
1180 <<
"* sequencing reads that have not been assembled into~"
1181 <<
"* contigs. Runs of N are used to separate the reads~"
1182 <<
"* and the order in which they appear is completely~"
1183 <<
"* arbitrary. Low-pass sequence sampling is useful for~"
1184 <<
"* identifying clones that may be gene-rich and allows~"
1185 <<
"* overlap relationships among clones to be deduced.~"
1186 <<
"* However, it should not be assumed that this clone~"
1187 <<
"* will be sequenced to completion. In the event that~"
1188 <<
"* the record is updated, the accession number will~"
1189 <<
"* be preserved.";
1194 text <<
"* NOTE: This is a \"working draft\" sequence.";
1196 text <<
" It currently~"
1197 <<
"* consists of " << (summary.
num_gaps + 1) <<
" contigs. The true order of the pieces~"
1198 <<
"* is not known and their order in this sequence record is~"
1199 <<
"* arbitrary. Gaps between the contigs are represented as~"
1200 <<
"* runs of N, but the exact sizes of the gaps are unknown.";
1202 text <<
"~* This record will be updated with the finished sequence~"
1203 <<
"* as soon as it is available and the accession number will~"
1204 <<
"* be preserved."
1208 text <<
"* NOTE: This is a \"working draft\" sequence.";
1210 text <<
" It currently~* consists of " << (summary.
num_gaps + 1)
1211 <<
" contigs. Gaps between the contigs~"
1212 <<
"* are represented as runs of N. The order of the pieces~"
1213 <<
"* is believed to be correct as given, however the sizes~"
1214 <<
"* of the gaps between them are based on estimates that have~"
1215 <<
"* provided by the submitter.";
1217 text <<
"~* This sequence will be replaced~"
1218 <<
"* by the finished sequence as soon as it is available and~"
1219 <<
"* the accession number will be preserved."
1235 const bool bHtml =
ctx.Config().DoHTML();
1242 ctx.Config().GetHTMLFormatter().FormatModelEvidence(me_name, me);
1244 text <<
"MODEL " << *refseq <<
": " <<
"This record is predicted by "
1245 <<
"automated computational analysis. This record is derived from "
1246 <<
"a genomic sequence (" << me_name <<
")";
1250 text <<
" and transcript sequence";
1259 ctx.Config().GetHTMLFormatter().FormatTranscript(tr_name, *
str);
1260 text << prefix << tr_name;
1262 if (num_assm ==
count + 1) {
1271 if ( !me.
method.empty() ) {
1272 text <<
" annotated using gene prediction method: " << me.
method;
1276 text <<
", supported by ";
1278 text <<
"mRNA and EST ";
1279 }
else if ( me.
mrnaEv ) {
1288 const char *documentation_str = ( bHtml ?
1289 "<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_euk/process/\">Documentation</a>" :
1292 text <<
".~Also see:~"
1293 <<
" " << documentation_str <<
" of NCBI's Annotation Process ";
1299 (
string& chromosome,
1300 string& assembly_date,
1301 string& ncbi_annotation,
1315 if (uo.
HasField(
"NcbiAnnotation")) {
1324 const string* name =
nullptr;
1329 name = &(*st)->GetName();
1344 assembly_date =
"?";
1347 ncbi_annotation =
"?";
1355 const static string kEncodeProjLink =
"https://www.nhgri.nih.gov/10005107";
1357 const bool bHtml =
ctx.Config().DoHTML();
1359 if (!
ctx.IsEncode()) {
1364 str <<
"REFSEQ: This record was provided by the ";
1366 str <<
"<a href=\"" << kEncodeProjLink <<
"\">";
1374 string chromosome, assembly_date, ncbi_annotation;
1376 str <<
" It is defined by coordinates on the sequence of chromosome "
1377 << chromosome <<
" from the " << assembly_date
1378 <<
" assembly of the human genome (NCBI build " << ncbi_annotation
1387 const bool bHtml =
ctx.Config().DoHTML();
1389 const string & sAuthorizedAccess =
ctx.GetAuthorizedAccess();
1390 if( sAuthorizedAccess.empty() ) {
1396 str <<
"These data are available through the dbGaP authorized access system. ";
1399 <<
"https://dbgap.ncbi.nlm.nih.gov/aa/wga.cgi?adddataset="
1400 << sAuthorizedAccess <<
"&page=login\">";
1401 str <<
"Request access";
1403 str <<
" to Study ";
1405 <<
"https://www.ncbi.nlm.nih.gov/projects/gap/cgi-bin/study.cgi?study_id="
1406 << sAuthorizedAccess <<
"\">";
1407 str << sAuthorizedAccess;
1410 str <<
"Request access to Study ";
1411 str << sAuthorizedAccess;
1420 const bool bHtml =
ctx.Config().DoHTML();
1423 if( ! pOpticalMapPoints ||
1429 const string & sFiletrackURL =
ctx.GetFiletrackURL();
1431 const bool bIsCircular =
FIELD_EQUALS(
ctx.GetHandle(), Inst_Topology,
1441 _ASSERT( ! vecOfPoints.empty() );
1444 if( bHtml && ! sFiletrackURL.empty() ) {
1445 str <<
"<a href=\"" << sFiletrackURL <<
"\">";
1448 if( bHtml && ! sFiletrackURL.empty() ) {
1453 size_t uNumFrags = pOpticalMapPoints->
GetPoints().size();
1459 if (uNumFrags > 1 && vecOfPoints[uNumFrags-1] < uBioseqLength - 1) {
1464 str <<
" piece" << ( (uNumFrags > 1) ?
"s" :
"" ) <<
":";
1468 TSeqPos thisEndPos = vecOfPoints[0] + 1;
1471 if ( ! bIsCircular ) {
1473 str, prevEndPos, thisEndPos, uBioseqLength,
1476 prevEndPos = thisEndPos + 1;
1479 for(
size_t idx = 1; idx < vecOfPoints.size(); ++idx ) {
1480 thisEndPos = vecOfPoints[idx] + 1;
1482 str, prevEndPos, thisEndPos, uBioseqLength,
1484 prevEndPos = thisEndPos + 1;
1489 thisEndPos = ( bIsCircular ? vecOfPoints[0] + 1 : uBioseqLength );
1490 if ( bIsCircular || prevEndPos < uBioseqLength - 1 ) {
1492 str, prevEndPos, thisEndPos, uBioseqLength,
1503 const bool bHtml =
ctx.Config().DoHTML();
1505 const vector< string > & sBasemodURLs =
ctx.GetBasemodURLs();
1506 int numBases = (
int) sBasemodURLs.size();
1510 if ( numBases < 1 ) {
1514 if ( numBases == 1 ) {
1515 str <<
"This genome has a ";
1519 if ( ! url.empty() ) {
1521 str <<
"<a href=\"" << url <<
"\">" <<
"base modification file" <<
"</a>";
1525 str <<
"base modification file";
1527 str <<
" available.";
1529 str <<
"There are ";
1531 str <<
" base modification files";
1538 if ( ! url.empty() ) {
1541 str << pfx <<
"<a href=\"" << url <<
"\">" << j <<
"</a>";
1542 if ( numBases == 2 ) {
1544 }
else if ( j == numBases - 1 ) {
1554 str <<
" available for this genome.";
1562 if( !
ctx.IsRSUniqueProt() ) {
1571 str <<
"REFSEQ: This record represents a single, non-redundant, protein "
1572 <<
"sequence which may be annotated on many different RefSeq "
1573 <<
"genomes from the same, or different, species.";
1608 const char* provider,
const char* status,
bool has_name,
const char* organism,
1609 const char*
source,
const char* category,
const char* accession )
1617 result <<
"<a href=\"http://genomesonline.org/cgi-bin/GOLD/bin/GOLDCards.cgi?goldstamp=" << data_str
1618 <<
"\">" << data_str <<
"</a>";
1621 if ( label_str ==
"Annotation Software Version") {
1622 result <<
"<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_euk/release_notes/#version"
1624 <<
"\">" << data_str <<
"</a>";
1629 if (
NStr::Find(data_str,
"Updated Annotation Release") !=
NPOS) {
1630 NStr::Replace( data_str,
" Updated Annotation Release ",
"/", fst );
1632 NStr::Replace( data_str,
" Annotation Release ",
"/", fst );
1636 result <<
"<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_euk/"
1638 <<
"\">" << data_str <<
"</a>";
1640 result <<
"<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_euk/"
1643 <<
"\">" << data_str <<
"</a>";
1649 NStr::Replace( data_str,
" Annotation Release ",
"/", fst );
1651 result <<
"<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_euk/"
1653 <<
"\">" << data_str <<
"</a>";
1656 string accn = data_str;
1661 result <<
"<a href=\"https://www.ebi.ac.uk/interpro/entry/pfam/"
1663 <<
"\">" << data_str <<
"</a>";
1668 NStr::Replace( data_str,
"Domain architecture ID ",
"", fst );
1670 result <<
"<a href=\"https://www.ncbi.nlm.nih.gov/Structure/sparcle/archview.html?archid="
1672 <<
"\">" << data_str <<
"</a>";
1674 }
else if (
NStr::Equal (label_str,
"Evidence Category") &&
1675 NStr::Equal (data_str,
"Antimicrobial Resistance Allele") &&
1676 NStr::Equal (
source,
"Bacterial Antimicrobial Resistance Reference Gene Database") ) {
1677 result <<
"<a href=\"https://www.ncbi.nlm.nih.gov/bioproject/"
1679 <<
"\">" << data_str <<
"</a>";
1681 }
else if (
NStr::Equal (label_str,
"Evidence Accession") &&
1682 NStr::Equal (
source,
"Bacterial Antimicrobial Resistance Reference Gene Database") ) {
1683 result <<
"<a href=\"https://www.ncbi.nlm.nih.gov/nuccore/"
1685 <<
"\">" << data_str <<
"</a>";
1688 result <<
"<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_prok/evidence/"
1690 <<
"\">" << data_str <<
"</a>";
1693 result <<
"<a href=\"https://www.ncbi.nlm.nih.gov/genome/annotation_prok/evidence/"
1695 <<
"\">" << data_str <<
"</a>";
1709 list<string> &out_lines,
1710 int &out_prefix_len,
1711 const bool is_first,
1712 const bool is_html )
1714 static const int kFieldLenThreshold = 45;
1717 const char* prefix =
"##Metadata-START##";
1718 const char* suffix =
"##Metadata-END##";
1719 const char* provider =
"";
1720 const char* status =
"";
1722 const char* category =
"";
1723 const char* organism =
"";
1725 bool has_name =
false;
1727 bool fieldOverThreshold =
false;
1731 string::size_type longest_label_len = 1;
1733 if( (*it_for_len)->GetLabel().IsStr() &&
1734 (*it_for_len)->GetData().IsStr() && ! (*it_for_len)->GetData().GetStr().empty() ) {
1735 const string &
label = (*it_for_len)->GetLabel().GetStr();
1737 if(
label ==
"StructuredCommentPrefix" ) {
1738 prefix = (*it_for_len)->GetData().GetStr().c_str();
1739 }
else if(
label ==
"StructuredCommentSuffix" ) {
1740 suffix = (*it_for_len)->GetData().GetStr().c_str();
1742 if (
label ==
"Annotation Provider" ) {
1743 provider = (*it_for_len)->GetData().GetStr().c_str();
1744 }
else if (
label ==
"Annotation Status" ) {
1745 status = (*it_for_len)->GetData().GetStr().c_str();
1746 }
else if (
label ==
"Annotation Name" ) {
1748 }
else if (
label ==
"URL Organism" ) {
1749 organism = (*it_for_len)->GetData().GetStr().c_str();
1750 }
else if (
NStr::EqualNocase(prefix,
"##Evidence-For-Name-Assignment-START##")) {
1751 if (
label ==
"Evidence Source" ) {
1752 source = (*it_for_len)->GetData().GetStr().c_str();
1754 if (
label ==
"Evidence Category" ) {
1755 category = (*it_for_len)->GetData().GetStr().c_str();
1757 if (
label ==
"Evidence Accession" ) {
1758 string accn = (*it_for_len)->GetData().GetStr();
1763 const string::size_type label_len =
label.length();
1764 if( (label_len > longest_label_len) && (label_len <= kFieldLenThreshold) ) {
1765 longest_label_len = label_len;
1767 if( label_len > kFieldLenThreshold ) {
1768 fieldOverThreshold =
true;
1773 out_prefix_len = (longest_label_len + 4);
1780 out_lines.push_back( prefix );
1781 out_lines.back().append(
"\n" );
1786 if( ! (*it)->GetLabel().IsStr() || (*it)->GetLabel().GetStr().empty() ) {
1791 if( ! (*it)->GetData().IsStr() || (*it)->GetData().GetStr().empty() ) {
1796 if( (*it)->GetLabel().GetStr() ==
"StructuredCommentPrefix" ||
1797 (*it)->GetLabel().GetStr() ==
"StructuredCommentSuffix" ||
1798 (*it)->GetLabel().GetStr() ==
"Annotation Freeze" ||
1799 (*it)->GetLabel().GetStr() ==
"URL Organism" ) {
1804 out_lines.push_back( (*it)->GetLabel().GetStr() );
1805 string &next_line = out_lines.back();
1811 if( ! fieldOverThreshold ) {
1812 next_line.resize(
max( next_line.size(), longest_label_len),
' ' );
1814 next_line.append(
" :: " );
1816 provider, status, has_name, organism,
source, category, accession.c_str() ) );
1817 next_line.append(
"\n" );
1822 out_lines.push_back( suffix );
1823 out_lines.back().append(
"\n" );
1831 string prefix,
str, suffix;
1832 switch ( desc.
Which() ) {
1851 if ( oid.
IsStr() ) {
1852 prefix =
"Map location: ";
1856 prefix =
"Map location: (Database ";
1866 prefix =
"Region: ";
1887 if(
type.IsStr() &&
type.GetStr() ==
"StructuredComment" ) {
1901 if (
str.empty() ||
str ==
".") {
1923 if(
type.IsStr() &&
type.GetStr() ==
"StructuredComment" ) {
1950 if (!
ctx.Config().IsFormatGBSeq() && !
ctx.Config().IsFormatINSDSeq()) {
1957 (
const string& prefix,
1959 const string& suffix,
1964 string comment = prefix;
1968 if (!
ctx.Config().IsFormatGBSeq() && !
ctx.Config().IsFormatINSDSeq()) {
1976 size_t pos = comment.find_last_not_of(
" \n\t\r.~");
1977 if (pos != comment.length() - 1) {
1978 size_t period = comment.find_last_of(
'.');
1979 bool add_period = period > pos;
2009 << setw(7) << (prevEndPos)
2011 << setw(7) << (thisEndPos)
2012 <<
": fragment of ";
2014 bool bLengthIsOkay =
true;
2016 (thisEndPos <= prevEndPos) )
2018 bLengthIsOkay =
false;
2020 (thisEndPos >= prevEndPos) )
2022 bLengthIsOkay =
false;
2025 if( ! bLengthIsOkay ) {
2026 str <<
"(ERROR: CANNOT CALCULATE LENGTH)";
2027 }
else if( (thisEndPos > uBioseqLength) ||
2028 (prevEndPos > uBioseqLength) )
2030 str <<
"(ERROR: FRAGMENT IS OUTSIDE BIOSEQ BOUNDS)";
2033 str << (thisEndPos - prevEndPos + 1);
2035 str << (uBioseqLength + thisEndPos - prevEndPos + 1);
2038 str <<
" bp in length";
2049 const string& build_num) :
2060 if ( uo.
HasField(
"NcbiAnnotation") ) {
2068 if ( uo.
HasField(
"NcbiVersion") ) {
2072 build_num +=
" version ";
2078 }
else if ( uo.
HasField(
"Annotation") ) {
2082 static const string prefix =
"NCBI build ";
2108 const bool bHtml =
ctx.Config().DoHTML();
2114 text <<
"GENOME ANNOTATION " << *refseq <<
": ";
2116 text <<
"Features on this sequence have been produced for build "
2122 text <<
"documentation";
2128 text <<
"NCBI contigs are derived from assembled genomic sequence data."
2130 <<
" Documentation of NCBI's Annotation Process ";
2135 desc_it; ++desc_it) {
2167 (
const string& prefix,
2168 const string& suffix,
2178 hist.
GetDate().
GetDate(&date,
"%{%3N%|???%} %{%D%|??%}, %{%4Y%|????%}");
2183 if ( (*id)->IsGi() ) {
2184 gis.push_back((*id)->GetGi());
2190 text << prefix << ((gis.size() > 1) ?
" or before " :
" ") << date
2193 if ( gis.empty() ) {
2215 text <<
'.' <<
'\n';
2226 if(
ctx.IsWGSMaster() ||
ctx.IsTSAMaster() ) {
2229 "this project was updated. The new version is",
2235 "this sequence was replaced by",
2243 "this sequence version replaced",
2288 if (! desc.
IsUser())
continue;
2292 if (! oi.
IsStr())
continue;
2317 if ( orig_id.length() < 1000 ) {
2318 msg <<
"LocalID: " << orig_id;
2320 msg <<
"LocalID string too large";
2331 msg <<
"LocalID string too large";
2363 msg <<
"FileID string too large";
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void GetDate(string *label, bool year_only=false) const
Append a standardized string representation of the date to the label.
CBioseqContext * GetContext(void)
void x_SetObject(const CSerialObject &obj)
const CSerialObject * GetObject(void) const
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
namespace ncbi::objects::
Base class for all serializable objects.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
@ fFieldMapFlags_ExcludeThis
= 0x1 (excludes this CUser_field's name and mapping to self from results)
void GetFieldsMap(CUser_field::TMapFieldNameToRef &out_mapFieldNameToRef, TFieldMapFlags fFieldMapFlags=0, const SFieldNameChain &parent_name=SFieldNameChain()) const
Recursively get the map of field names like the input for GetFieldRef to the user-field.
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
container_type::const_iterator const_iterator
const_iterator end() const
const_iterator find(const key_type &key) const
Include a standard set of the NCBI C++ Toolkit most basic headers.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static const char * str(char *buf, int n)
Utility macros and typedefs for exploring NCBI objects from general.asn.
#define FOR_EACH_USERFIELD_ON_USEROBJECT(Itr, Var)
FOR_EACH_USERFIELD_ON_USEROBJECT EDIT_EACH_USERFIELD_ON_USEROBJECT.
#define ITERATE_0_IDX(idx, up_to)
idx loops from 0 (inclusive) to up_to (exclusive)
unsigned int TSeqPos
Type for sequence locations and lengths.
constexpr size_t ArraySize(const Element(&)[Size])
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
CConstRef< CSeq_id > GetSeqId(void) const
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
string GetAccessionForGi(TGi gi, CScope &scope, EAccessionVersion use_version=eWithAccessionVersion, EGetIdType flags=0)
Retrieve the accession for a given GI.
@ eWithAccessionVersion
accession.version (when possible)
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
const TInst_Hist & GetInst_Hist(void) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
bool IsSetInst_Hist(void) const
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
NCBI_NS_STD::string::size_type SIZE_TYPE
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
size_type length(void) const
Return the length of the represented array.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static string & ToUpper(string &str)
Convert string to upper case – string& version.
static const char label[]
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
list< CRef< CSubSource > > TSubtype
const TOrg & GetOrg(void) const
Get the Org member data.
const TStr & GetStr(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
bool CanGetType(void) const
Check if it is safe to call GetType method.
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
const TTag & GetTag(void) const
Get the Tag member data.
bool IsId(void) const
Check if variant Id is selected.
const TData & GetData(void) const
Get the Data member data.
bool CanGetDb(void) const
Check if it is safe to call GetDb method.
bool CanGetData(void) const
Check if it is safe to call GetData method.
bool CanGetTag(void) const
Check if it is safe to call GetTag method.
bool IsSetTag(void) const
appropriate tag Check if a value has been assigned to Tag data member.
const TFields & GetFields(void) const
Get the variant data.
const TDb & GetDb(void) const
Get the Db member data.
vector< CRef< CUser_field > > TFields
E_Choice Which(void) const
Which variant is currently selected.
bool IsFields(void) const
Check if variant Fields is selected.
bool IsInt(void) const
Check if variant Int is selected.
bool IsStr(void) const
Check if variant Str is selected.
const TStr & GetStr(void) const
Get the variant data.
TInt GetInt(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
const TLabel & GetLabel(void) const
Get the Label member data.
const TType & GetType(void) const
Get the Type member data.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
bool CanGetLabel(void) const
Check if it is safe to call GetLabel method.
vector< CRef< CUser_field > > TData
TId GetId(void) const
Get the variant data.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
bool CanGetSegs(void) const
Check if it is safe to call GetSegs method.
bool IsDisc(void) const
Check if variant Disc is selected.
const TDisc & GetDisc(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
bool IsDenseg(void) const
Check if variant Denseg is selected.
const TData & GetData(void) const
Get the Data member data.
const TComment & GetComment(void) const
Get the Comment member data.
bool IsComment(void) const
Check if variant Comment is selected.
bool CanGetComment(void) const
Check if it is safe to call GetComment method.
bool IsGeneral(void) const
Check if variant General is selected.
vector< TSeqPos > TPoints
const TPoints & GetPoints(void) const
Get the Points member data.
const TGeneral & GetGeneral(void) const
Get the variant data.
bool IsGi(void) const
Check if variant Gi is selected.
const TUser & GetUser(void) const
Get the variant data.
bool IsSetAssembly(void) const
how was this assembled? Check if a value has been assigned to Assembly data member.
const TMaploc & GetMaploc(void) const
Get the variant data.
const TAssembly & GetAssembly(void) const
Get the Assembly member data.
list< CRef< CSeq_id > > TIds
const TIds & GetIds(void) const
Get the Ids member data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
bool CanGetCompleteness(void) const
Check if it is safe to call GetCompleteness method.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetDate(void) const
Check if a value has been assigned to Date data member.
const TReplaces & GetReplaces(void) const
Get the Replaces member data.
const TDate & GetDate(void) const
Get the Date member data.
const TReplaced_by & GetReplaced_by(void) const
Get the Replaced_by member data.
const TComment & GetComment(void) const
Get the variant data.
const TName & GetName(void) const
Get the variant data.
const TRegion & GetRegion(void) const
Get the variant data.
bool IsUser(void) const
Check if variant User is selected.
@ eCompleteness_has_left
5' or NH3 end present
@ eCompleteness_complete
complete biological entity
@ eCompleteness_has_right
3' or COOH end present
@ eCompleteness_no_left
missing 5' or NH3 end
@ eCompleteness_partial
partial but no details given
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ eTech_htgs_2
ordered High Throughput sequence contig
@ eTech_htgs_1
unordered High Throughput sequence contig
@ eTech_htgs_0
single genomic reads for coordination
@ e_User
user defined object
@ e_Comment
a more extensive comment
@ e_Region
overall region (globin locus)
@ e_Maploc
map location of this sequence
@ e_Name
a name for this sequence
@ e_Source
source of materials, includes Org-ref
unsigned int
A callback function used to compare two keys in a database.
static void text(MDB_val *v)
constexpr auto sort(_Init &&init)
const string version
version string
const struct ncbi::grid::netcache::search::fields::SIZE size
const CharType(& source)[N]
bool TrimSpacesAndJunkFromEnds(string &str, bool allow_ellipsis=false)
bool IsValidAccession(const string &accn, EAccValFlag flag=eValidateAcc)
void ExpandTildes(string &s, ETildeStyle style)
void GetDeltaSeqSummary(const CBioseq_Handle &seq, SDeltaSeqSummary &summary)
void AddPeriod(string &str)
void NcbiId(CNcbiOstream &os, const T &id, bool html=false)
const string & GetTechString(int tech)
void ConvertQuotes(string &str)
Utility macros and typedefs for exploring NCBI objects from seq.asn.
#define FOR_EACH_SEQDESC_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQDESC_ON_BIOSEQ EDIT_EACH_SEQDESC_ON_BIOSEQ.
Generic utility macros and templates for exploring NCBI objects.
#define FIELD_IS_SET_AND_IS(Var, Fld, Chs)
FIELD_IS_SET_AND_IS base macro.
#define FOR_EACH_STRING_IN_LIST(Itr, Var)
FOR_EACH_STRING_IN_LIST EDIT_EACH_STRING_IN_LIST.
#define FOR_EACH_STRING_IN_VECTOR(Itr, Var)
FOR_EACH_STRING_IN_VECTOR EDIT_EACH_STRING_IN_VECTOR.
#define RAW_FIELD_IS_EMPTY_OR_UNSET(Var, Fld)
RAW_FIELD_IS_EMPTY_OR_UNSET macro.
#define GET_FIELD_OR_DEFAULT(Var, Fld, Dflt)
GET_FIELD_OR_DEFAULT base macro.
#define FIELD_EQUALS(Var, Fld, Value)
FIELD_EQUALS base macro.
#define GET_FIELD(Var, Fld)
GET_FIELD base macro.
static SLJIT_INLINE sljit_ins st(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
For functions that don't use delims, we instead use a chain of names.