132 using namespace validator;
133 using namespace unit_test_util;
137 : m_Accession(accession), m_Severity(severity), m_ErrCode(err_code), m_ErrMsg(err_msg)
155 string msg = err_item.
GetMsg();
156 size_t pos =
NStr::Find(msg,
" EXCEPTION: NCBI C++ Exception:");
157 if (pos != string::npos) {
158 msg = msg.substr(0, pos);
178 string msg = err_item.
GetMsg();
179 size_t pos =
NStr::Find(msg,
" EXCEPTION: NCBI C++ Exception:");
180 if (pos != string::npos) {
181 msg = msg.substr(0, pos);
189 string description = err_item.
GetAccnver() +
":"
193 printf(
"%s\n", description.c_str());
203 printf(
"%s\n", description.c_str());
225 vector<CExpectedError*>& expected_errors)
233 bool problem_found =
false;
240 vector<bool> expected_found;
241 for (
size_t i = 0;
i < expected_errors.size();
i++) {
242 if (expected_errors[
i]) {
243 expected_found.push_back(
false);
245 expected_found.push_back(
true);
251 for (
size_t i = 0;
i < expected_errors.size();
i++) {
252 if (!expected_found[
i] && expected_errors[
i]->Match(*vit)) {
253 expected_found[
i] =
true;
259 for (
size_t i = 0;
i < expected_errors.size();
i++) {
260 if (!expected_found[
i] && expected_errors[
i]->Match(*vit,
true)) {
261 printf(
"Problem with ");
263 expected_errors[
i]->Test(*vit);
264 expected_found[
i] =
true;
266 problem_found =
true;
272 BOOST_CHECK_EQUAL(
"Unexpected error",
"Error not found");
274 problem_found =
true;
278 for (
size_t i = 0;
i < expected_errors.size();
i++) {
279 if (!expected_found[
i]) {
280 BOOST_CHECK_EQUAL(expected_errors[
i]->GetErrMsg(),
"Expected error not found");
281 problem_found =
true;
288 printf(
"Expected:\n");
289 for (
auto it : expected_errors) {
300 auto it1 = seen.begin();
303 while (it1 != seen.end() && it2 !=
expected.end()) {
304 BOOST_CHECK_EQUAL(*it1, *it2);
311 while (it1 != seen.end()) {
312 BOOST_CHECK_EQUAL(*it1,
"Unexpected string");
317 BOOST_CHECK_EQUAL(
"Missing string", *it2);
324 auto it1 = seen.begin();
325 while (it1 != seen.end()) {
326 printf(
"%s\n", (*it1).c_str());
329 printf(
"Expected:\n");
332 printf(
"%s\n", (*it2).c_str());
343 static void SetCountryOnSrc(
CBioSource& src,
string country)
384 size_t i,
len = expected_errors.size();
385 for (
i = 0;
i <
len;
i++) {
386 expected_errors[
i]->SetAccession(accession);
396 "debug_mode",
"Debugging mode writes errors seen for each test");
404 if (args[
"debug_mode"]) {
413 "ChromosomeWithoutLocation",
414 "INDEXER_ONLY - source contains chromosome value '1' but the BioSource location is not set to chromosome"));
419 if (entry->
IsSeq()) {
422 }
else if (entry->
IsSet()) {
470 "Structured Comment is non-compliant, keyword should be removed"));
472 "Required field finishing_strategy is missing when investigation_type has value 'eukaryote'"));
474 "Structured Comment invalid; the field value and/or name are incorrect"));
476 eval = validator.Validate(seh, options);
481 delete expected_errors[0];
482 expected_errors[0] =
nullptr;
483 eval = validator.Validate(seh, options);
491 eval = validator.Validate(seh, options);
498 eval = validator.Validate(seh, options);
523 "Longitude should be set to W (western hemisphere)"));
524 eval = validator.Validate(seh, options);
531 expected_errors[0]->SetErrMsg(
"Latitude should be set to S (southern hemisphere)");
532 eval = validator.Validate(seh, options);
550 "Latitude and longitude values appear to be exchanged"));
551 eval = validator.Validate(seh, options);
558 void TestOneLatLonCountry(
const string& country,
const string& lat_lon,
const string&
error,
bool use_state =
false,
const string& err_code =
"LatLonCountry")
571 if (!
error.empty()) {
574 eval = validator.Validate(seh, options);
577 if (!
error.empty()) {
580 expected.push_back(
"LatLonCountry Errors");
585 vector<string> cat_list =
format.FormatCompleteSubmitterReport(*eval, scope);
586 for (
const string& it : cat_list) {
587 vector<string> sublist;
589 for (
const string& sit : sublist) {
610 "Lat_lon '46.5 N 20 E' maps to 'Hungary' instead of 'Romania' - claimed region 'Romania' is at distance 45 km");
611 TestOneLatLonCountry(
"Romania",
"34 N 65 E",
"Lat_lon '34 N 65 E' maps to 'Afghanistan' instead of 'Romania'");
612 TestOneLatLonCountry(
"Romania",
"48 N 15 E",
"Lat_lon '48 N 15 E' maps to 'Austria' instead of 'Romania'");
613 TestOneLatLonCountry(
"Romania",
"48 N 15 W",
"Lat_lon '48 N 15 W' is in water 'Atlantic Ocean'",
false,
"LatLonWater");
633 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[44] =
'A';
634 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[45] =
'G';
639 other_intron->
SetData().SetImp().SetKey(
"intron");
641 gene->
SetData().SetGene().SetLocus_tag(
"fake_locustag");
646 prot->SetData().SetProt().SetEc().push_back(
"1.2.3.10");
647 prot->SetData().SetProt().SetEc().push_back(
"1.1.3.22");
648 prot->SetData().SetProt().SetEc().push_back(
"1.1.99.n");
649 prot->SetData().SetProt().SetEc().push_back(
"1.1.1.17");
650 prot->SetData().SetProt().SetEc().push_back(
"11.22.33.44");
651 prot->SetData().SetProt().SetEc().push_back(
"11.22.n33.44");
652 prot->SetData().SetProt().SetEc().push_back(
"11.22.33.n44");
666 eval = validator.Validate(seh, options);
671 expected.push_back(
"intron\tlcl|nuc\tGT at 17");
672 expected.push_back(
"intron\tlcl|nuc\tGT at 1");
673 expected.push_back(
"intron\tlcl|nuc\tAG at 11");
674 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
675 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
676 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
677 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
678 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
679 expected.push_back(
"CDS\tlcl|nuc\tGT at 16");
680 expected.push_back(
"lcl|nuc:Lat_lon '30 N 30 E' maps to 'Egypt' instead of 'Panama'");
681 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
682 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
683 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
687 string val =
format.FormatForSubmitterReport(*vit, scope);
695 seen.push_back(vit->GetErrCode());
697 expected.push_back(
"NotSpliceConsensusDonor");
698 expected.push_back(
"NotSpliceConsensusDonorTerminalIntron");
699 expected.push_back(
"NotSpliceConsensusAcceptor");
700 expected.push_back(
"DeletedEcNumber");
701 expected.push_back(
"ReplacedEcNumber");
702 expected.push_back(
"BadEcNumberValue");
703 expected.push_back(
"BadEcNumberFormat");
704 expected.push_back(
"BadEcNumberValue");
705 expected.push_back(
"NotSpliceConsensusDonor");
706 expected.push_back(
"LatLonCountry");
707 expected.push_back(
"BadInstitutionCode");
708 expected.push_back(
"BadInstitutionCode");
709 expected.push_back(
"BadInstitutionCode");
714 vector<CValidErrItem::TErrIndex> codes =
format.GetListOfErrorCodes(*eval);
719 expected.push_back(
"LatLonCountry");
720 expected.push_back(
"BadInstitutionCode");
721 expected.push_back(
"BadEcNumberFormat");
722 expected.push_back(
"BadEcNumberValue");
723 expected.push_back(
"NotSpliceConsensusDonor");
724 expected.push_back(
"NotSpliceConsensusAcceptor");
725 expected.push_back(
"DeletedEcNumber");
726 expected.push_back(
"ReplacedEcNumber");
727 expected.push_back(
"NotSpliceConsensusDonorTerminalIntron");
734 expected.push_back(
"Not Splice Consensus");
735 expected.push_back(
"intron\tlcl|nuc\tGT at 17");
736 expected.push_back(
"CDS\tlcl|nuc\tGT at 16");
744 expected.push_back(
"Not Splice Consensus");
745 expected.push_back(
"intron\tlcl|nuc\tGT at 17");
746 expected.push_back(
"intron\tlcl|nuc\tGT at 1");
747 expected.push_back(
"intron\tlcl|nuc\tAG at 11");
748 expected.push_back(
"CDS\tlcl|nuc\tGT at 16");
754 vector<string> cat_list =
format.FormatCompleteSubmitterReport(*eval, scope);
755 for (
const string& it : cat_list) {
756 vector<string> sublist;
758 for (
const string& sit : sublist) {
762 expected.push_back(
"Not Splice Consensus");
763 expected.push_back(
"intron\tlcl|nuc\tGT at 17");
764 expected.push_back(
"intron\tlcl|nuc\tGT at 1");
765 expected.push_back(
"intron\tlcl|nuc\tAG at 11");
766 expected.push_back(
"CDS\tlcl|nuc\tGT at 16");
768 expected.push_back(
"EC Number Format");
769 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
771 expected.push_back(
"EC Number Value");
772 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
773 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
774 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
775 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
777 expected.push_back(
"Bad Institution Codes");
778 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
779 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
780 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
782 expected.push_back(
"LatLonCountry Errors");
783 expected.push_back(
"lcl|nuc:Lat_lon '30 N 30 E' maps to 'Egypt' instead of 'Panama'");
797 eval = validator.Validate(seh, options);
803 vector<string> cat_list =
format.FormatCompleteSubmitterReport(*eval, scope);
804 for (
const string& it : cat_list) {
805 vector<string> sublist;
807 for (
const string& sit : sublist) {
812 expected.push_back(
"lcl|good:Sebaea microphylla");
829 "Lat_lon '36 N 80 W' maps to 'USA: North Carolina' instead of 'USA: South Carolina' - claimed region 'USA: South Carolina' is at distance 130 km"));
832 eval = validator.Validate(seh, options);
843 prot->SetData().SetProt().SetEc().push_back(
"1.2.3.10");
844 prot->SetData().SetProt().SetEc().push_back(
"1.1.3.22");
845 prot->SetData().SetProt().SetEc().push_back(
"1.1.99.n");
846 prot->SetData().SetProt().SetEc().push_back(
"1.1.1.17");
847 prot->SetData().SetProt().SetEc().push_back(
"11.22.33.44");
848 prot->SetData().SetProt().SetEc().push_back(
"11.22.n33.44");
849 prot->SetData().SetProt().SetEc().push_back(
"11.22.33.n44");
862 "EC_number 1.2.3.10 was deleted"));
864 "EC_number 1.1.3.22 was transferred and is no longer valid"));
866 "11.22.33.44 is not a legal value for qualifier EC_number"));
868 "11.22.n33.44 is not in proper EC_number format"));
870 "11.22.33.n44 is not a legal preliminary value for qualifier EC_number"));
872 eval = validator.Validate(seh, options);
875 scope.RemoveTopLevelSeqEntry(seh);
876 prot->SetData().SetProt().ResetEc();
878 misc->
SetData().SetImp().SetKey(
"exon");
887 expected_errors[1]->SetErrMsg(
"EC_number 1.1.3.22 was replaced");
888 seh = scope.AddTopLevelSeqEntry(*entry);
889 eval = validator.Validate(seh, options);
900 misc->
SetData().SetImp().SetKey(
"repeat_region");
906 "repeat_region /rpt_unit and underlying sequence do not match"));
908 eval = validator.Validate(seh, options);
911 scope.RemoveTopLevelSeqEntry(seh);
914 misc->
SetData().SetImp().SetKey(
"repeat_region");
916 seh = scope.AddTopLevelSeqEntry(*entry);
917 expected_errors[0]->SetErrCode(
"InvalidRepeatUnitLength");
918 expected_errors[0]->SetErrMsg(
"Length of rpt_unit_seq is greater than feature length");
920 eval = validator.Validate(seh, options);
940 eval = validator.Validate(seh, options);
945 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AATTGG");
946 expected_errors[0]->SetErrMsg(
"Bioseq-ext not allowed on raw Bioseq");
947 eval = validator.Validate(seh, options);
952 expected_errors[0]->SetErrCode(
"SeqDataNotFound");
953 expected_errors[0]->SetErrMsg(
"Missing Seq-data on raw Bioseq");
955 eval = validator.Validate(seh, options);
959 eval = validator.Validate(seh, options);
964 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AATTGG");
966 expected_errors[0]->SetErrCode(
"ExtNotAllowed");
967 expected_errors[0]->SetErrMsg(
"Bioseq-ext not allowed on constructed Bioseq");
968 eval = validator.Validate(seh, options);
973 expected_errors[0]->SetErrCode(
"SeqDataNotFound");
974 expected_errors[0]->SetErrMsg(
"Missing Seq-data on constructed Bioseq");
976 eval = validator.Validate(seh, options);
980 eval = validator.Validate(seh, options);
986 expected_errors[0]->SetErrCode(
"ExtBadOrMissing");
987 expected_errors[0]->SetErrMsg(
"Missing or incorrect Bioseq-ext on map Bioseq");
989 eval = validator.Validate(seh, options);
993 eval = validator.Validate(seh, options);
997 eval = validator.Validate(seh, options);
1001 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AATTGG");
1002 expected_errors[0]->SetErrCode(
"SeqDataNotAllowed");
1003 expected_errors[0]->SetErrMsg(
"Seq-data not allowed on map Bioseq");
1004 eval = validator.Validate(seh, options);
1012 expected_errors[0]->SetErrCode(
"ExtBadOrMissing");
1013 expected_errors[0]->SetErrMsg(
"Missing or incorrect Bioseq-ext on reference Bioseq");
1014 eval = validator.Validate(seh, options);
1028 expected_errors[0]->SetErrCode(
"ReprInvalid");
1029 expected_errors[0]->SetErrMsg(
"Invalid Bioseq->repr = 6");
1030 eval = validator.Validate(seh, options);
1035 expected_errors[0]->SetErrMsg(
"Invalid Bioseq->repr = 0");
1036 eval = validator.Validate(seh, options);
1041 expected_errors[0]->SetErrMsg(
"Invalid Bioseq->repr = 255");
1042 eval = validator.Validate(seh, options);
1048 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AATTGG");
1050 expected_errors[0]->SetErrCode(
"SeqDataNotAllowed");
1051 expected_errors[0]->SetErrMsg(
"Seq-data not allowed on delta Bioseq");
1052 eval = validator.Validate(seh, options);
1058 expected_errors[0]->SetErrCode(
"ExtBadOrMissing");
1059 expected_errors[0]->SetErrMsg(
"Missing or incorrect Bioseq-ext on delta Bioseq");
1060 eval = validator.Validate(seh, options);
1076 eval = validator.Validate(seh, options);
1079 expected_errors[0]->SetErrMsg(
"Invalid Bioseq->repr = 255");
1081 eval = validator.Validate(seh, options);
1084 expected_errors[0]->SetErrMsg(
"Invalid Bioseq->repr = 6");
1086 eval = validator.Validate(seh, options);
1114 vector<CExpectedError*> expected_errors;
1115 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Warning,
"TerminalNs",
"N at end of sequence"));
1116 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Warning,
"GeneLocusCollidesWithLocusTag",
"locus collides with locus_tag in another gene"));
1117 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"CollidingLocusTags",
"Colliding locus_tags in gene features"));
1118 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"CollidingLocusTags",
"Colliding locus_tags in gene features"));
1119 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"NoMolInfoFound",
"No Mol-info applies to this Bioseq"));
1120 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"LocusTagGeneLocusMatch",
"Gene locus and locus_tag 'foo' match"));
1121 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"NoPubFound",
"No publications anywhere on this entire record."));
1122 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Info,
"MissingPubRequirement",
"No submission citation anywhere on this entire record."));
1123 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"NoSourceDescriptor",
"No source information included on this record."));
1134 local str \"LocusCollidesWithLocusTag\" } ,\
1140 iupacna \"AATTGGCCAANNAATTGGCCAANN\" } ,\
1149 locus-tag \"foo\" } ,\
1156 local str \"LocusCollidesWithLocusTag\" } } ,\
1161 locus-tag \"foo\" } ,\
1168 local str \"LocusCollidesWithLocusTag\" } } ,\
1173 locus-tag \"baz\" } ,\
1180 local str \"LocusCollidesWithLocusTag\" } } ,\
1185 locus-tag \"baz\" } ,\
1192 local str \"LocusCollidesWithLocusTag\" } } } } } }\
1202 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"CircularProtein",
"Non-linear topology set on protein"));
1208 eval = validator.Validate(seh, options);
1212 eval = validator.Validate(seh, options);
1216 eval = validator.Validate(seh, options);
1223 eval = validator.Validate(seh, options);
1228 eval = validator.Validate(seh, options);
1241 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadProteinMoltype",
"Protein not single stranded"));
1245 eval = validator.Validate(seh, options);
1249 eval = validator.Validate(seh, options);
1253 eval = validator.Validate(seh, options);
1262 eval = validator.Validate(seh, options);
1266 eval = validator.Validate(seh, options);
1283 eval = validator.Validate(seh, options);
1286 expected_errors[0]->SetErrCode(
"MolOther");
1287 expected_errors[0]->SetErrMsg(
"Bioseq.mol is type other");
1289 eval = validator.Validate(seh, options);
1292 expected_errors[0]->SetErrCode(
"MolNuclAcid");
1293 expected_errors[0]->SetErrMsg(
"Bioseq.mol is type nucleic acid");
1295 eval = validator.Validate(seh, options);
1312 eval = validator.Validate(seh, options);
1315 expected_errors[0]->SetErrMsg(
"Fuzzy length on const Bioseq");
1317 eval = validator.Validate(seh, options);
1321 expected_errors[0]->SetErrCode(
"SeqDataNotFound");
1322 expected_errors[0]->SetErrMsg(
"Missing Seq-data on constructed Bioseq");
1325 eval = validator.Validate(seh, options);
1350 vector<CExpectedError*> expected_errors;
1351 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidAlphabet",
"Using a nucleic acid alphabet on a protein sequence"));
1358 eval = validator.
Validate(prot_seh, options);
1362 eval = validator.
Validate(prot_seh, options);
1366 eval = validator.
Validate(prot_seh, options);
1370 eval = validator.
Validate(prot_seh, options);
1379 expected_errors[0]->SetErrMsg(
"Using a protein alphabet on a nucleic acid");
1381 eval = validator.
Validate(seh, options);
1385 eval = validator.
Validate(seh, options);
1389 eval = validator.
Validate(seh, options);
1393 eval = validator.
Validate(seh, options);
1397 eval = validator.
Validate(seh, options);
1410 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ");
1411 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFB');
1412 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFB');
1413 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFB');
1414 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFC');
1415 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFC');
1416 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFC');
1417 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFD');
1418 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFD');
1419 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFD');
1420 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFE');
1421 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFE');
1422 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFF');
1423 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFF');
1425 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'E' at position [5]"));
1426 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'F' at position [6]"));
1427 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'I' at position [9]"));
1428 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'J' at position [10]"));
1429 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'L' at position [12]"));
1430 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'O' at position [15]"));
1431 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'P' at position [16]"));
1432 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'Q' at position [17]"));
1433 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'U' at position [21]"));
1434 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'X' at position [24]"));
1435 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'Z' at position [26]"));
1436 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'E' at position [31]"));
1437 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'F' at position [32]"));
1438 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'I' at position [35]"));
1439 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'J' at position [36]"));
1440 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'L' at position [38]"));
1441 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'O' at position [41]"));
1442 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'P' at position [42]"));
1443 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'Q' at position [43]"));
1444 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'U' at position [47]"));
1445 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'X' at position [50]"));
1446 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'Z' at position [52]"));
1457 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"More than 10 invalid residues. Checking stopped"));
1458 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Fatal,
"NonAsciiAsn",
"Non-ASCII character '251' found in item"));
1461 eval = validator.Validate(seh, options);
1466 delete expected_errors[8];
1467 expected_errors[8] =
nullptr;
1468 delete expected_errors[19];
1469 expected_errors[19] =
nullptr;
1470 eval = validator.Validate(seh, options);
1476 if (it->IsMolinfo()) {
1480 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ");
1481 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFB');
1482 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFB');
1483 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFB');
1484 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFC');
1485 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFC');
1486 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFC');
1487 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFD');
1488 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFD');
1489 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFD');
1490 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFE');
1491 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFE');
1492 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFF');
1493 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFF');
1496 feat->
SetData().SetProt().SetName().push_back(
"fake protein name");
1497 feat->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"good");
1501 scope.RemoveEntry(*entry);
1502 seh = scope.AddTopLevelSeqEntry(*entry);
1504 for (
int j = 0; j < 22; j++) {
1505 if (expected_errors[j]) {
1506 delete expected_errors[j];
1507 expected_errors[j] =
nullptr;
1510 eval = validator.Validate(seh, options);
1516 scope.RemoveEntry(*entry);
1518 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"abcdefghijklmnopqrstuvwxyz");
1520 seh = scope.AddTopLevelSeqEntry(*entry);
1521 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Sequence contains lower-case characters"));
1523 eval = validator.Validate(seh, options);
1526 scope.RemoveEntry(*entry);
1528 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"protein");
1529 seh = scope.AddTopLevelSeqEntry(*entry);
1530 eval = validator.Validate(seh, options);
1536 scope.RemoveEntry(*entry);
1541 seg->SetLiteral().SetSeq_data().SetIupacna().Set(
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ");
1542 seg->SetLiteral().SetLength(52);
1543 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(seg);
1545 seh = scope.AddTopLevelSeqEntry(*entry);
1570 eval = validator.Validate(seh, options);
1576 scope.RemoveEntry(*entry);
1581 seg2->SetLiteral().SetSeq_data().SetIupacaa().Set(
"1234567");
1582 seg2->SetLiteral().SetLength(7);
1583 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(seg2);
1585 seh = scope.AddTopLevelSeqEntry(*entry);
1596 eval = validator.Validate(seh, options);
1641 entry->
SetSet().
SetSeq_set().back()->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set(
"MP*K*E*N");
1642 entry->
SetSet().
SetSeq_set().front()->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"GTGCCCTAAAAATAAGAGTAAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
1651 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"StopInProtein",
"[3] termination symbols in protein sequence (gene? - fake protein name)"));
1652 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"ExceptionProblem",
"unclassified translation discrepancy is not a legal exception explanation"));
1653 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"InternalStop",
"3 internal stops (and illegal start codon). Genetic code [0]"));
1655 "CDS has unnecessary translated product replaced exception"));
1658 eval = validator.Validate(seh, options);
1669 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"StopInProtein",
"[3] termination symbols in protein sequence (gene? - fake protein name)"));
1670 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"StartCodon",
"Illegal start codon (and 3 internal stops). Probably wrong genetic code [0]"));
1671 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"InternalStop",
"3 internal stops (and illegal start codon). Genetic code [0]"));
1674 eval = validator.Validate(seh, options);
1679 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCTAAAAATAAGAGTAAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
1684 delete expected_errors[1];
1685 expected_errors[1] =
nullptr;
1686 expected_errors[2]->SetErrMsg(
"3 internal stops. Genetic code [0]");
1687 eval = validator.Validate(seh, options);
1706 entry->
SetSeq().
SetInst().SetExt().SetSeg().Set().push_back(loc1);
1709 entry->
SetSeq().
SetInst().SetExt().SetSeg().Set().push_back(loc2);
1712 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"PartialInconsistent",
"Partial segmented sequence without MolInfo partial"));
1717 eval = validator.Validate(seh, options);
1721 eval = validator.Validate(seh, options);
1725 eval = validator.Validate(seh, options);
1733 eval = validator.Validate(seh, options);
1737 eval = validator.Validate(seh, options);
1741 eval = validator.Validate(seh, options);
1749 eval = validator.Validate(seh, options);
1753 eval = validator.Validate(seh, options);
1757 eval = validator.Validate(seh, options);
1765 expected_errors[0]->SetErrMsg(
"Complete segmented sequence with MolInfo partial");
1766 eval = validator.Validate(seh, options);
1774 expected_errors[0]->SetErrMsg(
"No-left inconsistent with segmented SeqLoc");
1775 eval = validator.Validate(seh, options);
1779 eval = validator.Validate(seh, options);
1783 eval = validator.Validate(seh, options);
1791 expected_errors[0]->SetErrMsg(
"No-right inconsistent with segmented SeqLoc");
1792 eval = validator.Validate(seh, options);
1796 eval = validator.Validate(seh, options);
1800 eval = validator.Validate(seh, options);
1806 expected_errors[0]->SetErrMsg(
"No-ends inconsistent with segmented SeqLoc");
1809 eval = validator.Validate(seh, options);
1813 eval = validator.Validate(seh, options);
1817 eval = validator.Validate(seh, options);
1831 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"MPR");
1833 entry->
SetSeq().
SetAnnot().front()->SetData().SetFtable().front()->SetLocation().SetInt().SetTo(2);
1837 pdb_id->SetMol().Set(
"foo");
1839 entry->
SetSeq().
SetAnnot().front()->SetData().SetFtable().front()->SetLocation().SetInt().SetId().SetPdb(*pdb_id);
1840 scope.RemoveTopLevelSeqEntry(seh);
1841 seh = scope.AddTopLevelSeqEntry(*entry);
1842 eval = validator.Validate(seh, options);
1847 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"PartialsInconsistent",
"Molinfo completeness and protein feature partials conflict"));
1848 expected_errors[0]->SetAccession(
"lcl|good");
1849 entry->
SetSeq().
SetId().front()->SetLocal().SetStr(
"good");
1850 entry->
SetSeq().
SetAnnot().front()->SetData().SetFtable().front()->SetLocation().SetInt().SetId().SetLocal().SetStr(
"good");
1851 scope.RemoveTopLevelSeqEntry(seh);
1852 seh = scope.AddTopLevelSeqEntry(*entry);
1855 eval = validator.Validate(seh, options);
1858 eval = validator.Validate(seh, options);
1861 eval = validator.Validate(seh, options);
1864 eval = validator.Validate(seh, options);
1873 if (it->IsMolinfo()) {
1874 it->SetMolinfo().ResetCompleteness();
1877 eval = validator.Validate(seh, options);
1880 eval = validator.Validate(seh, options);
1883 eval = validator.Validate(seh, options);
1886 eval = validator.Validate(seh, options);
1890 scope.RemoveTopLevelSeqEntry(seh);
1892 seh = scope.AddTopLevelSeqEntry(*entry);
1893 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCTTT");
1895 expected_errors[0]->SetErrMsg(
"Sequence only 9 residues");
1896 eval = validator.Validate(seh, options);
1903 scope.RemoveTopLevelSeqEntry(seh);
1904 seh = scope.AddTopLevelSeqEntry(*entry);
1905 eval = validator.Validate(seh, options);
1938 if (entry->
IsSeq()) {
1940 }
else if (entry->
IsSet()) {
1948 if (entry->
IsSeq()) {
1950 if (it->IsUser() && it->GetUser().IsRefGeneTracking()) {
1951 it->SetUser().SetData().front()->SetData().SetStr(status);
1954 }
else if (entry->
IsSet()) {
1956 if (it->IsUser() && it->GetUser().IsRefGeneTracking()) {
1957 it->SetUser().SetData().front()->SetData().SetStr(status);
1969 auto& cont = entry->
SetDescr().Set();
1970 for (
auto it = cont.begin(); it != cont.end();) {
1971 if ((*it)->IsTitle()) {
1974 it = cont.erase(it);
1977 (*it)->SetTitle(title);
1997 if (it->IsGenbank()) {
1998 it->SetGenbank().SetKeywords().push_back(keyword);
2016 eval = validator.Validate(seh, options);
2018 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Info,
"NoKeywordHasTechnique",
"Molinfo.tech barcode without BARCODE keyword"));
2021 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TSAshouldBNotBeDNA",
"TSA sequence should not be DNA"));
2022 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WrongBiomolForTSA",
"Biomol \"genomic\" is not appropriate for sequences that use the TSA technique."));
2023 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TSAseqGapProblem",
"TSA submission includes wrong gap type. Gaps for TSA should be Assembly Gaps with linkage evidence."));
2025 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WGSseqGapProblem",
"WGS submission includes wrong gap type. Gaps for WGS genomes should be Assembly Gaps with linkage evidence."));
2043 eval = validator.Validate(seh, options);
2045 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ProteinTechniqueOnNucleotide",
"Nucleic acid with protein sequence method"));
2051 eval = validator.Validate(seh, options);
2064 start_gap_seg->SetLiteral().SetLength(10);
2065 start_gap_seg->SetLiteral().SetSeq_data().SetGap();
2066 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().insert(entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().begin(), start_gap_seg);
2067 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddLiteral(10);
2068 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddLiteral(10);
2071 end_gap_seg->SetLiteral().SetLength(10);
2072 end_gap_seg->SetLiteral().SetSeq_data().SetGap();
2073 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(end_gap_seg);
2074 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddLiteral(10);
2081 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadDeltaSeq",
"There is 1 adjacent gap in delta seq"));
2092 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WGSseqGapProblem",
"WGS submission includes wrong gap type. Gaps for WGS genomes should be Assembly Gaps with linkage evidence."));
2094 eval = validator.Validate(seh, options);
2108 if (it->IsMolinfo()) {
2114 scope.RemoveTopLevelSeqEntry(seh);
2115 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
2116 seh = scope.AddTopLevelSeqEntry(*entry);
2117 eval = validator.Validate(seh, options);
2122 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NT_123456");
2123 scope.RemoveTopLevelSeqEntry(seh);
2124 seh = scope.AddTopLevelSeqEntry(*entry);
2125 eval = validator.Validate(seh, options);
2132 entry->
SetSeq().
SetId().front()->SetLocal().SetStr(
"good");
2133 scope.RemoveTopLevelSeqEntry(seh);
2134 seh = scope.AddTopLevelSeqEntry(*entry);
2137 vector<CMolInfo::TTech> allowed_list;
2151 bool allowed =
false;
2185 for (
auto it : linkage_evidence) {
2197 vector<CLinkage_evidence::EType> evidence;
2199 for (
auto it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
2200 if (it->IsLiteral() && it->GetLiteral().IsSetSeq_data()
2201 && it->GetLiteral().GetSeq_data().IsGap()) {
2202 AdjustGap(it->SetLiteral().SetSeq_data().SetGap(),
2211 "SeqGapBadLinkage",
"Seq-gap of type 3 should not have linkage evidence"));
2213 eval = validator.Validate(seh, options);
2218 scope.RemoveTopLevelSeqEntry(seh);
2219 for (
auto it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
2220 if (it->IsLiteral() && it->GetLiteral().IsSetSeq_data()
2221 && it->GetLiteral().GetSeq_data().IsGap()) {
2222 CSeq_gap& gap = it->SetLiteral().SetSeq_data().SetGap();
2227 seh = scope.AddTopLevelSeqEntry(*entry);
2231 "SeqGapBadLinkage",
"Seq-gap with linkage evidence must have linkage field set to linked"));
2233 eval = validator.Validate(seh, options);
2238 scope.RemoveTopLevelSeqEntry(seh);
2240 for (
auto it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
2241 if (it->IsLiteral() && it->GetLiteral().IsSetSeq_data()
2242 && it->GetLiteral().GetSeq_data().IsGap()) {
2243 AdjustGap(it->SetLiteral().SetSeq_data().SetGap(),
2247 seh = scope.AddTopLevelSeqEntry(*entry);
2251 "SeqGapBadLinkage",
"Linkage evidence 'align genus' appears 2 times"));
2253 eval = validator.Validate(seh, options);
2258 evidence.pop_back();
2260 scope.RemoveTopLevelSeqEntry(seh);
2261 for (
auto it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
2262 if (it->IsLiteral() && it->GetLiteral().IsSetSeq_data()
2263 && it->GetLiteral().GetSeq_data().IsGap()) {
2264 AdjustGap(it->SetLiteral().SetSeq_data().SetGap(),
2268 seh = scope.AddTopLevelSeqEntry(*entry);
2272 "SeqGapBadLinkage",
"Seq-gap type has unspecified and additional linkage evidence"));
2274 eval = validator.Validate(seh, options);
2279 scope.RemoveTopLevelSeqEntry(seh);
2282 for (
auto it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
2283 if (it->IsLiteral() && it->GetLiteral().IsSetSeq_data()
2284 && it->GetLiteral().GetSeq_data().IsGap()) {
2285 AdjustGap(it->SetLiteral().SetSeq_data().SetGap(),
2289 seh = scope.AddTopLevelSeqEntry(*entry);
2293 "SeqGapBadLinkage",
"Single Seq-gap has unknown type and unspecified linkage"));
2295 eval = validator.Validate(seh, options);
2300 scope.RemoveTopLevelSeqEntry(seh);
2302 gap_seg->SetLiteral().SetLength(10);
2303 AdjustGap(gap_seg->SetLiteral().SetSeq_data().SetGap(),
2307 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetSeq_data().SetIupacna().Set(
"CCCATGATGATGTACCGTACGTTTTCCCATGATGATGTACCGTACGTTTT");
2308 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetLength(50);
2309 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(gap_seg);
2313 seh = scope.AddTopLevelSeqEntry(*entry);
2317 "SeqGapBadLinkage",
"All 2 Seq-gaps have unknown type and unspecified linkage"));
2319 eval = validator.Validate(seh, options);
2328 for (
auto it : expected_errors) {
2330 it->SetAccession(acc);
2342 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (lcl|good - lcl|bad)"));
2346 scope.RemoveTopLevelSeqEntry(seh);
2350 seh = scope.AddTopLevelSeqEntry(*entry);
2351 eval = validator.Validate(seh, options);
2355 scope.RemoveTopLevelSeqEntry(seh);
2359 seh = scope.AddTopLevelSeqEntry(*entry);
2361 expected_errors[0]->SetErrMsg(
"Conflicting ids on a Bioseq: (bbs|1 - bbs|2)");
2362 eval = validator.Validate(seh, options);
2366 scope.RemoveTopLevelSeqEntry(seh);
2369 seh = scope.AddTopLevelSeqEntry(*entry);
2371 expected_errors[0]->SetErrMsg(
"Conflicting ids on a Bioseq: (bbm|1 - bbm|2)");
2372 eval = validator.Validate(seh, options);
2376 scope.RemoveTopLevelSeqEntry(seh);
2381 seh = scope.AddTopLevelSeqEntry(*entry);
2383 expected_errors[0]->SetErrMsg(
"Conflicting ids on a Bioseq: (gi|1 - gi|2)");
2384 eval = validator.Validate(seh, options);
2389 scope.RemoveTopLevelSeqEntry(seh);
2394 seh = scope.AddTopLevelSeqEntry(*entry);
2397 expected_errors.push_back(
new CExpectedError(
"gim|1",
eDiag_Error,
"IdOnMultipleBioseqs",
"BioseqFind (gim|1) unable to find itself - possible internal error"));
2398 expected_errors.push_back(
new CExpectedError(
"gim|1",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (gim|1 - gim|2)"));
2399 expected_errors.push_back(
new CExpectedError(
"gim|1",
eDiag_Error,
"IdOnMultipleBioseqs",
"BioseqFind (gim|2) unable to find itself - possible internal error"));
2401 eval = validator.Validate(seh, options);
2406 scope.RemoveTopLevelSeqEntry(seh);
2413 seh = scope.AddTopLevelSeqEntry(*entry);
2414 expected_errors.push_back(
new CExpectedError(
"pat|USA|1|1",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (pat|USA|1|1 - pat|USA|2|2)"));
2416 eval = validator.Validate(seh, options);
2420 scope.RemoveTopLevelSeqEntry(seh);
2423 seh = scope.AddTopLevelSeqEntry(*entry);
2425 expected_errors[0]->SetErrMsg(
"Conflicting ids on a Bioseq: (pdb|good| - pdb|badd| )");
2426 eval = validator.Validate(seh, options);
2430 scope.RemoveTopLevelSeqEntry(seh);
2435 seh = scope.AddTopLevelSeqEntry(*entry);
2437 expected_errors[0]->SetErrMsg(
"Conflicting ids on a Bioseq: (gnl|a|good - gnl|a|bad)");
2438 eval = validator.Validate(seh, options);
2443 scope.RemoveTopLevelSeqEntry(seh);
2445 seh = scope.AddTopLevelSeqEntry(*entry);
2447 eval = validator.Validate(seh, options);
2451 scope.RemoveTopLevelSeqEntry(seh);
2452 expected_errors.push_back(
new CExpectedError(
"gb|AY123456|",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (gb|AY123456| - gb|AY222222|)"));
2455 seh = scope.AddTopLevelSeqEntry(*entry);
2456 eval = validator.Validate(seh, options);
2460 scope.RemoveTopLevelSeqEntry(seh);
2463 seh = scope.AddTopLevelSeqEntry(*entry);
2466 expected_errors.push_back(
new CExpectedError(
"gb|AY123456.2|",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (gb|AY123456| - gb|AY123456.2|)"));
2467 eval = validator.Validate(seh, options);
2471 scope.RemoveTopLevelSeqEntry(seh);
2473 seh = scope.AddTopLevelSeqEntry(*entry);
2475 expected_errors.push_back(
new CExpectedError(
"gb|AY123456|",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (gb|AY123456| - gpp|AY123456|)"));
2477 eval = validator.Validate(seh, options);
2481 scope.RemoveTopLevelSeqEntry(seh);
2484 seh = scope.AddTopLevelSeqEntry(*entry);
2486 expected_errors[0]->SetErrMsg(
"LRG sequence needs NG_ accession");
2488 eval = validator.Validate(seh, options);
2491 scope.RemoveTopLevelSeqEntry(seh);
2493 seh = scope.AddTopLevelSeqEntry(*entry);
2496 eval = validator.Validate(seh, options);
2510 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"MolNuclAcid",
"Bioseq.mol is type nucleic acid"));
2513 eval = validator.Validate(seh, options);
2527 vector<CMolInfo::TTech> genomic_list;
2538 bool genomic =
false;
2548 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"InconsistentMolType",
"Molecule type (DNA) does not match biomol (RNA)"));
2556 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"BadHTGSeq",
"HTGS 2 raw seq has no gaps and no graphs"));
2559 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"HTGS_STS_GSS_WGSshouldBeGenomic",
"HTGS/STS/GSS/WGS sequence should be genomic"));
2560 eval = validator.Validate(seh, options);
2564 delete expected_errors[0];
2565 expected_errors[0] =
nullptr;
2566 expected_errors.back()->SetErrCode(
"HTGS_STS_GSS_WGSshouldNotBeRNA");
2567 expected_errors.back()->SetErrMsg(
"HTGS/STS/GSS/WGS sequence should not be RNA");
2568 eval = validator.Validate(seh, options);
2572 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ProteinTechniqueOnNucleotide",
"Nucleic acid with protein sequence method"));
2575 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Info,
"NoKeywordHasTechnique",
"Molinfo.tech barcode without BARCODE keyword"));
2577 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TSAshouldBNotBeDNA",
"TSA sequence should not be DNA"));
2578 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WrongBiomolForTSA",
"Biomol \"cRNA\" is not appropriate for sequences that use the TSA technique."));
2580 eval = validator.Validate(seh, options);
2589 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"InconsistentMolType",
"Molecule type (DNA) does not match biomol (RNA)"));
2590 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TSAshouldBNotBeDNA",
"TSA sequence should not be DNA"));
2591 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WrongBiomolForTSA",
"Biomol \"cRNA\" is not appropriate for sequences that use the TSA technique."));
2592 eval = validator.Validate(seh, options);
2597 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TSAshouldBNotBeDNA",
"TSA sequence should not be DNA"));
2598 eval = validator.GetTSAConflictingBiomolTechErrors(seh);
2600 eval = validator.GetTSAConflictingBiomolTechErrors(*(seh.GetSeq().GetCompleteBioseq()));
2609 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
2610 entry->
SetSeq().
SetId().front()->SetOther().SetName(
"good one");
2614 expected_errors.push_back(
new CExpectedError(
"ref|NC_123456|good one",
eDiag_Critical,
"SeqIdNameHasSpace",
"Seq-id.name 'good one' should be a single word without any spaces"));
2617 eval = validator.Validate(seh, options);
2632 seg1->
SetWhole().SetGenbank().SetAccession(
"AY123456");
2633 entry->
SetSeq().
SetInst().SetExt().SetSeg().Set().push_back(seg1);
2635 seg2->
SetWhole().SetGenbank().SetAccession(
"AY123456");
2636 entry->
SetSeq().
SetInst().SetExt().SetSeg().Set().push_back(seg2);
2658 vector<CExpectedError*> expected_errors;
2659 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"SeqLocOrder",
"Segmented BioseqIntervals out of order in SeqLoc [[gb|AY123456|, gb|AY123456|]]"));
2660 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"DuplicateSegmentReferences",
"Segmented sequence has multiple references to gb|AY123456"));
2663 eval = validator.
Validate(seh, options);
2666 seg2->
SetInt().SetId().SetGenbank().SetAccession(
"AY123456");
2667 seg2->
SetInt().SetFrom(0);
2668 seg2->
SetInt().SetTo(484);
2669 expected_errors[0]->SetErrMsg(
"Segmented BioseqIntervals out of order in SeqLoc [[gb|AY123456|, 1-485]]");
2671 expected_errors[1]->SetErrMsg(
"Segmented sequence has multiple references to gb|AY123456 that are not SEQLOC_WHOLE");
2672 eval = validator.
Validate(seh, options);
2685 CRef<CSeq_feat> prot_feat =
prot->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
2687 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCAGAAAAACAGAGATANNNNNN");
2688 nuc->SetSeq().SetInst().SetLength(27);
2689 prot->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set(
"MPRKTEIXX");
2690 prot->SetSeq().SetInst().SetLength(9);
2703 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
2706 eval = validator.Validate(seh, options);
2729 expected_errors.push_back(
new CExpectedError(
"gb|" + id_str +
"|",
eDiag_Error,
"BadSeqIdFormat",
"Bad accession " + id_str));
2732 eval = validator.Validate(seh, options);
2753 eval = validator.Validate(seh, options);
2770 bool is_wgs =
false;
2771 if (id_str.length() == 12 || id_str.length() == 13 || id_str.length() == 14 || id_str.length() == 15) {
2781 eval = validator.Validate(seh, options);
2799 vector<string> bad_ids;
2800 bad_ids.push_back(
"AY123456ABC");
2801 bad_ids.push_back(
"A1234");
2802 bad_ids.push_back(
"A123456");
2803 bad_ids.push_back(
"AY12345");
2804 bad_ids.push_back(
"AY1234567");
2805 bad_ids.push_back(
"ABC1234");
2806 bad_ids.push_back(
"ABC123456");
2807 bad_ids.push_back(
"ABCD1234567");
2808 bad_ids.push_back(
"ABCDE123456");
2809 bad_ids.push_back(
"ABCDE12345678");
2811 vector<string> bad_nuc_ids;
2812 bad_nuc_ids.push_back(
"ABC12345");
2814 vector<string> bad_prot_ids;
2815 bad_prot_ids.push_back(
"AY123456");
2816 bad_prot_ids.push_back(
"A12345");
2818 vector<string> good_ids;
2820 vector<string> good_nuc_ids;
2821 good_nuc_ids.push_back(
"AY123456");
2822 good_nuc_ids.push_back(
"A12345");
2823 good_nuc_ids.push_back(
"ABCD123456789");
2824 good_nuc_ids.push_back(
"ABCD1234567890");
2826 vector<string> good_prot_ids;
2827 good_prot_ids.push_back(
"ABC12345");
2838 for (
const string& id_str : bad_ids) {
2839 const string acc_str =
"gb|" + id_str +
"|";
2841 expected_errors[0]->SetErrMsg(
"Bad accession " + id_str);
2844 scope.RemoveTopLevelSeqEntry(seh);
2845 scope.ResetDataAndHistory();
2849 seh = scope.AddTopLevelSeqEntry(*entry);
2850 eval = validator.Validate(seh, options);
2852 scope.RemoveTopLevelSeqEntry(seh);
2853 scope.ResetDataAndHistory();
2856 seh = scope.AddTopLevelSeqEntry(*entry);
2857 eval = validator.Validate(seh, options);
2861 for (
const string& id_it : bad_ids) {
2862 const string id_str =
"B" + id_it.substr(1);
2863 expected_errors[0]->SetAccession(
"embl|" + id_str +
"|");
2864 expected_errors[0]->SetErrMsg(
"Bad accession " + id_str);
2867 scope.RemoveTopLevelSeqEntry(seh);
2868 scope.ResetDataAndHistory();
2872 seh = scope.AddTopLevelSeqEntry(*entry);
2873 eval = validator.Validate(seh, options);
2874 expected_errors[0]->SetAccession(
"emb|" + id_str +
"|");
2876 scope.RemoveTopLevelSeqEntry(seh);
2877 scope.ResetDataAndHistory();
2880 seh = scope.AddTopLevelSeqEntry(*entry);
2881 eval = validator.Validate(seh, options);
2885 for (
const string& id_it : bad_ids) {
2886 const string id_str =
"C" + id_it.substr(1);
2887 expected_errors[0]->SetAccession(
"dbj|" + id_str +
"|");
2888 expected_errors[0]->SetErrMsg(
"Bad accession " + id_str);
2891 scope.RemoveTopLevelSeqEntry(seh);
2892 scope.ResetDataAndHistory();
2896 seh = scope.AddTopLevelSeqEntry(*entry);
2897 eval = validator.Validate(seh, options);
2898 expected_errors[0]->SetAccession(
"dbj|" + id_str +
"|");
2900 scope.RemoveTopLevelSeqEntry(seh);
2901 scope.ResetDataAndHistory();
2904 seh = scope.AddTopLevelSeqEntry(*entry);
2905 eval = validator.Validate(seh, options);
2910 for (
const string& id_str : bad_nuc_ids) {
2912 scope.RemoveTopLevelSeqEntry(seh);
2915 expected_errors[0]->SetAccession(
"gb|" + id_str +
"|");
2916 expected_errors[0]->SetErrMsg(
"Bad accession " + id_str);
2917 seh = scope.AddTopLevelSeqEntry(*entry);
2918 eval = validator.Validate(seh, options);
2923 for (
auto id_it : bad_prot_ids) {
2930 for (
const string& id_str : good_ids) {
2932 scope.RemoveTopLevelSeqEntry(seh);
2935 seh = scope.AddTopLevelSeqEntry(*entry);
2936 eval = validator.Validate(seh, options);
2939 scope.RemoveTopLevelSeqEntry(seh);
2942 seh = scope.AddTopLevelSeqEntry(*entry);
2943 eval = validator.Validate(seh, options);
2949 for (
const string& id_it : good_nuc_ids) {
2954 for (
const string& id_it : good_prot_ids) {
2959 scope.RemoveTopLevelSeqEntry(seh);
2966 seh = scope.AddTopLevelSeqEntry(*entry);
2967 eval = validator.Validate(seh, options);
2969 "Accession AY123456 has 0 version"));
2970 expected_errors.push_back(
new CExpectedError(
"gb|AY123456|",
eDiag_Warning,
"UnexpectedIdentifierChange",
"New accession (gb|AY123456|) does not match one in NCBI sequence repository (gb|AY123456.1|) on gi (21914627)"));
2979 scope.RemoveTopLevelSeqEntry(seh);
2980 bad_id->
SetLocal().
SetStr(
"ABCDEFGHIJKLMNOPQRSTUVWXYZ012345678901234");
2982 seh = scope.AddTopLevelSeqEntry(*entry);
2983 eval = validator.Validate(seh, options);
2990 scope.RemoveTopLevelSeqEntry(seh);
2994 seh = scope.AddTopLevelSeqEntry(*entry);
2995 eval = validator.Validate(seh, options);
3002 scope.RemoveTopLevelSeqEntry(seh);
3009 seh = scope.AddTopLevelSeqEntry(*entry);
3011 "General database longer than 20 characters"));
3014 eval = validator.Validate(seh, options);
3020 scope.RemoveTopLevelSeqEntry(seh);
3022 entry->
SetSeq().
SetId().front()->SetLocal().SetStr(
"a/b");
3023 seh = scope.AddTopLevelSeqEntry(*entry);
3024 eval = validator.Validate(seh, options);
3036 id->SetGeneral().SetDb(db);
3037 id->SetGeneral().SetTag().SetStr(
tag);
3042 string acc_str =
"lcl|good";
3043 if (!errmsg.empty()) {
3048 eval = validator.Validate(seh, options);
3057 TestOneGeneralSeqId(
"PRJNA318798",
" CpPA02_0001",
"Bad character ' ' in sequence ID 'gnl|PRJNA318798| CpPA02_0001'");
3058 TestOneGeneralSeqId(
"PRJNA3 18798",
"CpPA02_0001",
"Bad character ' ' in sequence ID 'gnl|PRJNA3 18798|CpPA02_0001'");
3066 id->SetGeneral().SetDb(
"lgsi");
3067 id->SetGeneral().SetTag().SetStr(
"thisidentifierismorethanfiftycharactersinlengthsoitshouldberejected");
3080 string acc_str =
"lcl|good";
3082 "General identifier longer than 50 characters"));
3085 eval = validator.Validate(seh, options);
3101 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3106 gbdesc->SetGenbank().SetExtra_accessions().push_back(
"AY123456");
3109 expected_errors.push_back(
new CExpectedError(
"gb|AY123456|",
eDiag_Error,
"BadSecondaryAccn",
"AY123456 used for both primary and secondary accession"));
3111 eval = validator.Validate(seh, options);
3114 gbdesc->SetEmbl().SetExtra_acc().push_back(
"AY123456");
3115 eval = validator.Validate(seh, options);
3130 expected_errors.push_back(
new CExpectedError(
"gi|0",
eDiag_Error,
"GiWithoutAccession",
"No accession on sequence with gi number"));
3132 eval = validator.Validate(seh, options);
3142 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3143 entry->
SetSeq().
SetId().front()->SetGenbank().SetVersion(1);
3151 hist_id->SetGi(
GI_CONST(21914627));
3152 entry->
SetSeq().
SetInst().SetHist().SetReplaced_by().SetIds().push_back(hist_id);
3153 entry->
SetSeq().
SetInst().SetHist().SetReplaced_by().SetDate().SetStd().SetYear(2008);
3155 expected_errors.push_back(
new CExpectedError(
"gb|AY123456.1|",
eDiag_Error,
"HistoryGiCollision",
"Replaced by gi (21914627) is same as current Bioseq"));
3157 eval = validator.Validate(seh, options);
3161 entry->
SetSeq().
SetInst().SetHist().SetReplaces().SetIds().push_back(hist_id);
3162 entry->
SetSeq().
SetInst().SetHist().SetReplaces().SetDate().SetStd().SetYear(2008);
3163 expected_errors[0]->SetErrMsg(
"Replaces gi (21914627) is same as current Bioseq");
3164 eval = validator.Validate(seh, options);
3171 entry->
SetSeq().
SetInst().SetHist().SetReplaced_by().SetIds().push_back(hist_id);
3172 eval = validator.Validate(seh, options);
3177 entry->
SetSeq().
SetInst().SetHist().SetReplaces().SetIds().push_back(hist_id);
3178 eval = validator.Validate(seh, options);
3192 expected_errors.push_back(
new CExpectedError(
"gi|123456",
eDiag_Error,
"GiWithoutAccession",
"No accession on sequence with gi number"));
3194 eval = validator.Validate(seh, options);
3204 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3205 entry->
SetSeq().
SetId().front()->SetGenbank().SetVersion(1);
3210 string acc_str =
"gb|AY123456.1|";
3216 "Conflicting ids on a Bioseq: (gb|AY123456.1| - " + other_acc->
AsFastaString() +
")"));
3218 expected_errors.push_back(
new CExpectedError(acc_str,
eDiag_Error,
"MultipleAccessions",
"Multiple accessions on sequence with gi number"));
3220 expected_errors.push_back(
new CExpectedError(
"gb|AY123456.1|",
eDiag_Warning,
"UnexpectedIdentifierChange",
"New accession (gb|AY123457.1|) does not match one in NCBI sequence repository (gb|AY123456.1|) on gi (21914627)"));
3224 "TPA record gb|AY123456.1| should have Seq-hist.assembly for PRIMARY block"));
3227 eval = validator.Validate(seh, options);
3286 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3287 entry->
SetSeq().
SetId().front()->SetGenbank().SetVersion(1);
3297 string acc_str =
"gb|AY123456.1|";
3298 expected_errors.push_back(
new CExpectedError(acc_str,
eDiag_Error,
"INSDRefSeqPackaging",
"INSD and RefSeq records should not be present in the same set"));
3299 expected_errors.push_back(
new CExpectedError(acc_str,
eDiag_Error,
"MultipleAccessions",
"Multiple accessions on sequence with gi number"));
3301 eval = validator.Validate(seh, options);
3311 tpg_entry->
SetSeq().
SetId().front()->SetTpg().SetAccession(
"AY123456");
3312 tpg_entry->
SetSeq().
SetId().front()->SetTpg().SetVersion(1);
3315 tpe_entry->
SetSeq().
SetId().front()->SetTpe().SetAccession(
"AY123456");
3316 tpe_entry->
SetSeq().
SetId().front()->SetTpe().SetVersion(1);
3319 tpd_entry->
SetSeq().
SetId().front()->SetTpd().SetAccession(
"AY123456");
3320 tpd_entry->
SetSeq().
SetId().front()->SetTpd().SetVersion(1);
3325 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Info,
"HistAssemblyMissing",
"TPA record tpg|AY123456.1| should have Seq-hist.assembly for PRIMARY block"));
3327 eval = validator.Validate(seh, options);
3331 scope.RemoveTopLevelSeqEntry(seh);
3332 seh = scope.AddTopLevelSeqEntry(*tpe_entry);
3334 expected_errors[0]->SetErrMsg(
"TPA record tpe|AY123456.1| should have Seq-hist.assembly for PRIMARY block");
3335 eval = validator.Validate(seh, options);
3340 scope.RemoveTopLevelSeqEntry(seh);
3341 seh = scope.AddTopLevelSeqEntry(*tpd_entry);
3343 expected_errors[0]->SetErrMsg(
"TPA record tpd|AY123456.1| should have Seq-hist.assembly for PRIMARY block");
3344 eval = validator.Validate(seh, options);
3351 block->SetGenbank().SetKeywords().push_back(
"TPA:reassembly");
3353 scope.RemoveTopLevelSeqEntry(seh);
3354 seh = scope.AddTopLevelSeqEntry(*tpg_entry);
3355 eval = validator.Validate(seh, options);
3359 block->SetEmbl().SetKeywords().push_back(
"TPA:reassembly");
3360 eval = validator.Validate(seh, options);
3368 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"NNNNNNNNNNAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCCAANNNNNNNNNN");
3376 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"));
3378 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
3380 eval = validator.Validate(seh, options);
3384 scope.RemoveTopLevelSeqEntry(seh);
3385 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3386 seh = scope.AddTopLevelSeqEntry(*entry);
3390 eval = validator.Validate(seh, options);
3396 scope.RemoveTopLevelSeqEntry(seh);
3398 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetSeq_data().SetIupacna().Set(
"NNNNNNNNNCCC");
3399 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetSeq_data().SetIupacna().Set(
"CCCNNNNNNNNN");
3400 seh = scope.AddTopLevelSeqEntry(*entry);
3402 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ContigsTooShort",
"Maximum contig length is 3 bases"));
3405 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentPercent",
"Sequence contains 52 percent Ns"));
3406 eval = validator.Validate(seh, options);
3410 scope.RemoveTopLevelSeqEntry(seh);
3412 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetSeq_data().SetIupacna().Set(
"NNNNNNNNNNCC");
3413 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetSeq_data().SetIupacna().Set(
"CCNNNNNNNNNN");
3414 seh = scope.AddTopLevelSeqEntry(*entry);
3415 expected_errors[0]->SetErrMsg(
"Maximum contig length is 2 bases");
3416 expected_errors.back()->SetErrMsg(
"Sequence contains 58 percent Ns");
3417 eval = validator.Validate(seh, options);
3421 scope.RemoveTopLevelSeqEntry(seh);
3422 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3423 seh = scope.AddTopLevelSeqEntry(*entry);
3427 eval = validator.Validate(seh, options);
3431 scope.RemoveTopLevelSeqEntry(seh);
3432 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
3433 seh = scope.AddTopLevelSeqEntry(*entry);
3437 eval = validator.Validate(seh, options);
3440 scope.RemoveTopLevelSeqEntry(seh);
3441 entry->
SetSeq().
SetId().front()->SetPatent().SetSeqid(1);
3442 entry->
SetSeq().
SetId().front()->SetPatent().SetCit().SetCountry(
"USA");
3443 entry->
SetSeq().
SetId().front()->SetPatent().SetCit().SetId().SetNumber(
"1");
3444 seh = scope.AddTopLevelSeqEntry(*entry);
3446 delete expected_errors.back();
3447 expected_errors.pop_back();
3448 eval = validator.Validate(seh, options);
3456 "Maximum contig length is 2 bases"));
3458 "Suspicious use of complete"));
3461 eval = validator.Validate(seh, options);
3471 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123457");
3472 entry->
SetSeq().
SetId().front()->SetGenbank().SetVersion(1);
3479 expected_errors.push_back(
new CExpectedError(
"gb|AY123457.1|",
eDiag_Warning,
"UnexpectedIdentifierChange",
"New accession (gb|AY123457.1|) does not match one in NCBI sequence repository (gb|AY123456.1|) on gi (21914627)"));
3481 eval = validator.Validate(seh, options);
3485 scope.RemoveTopLevelSeqEntry(seh);
3486 entry->
SetSeq().
SetId().front()->SetTpg().SetAccession(
"AY123456");
3487 entry->
SetSeq().
SetId().front()->SetTpg().SetVersion(1);
3488 seh = scope.AddTopLevelSeqEntry(*entry);
3490 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Info,
"HistAssemblyMissing",
"TPA record tpg|AY123456.1| should have Seq-hist.assembly for PRIMARY block"));
3491 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Warning,
"UnexpectedIdentifierChange",
"Loss of accession (gb|AY123456.1|) on gi (21914627) compared to the NCBI sequence repository"));
3492 eval = validator.Validate(seh, options);
3509 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"InternalNsInSeqLit",
"Run of 20 Ns in delta component 5 that starts at base 45"));
3510 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WGSseqGapProblem",
"WGS submission includes wrong gap type. Gaps for WGS genomes should be Assembly Gaps with linkage evidence."));
3519 eval = validator.Validate(seh, options);
3527 "Run of 81 Ns in delta component 7 that starts at base 79"));
3536 eval = validator.Validate(seh, options);
3540 eval = validator.Validate(seh, options);
3544 eval = validator.Validate(seh, options);
3547 unit_test_util::AddToDeltaSeq(entry,
"AANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGG");
3549 expected_errors[0]->SetErrMsg(
"Run of 101 Ns in delta component 9 that starts at base 174");
3550 eval = validator.Validate(seh, options);
3561 delta_seq->SetLiteral().SetLength(0);
3562 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(delta_seq);
3566 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"SeqLitGapLength0",
"Gap of length 0 in delta chain"));
3569 eval = validator.Validate(seh, options);
3574 eval = validator.Validate(seh, options);
3577 delta_seq->SetLiteral().SetFuzz().
Reset();
3578 delta_seq->SetLiteral().SetFuzz().SetP_m(10);
3579 eval = validator.Validate(seh, options);
3583 delta_seq->SetLiteral().SetFuzz().
Reset();
3585 expected_errors[0]->SetErrMsg(
"Gap of length 0 with unknown fuzz in delta chain");
3586 eval = validator.Validate(seh, options);
3590 scope.RemoveTopLevelSeqEntry(seh);
3591 entry->
SetSeq().
SetId().front()->SetSwissprot().SetAccession(
"AY123456");
3592 seh = scope.AddTopLevelSeqEntry(*entry);
3595 eval = validator.Validate(seh, options);
3598 delta_seq->SetLiteral().SetFuzz().SetP_m(10);
3599 expected_errors[0]->SetErrMsg(
"Gap of length 0 in delta chain");
3600 eval = validator.Validate(seh, options);
3603 delta_seq->SetLiteral().SetFuzz().
Reset();
3605 eval = validator.Validate(seh, options);
3608 delta_seq->SetLiteral().ResetFuzz();
3609 eval = validator.Validate(seh, options);
3624 field->
SetData().SetStr(
"Data");
3634 member1->
SetSeq().
SetId().front()->SetLocal().SetStr(
"good");
3638 member2->
SetSeq().
SetId().front()->SetLocal().SetStr(
"good2");
3647 eval = validator.Validate(seh, options);
3652 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TpaAssemblyProblem",
"There are 1 TPAs with history and 1 without history in this record."));
3653 eval = validator.Validate(seh, options);
3657 scope.RemoveTopLevelSeqEntry(seh);
3658 member1->
SetSeq().
SetId().front()->SetTpg().SetAccession(
"AY123456");
3659 member1->
SetSeq().
SetId().front()->SetTpg().SetVersion(1);
3663 seh = scope.AddTopLevelSeqEntry(*entry);
3667 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Warning,
"UnexpectedIdentifierChange",
"Loss of accession (gb|AY123456.1|) on gi (21914627) compared to the NCBI sequence repository"));
3668 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Error,
"TpaAssemblyProblem",
"There are 1 TPAs with history and 1 without history in this record."));
3669 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Warning,
"TpaAssemblyProblem",
"There are 1 TPAs without history in this record, but the record has a gi number assignment."));
3672 eval = validator.Validate(seh, options);
3683 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetId().SetGenbank().SetAccession(
"AY123456");
3684 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetFrom(0);
3685 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetTo(9);
3692 eval = validator.Validate(seh, options);
3695 scope.RemoveTopLevelSeqEntry(seh);
3698 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetId().SetGenbank().SetAccession(
"AY123456");
3699 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetFrom(0);
3700 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetTo(10);
3702 seh = scope.AddTopLevelSeqEntry(*entry);
3703 eval = validator.Validate(seh, options);
3721 eval = validator.Validate(seh, options);
3725 eval = validator.Validate(seh, options);
3729 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"MissingGaps",
"HTGS delta seq should have gaps between all sequence runs"));
3730 eval = validator.Validate(seh, options);
3734 eval = validator.Validate(seh, options);
3738 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"BadHTGSeq",
"HTGS 2 delta seq has no gaps and no graphs"));
3739 eval = validator.Validate(seh, options);
3743 scope.RemoveTopLevelSeqEntry(seh);
3744 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
3746 seh = scope.AddTopLevelSeqEntry(*entry);
3749 eval = validator.Validate(seh, options);
3751 delete expected_errors[1];
3752 expected_errors.pop_back();
3755 eval = validator.Validate(seh, options);
3759 eval = validator.Validate(seh, options);
3770 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3772 SetTitle(entry,
"Foo complete genome");
3776 expected_errors.push_back(
new CExpectedError(
"gb|AY123456|",
eDiag_Warning,
"CompleteTitleProblem",
"Complete genome in title without complete flag set"));
3779 eval = validator.Validate(seh, options);
3787 eval = validator.Validate(seh, options);
3792 scope.RemoveTopLevelSeqEntry(seh);
3794 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3796 SetTitle(entry,
"Foo complete genome");
3798 seh = scope.AddTopLevelSeqEntry(*entry);
3801 "CompleteGenomeHasGaps",
"Title contains 'complete genome' but sequence has gaps"));
3803 eval = validator.Validate(seh, options);
3819 "CompleteCircleProblem",
3820 "Circular topology without complete flag set"));
3823 eval = validator.Validate(seh, options);
3828 scope.RemoveTopLevelSeqEntry(seh);
3829 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3830 SetTitle(entry,
"This is just a title");
3832 seh = scope.AddTopLevelSeqEntry(*entry);
3834 "CompleteCircleProblem",
3835 "Circular topology has complete flag set, but title should say complete sequence or complete genome"));
3837 "UnwantedCompleteFlag",
3838 "Suspicious use of complete"));
3841 eval = validator.Validate(seh, options);
3859 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"MissingGaps",
"HTGS delta seq should have gaps between all sequence runs"));
3860 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"BadHTGSeq",
"HTGS 2 delta seq has no gaps and no graphs"));
3861 eval = validator.Validate(seh, options);
3864 delete expected_errors[1];
3865 expected_errors.pop_back();
3869 eval = validator.Validate(seh, options);
3874 scope.RemoveTopLevelSeqEntry(seh);
3877 seh = scope.AddTopLevelSeqEntry(*raw_entry);
3878 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"BadHTGSeq",
"HTGS 2 raw seq has no gaps and no graphs"));
3880 eval = validator.Validate(seh, options);
3888 eval = validator.Validate(seh, options);
3897 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadHTGSeq",
"HTGS 3 sequence should not have HTGS_DRAFT keyword"));
3898 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadHTGSeq",
"HTGS 3 sequence should not have HTGS_PREFIN keyword"));
3899 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadHTGSeq",
"HTGS 3 sequence should not have HTGS_ACTIVEFIN keyword"));
3900 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadHTGSeq",
"HTGS 3 sequence should not have HTGS_FULLTOP keyword"));
3901 eval = validator.Validate(seh, options);
3904 scope.RemoveTopLevelSeqEntry(seh);
3905 seh = scope.AddTopLevelSeqEntry(*delta_entry);
3910 eval = validator.Validate(seh, options);
3921 entry->
SetSeq().
SetInst().SetSeq_data().SetNcbieaa().Set(
"PRK-EIN");
3925 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"GapInProtein",
"[1] internal gap symbols in protein sequence (gene? - fake protein name)"));
3927 eval = validator.Validate(seh, options);
3932 entry->
SetSeq().
SetInst().SetSeq_data().SetNcbieaa().Set(
"-RKTEIN");
3933 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadProteinStart",
"gap symbol at start of protein sequence (gene? - fake protein name)"));
3935 eval = validator.Validate(seh, options);
3938 entry->
SetSeq().
SetInst().SetSeq_data().SetNcbieaa().Set(
"-RK-EIN");
3939 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"GapInProtein",
"[1] internal gap symbols in protein sequence (gene? - fake protein name)"));
3940 eval = validator.Validate(seh, options);
3952 first_seg->SetLiteral().SetLength(9);
3953 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_front(first_seg);
3955 last_seg->SetLiteral().SetLength(9);
3956 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(last_seg);
3973 eval = validator.Validate(seh, options);
3977 scope.RemoveTopLevelSeqEntry(seh);
3978 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetLength(10);
3979 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetLength(10);
3981 seh = scope.AddTopLevelSeqEntry(*entry);
3982 eval = validator.Validate(seh, options);
3986 scope.RemoveTopLevelSeqEntry(seh);
3987 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
3988 seh = scope.AddTopLevelSeqEntry(*entry);
3994 eval = validator.Validate(seh, options);
3997 scope.RemoveTopLevelSeqEntry(seh);
3998 entry->
SetSeq().
SetId().front()->SetPatent().SetSeqid(1);
3999 entry->
SetSeq().
SetId().front()->SetPatent().SetCit().SetCountry(
"USA");
4000 entry->
SetSeq().
SetId().front()->SetPatent().SetCit().SetId().SetNumber(
"1");
4001 seh = scope.AddTopLevelSeqEntry(*entry);
4003 eval = validator.Validate(seh, options);
4012 "Suspicious use of complete"));
4015 eval = validator.Validate(seh, options);
4028 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddSeqRange(*seqid, 0, 10);
4029 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddSeqRange(*seqid, 5, 15);
4030 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddSeqRange(*seqid, 20, 30);
4031 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddSeqRange(*seqid, 25, 35);
4036 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"OverlappingDeltaRange",
"Overlapping delta range 6-16 and 1-11 on a Bioseq gb|AY123456|"));
4037 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"OverlappingDeltaRange",
"Overlapping delta range 26-36 and 21-31 on a Bioseq gb|AY123456|"));
4039 eval = validator.Validate(seh, options);
4050 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"XROTEIN");
4056 eval = validator.Validate(seh, options);
4066 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AAAAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTTTT");
4071 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"InternalNsInSeqRaw",
"Run of 100 Ns in raw sequence starting at base 6"));
4072 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ContigsTooShort",
"Maximum contig length is 5 bases"));
4073 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentPercent",
"Sequence contains 90 percent Ns"));
4075 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"));
4077 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
4079 eval = validator.Validate(seh, options);
4085 scope.RemoveTopLevelSeqEntry(seh);
4086 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AAAAANNNNNNNNNNNNNNNNNNNNTTTTT");
4088 seh = scope.AddTopLevelSeqEntry(*entry);
4089 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ContigsTooShort",
"Maximum contig length is 5 bases"));
4090 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentPercent",
"Sequence contains 66 percent Ns"));
4092 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"));
4094 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
4096 eval = validator.Validate(seh, options);
4103 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"InternalNsInSeqRaw",
"Run of 20 Ns in raw sequence starting at base 6"));
4104 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ContigsTooShort",
"Maximum contig length is 5 bases"));
4105 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentPercent",
"Sequence contains 66 percent Ns"));
4107 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"));
4109 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
4111 eval = validator.Validate(seh, options);
4122 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetSeq_data().SetIupacna().Set(
"ATGATGATGNNN");
4123 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetSeq_data().SetIupacna().Set(
"NNNATGATGATG");
4127 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ContigsTooShort",
"Maximum contig length is 9 bases"));
4128 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"InternalNsAdjacentToGap",
"Ambiguous residue N is adjacent to a gap around position 13"));
4129 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"InternalNsAdjacentToGap",
"Ambiguous residue N is adjacent to a gap around position 23"));
4136 eval = validator.Validate(seh, options);
4146 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetFrom(0);
4147 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetTo(11);
4148 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetId().SetGi(
ZERO_GI);
4153 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"DeltaSeqError",
"Unable to find far delta sequence component"));
4156 eval = validator.Validate(seh, options);
4167 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AATTGGCCAAAATTGGCCAAAATTGG-CAAAATTGGCCAAAATTGGCCAAAATTGGCCAA");
4172 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"InternalGapsInSeqRaw",
"Raw nucleotide should not contain gap characters"));
4175 eval = validator.Validate(seh, options);
4186 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetFrom(0);
4187 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetTo(11);
4188 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetId().SetLocal().SetStr(
"good");
4192 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"SelfReferentialSequence",
"Self-referential delta sequence"));
4193 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"InstantiatedGapMismatch",
"Exception 4 in GapByGapInst"));
4196 eval = validator.Validate(seh, options);
4207 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetWhole().SetGenbank().SetAccession(
"AY123456");
4212 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WholeComponent",
"Delta seq component should not be of type whole"));
4215 eval = validator.Validate(seh, options);
4227 seq.
SetId().front()->Assign(*gnl);
4230 seq.
SetId().push_back(lcl);
4231 seq.
SetAnnot().front()->SetData().SetFtable().front()->SetLocation().SetInt().SetId().Assign(*gnl);
4245 eval = validator.Validate(seh, options);
4249 scope.RemoveTopLevelSeqEntry(seh);
4255 cds->
SetProduct().SetWhole().SetGeneral().SetDb(
"a");
4256 cds->
SetProduct().SetWhole().SetGeneral().SetTag().SetStr(
"b");
4257 seh = scope.AddTopLevelSeqEntry(*entry);
4259 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Info,
"ProteinsHaveGeneralID",
"INDEXER_ONLY - Protein bioseqs have general seq-id."));
4262 eval = validator.Validate(seh, options);
4273 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AAAAATTTTTGGGGGCCCCCAAAAATTTTTGGGGGCCCCCNNNNNNNNNNNAAAATTTTTGGGGGCCCCCAAAAATTTTTGGGGGCCCCCAAAAATTTTT");
4281 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentPercent",
"Sequence contains 11 percent Ns"));
4283 eval = validator.Validate(seh, options);
4286 scope.RemoveTopLevelSeqEntry(seh);
4287 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AAAAATTTTTGGGGGCCCCCAAAAATTTTTGGGGGCCCCCNNNNNNNNNNNNNNNNTTTTGGGGGCCCCCAAAAATTTTTGGGGGCCCCCAAAAATTTTT");
4288 seh = scope.AddTopLevelSeqEntry(*entry);
4289 expected_errors[0]->SetErrMsg(
"Sequence contains 16 percent Ns");
4290 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentStretch",
"Sequence has a stretch of 16 Ns"));
4291 eval = validator.Validate(seh, options);
4296 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentStretch",
"Sequence has a stretch of 16 Ns"));
4297 eval = validator.GetTSANStretchErrors(seh);
4299 eval = validator.GetTSANStretchErrors(entry->
GetSeq());
4304 scope.RemoveTopLevelSeqEntry(seh);
4305 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AANNNNNNNNNNGGGCCCCCAAAAATTTTTGGGGGCCCCCAAAAATTTTTGGGGGTTTTTGGGGGCCCCCAAAAATTTTTGGGGGCCNNNNNNNNNNAAA");
4306 seh = scope.AddTopLevelSeqEntry(*entry);
4308 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"));
4310 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
4312 "Sequence contains 20 percent Ns"));
4314 "Sequence has a stretch of at least 10 Ns within the first 20 bases"));
4316 "Sequence has a stretch of at least 10 Ns within the last 20 bases"));
4318 eval = validator.Validate(seh, options);
4323 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNcontent5Prime",
"Sequence has a stretch of at least 10 Ns within the first 20 bases"));
4324 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNcontent3Prime",
"Sequence has a stretch of at least 10 Ns within the last 20 bases"));
4325 eval = validator.GetTSANStretchErrors(seh);
4327 eval = validator.GetTSANStretchErrors(entry->
GetSeq());
4332 scope.RemoveTopLevelSeqEntry(seh);
4335 gap_seg->SetLiteral().SetSeq_data().SetGap();
4336 gap_seg->SetLiteral().SetLength(10);
4337 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(gap_seg);
4340 seh = scope.AddTopLevelSeqEntry(*entry);
4350 eval = validator.Validate(seh, options);
4362 CDelta_ext::Tdata::iterator seg_it = entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().begin();
4364 (*seg_it)->SetLiteral().SetSeq_data().SetIupacna().Set();
4365 (*seg_it)->SetLiteral().SetLength(0);
4371 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"SeqLitDataLength0",
"Seq-lit of length 0 in delta chain"));
4373 eval = validator.Validate(seh, options);
4388 gap_seg->SetLiteral().SetLength(101);
4390 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(gap_seg);
4404 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"UnknownLengthGapNot100",
"Gap of unknown length should have length 100"));
4412 eval = validator.Validate(seh, options);
4430 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"mRNAshouldBeSingleStranded",
"mRNA should be single stranded not double stranded"));
4432 eval = validator.Validate(seh, options);
4437 eval = validator.Validate(seh, options);
4442 eval = validator.Validate(seh, options);
4451 eval = validator.Validate(seh, options);
4457 eval = validator.Validate(seh, options);
4462 eval = validator.Validate(seh, options);
4478 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"BioSourceMissing",
"Nuc-prot set does not contain expected BioSource descriptor"));
4479 expected_errors.push_back(
new CExpectedError(
"lcl|prot",
eDiag_Fatal,
"NoOrgFound",
"No organism name included in the source. Other qualifiers may exist."));
4482 eval = validator.Validate(seh, options);
4496 entry->
SetDescr().Set().push_back(desc);
4499 entry->
SetDescr().Set().push_back(desc);
4502 entry->
SetDescr().Set().push_back(desc);
4505 entry->
SetDescr().Set().push_back(desc);
4511 "Nucleic acid with protein sequence method"));
4513 "MolType descriptor is obsolete"));
4515 "Modif descriptor is obsolete"));
4517 "Method descriptor is obsolete"));
4519 "OrgRef descriptor is obsolete"));
4523 eval = validator.Validate(seh, options);
4528 scope.RemoveTopLevelSeqEntry(seh);
4529 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
4534 seh = scope.AddTopLevelSeqEntry(*entry);
4536 "Non-TPA record gb|AY123456| should not have TpaAssembly object"));
4539 eval = validator.Validate(seh, options);
4542 scope.RemoveTopLevelSeqEntry(seh);
4543 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
4544 seh = scope.AddTopLevelSeqEntry(*entry);
4546 expected_errors[0]->SetErrMsg(
"Non-TPA record ref|NC_123456| should not have TpaAssembly object");
4547 eval = validator.Validate(seh, options);
4552 entry->
SetDescr().Set().push_back(desc);
4554 "Nucleic acid with GIBB-mol = peptide"));
4556 "MolType descriptor is obsolete"));
4557 eval = validator.Validate(seh, options);
4561 expected_errors[1]->SetErrMsg(
"GIBB-mol unknown or other used");
4562 eval = validator.Validate(seh, options);
4566 eval = validator.Validate(seh, options);
4571 scope.RemoveTopLevelSeqEntry(seh);
4575 entry->
SetDescr().Set().push_back(desc);
4576 seh = scope.AddTopLevelSeqEntry(*entry);
4578 "GIBB-mol [1] used on protein"));
4580 "MolType descriptor is obsolete"));
4582 eval = validator.Validate(seh, options);
4586 expected_errors[0]->SetErrMsg(
"GIBB-mol [2] used on protein");
4587 eval = validator.Validate(seh, options);
4591 expected_errors[0]->SetErrMsg(
"GIBB-mol [3] used on protein");
4592 eval = validator.Validate(seh, options);
4596 expected_errors[0]->SetErrMsg(
"GIBB-mol [4] used on protein");
4597 eval = validator.Validate(seh, options);
4601 expected_errors[0]->SetErrMsg(
"GIBB-mol [5] used on protein");
4602 eval = validator.Validate(seh, options);
4606 expected_errors[0]->SetErrMsg(
"GIBB-mol [6] used on protein");
4607 eval = validator.Validate(seh, options);
4611 expected_errors[0]->SetErrMsg(
"GIBB-mol [7] used on protein");
4612 eval = validator.Validate(seh, options);
4616 expected_errors[0]->SetErrMsg(
"GIBB-mol [9] used on protein");
4617 eval = validator.Validate(seh, options);
4621 expected_errors[0]->SetErrMsg(
"GIBB-mol [10] used on protein");
4622 eval = validator.Validate(seh, options);
4631 "Nucleic acid GIBB-mod [0] on protein"));
4633 "Nucleic acid GIBB-mod [1] on protein"));
4635 "Modif descriptor is obsolete"));
4637 eval = validator.Validate(seh, options);
4642 scope.RemoveTopLevelSeqEntry(seh);
4645 if (it->IsSource()) {
4649 seh = scope.AddTopLevelSeqEntry(*entry);
4652 "Molinfo-biomol other should be used if Biosource-location is synthetic"));
4654 eval = validator.Validate(seh, options);
4660 if (it->IsSource()) {
4661 it->SetSource().ResetOrigin();
4667 "Nucleic acid with Molinfo = peptide"));
4669 eval = validator.Validate(seh, options);
4675 "MoltypeOtherGenetic",
"Molinfo-biomol = other genetic"));
4677 eval = validator.Validate(seh, options);
4683 "MoltypeUnknown",
"Molinfo-biomol unknown used"));
4685 eval = validator.Validate(seh, options);
4691 "MoltypeOther",
"Molinfo-biomol other used"));
4693 eval = validator.Validate(seh, options);
4697 scope.RemoveTopLevelSeqEntry(seh);
4699 seh = scope.AddTopLevelSeqEntry(*entry);
4702 "InvalidForType",
"Molinfo-biomol [1] used on protein"));
4705 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [1] used on protein");
4706 eval = validator.Validate(seh, options);
4710 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [2] used on protein");
4711 eval = validator.Validate(seh, options);
4715 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [3] used on protein");
4716 eval = validator.Validate(seh, options);
4720 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [4] used on protein");
4721 eval = validator.Validate(seh, options);
4725 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [5] used on protein");
4726 eval = validator.Validate(seh, options);
4730 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [6] used on protein");
4731 eval = validator.Validate(seh, options);
4735 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [7] used on protein");
4736 eval = validator.Validate(seh, options);
4740 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [10] used on protein");
4741 eval = validator.Validate(seh, options);
4745 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [11] used on protein");
4746 eval = validator.Validate(seh, options);
4750 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [12] used on protein");
4751 eval = validator.Validate(seh, options);
4755 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [13] used on protein");
4756 eval = validator.Validate(seh, options);
4760 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [14] used on protein");
4761 eval = validator.Validate(seh, options);
4765 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [15] used on protein");
4766 eval = validator.Validate(seh, options);
4771 scope.RemoveTopLevelSeqEntry(seh);
4773 seh = scope.AddTopLevelSeqEntry(*entry);
4776 "synthetic construct should have other-genetic"));
4778 "synthetic construct should have artificial origin"));
4780 eval = validator.Validate(seh, options);
4789 "Nucleic acid with protein sequence method"));
4791 eval = validator.Validate(seh, options);
4795 eval = validator.Validate(seh, options);
4799 eval = validator.Validate(seh, options);
4803 eval = validator.Validate(seh, options);
4807 eval = validator.Validate(seh, options);
4811 eval = validator.Validate(seh, options);
4816 scope.RemoveTopLevelSeqEntry(seh);
4818 seh = scope.AddTopLevelSeqEntry(*entry);
4820 "NucleotideTechniqueOnProtein",
"Protein with nucleic acid sequence method"));
4822 "EST sequence should be mRNA"));
4826 eval = validator.Validate(seh, options);
4832 "Protein with nucleic acid sequence method"));
4835 eval = validator.Validate(seh, options);
4839 eval = validator.Validate(seh, options);
4843 eval = validator.Validate(seh, options);
4847 eval = validator.Validate(seh, options);
4851 "HTGS/STS/GSS/WGS sequence should be genomic"));
4853 eval = validator.Validate(seh, options);
4857 eval = validator.Validate(seh, options);
4861 eval = validator.Validate(seh, options);
4865 eval = validator.Validate(seh, options);
4869 eval = validator.Validate(seh, options);
4873 eval = validator.Validate(seh, options);
4880 "HTGS 2 raw seq has no gaps and no graphs"));
4882 "Protein with nucleic acid sequence method"));
4884 "HTGS/STS/GSS/WGS sequence should be genomic"));
4888 eval = validator.Validate(seh, options);
4894 "Molinfo.tech barcode without BARCODE keyword"));
4896 "Protein with nucleic acid sequence method"));
4900 eval = validator.Validate(seh, options);
4913 entry->
SetDescr().Set().push_back(desc);
4918 "Modif descriptor is obsolete"));
4920 "GIBB-mod = other used"));
4923 eval = validator.Validate(seh, options);
4934 set->SetSet().SetSeq_set().push_back(member);
4948 "No publications anywhere on this entire record."));
4950 "No submission citation anywhere on this entire record."));
4952 eval = validator.Validate(seh, options);
4958 scope.RemoveTopLevelSeqEntry(seh);
4961 entry->
SetSet().
SetSeq_set().front()->SetSeq().SetId().push_back(id_suppress);
4962 seh = scope.AddTopLevelSeqEntry(*entry);
4964 "No submission citation anywhere on this entire record."));
4966 eval = validator.Validate(seh, options);
4972 scope.RemoveTopLevelSeqEntry(seh);
4975 seh = scope.AddTopLevelSeqEntry(*gps);
4977 "GenomicProductPackagingProblem",
4978 "Nucleotide bioseq should be product of mRNA feature on contig, but is not"));
4980 "GenomicProductPackagingProblem",
4981 "Protein bioseq should be product of CDS feature on contig, but is not"));
4983 "No submission citation anywhere on this entire record."));
4986 eval = validator.Validate(seh, options);
4992 scope.RemoveTopLevelSeqEntry(seh);
4996 seh = scope.AddTopLevelSeqEntry(*entry);
4998 "No publications refer to this Bioseq."));
5000 "Expected submission citation is missing for this Bioseq"));
5003 eval = validator.Validate(seh, options);
5009 scope.RemoveTopLevelSeqEntry(seh);
5011 entry->
SetSet().
SetSeq_set().front()->SetSeq().SetId().push_back(id_suppress);
5013 seh = scope.AddTopLevelSeqEntry(*entry);
5016 "Expected submission citation is missing for this Bioseq"));
5018 eval = validator.Validate(seh, options);
5034 "Nuc-prot set does not contain expected BioSource descriptor"));
5036 "No source information included on this record."));
5038 eval = validator.Validate(seh, options);
5044 "Nuc-prot set does not contain expected BioSource descriptor"));
5047 scope.RemoveTopLevelSeqEntry(seh);
5053 seh = scope.AddTopLevelSeqEntry(*entry);
5054 eval = validator.Validate(seh, options);
5057 scope.RemoveTopLevelSeqEntry(seh);
5059 pdb_id->SetMol().Set(
"foo");
5061 seh = scope.AddTopLevelSeqEntry(*entry);
5063 eval = validator.Validate(seh, options);
5067 scope.RemoveTopLevelSeqEntry(seh);
5070 seh = scope.AddTopLevelSeqEntry(*entry);
5073 "No organism name included in the source. Other qualifiers may exist."));
5076 eval = validator.Validate(seh, options);
5085 "No organism name included in the source. Other qualifiers may exist."));
5087 "Nuc-prot set has 1 protein with a BioSource descriptor"));
5089 "Nuc-prot set does not contain expected BioSource descriptor"));
5092 eval = validator.Validate(seh, options);
5108 "Undesired multiple source descriptors"));
5112 eval = validator.Validate(seh, options);
5128 "No Mol-info applies to this Bioseq"));
5131 eval = validator.Validate(seh, options);
5147 "BioSource is missing taxon ID"));
5149 eval = validator.Validate(seh, options);
5164 second->
SetSeq().
SetId().front()->SetLocal().SetStr(
"good2");
5178 "Population set contains inconsistent organism names."));
5182 eval = validator.Validate(seh, options);
5195 eval = validator.Validate(seh, options);
5207 eval = validator.Validate(seh, options);
5225 eval = validator.Validate(seh, options);
5241 "No lineage for this BioSource."));
5244 eval = validator.Validate(seh, options);
5248 eval = validator.Validate(seh, options);
5252 scope.RemoveTopLevelSeqEntry(seh);
5253 entry->
SetSeq().
SetId().front()->SetEmbl().SetAccession(
"B12345");
5254 seh = scope.AddTopLevelSeqEntry(*entry);
5257 eval = validator.Validate(seh, options);
5261 scope.RemoveTopLevelSeqEntry(seh);
5262 entry->
SetSeq().
SetId().front()->SetDdbj().SetAccession(
"C12345");
5263 seh = scope.AddTopLevelSeqEntry(*entry);
5266 eval = validator.Validate(seh, options);
5271 scope.RemoveTopLevelSeqEntry(seh);
5272 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
5273 seh = scope.AddTopLevelSeqEntry(*entry);
5276 eval = validator.Validate(seh, options);
5283 "BioSource is missing taxon ID"));
5284 eval = validator.Validate(seh, options);
5302 "Comment may refer to reference by serial number - attach reference specific comments to the reference REMARK instead."));
5305 eval = validator.Validate(seh, options);
5321 "BioSource descriptor must have focus or transgenic when BioSource feature with different taxname is present."));
5324 eval = validator.Validate(seh, options);
5333 eval = validator.Validate(seh, options);
5339 eval = validator.Validate(seh, options);
5355 "Only Kinetoplastida have kinetoplasts"));
5358 eval = validator.Validate(seh, options);
5362 expected_errors[0]->SetErrMsg(
"Only Chlorarachniophyceae and Cryptophyceae have nucleomorphs");
5364 "Taxonomy lookup does not have expected nucleomorph flag"));
5365 eval = validator.Validate(seh, options);
5371 "Only Ciliophora have macronuclear locations"));
5373 eval = validator.Validate(seh, options);
5382 "Taxonomy lookup does not have expected plastid flag"));
5383 eval = validator.Validate(seh, options);
5390 eval = validator.Validate(seh, options);
5407 "Multiple identical chromosome qualifiers present"));
5410 eval = validator.Validate(seh, options);
5414 expected_errors[0]->SetErrMsg(
"Multiple conflicting chromosome qualifiers present");
5415 eval = validator.Validate(seh, options);
5431 "Unknown subsource subtype 0"));
5434 eval = validator.Validate(seh, options);
5443 unique_ptr<CObjectOStream> os;
5451 if (entry.
IsSeq()) {
5454 if (it->IsSource() && it->GetSource().IsSetOrg()) {
5459 }
else if (entry.
IsSet()) {
5462 if (it->IsSource() && it->GetSource().IsSetOrg()) {
5494 "OrganismNotFound",
"Organism not found in taxonomy database (suggested:Sebaea microphylla var. c)"));
5497 "Unknown orgmod subtype 0"));
5499 "Unknown orgmod subtype 1"));
5501 "Multiple strain qualifiers on the same BioSource"));
5503 "Bad value for type_material"));
5505 "Variety value specified is not found in taxname"));
5507 "Specific host is identical to taxname"));
5514 eval = validator.Validate(seh, options);
5525 SetTaxname(entry,
"Sebaea microphylla var. x");
5532 "Orgmod variety should only be in plants, fungi, or cyanobacteria"));
5534 "NoTaxonID",
"BioSource is missing taxon ID"));
5536 "OrganismNotFound",
"Organism not found in taxonomy database"));
5538 eval = validator.Validate(seh, options);
5550 desc->
SetTitle(
"Not the correct title");
5551 entry->
SetSet().
SetSeq_set().back()->SetSeq().SetDescr().Set().push_back(desc);
5556 "Instantiated protein title does not match automatically generated title"));
5559 eval = validator.Validate(seh, options);
5646 src_desc->
SetSource().
SetOrg().SetOrgname().SetLineage(
"some lineage");
5660 "TPA:experimental and TPA:inferential should not both be in the same set of keywords"));
5668 "Inconsistent organism names [Trichechus manatus] and [Sebaea microphylla]"));
5670 "Inconsistent Molinfo-biomol [1] and [11]"));
5672 "Inconsistent Molinfo-tech [5] and [17]"));
5674 "Inconsistent Molinfo-completeness [3] and [4]"));
5676 "Multiple GenBank blocks"));
5678 "Multiple EMBL blocks"));
5680 "Multiple PIR blocks"));
5682 "Multiple PDB blocks"));
5684 "Multiple PRF blocks"));
5686 "Multiple SWISS-PROT blocks"));
5688 "Inconsistent GIBB-mod [0] and [1]"));
5690 "Inconsistent GIBB-mod [4] and [7]"));
5692 "Inconsistent GIBB-mod [11] and [10]"));
5694 "Inconsistent GIBB-mod [11] and [16]"));
5696 "Inconsistent GIBB-mod [11] and [17]"));
5698 "Inconsistent GIBB-mol [1] and [2]"));
5700 "MolType descriptor is obsolete"));
5702 "MolType descriptor is obsolete"));
5704 "Modif descriptor is obsolete"));
5706 "Create date has error - BAD_DAY"));
5708 "Create date has error - BAD_DAY"));
5710 "Update date has error - BAD_DAY"));
5717 eval = validator.Validate(seh, options);
5723 scope.RemoveTopLevelSeqEntry(seh);
5725 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"ABCD12345678");
5726 seh = scope.AddTopLevelSeqEntry(*entry);
5729 "WGS accession should have Mol-info.tech of wgs"));
5731 eval = validator.Validate(seh, options);
5733 scope.RemoveTopLevelSeqEntry(seh);
5734 entry->
SetSeq().
SetId().front()->SetEmbl().SetAccession(
"ABCE12345678");
5736 seh = scope.AddTopLevelSeqEntry(*entry);
5737 eval = validator.Validate(seh, options);
5739 scope.RemoveTopLevelSeqEntry(seh);
5740 entry->
SetSeq().
SetId().front()->SetDdbj().SetAccession(
"ABCF12345678");
5742 seh = scope.AddTopLevelSeqEntry(*entry);
5743 eval = validator.Validate(seh, options);
5749 scope.RemoveTopLevelSeqEntry(seh);
5750 entry->
SetSeq().
SetId().front()->SetEmbl().SetAccession(
"AA123456");
5752 seh = scope.AddTopLevelSeqEntry(*entry);
5753 eval = validator.Validate(seh, options);
5758 scope.RemoveTopLevelSeqEntry(seh);
5759 entry->
SetSeq().
SetId().front()->SetDdbj().SetAccession(
"AB123456");
5761 seh = scope.AddTopLevelSeqEntry(*entry);
5762 eval = validator.Validate(seh, options);
5767 scope.RemoveTopLevelSeqEntry(seh);
5769 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AC123456");
5772 seh = scope.AddTopLevelSeqEntry(*entry);
5774 "Mol-info.tech of wgs should have WGS accession"));
5776 "Loss of general ID (BCMHGSC: PROJECT_GXOU.BAYLOR) on gi (25008031) compared to the NCBI sequence repository"));
5778 eval = validator.Validate(seh, options);
5783 scope.RemoveTopLevelSeqEntry(seh);
5784 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NM_123456");
5785 seh = scope.AddTopLevelSeqEntry(*entry);
5787 "Mol-info.tech of wgs should have WGS accession"));
5790 eval = validator.Validate(seh, options);
5793 scope.RemoveTopLevelSeqEntry(seh);
5794 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NP_123456");
5795 seh = scope.AddTopLevelSeqEntry(*entry);
5797 eval = validator.Validate(seh, options);
5800 scope.RemoveTopLevelSeqEntry(seh);
5801 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NG_123456");
5802 seh = scope.AddTopLevelSeqEntry(*entry);
5804 eval = validator.Validate(seh, options);
5807 scope.RemoveTopLevelSeqEntry(seh);
5808 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NR_123456");
5809 seh = scope.AddTopLevelSeqEntry(*entry);
5811 eval = validator.Validate(seh, options);
5817 scope.RemoveTopLevelSeqEntry(seh);
5818 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NX_123456");
5819 seh = scope.AddTopLevelSeqEntry(*entry);
5820 eval = validator.Validate(seh, options);
5827 vector<string> segset_accession_prefixes;
5828 segset_accession_prefixes.push_back(
"AH");
5829 segset_accession_prefixes.push_back(
"CH");
5830 segset_accession_prefixes.push_back(
"CM");
5831 segset_accession_prefixes.push_back(
"DS");
5832 segset_accession_prefixes.push_back(
"EM");
5833 segset_accession_prefixes.push_back(
"EN");
5834 segset_accession_prefixes.push_back(
"EP");
5835 segset_accession_prefixes.push_back(
"EQ");
5836 segset_accession_prefixes.push_back(
"FA");
5837 segset_accession_prefixes.push_back(
"GG");
5838 segset_accession_prefixes.push_back(
"GL");
5840 for (
const string& it : segset_accession_prefixes) {
5841 scope.RemoveTopLevelSeqEntry(seh);
5842 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(it +
"_123456");
5843 seh = scope.AddTopLevelSeqEntry(*entry);
5844 eval = validator.Validate(seh, options);
5851 scope.RemoveTopLevelSeqEntry(seh);
5852 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
5855 seh = scope.AddTopLevelSeqEntry(*entry);
5857 eval = validator.Validate(seh, options);
5863 eval = validator.Validate(seh, options);
5868 "genomic RefSeq accession should use genomic or cRNA moltype"));
5869 eval = validator.Validate(seh, options);
5872 eval = validator.Validate(seh, options);
5875 eval = validator.Validate(seh, options);
5878 eval = validator.Validate(seh, options);
5881 eval = validator.Validate(seh, options);
5884 eval = validator.Validate(seh, options);
5887 eval = validator.Validate(seh, options);
5890 eval = validator.Validate(seh, options);
5893 eval = validator.Validate(seh, options);
5896 eval = validator.Validate(seh, options);
5899 eval = validator.Validate(seh, options);
5902 eval = validator.Validate(seh, options);
5918 "Transposon and insertion sequence are no longer legal locations"));
5920 eval = validator.Validate(seh, options);
5924 eval = validator.Validate(seh, options);
5941 "Transposon name and insertion sequence name are no longer legal qualifiers"));
5943 "Transposon name and insertion sequence name are no longer legal qualifiers"));
5946 eval = validator.Validate(seh, options);
5961 "Source note has structured tag '"));
5964 vector<string> tag_prefixes;
5965 tag_prefixes.push_back(
"acronym:");
5966 tag_prefixes.push_back(
"anamorph:");
5967 tag_prefixes.push_back(
"authority:");
5968 tag_prefixes.push_back(
"biotype:");
5969 tag_prefixes.push_back(
"biovar:");
5970 tag_prefixes.push_back(
"bio_material:");
5971 tag_prefixes.push_back(
"breed:");
5972 tag_prefixes.push_back(
"cell_line:");
5973 tag_prefixes.push_back(
"cell_type:");
5974 tag_prefixes.push_back(
"chemovar:");
5975 tag_prefixes.push_back(
"chromosome:");
5976 tag_prefixes.push_back(
"clone:");
5977 tag_prefixes.push_back(
"clone_lib:");
5978 tag_prefixes.push_back(
"collected_by:");
5979 tag_prefixes.push_back(
"collection_date:");
5980 tag_prefixes.push_back(
"common:");
5981 tag_prefixes.push_back(
"country:");
5982 tag_prefixes.push_back(
"cultivar:");
5983 tag_prefixes.push_back(
"culture_collection:");
5984 tag_prefixes.push_back(
"dev_stage:");
5985 tag_prefixes.push_back(
"dosage:");
5986 tag_prefixes.push_back(
"ecotype:");
5987 tag_prefixes.push_back(
"endogenous_virus_name:");
5988 tag_prefixes.push_back(
"environmental_sample:");
5989 tag_prefixes.push_back(
"forma:");
5990 tag_prefixes.push_back(
"forma_specialis:");
5991 tag_prefixes.push_back(
"frequency:");
5992 tag_prefixes.push_back(
"fwd_pcr_primer_name");
5993 tag_prefixes.push_back(
"fwd_pcr_primer_seq");
5994 tag_prefixes.push_back(
"fwd_primer_name");
5995 tag_prefixes.push_back(
"fwd_primer_seq");
5996 tag_prefixes.push_back(
"genotype:");
5997 tag_prefixes.push_back(
"germline:");
5998 tag_prefixes.push_back(
"group:");
5999 tag_prefixes.push_back(
"haplogroup:");
6000 tag_prefixes.push_back(
"haplotype:");
6001 tag_prefixes.push_back(
"identified_by:");
6002 tag_prefixes.push_back(
"insertion_seq_name:");
6003 tag_prefixes.push_back(
"isolate:");
6004 tag_prefixes.push_back(
"isolation_source:");
6005 tag_prefixes.push_back(
"lab_host:");
6006 tag_prefixes.push_back(
"lat_lon:");
6007 tag_prefixes.push_back(
"left_primer:");
6008 tag_prefixes.push_back(
"linkage_group:");
6009 tag_prefixes.push_back(
"map:");
6010 tag_prefixes.push_back(
"mating_type:");
6011 tag_prefixes.push_back(
"metagenome_source:");
6012 tag_prefixes.push_back(
"metagenomic:");
6013 tag_prefixes.push_back(
"nat_host:");
6014 tag_prefixes.push_back(
"pathovar:");
6015 tag_prefixes.push_back(
"placement:");
6016 tag_prefixes.push_back(
"plasmid_name:");
6017 tag_prefixes.push_back(
"plastid_name:");
6018 tag_prefixes.push_back(
"pop_variant:");
6019 tag_prefixes.push_back(
"rearranged:");
6020 tag_prefixes.push_back(
"rev_pcr_primer_name");
6021 tag_prefixes.push_back(
"rev_pcr_primer_seq");
6022 tag_prefixes.push_back(
"rev_primer_name");
6023 tag_prefixes.push_back(
"rev_primer_seq");
6024 tag_prefixes.push_back(
"right_primer:");
6025 tag_prefixes.push_back(
"segment:");
6026 tag_prefixes.push_back(
"serogroup:");
6027 tag_prefixes.push_back(
"serotype:");
6028 tag_prefixes.push_back(
"serovar:");
6029 tag_prefixes.push_back(
"sex:");
6030 tag_prefixes.push_back(
"specimen_voucher:");
6031 tag_prefixes.push_back(
"strain:");
6032 tag_prefixes.push_back(
"subclone:");
6033 tag_prefixes.push_back(
"subgroup:");
6034 tag_prefixes.push_back(
"substrain:");
6035 tag_prefixes.push_back(
"subtype:");
6036 tag_prefixes.push_back(
"sub_species:");
6037 tag_prefixes.push_back(
"synonym:");
6038 tag_prefixes.push_back(
"taxon:");
6039 tag_prefixes.push_back(
"teleomorph:");
6040 tag_prefixes.push_back(
"tissue_lib:");
6041 tag_prefixes.push_back(
"tissue_type:");
6042 tag_prefixes.push_back(
"transgenic:");
6043 tag_prefixes.push_back(
"transposon_name:");
6044 tag_prefixes.push_back(
"type:");
6045 tag_prefixes.push_back(
"variety:");
6047 for (
const string& it : tag_prefixes) {
6048 expected_errors[0]->SetErrMsg(
"Source note has structured tag '" + it +
"'");
6050 eval = validator.Validate(seh, options);
6054 eval = validator.Validate(seh, options);
6073 "BioSource descriptor has focus, but no BioSource feature"));
6076 eval = validator.Validate(seh, options);
6087 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
6094 field->
SetData().SetStr(
"Data");
6100 "RefGeneTracking object needs to have Status set"));
6103 eval = validator.Validate(seh, options);
6114 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
6116 SetTitle(entry,
"a title without the word");
6121 "Suspicious use of complete"));
6124 eval = validator.Validate(seh, options);
6130 eval = validator.Validate(seh, options);
6136 SetTitle(entry,
"complete sequence");
6138 eval = validator.Validate(seh, options);
6144 scope.RemoveTopLevelSeqEntry(seh);
6145 SetTitle(entry,
"a title without the word");
6146 entry->
SetSeq().
SetId().front()->SetEmbl().SetAccession(
"AY123457");
6149 seh = scope.AddTopLevelSeqEntry(*entry);
6150 eval = validator.Validate(seh, options);
6171 title1->SetName(
"First title");
6180 title2->SetName(
"Second title");
6188 "Multiple publications with identical PubMed ID"));
6191 eval = validator.Validate(seh, options);
6197 eval = validator.Validate(seh, options);
6201 title2->SetName(
"First title");
6203 "Multiple equivalent publications annotated on this sequence [Darwin|Ft; Darwin]"));
6204 eval = validator.Validate(seh, options);
6207 delete expected_errors[1];
6208 expected_errors.pop_back();
6211 scope.RemoveTopLevelSeqEntry(seh);
6216 seh = scope.AddTopLevelSeqEntry(*entry);
6217 expected_errors[0]->SetErrCode(
"CollidingPublications");
6218 expected_errors[0]->SetErrMsg(
"Multiple conflicting muids in a single publication");
6219 eval = validator.Validate(seh, options);
6222 expected_errors[0]->SetErrMsg(
"Multiple redundant muids in a single publication");
6223 eval = validator.Validate(seh, options);
6227 expected_errors[0]->SetErrMsg(
"Multiple conflicting pmids in a single publication");
6228 eval = validator.Validate(seh, options);
6231 expected_errors[0]->SetErrMsg(
"Multiple redundant pmids in a single publication");
6232 eval = validator.Validate(seh, options);
6248 "Transgenic source descriptor requires presence of source feature"));
6251 eval = validator.Validate(seh, options);
6256 scope.RemoveTopLevelSeqEntry(seh);
6259 seh = scope.AddTopLevelSeqEntry(*entry);
6262 eval = validator.Validate(seh, options);
6279 "BioSource is missing taxon ID"));
6281 "Organism not found in taxonomy database"));
6284 eval = validator.Validate(seh, options);
6290 "BioSource is missing taxon ID"));
6292 "Taxonomy lookup reports is_species_level FALSE"));
6295 eval = validator.Validate(seh, options);
6301 "BioSource is missing taxon ID"));
6303 "Taxonomy lookup reports taxonomy consultation needed"));
6306 eval = validator.Validate(seh, options);
6314 "Only Chlorarachniophyceae and Cryptophyceae have nucleomorphs"));
6316 "BioSource is missing taxon ID"));
6318 "Taxonomy lookup does not have expected nucleomorph flag"));
6320 eval = validator.Validate(seh, options);
6337 "BioSource is missing taxon ID"));
6339 "Taxonomy lookup reports taxonomy consultation needed"));
6342 eval = validator.Validate(seh, options);
6370 "Undesired multiple title descriptors"));
6373 eval = validator.Validate(seh, options);
6390 secondseq->
SetSeq().
SetId().front()->SetLocal().SetStr(
"good2");
6396 "RefGeneTracking object should only be in RefSeq record"));
6400 eval = validator.Validate(seh, options);
6406 scope.RemoveTopLevelSeqEntry(seh);
6407 secondseq->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
6408 seh = scope.AddTopLevelSeqEntry(*entry);
6410 eval = validator.Validate(seh, options);
6430 "Variety value specified is not found in taxname"));
6432 "Organism not found in taxonomy database (suggested:Arabidopsis thaliana var. foo)"));
6435 eval = validator.Validate(seh, options);
6440 expected_errors[0]->SetErrMsg(
"Forma value specified is not found in taxname");
6441 expected_errors[1]->SetErrMsg(
"Organism not found in taxonomy database (suggested:Arabidopsis thaliana f. foo)");
6442 eval = validator.Validate(seh, options);
6447 expected_errors[0]->SetErrMsg(
"Subspecies value specified is not found in taxname");
6448 expected_errors[1]->SetErrMsg(
"Organism not found in taxonomy database (suggested:Arabidopsis thaliana subsp. foo)");
6449 eval = validator.Validate(seh, options);
6457 "OrgModMissingValue",
6458 "Forma specialis value specified is not found in taxname"));
6460 eval = validator.Validate(seh, options);
6469 eval = validator.Validate(seh, options);
6474 eval = validator.Validate(seh, options);
6497 "Germline qualifier should not have descriptive text"));
6499 "Rearranged qualifier should not have descriptive text"));
6501 "Transgenic qualifier should not have descriptive text"));
6503 "Environmental_sample qualifier should not have descriptive text"));
6505 "Metagenomic qualifier should not have descriptive text"));
6507 "Germline and rearranged should not both be present"));
6509 "Transgenic and environmental sample should not both be present"));
6511 "Environmental sample should also have isolation source or specific host annotated"));
6513 eval = validator.Validate(seh, options);
6530 "Unexpected use of /sex qualifier"));
6532 eval = validator.Validate(seh, options);
6538 "Unexpected use of /sex qualifier"));
6541 eval = validator.Validate(seh, options);
6544 eval = validator.Validate(seh, options);
6547 expected_errors[0]->SetErrMsg(
"Invalid value (a) for /sex qualifier");
6550 "No lineage for this BioSource."));
6551 eval = validator.Validate(seh, options);
6557 "No lineage for this BioSource."));
6561 vector<string> ok_sex_vals;
6562 ok_sex_vals.push_back(
"female");
6563 ok_sex_vals.push_back(
"male");
6564 ok_sex_vals.push_back(
"hermaphrodite");
6565 ok_sex_vals.push_back(
"unisexual");
6566 ok_sex_vals.push_back(
"bisexual");
6567 ok_sex_vals.push_back(
"asexual");
6568 ok_sex_vals.push_back(
"monoecious");
6569 ok_sex_vals.push_back(
"monecious");
6570 ok_sex_vals.push_back(
"dioecious");
6571 ok_sex_vals.push_back(
"diecious");
6573 for (
const string& it : ok_sex_vals) {
6576 eval = validator.Validate(seh, options);
6587 "Unexpected use of /mating_type qualifier"));
6589 eval = validator.Validate(seh, options);
6593 eval = validator.Validate(seh, options);
6596 eval = validator.Validate(seh, options);
6599 eval = validator.Validate(seh, options);
6603 eval = validator.Validate(seh, options);
6608 for (
const string& it : ok_sex_vals) {
6611 eval = validator.Validate(seh, options);
6621 eval = validator.Validate(seh, options);
6642 "HIV with moltype DNA should be proviral"));
6643 eval = validator.Validate(seh, options);
6649 "HIV with mRNA molecule type is rare"));
6652 eval = validator.Validate(seh, options);
6672 "Plasmid subsource but not plasmid location"));
6673 eval = validator.Validate(seh, options);
6680 eval = validator.Validate(seh, options);
6686 "Plasmid location set but plasmid name missing. Add a plasmid source modifier with the plasmid name. Use unnamed if the name is not known."));
6687 eval = validator.Validate(seh, options);
6705 vector<string> plastid_vals;
6706 plastid_vals.push_back(
"chloroplast");
6707 plastid_vals.push_back(
"chromoplast");
6708 plastid_vals.push_back(
"kinetoplast");
6709 plastid_vals.push_back(
"plastid");
6710 plastid_vals.push_back(
"apicoplast");
6711 plastid_vals.push_back(
"leucoplast");
6712 plastid_vals.push_back(
"proplastid");
6716 "Plastid name subsource chloroplast but not chloroplast location"));
6718 for (
const string& it : plastid_vals) {
6721 expected_errors[0]->SetErrMsg(
"Plastid name subsource " + it +
" but not " + it +
" location");
6722 eval = validator.Validate(seh, options);
6728 expected_errors[0]->SetErrMsg(
"Plastid name subsource contains unrecognized value");
6729 eval = validator.Validate(seh, options);
6748 "bad frequency qualifier value 1"));
6749 eval = validator.Validate(seh, options);
6754 expected_errors[0]->SetErrMsg(
"bad frequency qualifier value abc");
6755 eval = validator.Validate(seh, options);
6781 "Virus has unexpected Sex qualifier"));
6782 eval = validator.Validate(seh, options);
6786 expected_errors[0]->SetErrCode(
"BioSourceInconsistency");
6787 expected_errors[0]->SetErrMsg(
"Virus has unexpected Cell-line qualifier");
6788 eval = validator.Validate(seh, options);
6792 expected_errors[0]->SetErrMsg(
"Virus has unexpected Cell-type qualifier");
6793 eval = validator.Validate(seh, options);
6797 expected_errors[0]->SetErrCode(
"InvalidTissueType");
6798 expected_errors[0]->SetErrMsg(
"Virus has unexpected Tissue-type qualifier");
6799 eval = validator.Validate(seh, options);
6803 expected_errors[0]->SetErrCode(
"BioSourceInconsistency");
6804 expected_errors[0]->SetErrMsg(
"Virus has unexpected Dev-stage qualifier");
6805 eval = validator.Validate(seh, options);
6809 expected_errors[0]->SetErrMsg(
"Virus has unexpected Breed qualifier");
6810 eval = validator.Validate(seh, options);
6814 expected_errors[0]->SetErrMsg(
"Virus has unexpected Cultivar qualifier");
6815 eval = validator.Validate(seh, options);
6821 expected_errors[0]->SetErrMsg(
"Germline and rearranged should not both be present");
6822 eval = validator.Validate(seh, options);
6829 scope.RemoveTopLevelSeqEntry(seh);
6835 seh = scope.AddTopLevelSeqEntry(*entry);
6837 "Transgenic and environmental sample should not both be present"));
6840 eval = validator.Validate(seh, options);
6850 "Metagenomic should also have environmental sample annotated"));
6851 eval = validator.Validate(seh, options);
6861 "Sex and mating type should not both be present"));
6862 eval = validator.Validate(seh, options);
6871 "If metagenomes appears in lineage, BioSource should have metagenomic qualifier"));
6872 eval = validator.Validate(seh, options);
6882 "Uncultured should also have /environmental_sample"));
6883 eval = validator.Validate(seh, options);
6888 scope.RemoveTopLevelSeqEntry(seh);
6891 seh = scope.AddTopLevelSeqEntry(*entry);
6893 "EnvironSampleMissingQualifier",
6894 "Environmental sample should also have isolation source or specific host annotated"));
6896 eval = validator.Validate(seh, options);
6905 "BadOrganelleLocation",
6906 "Bacterial or viral source should not have organelle location"));
6907 eval = validator.Validate(seh, options);
6910 eval = validator.Validate(seh, options);
6918 "MissingEnvironmentalSample",
6919 "BioSource with ENV division is missing environmental sample subsource"));
6920 eval = validator.Validate(seh, options);
6930 "StrainWithEnvironSample",
6931 "Strain should not be present in an environmental sample"));
6932 eval = validator.Validate(seh, options);
6942 "MissingMetagenomicQualifier",
6943 "Metagenome source should also have metagenomic qualifier"));
6944 eval = validator.Validate(seh, options);
6953 "OrgModValueInvalid",
6954 "OrgMod synonym is identical to OrgMod gb_synonym"));
6955 eval = validator.Validate(seh, options);
6964 "InconsistentVirusMoltype",
6965 "cRNA note conflicts with molecule type"));
6966 eval = validator.Validate(seh, options);
6971 expected_errors[0]->SetErrMsg(
"cRNA note redundant with molecule type");
6972 eval = validator.Validate(seh, options);
6979 expected_errors[0]->SetErrMsg(
"Genomic DNA viral lineage indicates no DNA stage");
6980 eval = validator.Validate(seh, options);
6985 expected_errors[0]->SetErrMsg(
"cRNA note conflicts with molecule type");
6986 eval = validator.Validate(seh, options);
6991 expected_errors[0]->SetErrMsg(
"cRNA note redundant with molecule type");
6992 eval = validator.Validate(seh, options);
6997 scope.RemoveTopLevelSeqEntry(seh);
6999 seh = scope.AddTopLevelSeqEntry(*entry);
7006 f->SetLabel().SetStr(
"BioSample");
7007 f->SetData().SetStr(
"PRJNA12345");
7011 "Bacteria should have strain or isolate or environmental sample"));
7013 "Bad BioSample format - PRJNA12345"));
7015 eval = validator.Validate(seh, options);
7021 scope.RemoveTopLevelSeqEntry(seh);
7023 seh = scope.AddTopLevelSeqEntry(*entry);
7025 "Bad BioSample format - PRJNA12345"));
7027 eval = validator.Validate(seh, options);
7030 scope.RemoveTopLevelSeqEntry(seh);
7033 seh = scope.AddTopLevelSeqEntry(*entry);
7034 eval = validator.Validate(seh, options);
7037 scope.RemoveTopLevelSeqEntry(seh);
7040 seh = scope.AddTopLevelSeqEntry(*entry);
7042 "Environmental sample should also have isolation source or specific host annotated"));
7043 eval = validator.Validate(seh, options);
7059 "Tissue-type is inappropriate for bacteria"));
7061 eval = validator.Validate(seh, options);
7075 "Negative-sense single-stranded RNA virus with plus strand CDS should be cRNA"));
7076 expected_errors[0]->SetAccession(
"lcl|nuc");
7078 "Taxonomy indicates single-stranded RNA, molecule type (DNA) is conflicting."));
7079 expected_errors[1]->SetAccession(
"lcl|nuc");
7080 eval = validator.Validate(seh, options);
7087 "Negative-sense single-stranded RNA virus with plus strand CDS should be cRNA"));
7088 expected_errors[0]->SetAccession(
"lcl|nuc");
7090 eval = validator.Validate(seh, options);
7097 eval = validator.Validate(seh, options);
7101 eval = validator.Validate(seh, options);
7104 eval = validator.Validate(seh, options);
7107 eval = validator.Validate(seh, options);
7110 eval = validator.Validate(seh, options);
7115 eval = validator.Validate(seh, options);
7120 eval = validator.Validate(seh, options);
7125 eval = validator.Validate(seh, options);
7131 eval = validator.Validate(seh, options);
7136 scope.RemoveTopLevelSeqEntry(seh);
7138 seh = scope.AddTopLevelSeqEntry(*entry);
7140 eval = validator.Validate(seh, options);
7149 "CDS should not be on minus strand of mRNA molecule"));
7151 "Negative-sense single-stranded RNA virus with minus strand CDS should be genomic RNA"));
7153 eval = validator.Validate(seh, options);
7158 scope.RemoveTopLevelSeqEntry(seh);
7163 seh = scope.AddTopLevelSeqEntry(*entry);
7165 "Negative-sense single-stranded RNA virus with nonfunctional plus strand misc_feature should be cRNA"));
7167 "Taxonomy indicates single-stranded RNA, molecule type (DNA) is conflicting."));
7169 eval = validator.Validate(seh, options);
7176 "Negative-sense single-stranded RNA virus with nonfunctional plus strand misc_feature should be cRNA"));
7180 eval = validator.Validate(seh, options);
7187 eval = validator.Validate(seh, options);
7191 eval = validator.Validate(seh, options);
7194 eval = validator.Validate(seh, options);
7197 eval = validator.Validate(seh, options);
7200 eval = validator.Validate(seh, options);
7203 scope.RemoveTopLevelSeqEntry(seh);
7205 seh = scope.AddTopLevelSeqEntry(*entry);
7207 eval = validator.Validate(seh, options);
7214 "Ambisense virus should be genomic RNA or cRNA"));
7215 eval = validator.Validate(seh, options);
7224 "Negative-sense single-stranded RNA virus with nonfunctional minus strand misc_feature should be genomic RNA"));
7225 eval = validator.Validate(seh, options);
7242 "Positive-sense single-stranded RNA virus should be genomic RNA"));
7244 eval = validator.Validate(seh, options);
7252 eval = validator.Validate(seh, options);
7255 eval = validator.Validate(seh, options);
7258 eval = validator.Validate(seh, options);
7261 eval = validator.Validate(seh, options);
7265 eval = validator.Validate(seh, options);
7267 "Molinfo-biomol other should be used if Biosource-location is synthetic"));
7273 eval = validator.Validate(seh, options);
7277 eval = validator.Validate(seh, options);
7279 "artificial origin should have other-genetic"));
7281 "synthetic construct should have other-genetic"));
7297 "Title may have unparsed [...=...] construct"));
7300 eval = validator.Validate(seh, options);
7306 scope.RemoveTopLevelSeqEntry(seh);
7311 seh = scope.AddTopLevelSeqEntry(*entry);
7313 eval = validator.Validate(seh, options);
7317 scope.RemoveTopLevelSeqEntry(seh);
7319 seh = scope.AddTopLevelSeqEntry(*entry);
7321 eval = validator.Validate(seh, options);
7340 "Comment descriptor needs text"));
7343 eval = validator.Validate(seh, options);
7348 scope.RemoveTopLevelSeqEntry(seh);
7350 seh = scope.AddTopLevelSeqEntry(*entry);
7352 "TitleMissingText",
"Title descriptor needs text"));
7354 eval = validator.Validate(seh, options);
7359 scope.RemoveTopLevelSeqEntry(seh);
7361 seh = scope.AddTopLevelSeqEntry(*entry);
7363 "MissingText",
"Name descriptor needs text"));
7365 eval = validator.Validate(seh, options);
7370 scope.RemoveTopLevelSeqEntry(seh);
7372 seh = scope.AddTopLevelSeqEntry(*entry);
7374 "Region descriptor needs text"));
7376 eval = validator.Validate(seh, options);
7393 "Collection_date format is not in DD-Mmm-YYYY format"));
7396 eval = validator.Validate(seh, options);
7402 eval = validator.Validate(seh, options);
7408 eval = validator.Validate(seh, options);
7413 expected_errors[0]->SetErrMsg(
"Collection_date is in the future");
7414 eval = validator.Validate(seh, options);
7420 eval = validator.Validate(seh, options);
7429 eval = validator.Validate(seh, options);
7435 eval = validator.Validate(seh, options);
7439 eval = validator.Validate(seh, options);
7443 eval = validator.Validate(seh, options);
7446 bool bad_format =
false, in_future =
false;
7448 BOOST_CHECK_EQUAL(bad_format,
false);
7449 BOOST_CHECK_EQUAL(in_future,
false);
7452 BOOST_CHECK_EQUAL(bad_format,
false);
7453 BOOST_CHECK_EQUAL(in_future,
false);
7463 BOOST_CHECK_EQUAL(bad_ch,
'0');
7472 "PCR forward primer sequence format is incorrect, first bad character is '?'"));
7474 "PCR primer does not have both sequences"));
7477 eval = validator.Validate(seh, options);
7482 expected_errors[0]->SetErrMsg(
"PCR reverse primer sequence format is incorrect, first bad character is '0'");
7484 eval = validator.Validate(seh, options);
7489 expected_errors[0]->SetErrMsg(
"PCR reverse primer sequence format is incorrect, first bad character is 'q'");
7491 eval = validator.Validate(seh, options);
7496 expected_errors[0]->SetErrMsg(
"PCR reverse primer sequence format is incorrect, first bad character is '?'");
7498 eval = validator.Validate(seh, options);
7508 eval = validator.Validate(seh, options);
7514 if (it->IsSource()) {
7516 fwd->
SetName().Set(
"AATTGGCCAATTGGC");
7517 fwd->
SetSeq().Set(
"AATTGGCCAATTGG4C");
7521 rev->
SetName().Set(
"AATTGGCCAATTGGC");
7522 rev->
SetSeq().Set(
"AATTGGCCAATTGG5C");
7524 it->SetSource().SetPcr_primers().Set().push_back(reaction);
7529 "PCR forward primer sequence format is incorrect, first bad character is '4'"));
7531 "PCR forward primer name appears to be a sequence"));
7533 "PCR reverse primer sequence format is incorrect, first bad character is '5'"));
7535 "PCR reverse primer name appears to be a sequence"));
7537 eval = validator.Validate(seh, options);
7546 fwd_seq.assign(
"5-agtctctctc-");
7548 BOOST_CHECK_EQUAL(modified,
true);
7549 BOOST_CHECK_EQUAL(fwd_seq,
string(
"agtctctctc"));
7551 fwd_seq.assign(
"5`aattggccaattg3'");
7553 BOOST_CHECK_EQUAL(modified,
true);
7554 BOOST_CHECK_EQUAL(fwd_seq,
string(
"aattggccaattg"));
7556 fwd_seq.assign(
"aattggccaacct");
7558 BOOST_CHECK_EQUAL(modified,
false);
7559 BOOST_CHECK_EQUAL(fwd_seq,
string(
"aattggccaacct"));
7561 fwd_seq.assign(
"agttt<I>tagaga<i>gac");
7563 BOOST_CHECK_EQUAL(modified,
true);
7564 BOOST_CHECK_EQUAL(fwd_seq,
string(
"agttt<i>tagaga<i>gac"));
7566 fwd_seq.assign(
"agtccat<iagata>gtct");
7568 BOOST_CHECK_EQUAL(modified,
true);
7569 BOOST_CHECK_EQUAL(fwd_seq,
string(
"agtccat<i>agata>gtct"));
7571 fwd_seq.assign(
"agtccat<i>gtctaaa");
7573 BOOST_CHECK_EQUAL(modified,
false);
7574 BOOST_CHECK_EQUAL(fwd_seq,
string(
"agtccat<i>gtctaaa"));
7590 "Title descriptor ends in bad punctuation"));
7593 eval = validator.Validate(seh, options);
7598 eval = validator.Validate(seh, options);
7603 eval = validator.Validate(seh, options);
7608 eval = validator.Validate(seh, options);
7624 "PCR primer name appears to be a sequence"));
7627 eval = validator.Validate(seh, options);
7633 eval = validator.Validate(seh, options);
7643 eval = validator.Validate(seh, options);
7659 "Nuc-prot set has 1 protein with a BioSource descriptor"));
7663 eval = validator.Validate(seh, options);
7680 "BioSource uses db AFTOL multiple times"));
7683 eval = validator.Validate(seh, options);
7695 if (it->IsSource()) {
7696 src.
Reset(&(it->SetSource()));
7702 eval = validator.Validate(seh, options);
7704 if (
NStr::Equal(vit->GetErrCode(),
"DuplicatePCRPrimerSequence")) {
7722 "PCR primer sequence has duplicates"));
7725 eval = validator.Validate(seh, options);
7739 rset->
Set().push_back(r1);
7741 rset->
Set().push_back(r2);
7747 f1->SetSeq().Set(
"aa");
7759 rv2->
SetName().Set(
"a different name");
7780 "Undesired multiple name descriptors, identical text"));
7782 eval = validator.Validate(seh, options);
7786 expected_errors[0]->SetErrMsg(
"Undesired multiple name descriptors, different text");
7787 eval = validator.Validate(seh, options);
7808 "Undesired multiple comment descriptors, identical text"));
7810 eval = validator.Validate(seh, options);
7817 eval = validator.Validate(seh, options);
7834 "lat_lon format has extra text after correct dd.dd N|S ddd.dd E|W format"));
7837 eval = validator.Validate(seh, options);
7842 expected_errors[0]->SetErrMsg(
"lat_lon format is incorrect - should be dd.dd N|S ddd.dd E|W");
7844 eval = validator.Validate(seh, options);
7860 "latitude value is out of range - should be between 90.00 N and 90.00 S"));
7862 "longitude value is out of range - should be between 180.00 E and 180.00 W"));
7864 eval = validator.Validate(seh, options);
7869 eval = validator.Validate(seh, options);
7883 eval = validator.Validate(seh, options);
7890 "'123' is an invalid altitude value, altitude should be provided in meters"));
7892 eval = validator.Validate(seh, options);
7906 "'123 ft.' is an invalid altitude value, altitude should be provided in meters"));
7909 eval = validator.Validate(seh, options);
7945 eval = validator.Validate(seh, options);
7960 "Specific host value is misspelled: Metapone madagascaria"));
7962 eval = validator.Validate(seh, options);
7967 expected_errors[0]->SetErrMsg(
"Specific host value is incorrectly capitalized: Homo Sapiens");
7968 eval = validator.Validate(seh, options);
7973 expected_errors[0]->SetErrMsg(
"Invalid value for specific host: Homo nonrecognizedus");
7974 eval = validator.Validate(seh, options);
7982 eval = validator.Validate(seh, options);
7988 eval = validator.Validate(seh, options);
7996 eval = validator.Validate(seh, options);
7999 "Suspect Host Value - a prokaryote, fungus or virus is suspect as a host for a plant or animal"));
8015 string host, error_msg;
8017 host =
"home sapiens";
8019 BOOST_CHECK_EQUAL(error_msg,
"Specific host value is misspelled: home sapiens");
8021 host =
"Svalbard rock ptarmigan";
8023 BOOST_CHECK_EQUAL(error_msg,
kEmptyStr);
8027 BOOST_CHECK_EQUAL(error_msg,
kEmptyStr);
8031 BOOST_CHECK_EQUAL(error_msg,
kEmptyStr);
8035 BOOST_CHECK_EQUAL(error_msg,
kEmptyStr);
8039 BOOST_CHECK_EQUAL(error_msg,
kEmptyStr);
8042 host =
"Homo sapiens";
8044 BOOST_CHECK_EQUAL(error_msg,
kEmptyStr);
8046 host =
"Homo supiens";
8048 BOOST_CHECK_EQUAL(error_msg,
string(
"Invalid value for specific host: Homo supiens"));
8052 BOOST_CHECK_EQUAL(error_msg,
kEmptyStr);
8054 host =
"Gallus Gallus";
8056 BOOST_CHECK_EQUAL(error_msg,
string(
"Specific host value is incorrectly capitalized: Gallus Gallus"));
8058 host =
"Eschericia coli";
8060 BOOST_CHECK_EQUAL(error_msg,
string(
"Specific host value is misspelled: Eschericia coli"));
8064 BOOST_CHECK_EQUAL(error_msg,
kEmptyStr);
8068 BOOST_CHECK_EQUAL(error_msg,
kEmptyStr);
8072 BOOST_CHECK_EQUAL(error_msg,
kEmptyStr);
8076 BOOST_CHECK_EQUAL(error_msg,
kEmptyStr);
8080 BOOST_CHECK_EQUAL(error_msg,
kEmptyStr);
8082 host =
"Homo sapiens; sex: female";
8084 BOOST_CHECK_EQUAL(error_msg,
kEmptyStr);
8086 host =
"Guinea pig";
8088 BOOST_CHECK_EQUAL(error_msg,
kEmptyStr);
8092 BOOST_CHECK_EQUAL(error_msg,
kEmptyStr);
8096 BOOST_CHECK_EQUAL(error_msg,
kEmptyStr);
8102 string hostfix, host;
8104 host =
"home sapiens";
8106 BOOST_CHECK_EQUAL(hostfix,
"Homo sapiens");
8108 host =
"homo sapiens";
8110 BOOST_CHECK_EQUAL(hostfix,
"Homo sapiens");
8112 host =
"Homo supiens";
8118 BOOST_CHECK_EQUAL(hostfix,
"Pinus sp.");
8120 host =
"Gallus Gallus";
8122 BOOST_CHECK_EQUAL(hostfix,
string(
"Gallus gallus"));
8124 host =
"Eschericia coli";
8126 BOOST_CHECK_EQUAL(hostfix,
string(
"Escherichia coli"));
8130 BOOST_CHECK_EQUAL(hostfix, host);
8138 BOOST_CHECK_EQUAL(hostfix,
string(
"Bovine"));
8140 host =
"Homo sapiens";
8142 BOOST_CHECK_EQUAL(hostfix,
string(
"Homo sapiens"));
8146 BOOST_CHECK_EQUAL(hostfix,
string(
"Pig"));
8150 BOOST_CHECK_EQUAL(hostfix,
string(
"Chicken"));
8152 host =
"Homo sapiens; sex: female";
8154 BOOST_CHECK_EQUAL(hostfix, host);
8158 BOOST_CHECK_EQUAL(hostfix,
"Homo sapiens");
8166 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
8173 "RefGeneTracking object has illegal Status 'unknown'"));
8175 eval = validator.Validate(seh, options);
8189 vector<string> old_countries;
8190 old_countries.push_back(
"Belgian Congo");
8191 old_countries.push_back(
"British Guiana");
8192 old_countries.push_back(
"Burma");
8193 old_countries.push_back(
"Czechoslovakia");
8194 old_countries.push_back(
"Korea");
8195 old_countries.push_back(
"Serbia and Montenegro");
8196 old_countries.push_back(
"Siam");
8197 old_countries.push_back(
"USSR");
8198 old_countries.push_back(
"Yugoslavia");
8199 old_countries.push_back(
"Zaire");
8200 old_countries.push_back(
"Macedonia");
8209 for (
const string& it : old_countries) {
8212 expected_errors[0]->SetErrMsg(
"Replaced geo_loc_name [" + it +
"]");
8214 expected_errors[0]->SetErrMsg(
"Replaced country name [" + it +
"]");
8216 eval = validator.Validate(seh, options);
8233 "Voucher is missing institution code"));
8236 eval = validator.Validate(seh, options);
8241 eval = validator.Validate(seh, options);
8246 eval = validator.Validate(seh, options);
8252 vector<string>
ambig;
8254 ambig.push_back(
"BAH");
8255 ambig.push_back(
"ACE");
8256 ambig.push_back(
"SLU");
8257 ambig.push_back(
"UAB");
8258 ambig.push_back(
"CAIM");
8259 ambig.push_back(
"HER");
8260 ambig.push_back(
"DSC");
8261 ambig.push_back(
"DNHM");
8262 ambig.push_back(
"BNHM");
8263 ambig.push_back(
"UI");
8264 ambig.push_back(
"KMK");
8265 ambig.push_back(
"MT");
8266 ambig.push_back(
"MP");
8267 ambig.push_back(
"NASC");
8268 ambig.push_back(
"IZAC");
8269 ambig.push_back(
"CCG");
8270 ambig.push_back(
"PIN");
8271 ambig.push_back(
"HSU");
8272 ambig.push_back(
"CAUP");
8273 ambig.push_back(
"ISU");
8274 ambig.push_back(
"SDSU");
8275 ambig.push_back(
"GC");
8276 ambig.push_back(
"UNL");
8277 ambig.push_back(
"MZUP");
8278 ambig.push_back(
"MG");
8279 ambig.push_back(
"HNHM");
8280 ambig.push_back(
"PMS");
8281 ambig.push_back(
"LE");
8282 ambig.push_back(
"GCM");
8283 ambig.push_back(
"TMP");
8284 ambig.push_back(
"DMNH");
8285 ambig.push_back(
"ZMUH");
8286 ambig.push_back(
"SMF");
8287 ambig.push_back(
"ZSP");
8288 ambig.push_back(
"TAU");
8289 ambig.push_back(
"MJG");
8290 ambig.push_back(
"DUM");
8291 ambig.push_back(
"ANU");
8292 ambig.push_back(
"CPAP");
8293 ambig.push_back(
"CSU");
8294 ambig.push_back(
"WACA");
8295 ambig.push_back(
"MMNH");
8296 ambig.push_back(
"ALA");
8297 ambig.push_back(
"RV");
8298 ambig.push_back(
"ABS");
8299 ambig.push_back(
"FM");
8300 ambig.push_back(
"HNU");
8301 ambig.push_back(
"PO");
8302 ambig.push_back(
"GAM");
8303 ambig.push_back(
"MCM");
8304 ambig.push_back(
"LU");
8305 ambig.push_back(
"SDM");
8306 ambig.push_back(
"PMK");
8307 ambig.push_back(
"VI");
8308 ambig.push_back(
"IMM");
8309 ambig.push_back(
"R");
8310 ambig.push_back(
"CHM");
8311 ambig.push_back(
"CMC");
8312 ambig.push_back(
"JSPC");
8313 ambig.push_back(
"YU");
8314 ambig.push_back(
"STM");
8315 ambig.push_back(
"RSM");
8316 ambig.push_back(
"BB");
8317 ambig.push_back(
"BHM");
8318 ambig.push_back(
"CBU");
8319 ambig.push_back(
"MCCM");
8320 ambig.push_back(
"NMSU");
8321 ambig.push_back(
"OTM");
8322 ambig.push_back(
"LP");
8323 ambig.push_back(
"SME");
8324 ambig.push_back(
"PEM");
8325 ambig.push_back(
"UMF");
8326 ambig.push_back(
"CIS");
8327 ambig.push_back(
"LBG");
8328 ambig.push_back(
"CCAC");
8329 ambig.push_back(
"SNP");
8330 ambig.push_back(
"UT");
8331 ambig.push_back(
"IBA");
8332 ambig.push_back(
"UNCC");
8333 ambig.push_back(
"NHMC");
8334 ambig.push_back(
"BAC");
8335 ambig.push_back(
"PMG");
8336 ambig.push_back(
"MRC");
8337 ambig.push_back(
"ETH");
8338 ambig.push_back(
"OMC");
8339 ambig.push_back(
"NMV");
8340 ambig.push_back(
"MLS");
8341 ambig.push_back(
"NJM");
8342 ambig.push_back(
"INA");
8343 ambig.push_back(
"BCM");
8344 ambig.push_back(
"YM");
8345 ambig.push_back(
"CAM");
8346 ambig.push_back(
"UA");
8347 ambig.push_back(
"OSM");
8348 ambig.push_back(
"CPS");
8349 ambig.push_back(
"POKM");
8350 ambig.push_back(
"VSM");
8351 ambig.push_back(
"ZMG");
8352 ambig.push_back(
"IO");
8353 ambig.push_back(
"USM");
8354 ambig.push_back(
"UCS");
8355 ambig.push_back(
"CN");
8356 ambig.push_back(
"PCM");
8357 ambig.push_back(
"MU");
8358 ambig.push_back(
"ISC");
8359 ambig.push_back(
"CIB");
8360 ambig.push_back(
"GML");
8361 ambig.push_back(
"NU");
8362 ambig.push_back(
"NCSC");
8363 ambig.push_back(
"MHNN");
8364 ambig.push_back(
"NCC");
8365 ambig.push_back(
"MSM");
8366 ambig.push_back(
"RM");
8367 ambig.push_back(
"MBM");
8368 ambig.push_back(
"UPM");
8369 ambig.push_back(
"MSU");
8370 ambig.push_back(
"PI");
8371 ambig.push_back(
"CENA");
8372 ambig.push_back(
"IBRP");
8373 ambig.push_back(
"CRE");
8374 ambig.push_back(
"FSC");
8375 ambig.push_back(
"ENCB");
8376 ambig.push_back(
"BAS");
8377 ambig.push_back(
"GOE");
8378 ambig.push_back(
"PSS");
8379 ambig.push_back(
"CCB");
8380 ambig.push_back(
"SUM");
8381 ambig.push_back(
"NMPG");
8382 ambig.push_back(
"USP");
8383 ambig.push_back(
"IPB");
8384 ambig.push_back(
"BCC");
8385 ambig.push_back(
"FNU");
8386 ambig.push_back(
"SHM");
8387 ambig.push_back(
"TNSC");
8388 ambig.push_back(
"LS");
8389 ambig.push_back(
"TMC");
8390 ambig.push_back(
"HUT");
8391 ambig.push_back(
"ZMUO");
8392 ambig.push_back(
"ALM");
8393 ambig.push_back(
"ITCC");
8394 ambig.push_back(
"TM");
8395 ambig.push_back(
"WB");
8396 ambig.push_back(
"ZMK");
8397 ambig.push_back(
"LBM");
8398 ambig.push_back(
"NI");
8399 ambig.push_back(
"CB");
8401 ambig.push_back(
"MM");
8402 ambig.push_back(
"PMU");
8403 ambig.push_back(
"DM");
8404 ambig.push_back(
"RIVE");
8405 ambig.push_back(
"TARI");
8406 ambig.push_back(
"CSCS");
8407 ambig.push_back(
"PSU");
8408 ambig.push_back(
"IMT");
8409 ambig.push_back(
"MZV");
8410 ambig.push_back(
"SZE");
8411 ambig.push_back(
"CUVC");
8412 ambig.push_back(
"LMJ");
8413 ambig.push_back(
"UC");
8414 ambig.push_back(
"ZIUS");
8415 ambig.push_back(
"FRI");
8416 ambig.push_back(
"CDA");
8417 ambig.push_back(
"ZMUA");
8418 ambig.push_back(
"MZUC");
8419 ambig.push_back(
"BR");
8420 ambig.push_back(
"UG");
8421 ambig.push_back(
"MDH");
8422 ambig.push_back(
"USD");
8423 ambig.push_back(
"MNHM");
8424 ambig.push_back(
"MAD");
8425 ambig.push_back(
"PMA");
8426 ambig.push_back(
"ICN");
8427 ambig.push_back(
"TU");
8428 ambig.push_back(
"PMNH");
8429 ambig.push_back(
"SAU");
8430 ambig.push_back(
"KM");
8431 ambig.push_back(
"GMNH");
8432 ambig.push_back(
"SSM");
8433 ambig.push_back(
"MZ");
8434 ambig.push_back(
"WSU");
8435 ambig.push_back(
"CIAN");
8436 ambig.push_back(
"ZMT");
8437 ambig.push_back(
"IMS");
8438 ambig.push_back(
"TCDU");
8439 ambig.push_back(
"SIAC");
8440 ambig.push_back(
"DFEC");
8441 ambig.push_back(
"CBD");
8442 ambig.push_back(
"SWC");
8443 ambig.push_back(
"MD");
8444 ambig.push_back(
"FU");
8445 ambig.push_back(
"UV");
8446 ambig.push_back(
"URM");
8447 ambig.push_back(
"JNU");
8448 ambig.push_back(
"IZ");
8449 ambig.push_back(
"UAIC");
8450 ambig.push_back(
"LEB");
8451 ambig.push_back(
"MCSN");
8452 ambig.push_back(
"UU");
8453 ambig.push_back(
"PUC");
8454 ambig.push_back(
"SNM");
8455 ambig.push_back(
"AKU");
8456 ambig.push_back(
"MH");
8457 ambig.push_back(
"MOR");
8458 ambig.push_back(
"IM");
8459 ambig.push_back(
"MSNT");
8460 ambig.push_back(
"IGM");
8461 ambig.push_back(
"NAP");
8462 ambig.push_back(
"NHMR");
8463 ambig.push_back(
"MW");
8464 ambig.push_back(
"PPCC");
8465 ambig.push_back(
"CNHM");
8466 ambig.push_back(
"IAL");
8467 ambig.push_back(
"PCU");
8468 ambig.push_back(
"HM");
8470 for (
const string& it :
ambig) {
8471 expected_errors[0]->SetErrMsg(
"Institution code " + it +
" needs to be qualified with a <COUNTRY> designation");
8473 eval = validator.Validate(seh, options);
8480 ambig.push_back(
"NASC");
8481 ambig.push_back(
"TCDU");
8483 for (
const string& it :
ambig) {
8484 expected_errors[0]->SetErrMsg(
"Institution code " + it +
" needs to be qualified with a <COUNTRY> designation");
8486 eval = validator.Validate(seh, options);
8493 ambig.push_back(
"CAIM");
8494 ambig.push_back(
"STM");
8495 ambig.push_back(
"HER");
8496 ambig.push_back(
"FSC");
8497 ambig.push_back(
"MDH");
8498 ambig.push_back(
"DSC");
8499 ambig.push_back(
"IFM");
8500 ambig.push_back(
"MCCM");
8501 ambig.push_back(
"CCB");
8502 ambig.push_back(
"LBG");
8503 ambig.push_back(
"BCC");
8504 ambig.push_back(
"CCAC");
8505 ambig.push_back(
"CCF");
8506 ambig.push_back(
"IBA");
8507 ambig.push_back(
"CAUP");
8508 ambig.push_back(
"MRC");
8509 ambig.push_back(
"ETH");
8510 ambig.push_back(
"TMC");
8511 ambig.push_back(
"CBD");
8512 ambig.push_back(
"HUT");
8513 ambig.push_back(
"URM");
8514 ambig.push_back(
"NJM");
8515 ambig.push_back(
"INA");
8516 ambig.push_back(
"BTCC");
8517 ambig.push_back(
"YM");
8518 ambig.push_back(
"IZ");
8519 ambig.push_back(
"ITCC");
8520 ambig.push_back(
"WB");
8521 ambig.push_back(
"LE");
8522 ambig.push_back(
"LCC");
8523 ambig.push_back(
"LBM");
8524 ambig.push_back(
"NI");
8525 ambig.push_back(
"CB");
8527 ambig.push_back(
"RIVE");
8528 ambig.push_back(
"DUM");
8529 ambig.push_back(
"AKU");
8530 ambig.push_back(
"CN");
8531 ambig.push_back(
"CCDM");
8532 ambig.push_back(
"PCM");
8533 ambig.push_back(
"MU");
8534 ambig.push_back(
"ISC");
8535 ambig.push_back(
"IMT");
8536 ambig.push_back(
"NU");
8537 ambig.push_back(
"RV");
8538 ambig.push_back(
"UC");
8539 ambig.push_back(
"NCSC");
8540 ambig.push_back(
"CCY");
8541 ambig.push_back(
"NCC");
8542 ambig.push_back(
"FRI");
8543 ambig.push_back(
"GAM");
8544 ambig.push_back(
"RM");
8545 ambig.push_back(
"MCM");
8546 ambig.push_back(
"PPCC");
8547 ambig.push_back(
"CDA");
8548 ambig.push_back(
"IAL");
8549 ambig.push_back(
"VI");
8550 ambig.push_back(
"PCU");
8551 ambig.push_back(
"CVCC");
8552 ambig.push_back(
"BR");
8553 ambig.push_back(
"MSU");
8554 for (
const string& it :
ambig) {
8555 expected_errors[0]->SetErrMsg(
"Institution code " + it +
" needs to be qualified with a <COUNTRY> designation");
8557 eval = validator.Validate(seh, options);
8562 expected_errors[0]->SetErrMsg(
"Institution code zzz is not in list");
8564 eval = validator.Validate(seh, options);
8568 eval = validator.Validate(seh, options);
8572 eval = validator.Validate(seh, options);
8576 expected_errors[0]->SetErrMsg(
"Institution code abrc exists, but correct capitalization is ABRC");
8578 eval = validator.Validate(seh, options);
8582 expected_errors[0]->SetErrMsg(
"Institution code a exists, but correct capitalization is A");
8584 eval = validator.Validate(seh, options);
8588 expected_errors[0]->SetErrMsg(
"Institution code abkmi exists, but correct capitalization is ABKMI");
8590 eval = validator.Validate(seh, options);
8599 eval = validator.Validate(seh, options);
8614 "Institution code ABRC exists, but collection ABRC:bar is not in list"));
8617 eval = validator.Validate(seh, options);
8621 expected_errors[0]->SetErrMsg(
"Institution code A exists, but collection A:bar is not in list");
8623 eval = validator.Validate(seh, options);
8627 expected_errors[0]->SetErrMsg(
"Institution code ABKMI exists, but collection ABKMI:bar is not in list");
8629 eval = validator.Validate(seh, options);
8638 eval = validator.Validate(seh, options);
8654 "Voucher is missing specific identifier"));
8657 eval = validator.Validate(seh, options);
8662 eval = validator.Validate(seh, options);
8667 eval = validator.Validate(seh, options);
8683 "Culture_collection should be structured, but is not"));
8686 eval = validator.Validate(seh, options);
8702 "INDEXER_ONLY - BioSource location is chromosome"));
8704 eval = validator.Validate(seh, options);
8719 "Multiple country qualifiers present"));
8723 eval = validator.Validate(seh, options);
8727 expected_errors[0]->SetErrMsg(
"Multiple lat_lon qualifiers present");
8730 eval = validator.Validate(seh, options);
8734 expected_errors[0]->SetErrMsg(
"Multiple fwd_primer_seq qualifiers present");
8735 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"MultipleSourceQualifiers",
"Multiple rev_primer_seq qualifiers present"));
8736 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"MultipleSourceQualifiers",
"Multiple fwd_primer_name qualifiers present"));
8737 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"MultipleSourceQualifiers",
"Multiple rev_primer_name qualifiers present"));
8746 eval = validator.Validate(seh, options);
8804 "Non-viral source feature should not have a segment qualifier"));
8808 "Unbalanced parentheses in subsource '" +
val +
"'"));
8809 eval = validator.Validate(seh, options);
8824 "Unbalanced parentheses in orgmod '" +
val +
"'"));
8827 eval = validator.Validate(seh, options);
8841 "Unbalanced parentheses in taxname 'Malio malefi (abc'"));
8843 "Organism not found in taxonomy database"));
8846 eval = validator.Validate(seh, options);
8849 expected_errors[0]->SetErrMsg(
"Unbalanced parentheses in taxname 'Malio malefi )abc'");
8851 eval = validator.Validate(seh, options);
8891 "Unbalanced parentheses in taxname 'Malio malefi (abc'"));
8895 expected_errors[0]->SetErrMsg(
"Unbalanced parentheses in orgmod 'no left (abc'");
8896 eval = validator.Validate(seh, options);
8899 expected_errors[0]->SetErrMsg(
"Unbalanced parentheses in orgmod 'no right )abc'");
8901 eval = validator.Validate(seh, options);
8912 eval = validator.Validate(seh, options);
8930 eval = validator.Validate(seh, options);
8936 eval = validator.Validate(seh, options);
8941 "Multiple vouchers with same institution:collection"));
8945 eval = validator.Validate(seh, options);
8949 expected_errors[0]->SetErrMsg(
"Multiple vouchers with same institution");
8953 eval = validator.Validate(seh, options);
8969 "Bad geo_loc_name capitalization [saint pierre and miquelon]"));
8972 "Bad country capitalization [saint pierre and miquelon]"));
8976 eval = validator.Validate(seh, options);
8991 "Institution code ABRC should be bio_material"));
8994 eval = validator.Validate(seh, options);
8998 eval = validator.Validate(seh, options);
9002 expected_errors[0]->SetErrMsg(
"Institution code ABKMI should be culture_collection");
9004 eval = validator.Validate(seh, options);
9008 eval = validator.Validate(seh, options);
9012 expected_errors[0]->SetErrMsg(
"Institution code AA should be specimen_voucher");
9014 eval = validator.Validate(seh, options);
9018 eval = validator.Validate(seh, options);
9030 SetTitle(entry,
"foo bar something something (PMID 1)");
9035 "Title descriptor has internal PMID"));
9037 eval = validator.Validate(seh, options);
9055 "BARCODE keyword without Molinfo.tech barcode"));
9057 eval = validator.Validate(seh, options);
9064 "NoKeywordHasTechnique",
"Molinfo.tech barcode without BARCODE keyword"));
9066 eval = validator.Validate(seh, options);
9077 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123458");
9078 SetTitle(entry,
"Something that does not start with organism");
9083 "RefSeq nucleotide title does not start with organism name"));
9085 eval = validator.Validate(seh, options);
9090 scope.RemoveTopLevelSeqEntry(seh);
9096 seh = scope.AddTopLevelSeqEntry(*entry);
9099 "RefSeq protein title does not end with organism name"));
9101 "Instantiated protein title does not match automatically generated title"));
9103 eval = validator.Validate(seh, options);
9114 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
9120 "Missing chromosome qualifier on NC or AC RefSeq record"));
9121 eval = validator.Validate(seh, options);
9128 eval = validator.Validate(seh, options);
9131 eval = validator.Validate(seh, options);
9135 eval = validator.Validate(seh, options);
9139 eval = validator.Validate(seh, options);
9142 eval = validator.Validate(seh, options);
9148 eval = validator.Validate(seh, options);
9154 eval = validator.Validate(seh, options);
9157 eval = validator.Validate(seh, options);
9161 eval = validator.Validate(seh, options);
9164 eval = validator.Validate(seh, options);
9167 eval = validator.Validate(seh, options);
9174 eval = validator.Validate(seh, options);
9179 eval = validator.Validate(seh, options);
9182 eval = validator.Validate(seh, options);
9185 eval = validator.Validate(seh, options);
9188 eval = validator.Validate(seh, options);
9206 "Structured Comment user object descriptor is empty"));
9208 "User object with no data"));
9210 "Structured Comment lacks prefix and/or suffix"));
9212 eval = validator.Validate(seh, options);
9219 prefix_field->SetLabel().SetStr(
"StructuredCommentPrefix");
9220 prefix_field->SetData().SetStr(
"Unknown prefix");
9222 eval = validator.Validate(seh, options);
9224 "Unknown prefix is not a valid value for StructuredCommentPrefix"));
9225 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Info,
"BadStrucCommInvalidFieldValue",
"Structured Comment invalid; the field value and/or name are incorrect"));
9229 BOOST_CHECK_EQUAL(validator.IsValidStructuredComment(*desc),
false);
9234 prefix_field->SetData().SetStr(
"##Genome-Assembly-Data-START##");
9235 vector<string> required_fields;
9240 required_fields.push_back(
"Assembly Method");
9241 required_fields.push_back(
"Genome Coverage");
9242 required_fields.push_back(
"Sequencing Technology");
9247 for (
const string& it : required_fields) {
9248 expected_errors.push_back(
new CExpectedError(
"lcl|good", levels[
i],
"BadStrucCommMissingField",
9249 "Required field " + it +
" is missing"));
9252 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Info,
"BadStrucCommInvalidFieldValue",
"Structured Comment invalid; the field value and/or name are incorrect"));
9255 eval = validator.Validate(seh, options);
9258 BOOST_CHECK_EQUAL(validator.IsValidStructuredComment(*desc),
false);
9263 for (
auto it = required_fields.crbegin(); it != required_fields.crend(); ++it) {
9266 field->
SetData().SetStr(
"bad value");
9271 for (
const string& it : required_fields) {
9272 if (pos < required_fields.size() - 1) {
9273 expected_errors.push_back(
new CExpectedError(
"lcl|good", levels[pos],
"BadStrucCommFieldOutOfOrder",
9274 it +
" field is out of order"));
9277 expected_errors.push_back(
new CExpectedError(
"lcl|good", levels[pos],
"BadStrucCommInvalidFieldValue",
9278 "bad value is not a valid value for " + it));
9282 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Info,
"BadStrucCommInvalidFieldValue",
"Structured Comment invalid; the field value and/or name are incorrect"));
9285 eval = validator.Validate(seh, options);
9288 BOOST_CHECK_EQUAL(validator.IsValidStructuredComment(*desc),
false);
9292 prefix_field->SetData().SetStr(
"##MIGS-Data-START##");
9293 required_fields.clear();
9294 required_fields.push_back(
"alt_elev");
9295 required_fields.push_back(
"assembly");
9296 required_fields.push_back(
"collection_date");
9297 required_fields.push_back(
"country");
9298 required_fields.push_back(
"depth");
9299 required_fields.push_back(
"environment");
9300 required_fields.push_back(
"investigation_type");
9301 required_fields.push_back(
"isol_growth_condt");
9302 required_fields.push_back(
"lat_lon");
9303 required_fields.push_back(
"project_name");
9304 required_fields.push_back(
"sequencing_meth");
9306 for (
const string& it : required_fields) {
9308 "Required field " + it +
" is missing"));
9310 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Info,
"BadStrucCommInvalidFieldValue",
"Structured Comment invalid; the field value and/or name are incorrect"));
9313 eval = validator.Validate(seh, options);
9316 BOOST_CHECK_EQUAL(validator.IsValidStructuredComment(*desc),
false);
9320 prefix_field->SetData().SetStr(
"##MIGS:4.0-Data-START##");
9321 required_fields.clear();
9322 required_fields.push_back(
"assembly");
9323 required_fields.push_back(
"collection_date");
9324 required_fields.push_back(
"env_biome");
9325 required_fields.push_back(
"env_feature");
9326 required_fields.push_back(
"env_material");
9327 required_fields.push_back(
"env_package");
9328 required_fields.push_back(
"geo_loc_name");
9329 required_fields.push_back(
"investigation_type");
9330 required_fields.push_back(
"isol_growth_condt");
9331 required_fields.push_back(
"lat_lon");
9332 required_fields.push_back(
"project_name");
9333 required_fields.push_back(
"seq_meth");
9335 for (
const string& it : required_fields) {
9337 "Required field " + it +
" is missing"));
9339 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Info,
"BadStrucCommInvalidFieldValue",
"Structured Comment invalid; the field value and/or name are incorrect"));
9342 eval = validator.Validate(seh, options);
9345 BOOST_CHECK_EQUAL(validator.IsValidStructuredComment(*desc),
false);
9350 prefix_field->SetData().SetStr(
"##Assembly-Data-START##");
9355 field->
SetLabel().SetStr(
"Sequencing Technology");
9356 field->
SetData().SetStr(
"Singer");
9360 "Required field Assembly Method is missing when Sequencing Technology has value 'Singer'"));
9361 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Info,
"BadStrucCommInvalidFieldValue",
"Structured Comment invalid; the field value and/or name are incorrect"));
9364 eval = validator.Validate(seh, options);
9367 BOOST_CHECK_EQUAL(validator.IsValidStructuredComment(*desc),
false);
9371 field->
SetData().SetStr(
"something else");
9373 "Required field Assembly Method is missing when Sequencing Technology has value 'something else'"));
9374 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Info,
"BadStrucCommInvalidFieldValue",
"Structured Comment invalid; the field value and/or name are incorrect"));
9377 eval = validator.Validate(seh, options);
9380 BOOST_CHECK_EQUAL(validator.IsValidStructuredComment(*desc),
false);
9384 prefix_field->SetData().SetStr(
"##HumanSTR-START##");
9386 eval = validator.Validate(seh, options);
9389 "Sequencing Technology is not a valid field name"));
9391 "Required field STR locus name is missing"));
9393 "Required field Length-based allele is missing"));
9395 "Required field Bracketed repeat is missing"));
9396 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Info,
"BadStrucCommInvalidFieldValue",
"Structured Comment invalid; the field value and/or name are incorrect"));
9407 f->SetLabel().SetStr(
label);
9408 f->SetData().SetStr(
val);
9418 user->
SetType().SetStr(
"StructuredComment");
9419 user->
SetData().push_back(
MkField(
"StructuredCommentPrefix",
"##Genome-Assembly-Data-START##"));
9423 user->
SetData().push_back(
MkField(
"Sequencing Technology",
"2"));
9432 "Assembly Name should not start with 'NCBI' or 'GenBank' in structured comment"));
9433 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Info,
"BadStrucCommInvalidFieldValue",
"Structured Comment invalid; the field value and/or name are incorrect"));
9436 eval = validator.Validate(seh, options);
9450 SetTitle(entry,
"Sebaea microphylla, complete genome.");
9454 "Non-viral complete genome not labeled as chromosome"));
9457 eval = validator.Validate(seh, options);
9466 eval = validator.Validate(seh, options);
9473 eval = validator.Validate(seh, options);
9478 SetTitle(entry,
"Sebaea microphylla, complete sequence.");
9479 eval = validator.Validate(seh, options);
9481 SetTitle(entry,
"Sebaea microphylla, complete genome.");
9487 "INDEXER_ONLY - BioSource location is chromosome"));
9488 eval = validator.Validate(seh, options);
9504 "Taxonomy indicates single-stranded RNA, molecule type (DNA) is conflicting."));
9507 eval = validator.Validate(seh, options);
9511 eval = validator.Validate(seh, options);
9515 eval = validator.Validate(seh, options);
9522 "Taxonomy indicates single-stranded RNA, molecule type (DNA) is conflicting."));
9526 eval = validator.Validate(seh, options);
9533 eval = validator.Validate(seh, options);
9539 eval = validator.Validate(seh, options);
9544 "Taxonomy indicates double-stranded RNA, molecule type (DNA) is conflicting."));
9545 eval = validator.Validate(seh, options);
9553 eval = validator.Validate(seh, options);
9558 "Taxonomy indicates single-stranded DNA, molecule type (RNA) is conflicting."));
9559 eval = validator.Validate(seh, options);
9565 expected_errors.back()->SetErrMsg(
"Taxonomy indicates double-stranded DNA, molecule type (RNA) is conflicting.");
9566 eval = validator.Validate(seh, options);
9572 eval = validator.Validate(seh, options);
9583 sdesc->
SetComment(
"This comment contains ::");
9589 "Comment may be formatted to look like a structured comment."));
9591 eval = validator.Validate(seh, options);
9610 "Structured Comment lacks prefix and/or suffix"));
9612 eval = validator.Validate(seh, options);
9634 vector<CExpectedError*> expected_errors;
9637 "Non-ascii chars in input ASN.1 strings"));
9639 eval = validator.
Validate(seh, options);
9647 eval = validator.
Validate(seh, options);
9661 "Personal collection does not have name of collector"));
9663 eval = validator.Validate(seh, options);
9675 author->
SetName().SetName().SetLast(
"et al.");
9679 art_title->SetName(
"article title");
9683 entry->
SetDescr().Set().push_back(desc);
9688 "Author list ends in et al."));
9690 eval = validator.Validate(seh, options);
9693 pub->
SetMan().
SetCit().SetAuthors().SetNames().SetStd().push_back(author);
9695 book_title->SetName(
"book title");
9696 pub->
SetMan().
SetCit().SetTitle().Set().push_back(book_title);
9697 eval = validator.Validate(seh, options);
9702 eval = validator.Validate(seh, options);
9705 pub->
SetProc().
SetBook().SetAuthors().SetNames().SetStd().push_back(author);
9707 eval = validator.Validate(seh, options);
9713 eval = validator.Validate(seh, options);
9723 eval = validator.Validate(seh, options);
9727 scope.RemoveTopLevelSeqEntry(seh);
9728 entry->
SetDescr().Set().pop_back();
9730 feat->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"good");
9733 feat->
SetData().SetPub().SetPub().Set().push_back(pub);
9735 annot->
SetData().SetFtable().push_back(feat);
9737 seh = scope.AddTopLevelSeqEntry(*entry);
9741 eval = validator.Validate(seh, options);
9744 pub->
SetMan().
SetCit().SetAuthors().SetNames().SetStd().push_back(author);
9745 pub->
SetMan().
SetCit().SetTitle().Set().push_back(book_title);
9746 eval = validator.Validate(seh, options);
9751 eval = validator.Validate(seh, options);
9754 pub->
SetProc().
SetBook().SetAuthors().SetNames().SetStd().push_back(author);
9756 eval = validator.Validate(seh, options);
9762 eval = validator.Validate(seh, options);
9772 eval = validator.Validate(seh, options);
9776 scope.RemoveTopLevelSeqEntry(seh);
9778 entry->
SetDescr().Set().push_back(desc);
9779 seh = scope.AddTopLevelSeqEntry(*entry);
9781 expected_errors[0]->SetErrMsg(
"Author list contains et al.");
9787 eval = validator.Validate(seh, options);
9790 pub->
SetMan().
SetCit().SetAuthors().SetNames().SetStd().push_back(author);
9791 pub->
SetMan().
SetCit().SetAuthors().SetNames().SetStd().push_back(author2);
9792 pub->
SetMan().
SetCit().SetTitle().Set().push_back(book_title);
9793 eval = validator.Validate(seh, options);
9799 eval = validator.Validate(seh, options);
9802 pub->
SetProc().
SetBook().SetAuthors().SetNames().SetStd().push_back(author);
9803 pub->
SetProc().
SetBook().SetAuthors().SetNames().SetStd().push_back(author2);
9805 eval = validator.Validate(seh, options);
9812 eval = validator.Validate(seh, options);
9823 eval = validator.Validate(seh, options);
9827 scope.RemoveTopLevelSeqEntry(seh);
9828 entry->
SetDescr().Set().pop_back();
9830 seh = scope.AddTopLevelSeqEntry(*entry);
9835 eval = validator.Validate(seh, options);
9838 pub->
SetMan().
SetCit().SetAuthors().SetNames().SetStd().push_back(author);
9839 pub->
SetMan().
SetCit().SetAuthors().SetNames().SetStd().push_back(author2);
9840 pub->
SetMan().
SetCit().SetTitle().Set().push_back(book_title);
9841 eval = validator.Validate(seh, options);
9847 eval = validator.Validate(seh, options);
9850 pub->
SetProc().
SetBook().SetAuthors().SetNames().SetStd().push_back(author);
9851 pub->
SetProc().
SetBook().SetAuthors().SetNames().SetStd().push_back(author2);
9853 eval = validator.Validate(seh, options);
9860 eval = validator.Validate(seh, options);
9871 eval = validator.Validate(seh, options);
9884 submit->
SetData().SetEntrys().push_back(entry);
9886 submit->
SetSub().SetCit().SetAuthors().SetNames().SetStd().push_back(author);
9887 submit->
SetSub().SetCit().SetAuthors().SetAffil().SetStd().SetAffil(
"some affiliation");
9889 submit->
SetSub().SetCit().SetDate().SetStd().SetYear(2009);
9890 submit->
SetSub().SetCit().SetDate().SetStd().SetMonth(12);
9891 submit->
SetSub().SetCit().SetDate().SetStd().SetDay(31);
9896 ids.push_back(
"good");
9897 ids.push_back(
"NC_123456");
9899 for (
const string& id_it : ids) {
9901 scope.RemoveTopLevelSeqEntry(seh);
9903 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(id_it);
9905 entry->
SetSeq().
SetId().front()->SetLocal().SetStr(id_it);
9908 seh = scope.AddTopLevelSeqEntry(*entry);
9910 submit->
SetSub().SetCit().SetAuthors().ResetAffil();
9911 submit->
SetSub().SetCit().SetAuthors().SetAffil().SetStd().SetAffil(
"some affiliation");
9912 submit->
SetSub().ResetContact();
9913 string msg_acc =
NStr::StartsWith(id_it,
"NC") ?
"ref|" + id_it +
"|" :
"lcl|" + id_it;
9915 sev,
"MissingPubRequirement",
9916 "Submission citation affiliation has no country"));
9918 eval = validator.Validate(*submit, &scope, options);
9921 submit->
SetSub().SetCit().SetAuthors().SetAffil().SetStd().SetCountry(
"USA");
9922 expected_errors[0]->SetErrMsg(
"Submission citation affiliation has no state");
9924 eval = validator.Validate(*submit, &scope, options);
9928 submit->
SetSub().SetCit().SetAuthors().SetAffil().SetStd().SetSub(
"VA");
9929 submit->
SetSub().SetContact().SetContact().SetAffil().SetStd().SetAffil(
"some affiliation");
9930 expected_errors.push_back(
new CExpectedError(msg_acc, sev,
"MissingPubRequirement",
9931 "Submission citation affiliation has no country"));
9932 expected_errors[0]->SetAccession(
"");
9935 eval = validator.Validate(*submit, &scope, options);
9938 submit->
SetSub().SetContact().SetContact().SetAffil().SetStd().SetCountry(
"USA");
9939 expected_errors[0]->SetErrMsg(
"Submission citation affiliation has no state");
9941 eval = validator.Validate(*submit, &scope, options);
9945 scope.RemoveTopLevelSeqEntry(seh);
9949 entry->
SetDescr().Set().push_back(desc);
9951 pub->
SetSub().
SetAuthors().SetAffil().SetStd().SetAffil(
"some affiliation");
9957 seh = scope.AddTopLevelSeqEntry(*entry);
9959 expected_errors.push_back(
new CExpectedError(msg_acc, sev,
"MissingPubRequirement",
9960 "Submission citation affiliation has no country"));
9962 eval = validator.Validate(seh, options);
9966 expected_errors[0]->SetErrMsg(
"Submission citation affiliation has no state");
9968 eval = validator.Validate(seh, options);
9974 expected_errors[0]->SetErrMsg(
"Submission citation has no author names");
9976 eval = validator.Validate(seh, options);
9986 "MissingPubRequirement",
9987 "Submission citation has no affiliation"));
9989 eval = validator.Validate(seh, options);
9993 eval = validator.Validate(seh, options);
9998 eval = validator.Validate(seh, options);
10001 eval = validator.Validate(seh, options);
10004 eval = validator.Validate(seh, options);
10011 pub->
SetGen().
SetCit(
"Does not start with expected text");
10014 "Unpublished citation text invalid"));
10016 "Publication date missing"));
10019 eval = validator.Validate(seh, options);
10022 delete expected_errors[1];
10023 expected_errors[1] =
nullptr;
10025 pub->
SetGen().
SetCit(
"submitted starts with expected text");
10027 expected_errors[0]->SetErrMsg(
"Publication date marked as '?'");
10029 eval = validator.Validate(seh, options);
10033 expected_errors[0]->SetErrMsg(
"Publication date not set");
10034 eval = validator.Validate(seh, options);
10042 expected_errors[0]->SetErrMsg(
"Publication has no author names");
10043 eval = validator.Validate(seh, options);
10048 expected_errors[0]->SetErrMsg(
"Publication has no title");
10049 eval = validator.Validate(seh, options);
10053 art_title->SetName(
"article title");
10056 expected_errors[0]->SetErrMsg(
"Publication has no author names");
10060 eval = validator.Validate(seh, options);
10066 pub->
SetArticle().
SetFrom().SetJournal().SetImp().SetDate().SetStd().SetYear(2009);
10068 expected_errors[0]->SetErrMsg(
"Journal title missing");
10070 "ISO journal title abbreviation missing"));
10071 eval = validator.Validate(seh, options);
10074 journal_title->SetName(
"journal_title");
10075 pub->
SetArticle().
SetFrom().SetJournal().SetTitle().Set().push_back(journal_title);
10076 delete expected_errors[0];
10077 expected_errors[0] =
nullptr;
10078 eval = validator.Validate(seh, options);
10083 "Journal volume missing"));
10085 "Journal pages missing"));
10088 iso_jta->SetIso_jta(
"abbr");
10092 eval = validator.Validate(seh, options);
10096 "Journal pages missing"));
10099 eval = validator.Validate(seh, options);
10103 "Journal volume missing"));
10107 expected_errors[0]->SetErrMsg(
"Journal volume missing");
10108 eval = validator.Validate(seh, options);
10112 "Publication date missing"));
10116 expected_errors[0]->SetErrMsg(
"Publication date missing");
10118 eval = validator.Validate(seh, options);
10121 expected_errors[0]->SetErrMsg(
"Publication date marked as '?'");
10122 eval = validator.Validate(seh, options);
10124 pub->
SetArticle().
SetFrom().SetJournal().SetImp().SetDate().SetStd().SetYear(0);
10125 expected_errors[0]->SetErrMsg(
"Publication date not set");
10126 eval = validator.Validate(seh, options);
10132 pub->
SetArticle().
SetFrom().SetJournal().SetImp().SetDate().SetStd().SetYear(2009);
10134 journal_title->SetName(
"(er) Journal Title");
10135 eval = validator.Validate(seh, options);
10137 journal_title->SetName(
"(journal title");
10139 eval = validator.Validate(seh, options);
10144 "In-press is not expected to have page numbers"));
10145 eval = validator.Validate(seh, options);
10149 entry->
SetDescr().Set().pop_back();
10162 entry->
SetDescr().Set().push_back(desc);
10167 "Publication has unexpected internal Pub-equiv"));
10169 eval = validator.Validate(seh, options);
10183 entry->
SetDescr().Set().push_back(desc);
10189 "Page numbering has zero value"));
10191 eval = validator.Validate(seh, options);
10194 eval = validator.Validate(seh, options);
10197 expected_errors[0]->SetErrMsg(
"Page numbering has negative value");
10199 eval = validator.Validate(seh, options);
10202 expected_errors[0]->SetErrMsg(
"Page numbering out of order");
10204 eval = validator.Validate(seh, options);
10207 expected_errors[0]->SetErrMsg(
"Page numbering greater than 50");
10209 eval = validator.Validate(seh, options);
10212 expected_errors[0]->SetErrMsg(
"Page numbering stop looks strange");
10214 eval = validator.Validate(seh, options);
10217 expected_errors[0]->SetErrMsg(
"Page numbering start looks strange");
10219 eval = validator.Validate(seh, options);
10234 entry->
SetDescr().Set().push_back(desc);
10239 "Publication is medline entry"));
10241 eval = validator.Validate(seh, options);
10266 if (it->GetPub().GetPub().Get().front()->IsSub()) {
10267 subpub = it->SetPub().SetPub().Set().front();
10269 otherpub = it->SetPub().SetPub().Set().front();
10278 "Submission citation date has error - BAD_STR"));
10280 eval = validator.Validate(seh, options);
10284 expected_errors[0]->SetErrMsg(
"Submission citation date has error - BAD_YEAR");
10285 eval = validator.Validate(seh, options);
10290 expected_errors[0]->SetErrMsg(
"Submission citation date has error - BAD_MONTH");
10291 eval = validator.Validate(seh, options);
10297 expected_errors[0]->SetErrMsg(
"Submission citation date has error - BAD_DAY");
10298 eval = validator.Validate(seh, options);
10302 expected_errors[0]->SetErrMsg(
"Submission citation date has error - BAD_SEASON");
10303 eval = validator.Validate(seh, options);
10309 gen->SetGen().SetAuthors().SetNames().SetStd().push_back(author);
10310 gen->SetGen().SetTitle(
"gen title");
10313 expected_errors[0]->SetErrMsg(
"Publication date has error - BAD_SEASON");
10314 eval = validator.Validate(seh, options);
10319 eval = validator.Validate(seh, options);
10324 entry->
SetDescr().Set().push_back(desc);
10326 expected_errors[0]->SetErrMsg(
"Create date has error - BAD_SEASON");
10327 eval = validator.Validate(seh, options);
10331 expected_errors[0]->SetErrMsg(
"Update date has error - BAD_SEASON");
10332 eval = validator.Validate(seh, options);
10347 pub->
SetGen().
SetCit(
"submitted something Title=foo");
10350 entry->
SetDescr().Set().push_back(desc);
10355 "Unpublished citation has embedded Title"));
10357 eval = validator.Validate(seh, options);
10360 pub->
SetGen().
SetCit(
"submitted something Journal=bar");
10361 expected_errors[0]->SetErrMsg(
"Unpublished citation has embedded Journal");
10362 eval = validator.Validate(seh, options);
10377 entry->
SetDescr().Set().push_back(desc);
10380 feat->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"good");
10388 "Multiple publications have serial number 1234"));
10390 eval = validator.Validate(seh, options);
10401 author->
SetName().SetName().SetLast(
"foo<script");
10406 entry->
SetDescr().Set().push_back(desc);
10413 "Bad characters in author foo<script"));
10415 "Script tag found in item"));
10417 eval = validator.Validate(seh, options);
10420 author->
SetName().SetName().SetLast(
"Last");
10421 delete expected_errors[0];
10422 expected_errors[0] =
nullptr;
10425 eval = validator.Validate(seh, options);
10428 feat->
SetComment(
"misc_feature needs a comment");
10431 eval = validator.Validate(seh, options);
10436 eval = validator.Validate(seh, options);
10441 eval = validator.Validate(seh, options);
10446 eval = validator.Validate(seh, options);
10451 eval = validator.Validate(seh, options);
10470 "Ahead-of-print without in-press"));
10472 eval = validator.Validate(seh, options);
10479 "In-press is not expected to have page numbers"));
10481 "Electronic-only publication should not also be in-press"));
10483 eval = validator.Validate(seh, options);
10490 "Empty consortium"));
10493 consortium->
SetName().SetConsortium(
"");
10495 eval = validator.Validate(seh, options);
10498 consortium->
SetName().SetConsortium(
"duplicate");
10500 consortium2->
SetName().SetConsortium(
"duplicate");
10502 expected_errors[0]->SetErrMsg(
"Duplicate consortium 'duplicate'");
10503 eval = validator.Validate(seh, options);
10508 "In-press is not expected to have page numbers"));
10510 "Duplicate consortium 'duplicate'"));
10514 eval = validator.Validate(seh, options);
10524 vector<CExpectedError*>& expected_errors,
10525 const string& valtype,
10537 vector<string> sgml_tags;
10539 sgml_tags.push_back(
">");
10540 sgml_tags.push_back(
"<");
10541 sgml_tags.push_back(
"&");
10542 sgml_tags.push_back(
"&agr;");
10543 sgml_tags.push_back(
"&Agr;");
10544 sgml_tags.push_back(
"&bgr;");
10545 sgml_tags.push_back(
"&Bgr;");
10546 sgml_tags.push_back(
"&ggr;");
10547 sgml_tags.push_back(
"&Ggr;");
10548 sgml_tags.push_back(
"&dgr;");
10549 sgml_tags.push_back(
"&Dgr;");
10550 sgml_tags.push_back(
"&egr;");
10551 sgml_tags.push_back(
"&Egr;");
10552 sgml_tags.push_back(
"&zgr;");
10553 sgml_tags.push_back(
"&Zgr;");
10554 sgml_tags.push_back(
"&eegr;");
10555 sgml_tags.push_back(
"&EEgr;");
10556 sgml_tags.push_back(
"&thgr;");
10557 sgml_tags.push_back(
"&THgr;");
10558 sgml_tags.push_back(
"&igr;");
10559 sgml_tags.push_back(
"&Igr;");
10560 sgml_tags.push_back(
"&kgr;");
10561 sgml_tags.push_back(
"&Kgr;");
10562 sgml_tags.push_back(
"&lgr;");
10563 sgml_tags.push_back(
"&Lgr;");
10564 sgml_tags.push_back(
"&mgr;");
10565 sgml_tags.push_back(
"&Mgr;");
10566 sgml_tags.push_back(
"&ngr;");
10567 sgml_tags.push_back(
"&Ngr;");
10568 sgml_tags.push_back(
"&xgr;");
10569 sgml_tags.push_back(
"&Xgr;");
10570 sgml_tags.push_back(
"&ogr;");
10571 sgml_tags.push_back(
"&Ogr;");
10572 sgml_tags.push_back(
"&pgr;");
10573 sgml_tags.push_back(
"&Pgr;");
10574 sgml_tags.push_back(
"&rgr;");
10575 sgml_tags.push_back(
"&Rgr;");
10576 sgml_tags.push_back(
"&sgr;");
10577 sgml_tags.push_back(
"&Sgr;");
10578 sgml_tags.push_back(
"&sfgr;");
10579 sgml_tags.push_back(
"&tgr;");
10580 sgml_tags.push_back(
"&Tgr;");
10581 sgml_tags.push_back(
"&ugr;");
10582 sgml_tags.push_back(
"&Ugr;");
10583 sgml_tags.push_back(
"&phgr;");
10584 sgml_tags.push_back(
"&PHgr;");
10585 sgml_tags.push_back(
"&khgr;");
10586 sgml_tags.push_back(
"&KHgr;");
10587 sgml_tags.push_back(
"&psgr;");
10588 sgml_tags.push_back(
"&PSgr;");
10589 sgml_tags.push_back(
"&ohgr;");
10590 sgml_tags.push_back(
"&OHgr;");
10593 "taxname %s has SGML"));
10595 "Organism not found in taxonomy database"));
10597 for (
const string& it : sgml_tags) {
10598 string taxname =
"a" + it +
"b";
10600 expected_errors[0]->SetErrMsg(
"taxname " + taxname +
" has SGML");
10601 eval = validator.Validate(seh, options);
10606 delete expected_errors[1];
10607 expected_errors[1] =
nullptr;
10609 size_t tag_num = 0;
10612 expected_errors[0]->SetErrMsg(
"subsource " + sgml_tags[tag_num] +
" has SGML");
10613 eval = validator.Validate(seh, options);
10619 expected_errors[0]->SetErrMsg(
"orgmod " + sgml_tags[tag_num] +
" has SGML");
10620 eval = validator.Validate(seh, options);
10627 "dbxref database " + sgml_tags[tag_num] +
" has SGML"));
10629 "Illegal db_xref type " + sgml_tags[tag_num] +
" (1234)"));
10633 eval = validator.Validate(seh, options);
10640 AddSgmlError(expected_errors,
"dbxref value", sgml_tags[tag_num]);
10643 eval = validator.Validate(seh, options);
10649 scope.RemoveTopLevelSeqEntry(seh);
10651 seh = scope.AddTopLevelSeqEntry(*entry);
10652 AddSgmlError(expected_errors,
"dbxref database", sgml_tags[tag_num]);
10655 "Illegal db_xref type " + sgml_tags[tag_num] +
" (1234)"));
10657 eval = validator.Validate(seh, options);
10665 AddSgmlError(expected_errors,
"dbxref value", sgml_tags[tag_num]);
10668 eval = validator.Validate(seh, options);
10675 scope.RemoveTopLevelSeqEntry(seh);
10676 string foo = sgml_tags[tag_num] +
"foo";
10677 feat->
SetData().SetGene().SetLocus(foo);
10678 seh = scope.AddTopLevelSeqEntry(*entry);
10681 eval = validator.Validate(seh, options);
10683 feat->
SetData().SetGene().SetLocus(
"good locus");
10687 feat->
SetData().SetGene().SetLocus_tag(sgml_tags[tag_num]);
10688 AddSgmlError(expected_errors,
"gene locus_tag", sgml_tags[tag_num]);
10690 eval = validator.Validate(seh, options);
10692 feat->
SetData().SetGene().ResetLocus_tag();
10696 feat->
SetData().SetGene().SetDesc(sgml_tags[tag_num]);
10697 AddSgmlError(expected_errors,
"gene description", sgml_tags[tag_num]);
10699 eval = validator.Validate(seh, options);
10701 feat->
SetData().SetGene().ResetDesc();
10705 feat->
SetData().SetGene().SetSyn().push_back(sgml_tags[tag_num]);
10706 AddSgmlError(expected_errors,
"gene synonym", sgml_tags[tag_num]);
10708 eval = validator.Validate(seh, options);
10710 feat->
SetData().SetGene().ResetDesc();
10715 scope.RemoveTopLevelSeqEntry(seh);
10717 foo = sgml_tags[tag_num] +
"foo";
10718 feat->
SetData().SetRna().SetExt().SetName(foo);
10719 seh = scope.AddTopLevelSeqEntry(*entry);
10721 "No CDS location match for 1 mRNA"));
10724 "mRNA name " + foo +
" has SGML"));
10726 eval = validator.Validate(seh, options);
10732 scope.RemoveTopLevelSeqEntry(seh);
10734 foo = sgml_tags[tag_num] +
"foo";
10735 feat->
SetData().SetRna().SetExt().SetName(foo);
10736 seh = scope.AddTopLevelSeqEntry(*entry);
10738 "rRNA name " + foo +
" has SGML"));
10740 eval = validator.Validate(seh, options);
10742 feat->
SetData().SetRna().SetExt().SetName(
"good name");
10746 expected_errors[0]->SetErrMsg(
"feature comment " + sgml_tags[tag_num] +
" has SGML");
10747 eval = validator.Validate(seh, options);
10753 qual->
SetQual(
"standard_name");
10754 qual->
SetVal(sgml_tags[tag_num]);
10755 feat->
SetQual().push_back(qual);
10756 expected_errors[0]->SetErrMsg(
"feature qualifier " + sgml_tags[tag_num] +
" has SGML");
10757 eval = validator.Validate(seh, options);
10762 scope.RemoveTopLevelSeqEntry(seh);
10764 feat = entry->
SetSet().
SetSeq_set().back()->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
10765 foo = sgml_tags[tag_num] +
"foo";
10766 feat->
SetData().SetProt().SetName().front().assign(foo);
10767 seh = scope.AddTopLevelSeqEntry(*entry);
10768 expected_errors[0]->SetAccession(
"lcl|prot");
10769 expected_errors[0]->SetErrMsg(
"protein name " + foo +
" has SGML");
10770 eval = validator.Validate(seh, options);
10772 feat->
SetData().SetProt().SetName().pop_back();
10773 feat->
SetData().SetProt().SetName().push_back(
"bar");
10777 feat->
SetData().SetProt().SetDesc(sgml_tags[tag_num]);
10778 expected_errors[0]->SetErrMsg(
"protein description " + sgml_tags[tag_num] +
" has SGML");
10779 eval = validator.Validate(seh, options);
10781 feat->
SetData().SetProt().ResetDesc();
10798 "Publication status is in comment for pmid 0"));
10800 eval = validator.Validate(seh, options);
10804 eval = validator.Validate(seh, options);
10809 "In-press is not expected to have page numbers"));
10811 "Publication status is in comment for pmid 0"));
10816 eval = validator.Validate(seh, options);
10820 eval = validator.Validate(seh, options);
10824 eval = validator.Validate(seh, options);
10845 BOOST_CHECK(orphans.empty());
10848 "No CdRegion in nuc-prot set points to this protein"));
10850 eval = validator.Validate(seh, options);
10864 entry->
SetSet().
SetAnnot().front()->SetData().SetFtable().pop_front();
10869 "No CdRegion in nuc-prot set points to this protein"));
10871 "No nucleotides in nuc-prot set"));
10873 eval = validator.Validate(seh, options);
10876 scope.RemoveTopLevelSeqEntry(seh);
10880 entry->
SetSet().
SetAnnot().front()->SetData().SetFtable().push_back(cds);
10881 seh = scope.AddTopLevelSeqEntry(*entry);
10882 delete expected_errors[0];
10883 expected_errors[0] =
nullptr;
10884 expected_errors[1]->SetErrMsg(
"No proteins in nuc-prot set");
10885 expected_errors[1]->SetAccession(
"lcl|nuc");
10887 "Unable to find product Bioseq from CDS feature"));
10888 eval = validator.Validate(seh, options);
10891 scope.RemoveTopLevelSeqEntry(seh);
10900 seh = scope.AddTopLevelSeqEntry(*entry);
10902 expected_errors[1]->SetErrMsg(
"Multiple unsegmented nucleotides in nuc-prot set");
10903 delete expected_errors[2];
10904 expected_errors.pop_back();
10905 eval = validator.Validate(seh, options);
10918 entry->
SetSet().
SetSeq_set().back()->SetSeq().SetId().front()->SetLocal().SetStr(
"good2");
10923 "No segmented Bioseq in segset"));
10925 eval = validator.Validate(seh, options);
10944 "No Bioseqs in this set"));
10946 eval = validator.Validate(seh, options);
10950 eval = validator.Validate(seh, options);
10954 eval = validator.Validate(seh, options);
10958 eval = validator.Validate(seh, options);
10962 eval = validator.Validate(seh, options);
10966 eval = validator.Validate(seh, options);
10970 eval = validator.Validate(seh, options);
10986 "Pop/Phy/Mut/Eco set has no components"));
10988 "Nuc-prot Bioseq-set contains wrong Bioseq-set, its class is \"eco-set\"."));
10990 eval = validator.Validate(seh, options);
11004 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCAGAAAAACAGAGATAAACTAA");
11005 nuc->SetSeq().SetInst().SetLength(27);
11012 cds->
SetData().SetCdregion();
11015 cds->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"good");
11016 cds->
SetProduct().SetWhole().SetLocal().SetStr(
"prot2");
11022 "Nucleotide bioseq should be product of mRNA feature on contig, but is not"));
11028 eval = validator.Validate(seh, options);
11032 scope.RemoveTopLevelSeqEntry(seh);
11034 contig->
SetSeq().
SetAnnot().front()->SetData().SetFtable().pop_back();
11038 mrna->
SetData().SetRna().SetExt().SetName(
"fake protein name");
11041 mrna->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"good");
11042 mrna->
SetProduct().SetWhole().SetLocal().SetStr(
"nuc2");
11044 seh = scope.AddTopLevelSeqEntry(*entry);
11046 "No CDS location match for 1 mRNA"));
11048 "Protein bioseq should be product of CDS feature on contig, but is not"));
11051 eval = validator.Validate(seh, options);
11057 scope.RemoveTopLevelSeqEntry(seh);
11058 contig->
SetSeq().
SetAnnot().front()->SetData().SetFtable().push_back(cds);
11062 gene->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"good");
11063 gene->
SetData().SetGene().SetLocus(
"gene locus");
11065 seh = scope.AddTopLevelSeqEntry(*entry);
11068 "Seq-annot packaged directly on genomic product set"));
11070 eval = validator.Validate(seh, options);
11074 scope.RemoveTopLevelSeqEntry(seh);
11078 mrna2->
SetData().SetRna().SetExt().SetName(
"second protein name");
11081 mrna2->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"good");
11082 mrna2->
SetProduct().SetWhole().SetLocal().SetStr(
"nuc3");
11084 seh = scope.AddTopLevelSeqEntry(*entry);
11089 "Unable to fetch mRNA transcript 'lcl|nuc3'"));
11091 "Product Bioseq of mRNA feature is not packaged in the record"));
11093 "Product of mRNA feature (lcl|nuc3) not packaged in genomic product set"));
11095 eval = validator.Validate(seh, options);
11099 scope.RemoveTopLevelSeqEntry(seh);
11103 contig->
SetSeq().
SetAnnot().front()->SetData().SetFtable().pop_back();
11104 seh = scope.AddTopLevelSeqEntry(*entry);
11105 eval = validator.Validate(seh, options);
11107 "2 mRNA features have 1 product references"));
11109 "Nucleotide bioseq should be product of mRNA feature on contig, but is not"));
11111 "Product of mRNA feature (?) not packaged in genomic product set"));
11115 scope.RemoveTopLevelSeqEntry(seh);
11117 seh = scope.AddTopLevelSeqEntry(*entry);
11118 eval = validator.Validate(seh, options);
11122 "Nucleotide bioseq should be product of mRNA feature on contig, but is not"));
11130 #define TESTPOPPHYMUTECO(seh, entry) \
11131 entry->SetSet().SetClass(CBioseq_set::eClass_pop_set); \
11132 eval = validator.Validate(seh, options); \
11133 CheckErrors(*eval, expected_errors); \
11134 entry->SetSet().SetClass(CBioseq_set::eClass_phy_set); \
11135 eval = validator.Validate(seh, options); \
11136 CheckErrors(*eval, expected_errors); \
11137 entry->SetSet().SetClass(CBioseq_set::eClass_mut_set); \
11138 eval = validator.Validate(seh, options); \
11139 CheckErrors(*eval, expected_errors); \
11140 entry->SetSet().SetClass(CBioseq_set::eClass_eco_set); \
11141 eval = validator.Validate(seh, options); \
11142 CheckErrors(*eval, expected_errors); \
11143 entry->SetSet().SetClass(CBioseq_set::eClass_small_genome_set); \
11144 scope.RemoveTopLevelSeqEntry(seh); \
11145 unit_test_util::RemoveDescriptorType(entry, CSeqdesc::e_Title); \
11146 seh = scope.AddTopLevelSeqEntry(*entry); \
11147 eval = validator.Validate(seh, options); \
11148 CheckErrors(*eval, expected_errors);
11150 #define TESTWGS(seh, entry) \
11151 entry->SetSet().SetClass(CBioseq_set::eClass_wgs_set); \
11152 eval = validator.Validate(seh, options); \
11153 CheckErrors(*eval, expected_errors);
11164 "Molecule type (DNA) does not match biomol (RNA)"));
11166 "Pop/phy/mut/eco set contains inconsistent moltype"));
11171 scope.RemoveTopLevelSeqEntry(seh);
11173 seh = scope.AddTopLevelSeqEntry(*entry);
11189 "There is 1 mispackaged graph in this record."));
11191 eval = validator.Validate(seh, options);
11195 expected_errors[0]->SetErrMsg(
"There are 2 mispackaged graphs in this record.");
11196 eval = validator.Validate(seh, options);
11213 "Nested sets within Pop/Phy/Mut/Eco/Wgs set"));
11220 "Nested sets within Pop/Phy/Mut/Eco/Wgs set"));
11222 scope.RemoveTopLevelSeqEntry(seh);
11224 seh = scope.AddTopLevelSeqEntry(*entry);
11241 "Set class should not be conset"));
11243 eval = validator.Validate(seh, options);
11258 "No Bioseqs in this entire record."));
11260 eval = validator.Validate(seh, options);
11270 entry->
SetSet().
SetSeq_set().front()->SetSeq().SetId().front()->SetEmbl().SetAccession(
"EA123456");
11271 entry->
SetSet().
SetSeq_set().back()->SetSeq().SetId().front()->SetOther().SetAccession(
"NC_123456");
11276 "INSD and RefSeq records should not be present in the same set"));
11278 "RefSeq nucleotide title does not start with organism name"));
11280 "Nucleotide component of pop/phy/mut/eco/wgs set is missing its title"));
11282 "Nucleotide component of pop/phy/mut/eco/wgs set is missing its title"));
11284 "Nucleotide component of pop/phy/mut/eco/wgs set is missing its title"));
11287 eval = validator.Validate(seh, options);
11305 "Genomic product set and mut/pop/phy/eco set records should not be present in the same set"));
11307 "Pop/phy/mut/eco set contains inconsistent moltype"));
11309 "Nested sets within Pop/Phy/Mut/Eco/Wgs set"));
11311 "Nested sets within Pop/Phy/Mut/Eco/Wgs set"));
11319 "Genomic product set and mut/pop/phy/eco set records should not be present in the same set"));
11321 "Pop/phy/mut/eco set contains inconsistent moltype"));
11323 "Nested sets within Pop/Phy/Mut/Eco/Wgs set"));
11325 "Nested sets within Pop/Phy/Mut/Eco/Wgs set"));
11338 entry->
SetSet().
SetSeq_set().front()->SetSeq().SetId().front()->SetOther().SetAccession(
"NC_123456");
11343 "RefSeq nucleotide title does not start with organism name"));
11345 "RefSeq record should not be a Pop-set"));
11347 "Nucleotide component of pop/phy/mut/eco/wgs set is missing its title"));
11349 "Nucleotide component of pop/phy/mut/eco/wgs set is missing its title"));
11351 "Nucleotide component of pop/phy/mut/eco/wgs set is missing its title"));
11353 eval = validator.Validate(seh, options);
11369 "Bioseq_set class not set"));
11371 eval = validator.Validate(seh, options);
11385 eval = validator.Validate(seh, options);
11388 BOOST_CHECK_EQUAL(orphans.size(), 0);
11390 scope.RemoveTopLevelSeqEntry(seh);
11391 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AYZ12345");
11392 entry->
SetSeq().
SetAnnot().front()->SetData().SetFtable().front()->SetLocation().SetInt().SetId().SetGenbank().SetAccession(
"AYZ12345");
11395 "Orphaned stand-alone protein"));
11396 seh = scope.AddTopLevelSeqEntry(*entry);
11398 eval = validator.Validate(seh, options);
11401 BOOST_CHECK_EQUAL(orphans.size(), 1);
11403 scope.RemoveTopLevelSeqEntry(seh);
11404 entry->
SetSeq().
SetId().front()->SetEmbl().SetAccession(
"AQZ12345");
11405 entry->
SetSeq().
SetAnnot().front()->SetData().SetFtable().front()->SetLocation().SetInt().SetId().SetEmbl().SetAccession(
"AQZ12345");
11406 seh = scope.AddTopLevelSeqEntry(*entry);
11407 eval = validator.Validate(seh, options);
11411 scope.RemoveTopLevelSeqEntry(seh);
11412 entry->
SetSeq().
SetId().front()->SetDdbj().SetAccession(
"ARZ12345");
11413 entry->
SetSeq().
SetAnnot().front()->SetData().SetFtable().front()->SetLocation().SetInt().SetId().SetDdbj().SetAccession(
"ARZ12345");
11414 seh = scope.AddTopLevelSeqEntry(*entry);
11415 eval = validator.Validate(seh, options);
11419 scope.RemoveTopLevelSeqEntry(seh);
11420 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
11421 entry->
SetSeq().
SetAnnot().front()->SetData().SetFtable().front()->SetLocation().SetInt().SetId().SetOther().SetAccession(
"NC_123456");
11422 seh = scope.AddTopLevelSeqEntry(*entry);
11423 eval = validator.Validate(seh, options);
11427 scope.RemoveTopLevelSeqEntry(seh);
11431 idlist.push_back(id1);
11434 idlist.push_back(id2);
11439 idlist.push_back(id_pat);
11440 seh = scope.AddTopLevelSeqEntry(*entry);
11441 eval = validator.Validate(seh, options);
11442 expected_errors.clear();
11459 "Nuc-prot set has MolInfo on set"));
11461 "HTGS/STS/GSS/WGS sequence should be genomic"));
11463 "Protein with nucleic acid sequence method"));
11465 "HTGS/STS/GSS/WGS sequence should be genomic"));
11467 "Inconsistent Molinfo-completeness [1] and [0]"));
11469 "Molinfo-biomol unknown used"));
11472 eval = validator.Validate(seh, options);
11488 eval = validator.Validate(seh, options);
11492 scope.RemoveTopLevelSeqEntry(seh);
11495 seh = scope.AddTopLevelSeqEntry(*entry);
11498 "Pop/Phy/Mut/Eco set has only one component and no alignments"));
11500 "Nested sets within Pop/Phy/Mut/Eco/Wgs set"));
11501 eval = validator.Validate(seh, options);
11514 feat->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"prot");
11515 feat->
SetData().SetCdregion();
11522 "Invalid feature for a protein Bioseq."));
11524 eval = validator.Validate(seh, options);
11527 scope.RemoveTopLevelSeqEntry(seh);
11530 seh = scope.AddTopLevelSeqEntry(*entry);
11531 eval = validator.Validate(seh, options);
11534 scope.RemoveTopLevelSeqEntry(seh);
11536 seh = scope.AddTopLevelSeqEntry(*entry);
11537 eval = validator.Validate(seh, options);
11540 scope.RemoveTopLevelSeqEntry(seh);
11542 seh = scope.AddTopLevelSeqEntry(*entry);
11543 eval = validator.Validate(seh, options);
11546 scope.RemoveTopLevelSeqEntry(seh);
11547 feat->
SetData().SetGene().SetLocus(
"good locus");
11548 seh = scope.AddTopLevelSeqEntry(*entry);
11549 eval = validator.Validate(seh, options);
11553 scope.RemoveTopLevelSeqEntry(seh);
11554 entry->
SetSet().
SetSeq_set().back()->SetSeq().SetAnnot().front()->SetData().SetFtable().pop_back();
11555 feat->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"nuc");
11556 feat->
SetData().SetProt().SetName().push_back(
"prot name");
11558 seh = scope.AddTopLevelSeqEntry(*entry);
11560 "InvalidFeatureForNucleotide",
"Invalid feature for a nucleotide Bioseq."));
11562 eval = validator.Validate(seh, options);
11565 scope.RemoveTopLevelSeqEntry(seh);
11566 feat->
SetData().SetPsec_str();
11567 seh = scope.AddTopLevelSeqEntry(*entry);
11568 eval = validator.Validate(seh, options);
11572 scope.RemoveTopLevelSeqEntry(seh);
11577 loc1->
SetInt().SetFrom(0);
11578 loc1->
SetInt().SetTo(10);
11581 loc2->
SetInt().SetFrom(21);
11582 loc2->
SetInt().SetTo(35);
11585 cds->
SetLocation().SetMix().Set().push_back(loc1);
11586 cds->
SetLocation().SetMix().Set().push_back(loc2);
11587 cds->
SetData().SetCdregion();
11590 seh = scope.AddTopLevelSeqEntry(*entry);
11592 "InvalidForType",
"Multi-interval CDS feature is invalid on an mRNA (cDNA) Bioseq."));
11594 eval = validator.Validate(seh, options);
11598 scope.RemoveTopLevelSeqEntry(seh);
11602 seh = scope.AddTopLevelSeqEntry(*entry);
11605 eval = validator.Validate(seh, options);
11608 scope.RemoveTopLevelSeqEntry(seh);
11613 seh = scope.AddTopLevelSeqEntry(*entry);
11615 expected_errors[0]->SetErrCode(
"CDSmRNAMismatchLocation");
11617 expected_errors[0]->SetErrMsg(
"No CDS location match for 1 mRNA");
11620 "mRNA feature is invalid on an mRNA (cDNA) Bioseq."));
11621 eval = validator.Validate(seh, options);
11625 scope.RemoveTopLevelSeqEntry(seh);
11626 cds->
SetData().SetImp().SetKey(
"intron");
11630 seh = scope.AddTopLevelSeqEntry(*entry);
11632 "Invalid feature for an mRNA Bioseq."));
11634 "NotSpliceConsensusDonorTerminalIntron",
11635 "Splice donor consensus (GT) not found at start of terminal intron, position 1 of lcl|good"));
11637 "NotSpliceConsensusAcceptorTerminalIntron",
11638 "Splice acceptor consensus (AG) not found at end of terminal intron, position 60 of lcl|good, but at end of sequence"));
11640 eval = validator.Validate(seh, options);
11644 vector<string> peptide_feat;
11645 peptide_feat.push_back(
"mat_peptide");
11646 peptide_feat.push_back(
"sig_peptide");
11647 peptide_feat.push_back(
"transit_peptide");
11648 peptide_feat.push_back(
"preprotein");
11649 peptide_feat.push_back(
"proprotein");
11651 scope.RemoveTopLevelSeqEntry(seh);
11656 imp->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"prot");
11658 seh = scope.AddTopLevelSeqEntry(*entry);
11661 "Peptide processing feature should be converted to the appropriate protein feature subtype"));
11665 for (
const string&
key : peptide_feat) {
11666 scope.RemoveTopLevelSeqEntry(seh);
11669 seh = scope.AddTopLevelSeqEntry(*entry);
11670 expected_errors[0]->SetAccession(
"lcl|good");
11672 eval = validator.Validate(seh, options);
11675 scope.RemoveTopLevelSeqEntry(seh);
11678 seh = scope.AddTopLevelSeqEntry(*entry);
11679 expected_errors[0]->SetAccession(
"ref|NY_123456|");
11681 eval = validator.Validate(seh, options);
11685 vector<string> rna_feat;
11686 rna_feat.push_back(
"mRNA");
11687 rna_feat.push_back(
"tRNA");
11688 rna_feat.push_back(
"rRNA");
11689 rna_feat.push_back(
"snRNA");
11690 rna_feat.push_back(
"scRNA");
11691 rna_feat.push_back(
"snoRNA");
11692 rna_feat.push_back(
"misc_RNA");
11693 rna_feat.push_back(
"precursor_RNA");
11695 scope.RemoveTopLevelSeqEntry(seh);
11697 seh = scope.AddTopLevelSeqEntry(*entry);
11699 expected_errors[0]->SetErrCode(
"InvalidRNAFeature");
11700 expected_errors[0]->SetErrMsg(
"RNA feature should be converted to the appropriate RNA feature subtype, location should be converted manually");
11703 for (
const string&
key : rna_feat) {
11704 scope.RemoveTopLevelSeqEntry(seh);
11707 rna->SetData().SetImp().SetKey(
key);
11708 seh = scope.AddTopLevelSeqEntry(*entry);
11709 eval = validator.Validate(seh, options);
11713 vector<CProt_ref::TProcessed> prot_types;
11721 prot->SetLocation().SetInt().SetFrom(0);
11722 prot->SetLocation().SetInt().SetTo(10);
11723 prot->SetLocation().SetInt().SetId().SetLocal().SetStr(
"good");
11724 prot->SetData().SetProt().SetName().push_back(
"unnamed");
11729 "InvalidFeatureForNucleotide",
"Invalid feature for a nucleotide Bioseq."));
11731 "Peptide processing feature should be remapped to the appropriate protein bioseq"));
11734 scope.RemoveTopLevelSeqEntry(seh);
11736 prot->SetData().SetProt().SetProcessed(
key);
11737 seh = scope.AddTopLevelSeqEntry(*entry);
11740 eval = validator.Validate(seh, options);
11743 scope.RemoveTopLevelSeqEntry(seh);
11745 prot->SetData().SetProt().SetProcessed(
key);
11746 seh = scope.AddTopLevelSeqEntry(*entry);
11750 "Uninformative protein name 'unnamed'"));
11751 eval = validator.Validate(seh, options);
11753 delete expected_errors[2];
11754 expected_errors.pop_back();
11775 gene->
SetData().SetGene().SetLocus(
"x");
11786 eval = validator.Validate(seh, options);
11798 gene->
SetData().SetGene().SetLocus_tag(
"x");
11808 eval = validator.Validate(seh, options);
11811 "Genes on protein sequences with PGAP annotation should not have locus tags."));
11826 nseq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCAGAAAAACAGGTATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
11831 loc1->
SetInt().SetId().SetLocal().SetStr(
"nuc");
11832 loc1->
SetInt().SetFrom(0);
11833 loc1->
SetInt().SetTo(15);
11836 loc2->
SetInt().SetId().SetLocal().SetStr(
"nuc");
11837 loc2->
SetInt().SetFrom(46);
11838 loc2->
SetInt().SetTo(56);
11840 cds->
SetLocation().SetMix().Set().push_back(loc1);
11841 cds->
SetLocation().SetMix().Set().push_back(loc2);
11861 "Coding region and protein feature partials conflict"));
11863 "Inconsistent: Product= complete, Location= partial, Feature.partial= TRUE"));
11865 "CDS is partial but protein is complete"));
11868 eval = validator.Validate(seh, options);
11877 "Inconsistent: Product= partial, Location= complete, Feature.partial= FALSE"));
11879 "CDS is 5' complete but protein is NH2 partial"));
11881 eval = validator.Validate(seh, options);
11890 "Coding region and protein feature partials conflict"));
11892 "Got stop codon, but 3'end is labeled partial"));
11894 "CDS is 3' complete but protein is CO2 partial"));
11896 "CDS is 5' partial but protein is CO2 partial"));
11898 eval = validator.Validate(seh, options);
11907 "Coding region and protein feature partials conflict"));
11909 "3' partial is not at end of sequence, gap, or consensus splice site"));
11911 "Got stop codon, but 3'end is labeled partial"));
11913 "CDS is 5' complete but protein is NH2 partial"));
11915 "CDS is 3' partial but protein is NH2 partial"));
11917 eval = validator.Validate(seh, options);
11926 "Coding region and protein feature partials conflict"));
11928 "Got stop codon, but 3'end is labeled partial"));
11930 "CDS is 5' partial but protein has neither end"));
11932 eval = validator.Validate(seh, options);
11941 "Coding region and protein feature partials conflict"));
11943 "3' partial is not at end of sequence, gap, or consensus splice site"));
11945 "Got stop codon, but 3'end is labeled partial"));
11947 "CDS is 3' partial but protein has neither end"));
11949 eval = validator.Validate(seh, options);
11959 "Inconsistent: Product= partial, Location= complete, Feature.partial= FALSE"));
11961 "Got stop codon, but 3'end is labeled partial"));
11963 "CDS is complete but protein has neither end"));
11965 eval = validator.Validate(seh, options);
11970 scope.RemoveTopLevelSeqEntry(seh);
11974 misc_feat->
SetLocation().SetWhole().SetLocal().SetStr(
"nuc");
11975 seh = scope.AddTopLevelSeqEntry(*entry);
11977 "Strand 'other' in location"));
11979 "Feature may not have whole location"));
11981 "On partial Bioseq, SeqFeat.partial should be TRUE"));
11983 eval = validator.Validate(seh, options);
11987 scope.RemoveTopLevelSeqEntry(seh);
11993 nuc_seq->
SetSeq().
SetAnnot().front()->SetData().SetFtable().pop_back();
11996 misc_feat->
SetProduct().SetWhole().SetLocal().SetStr(
"prot");
11997 seh = scope.AddTopLevelSeqEntry(*entry);
11999 "Coding region and protein feature partials conflict"));
12001 "When SeqFeat.product is a partial Bioseq, SeqFeat.location should also be partial"));
12003 eval = validator.Validate(seh, options);
12007 scope.RemoveTopLevelSeqEntry(seh);
12010 first->SetInt().SetId().SetLocal().SetStr(
"nuc");
12011 first->SetInt().SetFrom(0);
12012 first->SetInt().SetTo(5);
12016 last->SetInt().SetId().SetLocal().SetStr(
"nuc");
12017 last->SetInt().SetFrom(7);
12018 last->SetInt().SetTo(10);
12021 gene_feat->
SetData().SetGene().SetLocus(
"locus value");
12023 gene_feat->
SetLocation().SetMix().Set().push_back(middle);
12026 seh = scope.AddTopLevelSeqEntry(*entry);
12028 "Gene feature on non-segmented sequence should not have multiple intervals"));
12030 "gene [locus value:[lcl|nuc:1-6, ~, 8-11]] overlaps CDS but does not completely contain it"));
12032 "Coding region and protein feature partials conflict"));
12034 "Gene of 'order' with otherwise complete location should have partial flag set"));
12036 eval = validator.Validate(seh, options);
12040 scope.RemoveTopLevelSeqEntry(seh);
12046 seh = scope.AddTopLevelSeqEntry(*entry);
12049 "Coding region and protein feature partials conflict"));
12051 "5' or 3' partial location should not have unclassified partial in product molinfo descriptor"));
12053 eval = validator.Validate(seh, options);
12057 scope.RemoveTopLevelSeqEntry(seh);
12062 seh = scope.AddTopLevelSeqEntry(*entry);
12065 "PartialProblem3Prime",
12066 "Stop does not include first/last residue of sequence"));
12068 eval = validator.Validate(seh, options);
12072 scope.RemoveTopLevelSeqEntry(seh);
12077 seh = scope.AddTopLevelSeqEntry(*entry);
12079 "PartialProblem5Prime",
12080 "Start does not include first/last residue of sequence"));
12082 eval = validator.Validate(seh, options);
12086 scope.RemoveTopLevelSeqEntry(seh);
12094 cds->
SetData().SetCdregion();
12095 cds->
SetProduct().SetWhole().SetLocal().SetStr(
"prot");
12096 cds->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"nuc");
12103 prot_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"MPRKT");
12110 seh = scope.AddTopLevelSeqEntry(*entry);
12113 "Stop does not include first/last residue of sequence (but is at consensus splice site)"));
12114 eval = validator.Validate(seh, options);
12123 "Stop does not include first/last residue of mRNA sequence"));
12124 eval = validator.Validate(seh, options);
12129 scope.RemoveTopLevelSeqEntry(seh);
12136 nuc_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set()[2] =
'#';
12138 prot_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"PRKTEIN");
12140 prot_feat = prot_seq->
SetSeq().
SetAnnot().front()->SetData().SetFtable().front();
12143 seh = scope.AddTopLevelSeqEntry(*entry);
12146 "Coding region and protein feature partials conflict"));
12148 "PartialLocation: Start does not include first/last residue of sequence (and is at bad sequence)"));
12150 eval = validator.Validate(seh, options);
12153 scope.RemoveTopLevelSeqEntry(seh);
12160 nuc_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set()[24] =
'#';
12163 seh = scope.AddTopLevelSeqEntry(*entry);
12164 expected_errors[0]->SetErrMsg(
"Invalid residue '#' at position [25]");
12165 expected_errors[2]->SetErrMsg(
"PartialLocation: Stop does not include first/last residue of sequence (and is at bad sequence)");
12166 eval = validator.Validate(seh, options);
12171 scope.RemoveTopLevelSeqEntry(seh);
12178 prot_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"PRKTEIN");
12180 prot_feat = prot_seq->
SetSeq().
SetAnnot().front()->SetData().SetFtable().front();
12183 seh = scope.AddTopLevelSeqEntry(*entry);
12186 "Coding region and protein feature partials conflict"));
12188 "5' partial is not at beginning of sequence, gap, or consensus splice site"));
12190 eval = validator.Validate(seh, options);
12195 scope.RemoveTopLevelSeqEntry(seh);
12203 seh = scope.AddTopLevelSeqEntry(*entry);
12206 "Coding region and protein feature partials conflict"));
12208 "3' partial is not at end of sequence, gap, or consensus splice site"));
12210 eval = validator.Validate(seh, options);
12215 scope.RemoveTopLevelSeqEntry(seh);
12221 seh = scope.AddTopLevelSeqEntry(*entry);
12223 "Start does not include first/last residue of sequence"));
12225 eval = validator.Validate(seh, options);
12230 expected_errors[0]->SetErrCode(
"PartialProblem3Prime");
12231 expected_errors[0]->SetErrMsg(
"Stop does not include first/last residue of sequence");
12232 eval = validator.Validate(seh, options);
12239 "PartialLocation: Internal partial intervals do not include first/last residue of sequence"));
12241 eval = validator.Validate(seh, options);
12246 scope.RemoveTopLevelSeqEntry(seh);
12250 seh = scope.AddTopLevelSeqEntry(*entry);
12252 eval = validator.Validate(seh, options);
12257 scope.RemoveTopLevelSeqEntry(seh);
12262 prot_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"KPRKTEIN");
12263 seh = scope.AddTopLevelSeqEntry(*entry);
12266 "Inconsistent: Product= complete, Location= complete, Feature.partial= TRUE"));
12268 "Start of location should probably be partial"));
12270 "This SeqFeat should not be partial"));
12273 eval = validator.Validate(seh, options);
12278 scope.RemoveTopLevelSeqEntry(seh);
12283 seh = scope.AddTopLevelSeqEntry(*entry);
12286 "Inconsistent: Product= complete, Location= complete, Feature.partial= TRUE"));
12288 "End of location should probably be partial"));
12290 eval = validator.Validate(seh, options);
12293 scope.RemoveTopLevelSeqEntry(seh);
12297 seh = scope.AddTopLevelSeqEntry(*entry);
12299 expected_errors[1]->SetErrMsg(
"This SeqFeat should not be partial");
12300 eval = validator.Validate(seh, options);
12307 "Coding region and protein feature partials conflict"));
12309 "3' partial is not at end of sequence, gap, or consensus splice site"));
12311 "Inconsistent: Product= complete, Location= partial, Feature.partial= TRUE"));
12313 "Got stop codon, but 3'end is labeled partial"));
12315 "CDS is partial but protein is complete"));
12317 eval = validator.Validate(seh, options);
12340 eval = validator.Validate(seh, options);
12341 if (expect_bad_5) {
12343 "PartialProblem5Prime",
12344 "Start does not include first/last residue of sequence"));
12346 if (expect_bad_3) {
12348 "PartialProblem3Prime",
12349 "Stop does not include first/last residue of sequence"));
12353 "CDSmRNAMismatchLocation",
"No CDS location match for 1 mRNA"));
12367 misc->
SetData().SetRna().SetExt().SetName(
"fake mRNA name");
12413 gap1->SetLiteral().SetSeq_data().SetGap();
12414 gap1->SetLiteral().SetLength(10);
12415 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(gap1);
12418 gap2->SetLiteral().SetSeq_data().SetGap();
12419 gap2->SetLiteral().SetLength(10);
12420 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(gap2);
12428 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AGTTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCGT");
12445 "Invalid SeqFeat type [0]"));
12447 eval = validator.Validate(seh, options);
12457 trna->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetFrom(14);
12463 "Anticodon is not 3 bases in length"));
12465 "Anticodon location not in tRNA"));
12467 "Anticodon location [lcl|good:15-14] out of range"));
12469 eval = validator.Validate(seh, options);
12471 trna->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetTo(100);
12472 expected_errors[2]->SetErrMsg(
"Anticodon location [lcl|good:15-101] out of range");
12473 eval = validator.Validate(seh, options);
12475 trna->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetTo(50);
12477 expected_errors[2]->SetErrMsg(
"Anticodon location [lcl|good:0-51] out of range");
12478 eval = validator.Validate(seh, options);
12482 scope.RemoveTopLevelSeqEntry(seh);
12487 codebreak->
SetLoc().SetInt().SetId().SetLocal().SetStr(
"nuc");
12488 codebreak->
SetLoc().SetInt().SetFrom(27);
12489 codebreak->
SetLoc().SetInt().SetTo(29);
12490 cds->
SetData().SetCdregion().SetCode_break().push_back(codebreak);
12491 seh = scope.AddTopLevelSeqEntry(*entry);
12494 "Code-break location not in coding region"));
12496 eval = validator.Validate(seh, options);
12501 codebreak->
SetLoc().SetInt().SetFrom(0);
12502 codebreak->
SetLoc().SetInt().SetTo(1);
12508 scope.RemoveTopLevelSeqEntry(seh);
12509 seh = scope.AddTopLevelSeqEntry(*entry);
12512 "Code-break location not in coding region - may be frame problem"));
12515 eval = validator.Validate(seh, options);
12521 scope.RemoveTopLevelSeqEntry(seh);
12525 misc->
SetData().SetRna().SetExt().SetTRNA().SetAa().SetIupacaa(
'N');
12526 misc->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetId().SetLocal().SetStr(
"good");
12527 misc->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetFrom(11);
12528 misc->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetTo(13);
12529 seh = scope.AddTopLevelSeqEntry(*entry);
12531 "Anticodon location not in tRNA"));
12533 eval = validator.Validate(seh, options);
12536 misc->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetFrom(6);
12537 misc->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetTo(10);
12539 expected_errors[0]->SetErrMsg(
"Anticodon is not 3 bases in length");
12540 eval = validator.Validate(seh, options);
12543 scope.RemoveTopLevelSeqEntry(seh);
12548 seh = scope.AddTopLevelSeqEntry(*entry);
12550 expected_errors[0]->SetErrCode(
"Range");
12552 expected_errors[0]->SetErrMsg(
"Location: SeqLoc [lcl|good:12-11] out of range");
12553 eval = validator.Validate(seh, options);
12555 scope.RemoveTopLevelSeqEntry(seh);
12559 seh = scope.AddTopLevelSeqEntry(*entry);
12560 expected_errors[0]->SetErrMsg(
"Location: SeqLoc [lcl|good:1-101] out of range");
12561 eval = validator.Validate(seh, options);
12573 anticodon_loc->
SetMix().Set().front()->SetInt().SetFrom(0);
12574 anticodon_loc->
SetMix().Set().front()->SetInt().SetTo(0);
12576 anticodon_loc->
SetMix().Set().back()->SetInt().SetFrom(2);
12577 anticodon_loc->
SetMix().Set().back()->SetInt().SetTo(3);
12579 trna->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().Assign(*anticodon_loc);
12584 eval = validator.Validate(seh, options);
12597 anticodon_loc->
SetMix().Set().front()->SetInt().SetFrom(0);
12598 anticodon_loc->
SetMix().Set().front()->SetInt().SetTo(0);
12600 anticodon_loc->
SetMix().Set().back()->SetInt().SetFrom(9);
12601 anticodon_loc->
SetMix().Set().back()->SetInt().SetTo(10);
12602 trna->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().Assign(*anticodon_loc);
12608 "Mixed strands in Anticodon [[lcl|good:c1-1, 10-11]]"));
12610 "Codons predicted from anticodon (UAA) cannot produce amino acid (N/Asn)"));
12612 eval = validator.Validate(seh, options);
12615 scope.RemoveTopLevelSeqEntry(seh);
12619 anticodon_loc->
SetMix().Set().front()->SetInt().SetFrom(0);
12620 anticodon_loc->
SetMix().Set().front()->SetInt().SetTo(0);
12622 anticodon_loc->
SetMix().Set().back()->SetInt().SetFrom(9);
12623 anticodon_loc->
SetMix().Set().back()->SetInt().SetTo(10);
12624 trna->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().Assign(*anticodon_loc);
12626 seh = scope.AddTopLevelSeqEntry(*entry);
12628 expected_errors[0]->SetErrCode(
"AnticodonMixedStrand");
12629 expected_errors[0]->SetErrMsg(
"Mixed plus and unknown strands in Anticodon [[lcl|good:1-1, 10-11]]");
12630 expected_errors[1]->SetErrMsg(
"Codons predicted from anticodon (AAA) cannot produce amino acid (N/Asn)");
12631 eval = validator.Validate(seh, options);
12636 scope.RemoveTopLevelSeqEntry(seh);
12640 gene_loc->
SetMix().Set().front()->SetInt().SetFrom(0);
12641 gene_loc->
SetMix().Set().front()->SetInt().SetTo(0);
12643 gene_loc->
SetMix().Set().back()->SetInt().SetFrom(9);
12644 gene_loc->
SetMix().Set().back()->SetInt().SetTo(10);
12646 seh = scope.AddTopLevelSeqEntry(*entry);
12648 "Location: Mixed strands in SeqLoc [(lcl|good:c1-1, 10-11)]"));
12650 eval = validator.Validate(seh, options);
12654 scope.RemoveTopLevelSeqEntry(seh);
12656 seh = scope.AddTopLevelSeqEntry(*entry);
12658 eval = validator.Validate(seh, options);
12669 anticodon_loc->
SetMix().Set().front()->SetInt().SetFrom(9);
12670 anticodon_loc->
SetMix().Set().front()->SetInt().SetTo(10);
12671 anticodon_loc->
SetMix().Set().back()->SetInt().SetFrom(0);
12672 anticodon_loc->
SetMix().Set().back()->SetInt().SetTo(0);
12673 trna->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().Assign(*anticodon_loc);
12679 "Intervals out of order in Anticodon [[lcl|good:10-11, 1-1]]"));
12681 "Codons predicted from anticodon (AAA) cannot produce amino acid (N/Asn)"));
12683 eval = validator.Validate(seh, options);
12688 scope.RemoveTopLevelSeqEntry(seh);
12692 seh = scope.AddTopLevelSeqEntry(*entry);
12694 "Location: Intervals out of order in SeqLoc [(lcl|good:10-11, 1-1)]"));
12696 eval = validator.Validate(seh, options);
12714 "Location: SeqLoc [lcl|nuc:28-27] out of range"));
12716 "Protein product length [8] is more than 120% of the translation length [0]"));
12718 "Given protein length [8] does not match translation length [0]"));
12720 "Missing stop codon"));
12722 eval = validator.Validate(seh, options);
12730 #define START_CODON_AND_INT_STOP_ERR \
12731 expected_errors.push_back(new CExpectedError("lcl|nuc", eDiag_Error, "StartCodon",\
12732 "Illegal start codon (and 1 internal stops). Probably wrong genetic code [0]"));
12733 #define INTERNAL_STOP_ERR \
12734 expected_errors.push_back(new CExpectedError("lcl|nuc", eDiag_Error, "InternalStop",\
12735 "1 internal stops (and illegal start codon). Genetic code [0]"));
12736 #define NO_STOP_ERR \
12737 expected_errors.push_back(new CExpectedError("lcl|nuc", eDiag_Error, "NoStop", "Missing stop codon"));
12738 #define NO_PUB_ERR \
12739 expected_errors.push_back(new CExpectedError("lcl|nuc", eDiag_Error, "NoPubFound", "No publications anywhere on this entire record."));
12740 #define PROT_LEN_ERR \
12741 expected_errors.push_back(new CExpectedError("lcl|nuc", eDiag_Error, "TransLen",\
12742 "Given protein length [8] does not match translation length [9]"));
12743 #define NO_SUB_ERR \
12744 expected_errors.push_back(new CExpectedError("lcl|nuc", eDiag_Info, "MissingPubRequirement", "No submission citation anywhere on this entire record."));
12745 #define EXCEPTION_PROBLEM_ERR \
12746 expected_errors.push_back(new CExpectedError("lcl|nuc", eDiag_Error, "ExceptionProblem", "unclassified translation discrepancy is not a legal exception explanation"));
12747 #define NO_SRC_ERR \
12748 expected_errors.push_back(new CExpectedError("lcl|nuc", eDiag_Error, "NoSourceDescriptor", "No source information included on this record."));
12761 nuc_only_cds->
Assign(*cds);
12776 eval = validator.Validate(seh, options);
12780 scope.RemoveTopLevelSeqEntry(seh);
12781 seh = scope.AddTopLevelSeqEntry(*
nuc);
12782 eval = validator.Validate(seh, options);
12794 scope.RemoveTopLevelSeqEntry(seh);
12795 seh = scope.AddTopLevelSeqEntry(*entry);
12804 "CDS has unnecessary translated product replaced exception"));
12808 eval = validator.Validate(seh, options);
12812 scope.RemoveTopLevelSeqEntry(seh);
12813 nuc_only_cds->
Assign(*cds);
12814 seh = scope.AddTopLevelSeqEntry(*
nuc);
12815 eval = validator.Validate(seh, options);
12825 scope.RemoveTopLevelSeqEntry(seh);
12831 nuc_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set()[0] =
'C';
12832 nuc_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set()[1] =
'C';
12833 seh = scope.AddTopLevelSeqEntry(*entry);
12835 "Illegal start codon used. Wrong genetic code [0] or protein should be partial"));
12837 eval = validator.Validate(seh, options);
12846 "CDS has unnecessary translated product replaced exception"));
12849 eval = validator.Validate(seh, options);
12869 "Illegal start codon (and 1 internal stops). Probably wrong genetic code [0]"));
12871 "1 internal stops (and illegal start codon). Genetic code [0]"));
12873 "Given protein length [8] does not match translation length [9]"));
12875 "Missing stop codon"));
12878 eval = validator.Validate(seh, options);
12883 scope.RemoveTopLevelSeqEntry(seh);
12887 nuc_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set()[9] =
'T';
12888 entry->
SetSet().
SetSeq_set().back()->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set(
"MPR*TEIN");
12889 seh = scope.AddTopLevelSeqEntry(*entry);
12895 "[1] termination symbols in protein sequence (gene? - fake protein name)"));
12897 "1 internal stops. Genetic code [0]"));
12899 eval = validator.Validate(seh, options);
12904 BOOST_CHECK_EQUAL(rval,
"InternalStop\nlcl|nuc:CDS\t fake protein name\tlcl|nuc:1-27\t\n");
12907 scope.RemoveTopLevelSeqEntry(seh);
12909 gene->
SetData().SetGene().SetLocus_tag(
"a_locus_tag");
12911 seh = scope.AddTopLevelSeqEntry(*entry);
12912 eval = validator.Validate(seh, options);
12914 BOOST_CHECK_EQUAL(rval,
"InternalStop\nlcl|nuc:CDS\t fake protein name\tlcl|nuc:1-27\ta_locus_tag\n");
12929 "No proteins in nuc-prot set"));
12931 "No protein Bioseq given"));
12933 "Expected CDS product absent"));
12937 eval = validator.Validate(seh, options);
12949 prot->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set()[0] =
'A';
12954 "Residue 1 in protein [A] != translation [M] at lcl|nuc:1-3"));
12957 eval = validator.Validate(seh, options);
12960 for (
int i = 0;
i < 11;
i++) {
12961 prot->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set()[
i] =
'A';
12965 expected_errors[0]->SetErrMsg(
"11 mismatches found. First mismatch at 1, residue in protein [A] != translation [M] at lcl|nuc:1-3. Last mismatch at 11, residue in protein [A] != translation [M] at lcl|nuc:31-33. Genetic code [0]");
12967 eval = validator.Validate(seh, options);
12978 prot_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"MPRKTEI");
12985 "Given protein length [7] does not match translation length [9]"));
12988 eval = validator.Validate(seh, options);
12993 scope.RemoveTopLevelSeqEntry(seh);
12996 nuc_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set()[27] =
'A';
12997 nuc_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set()[28] =
'T';
13000 seh = scope.AddTopLevelSeqEntry(*entry);
13002 "Coding region extends 2 base(s) past stop codon"));
13005 eval = validator.Validate(seh, options);
13010 scope.RemoveTopLevelSeqEntry(seh);
13014 prot_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"MPRKTEINQQLLLLLLLLLLQQQQQQQQQQ");
13017 seh = scope.AddTopLevelSeqEntry(*entry);
13019 "Protein product length [30] is more than 120% of the translation length [9]"));
13021 "Given protein length [30] does not match translation length [9]"));
13024 eval = validator.Validate(seh, options);
13031 cds->
SetExcept_text(
"annotated by transcript or proteomic data");
13033 cds->
AddQualifier(
"inference",
"similar to DNA sequence:INSD:AY123456.1");
13037 eval = validator.Validate(seh, options);
13055 "Missing stop codon"));
13058 eval = validator.Validate(seh, options);
13071 prot_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set()[4] =
'E';
13076 "Residue 5 in protein [E] != translation [T] at lcl|nuc:13-15"));
13078 "Unparsed transl_except qual. Skipped"));
13081 eval = validator.Validate(seh, options);
13085 scope.RemoveTopLevelSeqEntry(seh);
13089 seh = scope.AddTopLevelSeqEntry(*entry);
13092 "Unparsed transl_except qual (but protein is okay). Skipped"));
13095 eval = validator.Validate(seh, options);
13113 "The product name is missing from this protein."));
13116 eval = validator.Validate(seh, options);
13119 scope.RemoveTopLevelSeqEntry(seh);
13121 seh = scope.AddTopLevelSeqEntry(*entry);
13123 eval = validator.Validate(seh, options);
13134 cds->
SetData().SetCdregion().SetOrf(
true);
13139 "An ORF coding region should not have a product"));
13142 eval = validator.Validate(seh, options);
13160 "There is a gene feature where all fields are empty"));
13163 eval = validator.Validate(seh, options);
13172 string except_text =
"trans-splicing";
13180 "Exception flag should be set in coding region"));
13183 eval = validator.Validate(seh, options);
13191 "Exception text is present, but exception flag is not set"));
13194 eval = validator.Validate(seh, options);
13203 "Exception flag is set, but exception text is empty"));
13206 eval = validator.Validate(seh, options);
13216 prot_feat->
SetData().SetProt().Reset();
13221 "There is a protein feature where all fields are empty"));
13223 "Protein feature has no name"));
13226 eval = validator.Validate(seh, options);
13240 cds->
SetData().SetCdregion().SetCode().Set().push_back(ce);
13244 prot_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set()[6] =
'M';
13248 "Genetic code conflict between CDS (code 3) and BioSource.genome biological context (apicoplast) (uses code 11)"));
13251 eval = validator.Validate(seh, options);
13256 expected_errors[0]->SetErrMsg(
"Genetic code conflict between CDS (code 3) and BioSource (code 2)");
13258 eval = validator.Validate(seh, options);
13267 eval = validator.Validate(seh, options);
13282 "RNA type 0 (unknown) not supported"));
13285 eval = validator.Validate(seh, options);
13296 misc->
SetData().SetImp().SetKey(
"bad value");
13300 "Unknown feature key bad value"));
13303 eval = validator.Validate(seh, options);
13306 scope.RemoveTopLevelSeqEntry(seh);
13307 misc->
SetData().SetImp().SetKey(
"");
13308 seh = scope.AddTopLevelSeqEntry(*entry);
13309 expected_errors[0]->SetErrMsg(
"NULL feature key");
13310 eval = validator.Validate(seh, options);
13313 vector<string> illegal_keys;
13314 illegal_keys.push_back(
"virion");
13315 illegal_keys.push_back(
"mutation");
13316 illegal_keys.push_back(
"allele");
13317 illegal_keys.push_back(
"Import");
13320 for (
const string& it : illegal_keys) {
13321 scope.RemoveTopLevelSeqEntry(seh);
13322 misc->
SetData().SetImp().SetKey(it);
13323 seh = scope.AddTopLevelSeqEntry(*entry);
13324 expected_errors[0]->SetErrMsg(
"Feature key " + it +
" is no longer legal");
13325 eval = validator.Validate(seh, options);
13341 "Unknown qualifier bad name"));
13344 eval = validator.Validate(seh, options);
13347 misc->
SetQual().front()->SetQual(
"");
13348 expected_errors[0]->SetErrMsg(
"NULL qualifier");
13349 eval = validator.Validate(seh, options);
13365 scope.RemoveTopLevelSeqEntry(seh);
13368 misc_feat->
SetData().SetImp().SetKey(
"conflict");
13369 seh = scope.AddTopLevelSeqEntry(*entry);
13371 "Missing qualifier citation for feature conflict"));
13373 eval = validator.Validate(seh, options);
13376 scope.RemoveTopLevelSeqEntry(seh);
13379 misc_feat->
SetData().SetImp().SetKey(
"misc_binding");
13380 seh = scope.AddTopLevelSeqEntry(*entry);
13382 "Missing qualifier bound_moiety for feature misc_binding"));
13384 eval = validator.Validate(seh, options);
13387 scope.RemoveTopLevelSeqEntry(seh);
13390 misc_feat->
SetData().SetImp().SetKey(
"modified_base");
13391 seh = scope.AddTopLevelSeqEntry(*entry);
13393 "Missing qualifier mod_base for feature modified_base"));
13395 eval = validator.Validate(seh, options);
13398 scope.RemoveTopLevelSeqEntry(seh);
13401 misc_feat->
SetData().SetImp().SetKey(
"old_sequence");
13402 seh = scope.AddTopLevelSeqEntry(*entry);
13404 "Missing qualifier citation for feature old_sequence"));
13406 eval = validator.Validate(seh, options);
13409 scope.RemoveTopLevelSeqEntry(seh);
13412 misc_feat->
SetData().SetImp().SetKey(
"operon");
13413 seh = scope.AddTopLevelSeqEntry(*entry);
13415 "Missing qualifier operon for feature operon"));
13417 eval = validator.Validate(seh, options);
13420 scope.RemoveTopLevelSeqEntry(seh);
13423 misc_feat->
SetData().SetImp().SetKey(
"protein_bind");
13424 seh = scope.AddTopLevelSeqEntry(*entry);
13426 "Missing qualifier bound_moiety for feature protein_bind"));
13428 eval = validator.Validate(seh, options);
13431 scope.RemoveTopLevelSeqEntry(seh);
13434 misc_feat->
SetData().SetImp().SetKey(
"source");
13435 seh = scope.AddTopLevelSeqEntry(*entry);
13437 "Missing qualifier organism for feature source"));
13439 eval = validator.Validate(seh, options);
13458 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"PseudoCdsHasProduct",
"A pseudo coding region should not have a product"));
13460 eval = validator.Validate(seh, options);
13464 eval = validator.Validate(seh, options);
13467 scope.RemoveTopLevelSeqEntry(seh);
13471 seh = scope.AddTopLevelSeqEntry(*entry);
13472 eval = validator.Validate(seh, options);
13482 for (
char c :
str) {
13500 vector<string> legal_strings;
13501 legal_strings.push_back(
"AceView/WormGenes");
13502 legal_strings.push_back(
"AFTOL");
13503 legal_strings.push_back(
"AntWeb");
13504 legal_strings.push_back(
"APHIDBASE");
13505 legal_strings.push_back(
"ApiDB");
13506 legal_strings.push_back(
"ApiDB_CryptoDB");
13507 legal_strings.push_back(
"ApiDB_PlasmoDB");
13508 legal_strings.push_back(
"ApiDB_ToxoDB");
13509 legal_strings.push_back(
"ASAP");
13510 legal_strings.push_back(
"ATCC");
13511 legal_strings.push_back(
"ATCC(in host)");
13512 legal_strings.push_back(
"ATCC(dna)");
13513 legal_strings.push_back(
"Axeldb");
13514 legal_strings.push_back(
"BDGP_EST");
13515 legal_strings.push_back(
"BDGP_INS");
13516 legal_strings.push_back(
"BEETLEBASE");
13517 legal_strings.push_back(
"BOLD");
13518 legal_strings.push_back(
"CDD");
13519 legal_strings.push_back(
"CK");
13520 legal_strings.push_back(
"COG");
13521 legal_strings.push_back(
"dbClone");
13522 legal_strings.push_back(
"dbCloneLib");
13523 legal_strings.push_back(
"dbEST");
13524 legal_strings.push_back(
"dbProbe");
13525 legal_strings.push_back(
"dbSNP");
13526 legal_strings.push_back(
"dbSTS");
13527 legal_strings.push_back(
"dictyBase");
13528 legal_strings.push_back(
"DDBJ");
13529 legal_strings.push_back(
"EcoGene");
13530 legal_strings.push_back(
"EMBL");
13532 legal_strings.push_back(
"Ensembl");
13533 legal_strings.push_back(
"ESTLIB");
13534 legal_strings.push_back(
"FANTOM_DB");
13535 legal_strings.push_back(
"FLYBASE");
13536 legal_strings.push_back(
"GABI");
13537 legal_strings.push_back(
"GDB");
13538 legal_strings.push_back(
"GeneDB");
13539 legal_strings.push_back(
"GeneID");
13540 legal_strings.push_back(
"GO");
13541 legal_strings.push_back(
"GOA");
13542 legal_strings.push_back(
"Greengenes");
13543 legal_strings.push_back(
"GRIN");
13544 legal_strings.push_back(
"H-InvDB");
13545 legal_strings.push_back(
"HGNC");
13546 legal_strings.push_back(
"HMP");
13547 legal_strings.push_back(
"HOMD");
13548 legal_strings.push_back(
"HSSP");
13549 legal_strings.push_back(
"IMGT/GENE-DB");
13550 legal_strings.push_back(
"IMGT/HLA");
13551 legal_strings.push_back(
"IMGT/LIGM");
13552 legal_strings.push_back(
"InterimID");
13553 legal_strings.push_back(
"InterPro");
13554 legal_strings.push_back(
"IRD");
13555 legal_strings.push_back(
"ISD");
13556 legal_strings.push_back(
"ISFinder");
13557 legal_strings.push_back(
"JCM");
13558 legal_strings.push_back(
"JGIDB");
13559 legal_strings.push_back(
"LocusID");
13560 legal_strings.push_back(
"MaizeGDB");
13561 legal_strings.push_back(
"MGI");
13562 legal_strings.push_back(
"MIM");
13563 legal_strings.push_back(
"miRBase");
13564 legal_strings.push_back(
"MycoBank");
13565 legal_strings.push_back(
"NBRC");
13566 legal_strings.push_back(
"NextDB");
13567 legal_strings.push_back(
"niaEST");
13568 legal_strings.push_back(
"NMPDR");
13569 legal_strings.push_back(
"NRESTdb");
13570 legal_strings.push_back(
"Osa1");
13571 legal_strings.push_back(
"Pathema");
13572 legal_strings.push_back(
"PBmice");
13573 legal_strings.push_back(
"PDB");
13574 legal_strings.push_back(
"PFAM");
13575 legal_strings.push_back(
"PGN");
13576 legal_strings.push_back(
"PIR");
13577 legal_strings.push_back(
"PSEUDO");
13579 legal_strings.push_back(
"PseudoCAP");
13580 legal_strings.push_back(
"RAP-DB");
13581 legal_strings.push_back(
"RATMAP");
13582 legal_strings.push_back(
"RFAM");
13583 legal_strings.push_back(
"RGD");
13584 legal_strings.push_back(
"RiceGenes");
13585 legal_strings.push_back(
"RZPD");
13586 legal_strings.push_back(
"SEED");
13587 legal_strings.push_back(
"SGD");
13588 legal_strings.push_back(
"SGN");
13589 legal_strings.push_back(
"SoyBase");
13590 legal_strings.push_back(
"SubtiList");
13591 legal_strings.push_back(
"TAIR");
13592 legal_strings.push_back(
"taxon");
13593 legal_strings.push_back(
"TIGRFAM");
13594 legal_strings.push_back(
"UniGene");
13595 legal_strings.push_back(
"UNILIB");
13596 legal_strings.push_back(
"UniProtKB/Swiss-Prot");
13597 legal_strings.push_back(
"UniProtKB/TrEMBL");
13598 legal_strings.push_back(
"UniSTS");
13599 legal_strings.push_back(
"UNITE");
13600 legal_strings.push_back(
"VBASE2");
13601 legal_strings.push_back(
"VectorBase");
13602 legal_strings.push_back(
"WorfDB");
13603 legal_strings.push_back(
"WormBase");
13604 legal_strings.push_back(
"Xenbase");
13605 legal_strings.push_back(
"ZFIN");
13606 vector<string> src_strings;
13607 src_strings.push_back(
"AFTOL");
13608 src_strings.push_back(
"AntWeb");
13609 src_strings.push_back(
"ATCC");
13610 src_strings.push_back(
"ATCC(dna)");
13611 src_strings.push_back(
"ATCC(in host)");
13612 src_strings.push_back(
"BOLD");
13613 src_strings.push_back(
"FANTOM_DB");
13614 src_strings.push_back(
"FLYBASE");
13615 src_strings.push_back(
"Greengenes");
13616 src_strings.push_back(
"GRIN");
13617 src_strings.push_back(
"HMP");
13618 src_strings.push_back(
"HOMD");
13619 src_strings.push_back(
"IMGT/HLA");
13620 src_strings.push_back(
"IMGT/LIGM");
13621 src_strings.push_back(
"JCM");
13622 src_strings.push_back(
"MGI");
13623 src_strings.push_back(
"MycoBank");
13624 src_strings.push_back(
"NBRC");
13625 src_strings.push_back(
"RZPD");
13626 src_strings.push_back(
"taxon");
13627 src_strings.push_back(
"UNILIB");
13628 src_strings.push_back(
"UNITE");
13629 vector<string> refseq_strings;
13630 refseq_strings.push_back(
"CCDS");
13631 refseq_strings.push_back(
"CGNC");
13632 refseq_strings.push_back(
"CloneID");
13633 refseq_strings.push_back(
"HPRD");
13634 refseq_strings.push_back(
"LRG");
13635 refseq_strings.push_back(
"PBR");
13636 refseq_strings.push_back(
"REBASE");
13637 refseq_strings.push_back(
"SK-FST");
13638 refseq_strings.push_back(
"VBRC");
13641 "db_xref type %s (1234) should not be used on an OrgRef"));
13645 for (
const string& sit : src_strings) {
13651 expected_errors[0]->SetErrMsg(
"Illegal db_xref type " + bad +
" (1234), legal capitalization is " + sit);
13652 eval = validator.Validate(seh, options);
13660 for (
const string& sit : legal_strings) {
13661 bool found =
false;
13662 for (
const string& ss : src_strings) {
13673 expected_errors[0]->SetErrMsg(
"Illegal db_xref type " + bad +
" (1234), legal capitalization is " + sit
13674 +
", but should not be used on an OrgRef");
13675 eval = validator.Validate(seh, options);
13680 expected_errors[0]->SetErrMsg(
"db_xref type " + sit +
" (1234) should not be used on an OrgRef");
13681 eval = validator.Validate(seh, options);
13686 for (
const string& sit : refseq_strings) {
13688 expected_errors[0]->SetErrMsg(
"RefSeq-specific db_xref type " + sit +
" (1234) should not be used on a non-RefSeq OrgRef");
13689 eval = validator.Validate(seh, options);
13695 expected_errors[0]->SetErrMsg(
"Illegal db_xref type unrecognized (1234)");
13696 eval = validator.Validate(seh, options);
13700 scope.RemoveTopLevelSeqEntry(seh);
13701 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
13702 seh = scope.AddTopLevelSeqEntry(*entry);
13704 for (
const string& sit : refseq_strings) {
13706 expected_errors[0]->SetErrMsg(
"RefSeq-specific db_xref type " + sit +
" (1234) should not be used on an OrgRef");
13707 eval = validator.Validate(seh, options);
13712 scope.RemoveTopLevelSeqEntry(seh);
13713 entry->
SetSeq().
SetId().front()->SetLocal().SetStr(
"good");
13715 seh = scope.AddTopLevelSeqEntry(*entry);
13718 for (
const string& sit : legal_strings) {
13722 expected_errors[0]->SetErrMsg(
"Illegal db_xref type TAXON (1234), legal capitalization is taxon, but should only be used on an OrgRef");
13724 expected_errors[0]->SetErrMsg(
"Illegal db_xref type " + bad +
" (1234), legal capitalization is " + sit);
13726 eval = validator.Validate(seh, options);
13731 for (
const string& sit : refseq_strings) {
13733 expected_errors[0]->SetErrMsg(
"db_xref type " + sit +
" (1234) is only legal for RefSeq");
13734 eval = validator.Validate(seh, options);
13740 expected_errors[0]->SetErrMsg(
"db_xref type taxon (1234) should only be used on an OrgRef");
13741 eval = validator.Validate(seh, options);
13746 expected_errors[0]->SetErrMsg(
"Illegal db_xref type unrecognized (1234)");
13747 eval = validator.Validate(seh, options);
13760 misc->
SetLocation().SetMix().Set().back()->SetInt().SetId().SetGenbank().SetAccession(
"AY123456");
13764 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"FarLocation",
"Feature has 'far' location - accession not packaged in record"));
13765 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadLocation",
"Feature location intervals should all be on the same sequence"));
13767 eval = validator.Validate(seh, options);
13782 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"DuplicateFeat",
"Features have identical intervals, but labels differ"));
13784 eval = validator.Validate(seh, options);
13788 scope.RemoveTopLevelSeqEntry(seh);
13790 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
13792 feat1->
SetData().SetGene().SetLocus(
"locus1");
13794 feat2->
SetData().SetGene().SetLocus(
"locus2");
13795 seh = scope.AddTopLevelSeqEntry(*entry);
13797 eval = validator.Validate(seh, options);
13802 eval = validator.Validate(seh, options);
13812 eval = validator.Validate(seh, options);
13823 eval = validator.Validate(seh, options);
13827 scope.RemoveTopLevelSeqEntry(seh);
13829 entry->
SetSeq().
SetId().front()->SetGeneral().SetDb(
"abc");
13830 entry->
SetSeq().
SetId().front()->SetGeneral().SetTag().SetId(123456);
13832 feat1->
SetData().SetGene().SetLocus(
"locus1");
13834 feat2->
SetData().SetGene().SetLocus(
"locus2");
13835 seh = scope.AddTopLevelSeqEntry(*entry);
13837 eval = validator.Validate(seh, options);
13842 eval = validator.Validate(seh, options);
13852 eval = validator.Validate(seh, options);
13863 eval = validator.Validate(seh, options);
13868 scope.RemoveTopLevelSeqEntry(seh);
13874 annot2->
SetData().SetFtable().push_back(feat2);
13876 seh = scope.AddTopLevelSeqEntry(*entry);
13879 expected_errors[0]->SetErrMsg(
"Features have identical intervals, but labels differ (packaged in different feature table)");
13880 eval = validator.Validate(seh, options);
13892 gene->
SetData().SetGene().SetLocus(
"foo");
13897 eval = validator.Validate(seh, options);
13902 scope.RemoveTopLevelSeqEntry(seh);
13906 gene2->
SetData().SetGene().SetLocus(
"bar");
13907 seh = scope.AddTopLevelSeqEntry(*entry);
13908 eval = validator.Validate(seh, options);
13916 "Gene feature has gene cross-reference"));
13917 eval = validator.Validate(seh, options);
13930 gene->
SetData().SetGene().SetLocus_tag(
"xyz");
13934 codebreak->
SetLoc().SetInt().SetId().SetLocal().SetStr(
"nuc");
13935 codebreak->
SetLoc().SetInt().SetFrom(4);
13936 codebreak->
SetLoc().SetInt().SetTo(6);
13937 cds->
SetData().SetCdregion().SetCode_break().push_back(codebreak);
13942 "Code-break location not in coding region - may be frame problem"));
13944 "transl_except qual out of frame."));
13946 eval = validator.Validate(seh, options);
13952 expected.push_back(
"TranslExceptPhase");
13953 expected.push_back(
"lcl|nuc:CDS\t fake protein name\tlcl|nuc:1-27\txyz");
13956 expected.push_back(
"lcl|nuc:CDS\t fake protein name\tlcl|nuc:1-27\txyz");
13958 vector<string> seen;
13959 vector<string> cat_list =
format.FormatCompleteSubmitterReport(*eval, scope);
13960 for (
const string& it : cat_list) {
13961 vector<string> sublist;
13963 for (
const string& sit : sublist) {
13964 seen.push_back(sit);
13972 for (
auto it : eval->GetErrs()) {
13973 BOOST_CHECK_EQUAL(it->IsSetLocus_tag(),
false);
13978 for (
const auto& it : eval->GetErrs()) {
13979 if (!
NStr::Equal(it->GetErrCode(),
"ChromosomeWithoutLocation")) {
13980 BOOST_CHECK_EQUAL(it->IsSetLocus_tag(),
true);
13981 BOOST_CHECK_EQUAL(it->GetLocus_tag(),
"xyz");
13986 expected.push_back(
"TranslExceptPhase");
13987 expected.push_back(
"lcl|nuc:CDS\t fake protein name\tlcl|nuc:1-27\txyz");
13990 expected.push_back(
"lcl|nuc:CDS\t fake protein name\tlcl|nuc:1-27\txyz");
13993 cat_list =
format.FormatCompleteSubmitterReport(*eval, scope);
13995 for (
const string& it : cat_list) {
13996 vector<string> sublist;
13998 for (
const string& sit : sublist) {
13999 seen.push_back(sit);
14014 trna->
SetData().SetRna().SetExt().SetTRNA().SetCodon().push_back(0);
14015 trna->
SetData().SetRna().SetExt().SetTRNA().SetAa().SetIupacaa(
'A');
14020 "Codon recognized by tRNA (UUU) does not match amino acid (A/Ala) specified by genetic code (1/Standard)"));
14022 eval = validator.Validate(seh, options);
14026 trna->
SetData().SetRna().SetExt().SetTRNA().SetAa().SetIupacaa(
'U');
14028 expected_errors[0]->SetErrMsg(
"Codon recognized by tRNA (UUU) does not match amino acid (U/Sec) specified by genetic code (1/Standard)");
14029 eval = validator.Validate(seh, options);
14032 trna->
SetData().SetRna().SetExt().SetTRNA().SetAa().SetIupacaa(
'O');
14033 expected_errors[0]->SetErrMsg(
"Codon recognized by tRNA (UUU) does not match amino acid (O/Pyl) specified by genetic code (1/Standard)");
14034 eval = validator.Validate(seh, options);
14045 feat->
SetData().SetGene().SetLocus(
"X");
14051 "gene may not be on both (forward) strands"));
14053 eval = validator.Validate(seh, options);
14056 scope.RemoveTopLevelSeqEntry(seh);
14059 feat->
SetData().SetGene().SetLocus(
"X");
14066 seh = scope.AddTopLevelSeqEntry(*entry);
14067 expected_errors[0]->SetErrMsg(
"gene may not be on both (forward and reverse) strands");
14068 eval = validator.Validate(seh, options);
14071 scope.RemoveTopLevelSeqEntry(seh);
14074 feat->
SetData().SetGene().SetLocus(
"X");
14078 seh = scope.AddTopLevelSeqEntry(*entry);
14079 expected_errors[0]->SetErrMsg(
"gene may not be on both (reverse) strands");
14081 "Gene feature on non-segmented sequence should not have multiple intervals"));
14082 eval = validator.Validate(seh, options);
14087 scope.RemoveTopLevelSeqEntry(seh);
14094 feat->
SetData().SetRna().SetExt().SetName(
"mRNA product");
14102 seh = scope.AddTopLevelSeqEntry(*entry);
14105 "No CDS location match for 1 mRNA"));
14107 "mRNA may not be on both (forward and reverse) strands"));
14109 "gene [gene locus:lcl|good:1-57] overlaps mRNA but does not completely contain it"));
14111 "Strand 'other' in location"));
14114 eval = validator.Validate(seh, options);
14119 scope.RemoveTopLevelSeqEntry(seh);
14126 feat->
SetData().SetCdregion();
14127 seh = scope.AddTopLevelSeqEntry(*entry);
14129 "CDS may not be on both (reverse) strands"));
14131 eval = validator.Validate(seh, options);
14147 mrna->
SetData().SetRna().SetExt().SetName(
"mRNA product");
14148 mrna->
SetLocation().SetMix().Set().front()->SetInt().SetTo(17);
14149 nuc_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set()[18] =
'G';
14150 nuc_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set()[19] =
'T';
14158 "No CDS location match for 1 mRNA"));
14160 "mRNA contains CDS but internal intron-exon boundaries do not match"));
14162 eval = validator.Validate(seh, options);
14167 scope.RemoveTopLevelSeqEntry(seh);
14170 nuc_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set()[16] =
'A';
14171 seh = scope.AddTopLevelSeqEntry(*entry);
14173 eval = validator.Validate(seh, options);
14177 nuc_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set()[16] =
'G';
14178 eval = validator.Validate(seh, options);
14182 "No CDS location match for 1 mRNA"));
14186 scope.RemoveTopLevelSeqEntry(seh);
14195 mrna->
SetData().SetRna().SetExt().SetName(
"mRNA product");
14196 mrna->
SetLocation().SetMix().Set().front()->SetInt().SetTo(12);
14197 nuc_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set()[13] =
'G';
14198 nuc_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set()[14] =
'T';
14201 prot_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set()[4] =
'S';
14202 seh = scope.AddTopLevelSeqEntry(*entry);
14204 "mRNA overlaps or contains CDS but does not completely contain intervals"));
14205 eval = validator.Validate(seh, options);
14216 p1->
SetData().SetProt().SetName().push_back(
"unnamed");
14219 p2->
SetData().SetProt().SetName().push_back(
"unnamed");
14224 "Signal, Transit, or Mature peptide features overlap (parent CDS is on lcl|nuc)"));
14226 "Signal, Transit, or Mature peptide features overlap (parent CDS is on lcl|nuc)"));
14228 eval = validator.Validate(seh, options);
14233 scope.RemoveTopLevelSeqEntry(seh);
14237 p1->
SetData().SetProt().SetName().push_back(
"unnamed");
14240 p2->
SetData().SetProt().SetName().push_back(
"unnamed");
14241 seh = scope.AddTopLevelSeqEntry(*entry);
14243 "Signal, Transit, or Mature peptide features overlap"));
14245 "Signal, Transit, or Mature peptide features overlap"));
14247 eval = validator.Validate(seh, options);
14256 eval = validator.Validate(seh, options);
14272 "Feature comment may refer to reference by serial number - attach reference specific comments to the reference REMARK instead."));
14274 eval = validator.Validate(seh, options);
14285 cds2->
SetData().SetCdregion();
14293 "Same product Bioseq from multiple CDS features"));
14295 eval = validator.Validate(seh, options);
14306 src->
SetData().SetBiosrc().SetIs_focus();
14312 "Focus must be on BioSource descriptor, not BioSource feature."));
14314 eval = validator.Validate(seh, options);
14326 peptide->
SetData().SetImp().SetKey(
"sig_peptide");
14331 "Peptide processing feature should be converted to the appropriate protein feature subtype"));
14333 "Stop of sig_peptide is out of frame with CDS codons"));
14335 eval = validator.Validate(seh, options);
14338 scope.RemoveTopLevelSeqEntry(seh);
14343 peptide->
SetData().SetImp().SetKey(
"sig_peptide");
14344 seh = scope.AddTopLevelSeqEntry(*entry);
14345 expected_errors[1]->SetErrMsg(
"Start of sig_peptide is out of frame with CDS codons");
14346 eval = validator.Validate(seh, options);
14349 scope.RemoveTopLevelSeqEntry(seh);
14354 peptide->
SetData().SetImp().SetKey(
"sig_peptide");
14355 seh = scope.AddTopLevelSeqEntry(*entry);
14356 expected_errors[1]->SetErrMsg(
"Start and stop of sig_peptide are out of frame with CDS codons");
14357 eval = validator.Validate(seh, options);
14375 "gene [gene locus:lcl|nuc:2-27] overlaps CDS but does not completely contain it"));
14377 eval = validator.Validate(seh, options);
14382 scope.RemoveTopLevelSeqEntry(seh);
14383 gene->
SetId().SetLocal().SetId(1);
14384 cds->
SetId().SetLocal().SetId(2);
14386 gene_xref->SetId().SetLocal().SetId(1);
14387 cds->
SetXref().push_back(gene_xref);
14389 cds_xref->SetId().SetLocal().SetId(2);
14390 gene->
SetXref().push_back(cds_xref);
14392 seh = scope.AddTopLevelSeqEntry(*entry);
14395 "gene [gene locus:lcl|nuc:2-27] overlaps CDS but does not completely contain it"));
14397 eval = validator.Validate(seh, options);
14403 scope.RemoveTopLevelSeqEntry(seh);
14408 cl1->
SetInt().SetFrom(0);
14410 cl1->
SetInt().SetId().Assign(*(
nuc->GetSeq().GetId().front()));
14412 cl2->
SetInt().SetFrom(21);
14413 cl2->
SetInt().SetTo(26);
14414 cl2->
SetInt().SetId().Assign(*(
nuc->GetSeq().GetId().front()));
14420 cds->
SetLocation().SetMix().Set().push_back(cl1);
14421 cds->
SetLocation().SetMix().Set().push_back(cl2);
14423 gene->
SetLocation().SetMix().Set().push_back(gl1);
14424 gene->
SetLocation().SetMix().Set().push_back(gl2);
14426 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCAGAGTAACAGAGAAGAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
14428 prot->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set(
"MPRN");
14429 prot->SetSeq().SetInst().SetLength(4);
14433 seh = scope.AddTopLevelSeqEntry(*entry);
14436 "MultiIntervalGene",
"Gene feature on non-segmented sequence should not have multiple intervals"));
14438 "CDSgeneRange",
"gene [gene locus:[lcl|nuc:22-27, 1-9]] overlaps CDS but does not completely contain it"));
14440 "SeqLocOrder",
"Location: Intervals out of order in SeqLoc [(lcl|nuc:22-27, 1-9)]"));
14442 eval = validator.Validate(seh, options);
14452 "MultiIntervalGene",
"Gene feature on non-segmented sequence should not have multiple intervals"));
14454 "SeqLocOrder",
"Location: Intervals out of order in SeqLoc [(lcl|nuc:22-27, 1-9)]"));
14456 eval = validator.Validate(seh, options);
14469 feat->
SetData().SetRna().SetExt().SetName(
"fake protein name");
14470 feat->
SetProduct().SetWhole().SetLocal().SetStr(
"nuc");
14475 "mRNA products are not unique"));
14477 "No CDS location match for 1 mRNA"));
14479 "Transcript length [11] less than product length [27], and tail < 95% polyA"));
14481 "Identical transcript IDs found on multiple mRNAs"));
14483 eval = validator.Validate(seh, options);
14494 gene->
SetData().SetGene().SetLocus(
"locus");
14503 "gene [locus:lcl|good:6-11] overlaps mRNA but does not completely contain it"));
14505 "No CDS location match for 1 mRNA"));
14507 eval = validator.Validate(seh, options);
14513 scope.RemoveTopLevelSeqEntry(seh);
14515 overlap->
SetData().SetGene().SetLocus(
"locus2");
14517 seh = scope.AddTopLevelSeqEntry(*entry);
14518 eval = validator.Validate(seh, options);
14520 "No CDS location match for 1 mRNA"));
14524 scope.RemoveTopLevelSeqEntry(seh);
14525 overlap->
SetData().SetImp().SetKey(
"operon");
14527 seh = scope.AddTopLevelSeqEntry(*entry);
14528 eval = validator.Validate(seh, options);
14545 "No CDS location match for 1 mRNA"));
14547 "mRNA overlaps or contains CDS but does not completely contain intervals"));
14549 "Transcript length [11] less than product length [27], and tail < 95% polyA"));
14551 eval = validator.Validate(seh, options);
14554 scope.RemoveTopLevelSeqEntry(seh);
14556 seh = scope.AddTopLevelSeqEntry(*entry);
14557 expected_errors[3]->SetErrCode(
"PolyATail");
14558 expected_errors[3]->SetSeverity(
eDiag_Info);
14559 expected_errors[3]->SetErrMsg(
"Transcript length [26] less than product length [27], but tail is 100% polyA");
14560 eval = validator.Validate(seh, options);
14565 scope.RemoveTopLevelSeqEntry(seh);
14567 seh = scope.AddTopLevelSeqEntry(*entry);
14569 "Transcript length [38] greater than product length [27]"));
14571 eval = validator.Validate(seh, options);
14581 mrna_seq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCAGAAAAACAGAGATAAATTAA");
14586 "There are 1 mismatches out of 27 bases between the transcript and product sequence"));
14588 eval = validator.Validate(seh, options);
14594 scope.RemoveTopLevelSeqEntry(seh);
14599 seh = scope.AddTopLevelSeqEntry(*entry);
14601 eval = validator.Validate(seh, options);
14616 "Protein product not packaged in nuc-prot set with nucleotide"));
14618 "There is 1 mispackaged feature in this record."));
14620 eval = validator.Validate(seh, options);
14633 anticodon_loc->
SetMix().Set().front()->SetInt().SetFrom(8);
14634 anticodon_loc->
SetMix().Set().front()->SetInt().SetTo(10);
14635 anticodon_loc->
SetMix().Set().back()->SetInt().SetFrom(8);
14636 anticodon_loc->
SetMix().Set().back()->SetInt().SetTo(10);
14637 trna->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().Assign(*anticodon_loc);
14643 "Anticodon is not 3 bases in length"));
14645 "Duplicate anticodon exons in location"));
14647 eval = validator.Validate(seh, options);
14653 scope.RemoveTopLevelSeqEntry(seh);
14657 loc->
SetMix().Set().back()->SetInt().SetFrom(0);
14658 loc->
SetMix().Set().back()->SetInt().SetTo(15);
14661 seh = scope.AddTopLevelSeqEntry(*entry);
14663 "Duplicate exons in location"));
14665 eval = validator.Validate(seh, options);
14677 feat->
SetData().SetImp().SetKey(
"polyA_site");
14682 "PolyA_site should be a single point"));
14684 eval = validator.Validate(seh, options);
14689 feat->
SetLocation().SetPnt().SetId().SetLocal().SetStr(
"good");
14693 eval = validator.Validate(seh, options);
14704 feat->
SetData().SetImp().SetLoc(
"one-of three");
14709 "ImpFeat loc one-of three has obsolete 'one-of' text for feature misc_feature"));
14711 eval = validator.Validate(seh, options);
14714 feat->
SetData().SetImp().SetLoc(
"5..12");
14715 expected_errors[0]->SetErrMsg(
"ImpFeat loc 5..12 does not equal feature location 1..11 for feature misc_feature");
14716 eval = validator.Validate(seh, options);
14729 feat->
SetCit().SetPub().push_back(pub);
14732 feat->
SetCit().SetPub().push_back(pub2);
14736 "Citation on feature has unexpected internal Pub-equiv"));
14738 eval = validator.Validate(seh, options);
14750 feat->
SetData().SetImp().SetKey(
"CDS");
14751 feat->
AddQualifier(
"translation",
"unexpected translation");
14756 "ImpFeat CDS with /translation found"));
14758 eval = validator.Validate(seh, options);
14769 feat->
SetData().SetImp().SetKey(
"CDS");
14774 "ImpFeat CDS should be pseudo"));
14776 eval = validator.Validate(seh, options);
14782 scope.RemoveTopLevelSeqEntry(seh);
14786 seh = scope.AddTopLevelSeqEntry(*entry);
14789 eval = validator.Validate(seh, options);
14802 feat->
SetData().SetRna().SetExt().SetName(
"fake protein name");
14803 feat->
SetProduct().SetWhole().SetLocal().SetStr(
"not_present_ever");
14808 "No CDS location match for 1 mRNA"));
14810 "Unable to fetch mRNA transcript 'lcl|not_present_ever'"));
14812 "Product Bioseq of mRNA feature is not packaged in the record"));
14814 "Product of mRNA feature (lcl|not_present_ever) not packaged in genomic product set"));
14816 eval = validator.Validate(seh, options);
14829 anticodon_loc->
SetMix().Set().front()->SetInt().SetFrom(8);
14830 anticodon_loc->
SetMix().Set().front()->SetInt().SetTo(8);
14831 anticodon_loc->
SetMix().Set().back()->SetInt().SetFrom(9);
14832 anticodon_loc->
SetMix().Set().back()->SetInt().SetTo(10);
14833 trna->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().Assign(*anticodon_loc);
14834 trna->
SetData().SetRna().SetExt().SetTRNA().SetAa().SetIupacaa(
'F');
14840 "Adjacent intervals in Anticodon"));
14842 eval = validator.Validate(seh, options);
14848 scope.RemoveTopLevelSeqEntry(seh);
14852 loc->
SetMix().Set().front()->SetInt().SetFrom(0);
14853 loc->
SetMix().Set().front()->SetInt().SetTo(7);
14854 loc->
SetMix().Set().back()->SetInt().SetFrom(8);
14855 loc->
SetMix().Set().back()->SetInt().SetTo(15);
14858 seh = scope.AddTopLevelSeqEntry(*entry);
14860 "Location: Adjacent intervals in SeqLoc [(lcl|good:1-8, 9-16)]"));
14862 eval = validator.Validate(seh, options);
14876 gene1->
SetData().SetGene().SetLocus(
"see_it_twice");
14881 gene2->
SetData().SetGene().SetLocus(
"see_it_twice");
14886 eval = validator.Validate(seh, options);
14889 scope.RemoveTopLevelSeqEntry(seh);
14890 gene2->
SetData().SetGene().SetLocus(
"See_It_Twice");
14891 seh = scope.AddTopLevelSeqEntry(*entry);
14892 eval = validator.Validate(seh, options);
14897 scope.RemoveTopLevelSeqEntry(seh);
14900 seh = scope.AddTopLevelSeqEntry(*entry);
14902 "Features have identical intervals, but labels differ"));
14904 "Colliding names (with different capitalization) in gene features, but feature locations are identical"));
14906 eval = validator.Validate(seh, options);
14911 scope.RemoveTopLevelSeqEntry(seh);
14915 seh = scope.AddTopLevelSeqEntry(*entry);
14917 eval = validator.Validate(seh, options);
14928 gene->
SetData().SetGene().SetLocus(
"multi-interval");
14934 "Gene feature on non-segmented sequence should not have multiple intervals"));
14937 eval = validator.Validate(seh, options);
14952 "Duplicate feature"));
14954 eval = validator.Validate(seh, options);
14957 BOOST_CHECK_EQUAL(dups.
size(), 1);
14962 scope.RemoveTopLevelSeqEntry(seh);
14963 feat1->
SetData().SetRegion(
"region");
14964 feat2->
SetData().SetRegion(
"region");
14965 seh = scope.AddTopLevelSeqEntry(*entry);
14966 eval = validator.Validate(seh, options);
14970 BOOST_CHECK_EQUAL(dups.
size(), 1);
14974 scope.RemoveTopLevelSeqEntry(seh);
14977 seh = scope.AddTopLevelSeqEntry(*entry);
14979 eval = validator.Validate(seh, options);
14983 BOOST_CHECK_EQUAL(dups.
size(), 0);
14986 scope.RemoveTopLevelSeqEntry(seh);
14987 feat1->
SetData().SetImp().SetKey(
"variation");
14988 feat2->
SetData().SetImp().SetKey(
"variation");
14989 seh = scope.AddTopLevelSeqEntry(*entry);
14991 "Duplicate feature"));
14992 eval = validator.Validate(seh, options);
14996 BOOST_CHECK_EQUAL(dups.
size(), 1);
15000 scope.RemoveTopLevelSeqEntry(seh);
15003 seh = scope.AddTopLevelSeqEntry(*entry);
15005 eval = validator.Validate(seh, options);
15010 BOOST_CHECK_EQUAL(dups.
size(), 0);
15013 scope.RemoveTopLevelSeqEntry(seh);
15029 seh = scope.AddTopLevelSeqEntry(*entry);
15037 "Duplicate feature"));
15039 "Duplicate feature"));
15042 eval = validator.Validate(seh, options);
15046 BOOST_CHECK_EQUAL(dups.
size(), 2);
15050 scope.RemoveTopLevelSeqEntry(seh);
15051 cds1->
SetId().SetLocal().SetId(1);
15052 cds2->
SetId().SetLocal().SetId(2);
15053 mrna1->
SetId().SetLocal().SetId(3);
15054 mrna2->
SetId().SetLocal().SetId(4);
15060 seh = scope.AddTopLevelSeqEntry(*entry);
15062 eval = validator.Validate(seh, options);
15066 BOOST_CHECK_EQUAL(dups.
size(), 0);
15079 cds->
SetProduct().SetWhole().SetGenbank().SetName(name);
15080 prot_seq->
SetSeq().
SetId().front()->SetGenbank().SetName(name);
15081 prot_feat->
SetLocation().SetInt().SetId().SetGenbank().SetName(name);
15093 "Feature product should not put an accession in the Textseq-id 'name' slot"));
15095 "Protein bioseq has Textseq-id 'name' that looks like it is derived from a nucleotide accession"));
15097 eval = validator.Validate(seh, options);
15102 scope.RemoveTopLevelSeqEntry(seh);
15104 seh = scope.AddTopLevelSeqEntry(*entry);
15106 "Feature product should not use Textseq-id 'name' slot"));
15108 "Protein bioseq has Textseq-id 'name' and no accession"));
15110 eval = validator.Validate(seh, options);
15116 scope.RemoveTopLevelSeqEntry(seh);
15120 seh = scope.AddTopLevelSeqEntry(*entry);
15122 "Capitalization change from product location on feature to product sequence"));
15124 eval = validator.Validate(seh, options);
15137 rna_feat->
SetProduct().SetWhole().SetLocal().SetStr(
"rna");
15140 rna_seq->
SetSeq().
SetId().front()->SetLocal().SetStr(
"rna");
15150 "No CDS location match for 1 mRNA"));
15152 "Type of RNA does not match MolInfo of product Bioseq"));
15154 eval = validator.Validate(seh, options);
15159 scope.RemoveTopLevelSeqEntry(seh);
15162 seh = scope.AddTopLevelSeqEntry(*entry);
15163 eval = validator.Validate(seh, options);
15165 "No CDS location match for 1 mRNA"));
15171 scope.RemoveTopLevelSeqEntry(seh);
15173 rna_feat->
SetData().SetRna().SetExt().SetTRNA().SetAa().SetIupacaa(
'N');
15174 rna_feat->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetId().SetLocal().SetStr(
"good");
15175 rna_feat->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetFrom(11);
15176 rna_feat->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetTo(13);
15177 seh = scope.AddTopLevelSeqEntry(*entry);
15179 "Type of RNA does not match MolInfo of product Bioseq"));
15181 eval = validator.Validate(seh, options);
15186 scope.RemoveTopLevelSeqEntry(seh);
15188 seh = scope.AddTopLevelSeqEntry(*entry);
15190 eval = validator.Validate(seh, options);
15194 scope.RemoveTopLevelSeqEntry(seh);
15196 rna_feat->
SetData().SetRna().SetExt().SetName(
"a ribosomal RNA");
15197 seh = scope.AddTopLevelSeqEntry(*entry);
15199 "Type of RNA does not match MolInfo of product Bioseq"));
15200 eval = validator.Validate(seh, options);
15205 scope.RemoveTopLevelSeqEntry(seh);
15207 seh = scope.AddTopLevelSeqEntry(*entry);
15209 eval = validator.Validate(seh, options);
15222 cds->
SetData().SetCdregion();
15223 cds->
SetProduct().SetWhole().SetLocal().SetStr(
"not_present_ever");
15231 "Illegal start codon used. Wrong genetic code [0] or protein should be partial"));
15233 "Missing stop codon"));
15235 "Unable to find product Bioseq from CDS feature"));
15237 eval = validator.Validate(seh, options);
15241 expected_errors[2]->SetErrMsg(
"Expected CDS product absent");
15243 eval = validator.Validate(seh, options);
15250 eval = validator.Validate(seh, options);
15257 eval = validator.Validate(seh, options);
15261 scope.RemoveTopLevelSeqEntry(seh);
15264 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"AATAAGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAA");
15269 seh = scope.AddTopLevelSeqEntry(*entry);
15270 eval = validator.Validate(seh, options);
15281 trna->
SetData().SetRna().SetExt().SetTRNA().SetCodon().push_back(64);
15286 "tRNA codon value 64 is greater than maximum 63"));
15288 eval = validator.Validate(seh, options);
15299 trna->
SetData().SetRna().SetExt().SetTRNA().ResetAa();
15304 "Missing tRNA amino acid"));
15306 eval = validator.Validate(seh, options);
15310 trna->
SetData().SetRna().SetExt().SetTRNA().SetAa().SetIupacaa(29);
15313 "Codons predicted from anticodon (AAA) cannot produce amino acid ( /OTHER)"));
15315 "Invalid tRNA amino acid"));
15317 eval = validator.Validate(seh, options);
15332 "Feature has gene locus cross-reference but no equivalent gene feature exists"));
15334 "There are 1 gene xrefs and no gene features in this record."));
15336 eval = validator.Validate(seh, options);
15349 nseq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"CCCATGAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
15350 pseq->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"MRKTEIN");
15355 utr5->
SetData().SetImp().SetKey(
"5'UTR");
15359 utr3->
SetData().SetImp().SetKey(
"3'UTR");
15366 "5'UTR does not abut CDS"));
15368 "CDS does not abut 3'UTR"));
15371 eval = validator.Validate(seh, options);
15374 scope.RemoveTopLevelSeqEntry(seh);
15379 seh = scope.AddTopLevelSeqEntry(*entry);
15381 expected_errors[0]->SetErrMsg(
"5'UTR is not on plus strand");
15382 expected_errors[1]->SetErrMsg(
"3'UTR is not on plus strand");
15383 eval = validator.Validate(seh, options);
15386 scope.RemoveTopLevelSeqEntry(seh);
15388 seh = scope.AddTopLevelSeqEntry(*entry);
15389 expected_errors[0]->SetErrMsg(
"3'UTR is not on minus strand");
15390 expected_errors[1]->SetErrMsg(
"5'UTR is not on minus strand");
15391 eval = validator.Validate(seh, options);
15405 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"ExceptionProblem",
"Exception explanation text is also found in feature comment"));
15413 eval = validator.Validate(seh, options);
15417 feat->
SetExcept_text(
"RNA editing, rearrangement required for product");
15418 eval = validator.Validate(seh, options);
15423 expected_errors[0]->SetErrMsg(
"Reasons given in citation exception does not have the required citation");
15424 eval = validator.Validate(seh, options);
15428 feat->
SetExcept_text(
"annotated by transcript or proteomic data");
15429 expected_errors[0]->SetErrMsg(
"Annotated by transcript or proteomic data exception does not have the required inference qualifier");
15430 eval = validator.Validate(seh, options);
15435 expected_errors[0]->SetErrMsg(
"not a legal exception is not a legal exception explanation");
15437 eval = validator.Validate(seh, options);
15441 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
15442 scope.RemoveTopLevelSeqEntry(seh);
15443 seh = scope.AddTopLevelSeqEntry(*entry);
15444 feat->
SetLocation().SetInt().SetId().SetOther().SetAccession(
"NC_123456");
15448 feat->
SetExcept_text(
"unclassified transcription discrepancy, RNA editing");
15449 feat->
SetComment(
"misc_feature needs a comment");
15450 expected_errors[0]->SetErrMsg(
"Genome processing exception should not be combined with other explanations");
15453 eval = validator.Validate(seh, options);
15459 expected_errors.push_back(
new CExpectedError(
"ref|NC_123456|",
eDiag_Warning,
"ExceptionProblem",
"not a legal exception is not a legal exception explanation"));
15461 eval = validator.Validate(seh, options);
15468 feat->
SetData().SetRna().SetExt().SetName(
"23S ribosomal RNA");
15470 feat->
SetExcept_text(
"23S ribosomal RNA and 5S ribosomal RNA overlap");
15472 eval = validator.Validate(seh, options);
15474 feat->
SetExcept_text(
"5S ribosomal RNA and 16S ribosomal RNA overlap");
15475 eval = validator.Validate(seh, options);
15477 feat->
SetExcept_text(
"5S ribosomal RNA and 23S ribosomal RNA overlap");
15478 eval = validator.Validate(seh, options);
15480 feat->
SetExcept_text(
"23S ribosomal RNA and 16S ribosomal RNA overlap");
15481 eval = validator.Validate(seh, options);
15512 vector<CExpectedError*> expected_errors;
15520 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"SeqDataLenWrong",
"Bioseq.seq_data too short [60] for given length [65]"));
15522 eval = validator.
Validate(seh, options);
15526 expected_errors[0]->SetErrMsg(
"Bioseq.seq_data is larger [60] than given length [55]");
15527 eval = validator.
Validate(seh, options);
15532 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'A');
15533 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'T');
15534 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'G');
15535 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'C');
15543 expected_errors[0]->SetErrMsg(
"Bioseq.seq_data is larger [64] than given length [60]");
15544 eval = validator.
Validate(seh, options);
15547 entry->
SetSeq().
SetInst().SetSeq_data().SetNcbi2na().Set().pop_back();
15548 entry->
SetSeq().
SetInst().SetSeq_data().SetNcbi2na().Set().pop_back();
15549 expected_errors[0]->SetErrMsg(
"Bioseq.seq_data too short [56] for given length [60]");
15550 eval = validator.
Validate(seh, options);
15557 eval = validator.
Validate(seh, options);
15560 entry->
SetSeq().
SetInst().SetSeq_data().SetNcbi4na().Set().push_back(
'1');
15561 entry->
SetSeq().
SetInst().SetSeq_data().SetNcbi4na().Set().push_back(
'8');
15562 entry->
SetSeq().
SetInst().SetSeq_data().SetNcbi4na().Set().push_back(
'1');
15563 entry->
SetSeq().
SetInst().SetSeq_data().SetNcbi4na().Set().push_back(
'8');
15564 expected_errors[0]->SetErrMsg(
"Bioseq.seq_data is larger [64] than given length [60]");
15565 eval = validator.
Validate(seh, options);
15575 entry->
SetSeq().
SetInst().SetExt().SetSeg().Set().push_back(loc);
15576 expected_errors[0]->SetErrMsg(
"Bioseq.seq_data too short [56] for given length [60]");
15577 eval = validator.
Validate(seh, options);
15580 loc->
SetInt().SetTo(63);
15581 expected_errors[0]->SetErrMsg(
"Bioseq.seq_data is larger [64] than given length [60]");
15582 eval = validator.
Validate(seh, options);
15586 entry->
SetSeq().
SetInst().SetExt().SetRef().SetInt().SetId(*
id);
15587 entry->
SetSeq().
SetInst().SetExt().SetRef().SetInt().SetFrom(0);
15588 entry->
SetSeq().
SetInst().SetExt().SetRef().SetInt().SetTo(55);
15589 expected_errors[0]->SetErrMsg(
"Bioseq.seq_data too short [56] for given length [60]");
15590 eval = validator.
Validate(seh, options);
15593 entry->
SetSeq().
SetInst().SetExt().SetRef().SetInt().SetTo(63);
15594 expected_errors[0]->SetErrMsg(
"Bioseq.seq_data is larger [64] than given length [60]");
15595 eval = validator.
Validate(seh, options);
15602 "Bioseq.seq_data too short [56] for given length [60]"));
15606 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddSeqRange(*
id, 0, 55);
15607 eval = validator.
Validate(seh, options);
15610 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddSeqRange(*
id, 0, 30);
15611 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddSeqRange(*
id, 40, 72);
15612 expected_errors[0]->SetErrMsg(
"Bioseq.seq_data is larger [64] than given length [60]");
15613 eval = validator.
Validate(seh, options);
15617 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddSeqRange(*
id, 0, 59);
15619 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(delta_seq);
15620 expected_errors[0]->SetErrMsg(
"NULL pointer in delta seq_ext valnode (segment 2)");
15622 eval = validator.
Validate(seh, options);
15627 delta_seq2->SetLoc().SetInt().SetId(*
id);
15628 delta_seq2->SetLoc().SetInt().SetFrom(0);
15629 delta_seq2->SetLoc().SetInt().SetTo(485);
15631 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(delta_seq2);
15632 expected_errors[0]->SetErrMsg(
"Seq-loc extent (486) greater than length of gb|AY123456| (485)");
15634 eval = validator.
Validate(seh, options);
15645 cds_feat->
SetData().SetCdregion().SetConflict(
true);
15650 "Coding region conflict flag should not be set"));
15652 eval = validator.Validate(seh, options);
15663 cds_feat->
SetData().SetCdregion().SetConflict(
true);
15665 prot->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set(
"MPRKTEIXX");
15666 prot->SetSeq().SetInst().SetLength(9);
15667 CRef<CSeq_feat> prot_feat =
prot->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
15673 "Coding region conflict flag is set"));
15675 eval = validator.Validate(seh, options);
15686 gene->
SetData().SetGene().SetLocus_tag(
"a b c");
15691 "Gene locus_tag 'a b c' should be a single word without any spaces"));
15693 eval = validator.Validate(seh, options);
15698 "old_locus_tag has same value as gene locus_tag"));
15700 "Gene locus_tag and old_locus_tag 'a b c' match"));
15701 eval = validator.Validate(seh, options);
15706 gene->
SetData().SetGene().SetLocus_tag(
"abc");
15707 gene->
SetData().SetGene().SetLocus(
"abc");
15709 "Gene locus and locus_tag 'abc' match"));
15711 eval = validator.Validate(seh, options);
15715 gene->
SetData().SetGene().ResetLocus();
15718 "old_locus_tag has comma, multiple old_locus_tags should be split into separate qualifiers"));
15720 eval = validator.Validate(seh, options);
15739 eval = validator.Validate(seh, options);
15745 scope.RemoveTopLevelSeqEntry(seh);
15746 nseq->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NM_123456");
15747 cds->
SetLocation().SetInt().SetId().SetOther().SetAccession(
"NM_123456");
15748 seh = scope.AddTopLevelSeqEntry(*entry);
15750 "Unnecessary alternative start codon exception"));
15752 eval = validator.Validate(seh, options);
15766 mgene->
SetData().SetGene().SetLocus(
"locus1");
15771 cgene->
SetData().SetGene().SetLocus(
"locus2");
15776 "Gene on mRNA bioseq does not match gene on genomic bioseq"));
15778 eval = validator.Validate(seh, options);
15790 codebreak1->
SetLoc().SetInt().SetId().SetLocal().SetStr(
"nuc");
15791 codebreak1->
SetLoc().SetInt().SetFrom(24);
15792 codebreak1->
SetLoc().SetInt().SetTo(26);
15793 cds->
SetData().SetCdregion().SetCode_break().push_back(codebreak1);
15795 codebreak2->
SetLoc().SetInt().SetId().SetLocal().SetStr(
"nuc");
15796 codebreak2->
SetLoc().SetInt().SetFrom(24);
15797 codebreak2->
SetLoc().SetInt().SetTo(26);
15798 cds->
SetData().SetCdregion().SetCode_break().push_back(codebreak2);
15803 "Multiple code-breaks at same location [lcl|nuc:25-27]"));
15805 eval = validator.Validate(seh, options);
15817 codebreak1->
SetLoc().SetInt().SetId().SetLocal().SetStr(
"nuc");
15818 codebreak1->
SetLoc().SetInt().SetFrom(24);
15819 codebreak1->
SetLoc().SetInt().SetTo(26);
15820 cds->
SetData().SetCdregion().SetCode_break().push_back(codebreak1);
15827 "CDS has both RNA editing /exception and /transl_except qualifiers"));
15829 "CDS has exception but passes translation test"));
15831 eval = validator.Validate(seh, options);
15844 prot_feat->
SetData().SetProt().ResetName();
15845 prot_feat->
SetData().SetProt().SetDesc(
"protein description");
15849 "Protein feature has description but no name"));
15851 eval = validator.Validate(seh, options);
15854 prot_feat->
SetData().SetProt().ResetDesc();
15855 prot_feat->
SetData().SetProt().SetActivity().push_back(
"activity");
15856 expected_errors[0]->SetErrMsg(
"Protein feature has function but no name");
15857 eval = validator.Validate(seh, options);
15860 prot_feat->
SetData().SetProt().ResetActivity();
15861 prot_feat->
SetData().SetProt().SetEc().push_back(
"1.2.3.4");
15862 expected_errors[0]->SetErrMsg(
"Protein feature has EC number but no name");
15863 eval = validator.Validate(seh, options);
15868 "There is a protein feature where all fields are empty"));
15870 "Protein feature has no name"));
15873 prot_feat->
SetData().SetProt().ResetEc();
15874 eval = validator.Validate(seh, options);
15889 mrna1->
SetData().SetRna().SetExt().SetName(
"product 1");
15893 mrna2->
SetData().SetRna().SetExt().SetName(
"product 2");
15899 "mRNA count (2) does not match CDS (1) count for gene"));
15901 "CDS matches 2 mRNAs"));
15903 "No CDS location match for 1 mRNA"));
15906 eval = validator.Validate(seh, options);
15925 "CDS has exception but passes translation test"));
15927 "mRNA has exception but passes transcription test"));
15930 eval = validator.Validate(seh, options);
15937 scope.RemoveTopLevelSeqEntry(seh);
15946 exon->
SetData().SetImp().SetKey(
"exon");
15954 seh = scope.AddTopLevelSeqEntry(*entry);
15957 "feature has exception but passes splice site test"));
15959 "feature has exception but passes splice site test"));
15961 "feature has exception but passes splice site test"));
15965 eval = validator.Validate(seh, options);
15982 id->SetGeneral().SetDb(
"a");
15983 id->SetGeneral().SetTag().SetStr(
"good");
15987 prot->SetSeq().SetId().push_back(lcl_id);
15993 gene->
SetData().SetGene().SetLocus_tag(
"something");
16000 "Gene locus_tag does not match general ID of product"));
16003 eval = validator.Validate(seh, options);
16021 "A coding region overlapped by a pseudogene should not have a product"));
16024 eval = validator.Validate(seh, options);
16037 gene1->
SetData().SetGene().SetLocus(
"first");
16041 gene2->
SetData().SetGene().SetLocus(
"second");
16048 "Feature overlapped by 2 identical-length genes but has no cross-reference"));
16051 eval = validator.Validate(seh, options);
16063 misc->
SetCit().SetPub().push_back(pub);
16068 "Citation on feature refers to uid [2] not on a publication in the record"));
16071 eval = validator.Validate(seh, options);
16082 loc1->
SetInt().SetId().SetLocal().SetStr(
"good");
16083 loc1->
SetInt().SetFrom(0);
16084 loc1->
SetInt().SetTo(10);
16086 loc2->
SetInt().SetId().SetLocal().SetStr(
"good");
16087 loc2->
SetInt().SetFrom(20);
16088 loc2->
SetInt().SetTo(30);
16090 loc3->
SetInt().SetId().SetLocal().SetStr(
"good");
16091 loc3->
SetInt().SetFrom(40);
16092 loc3->
SetInt().SetTo(50);
16094 loc4->
SetMix().Set().push_back(loc2);
16095 loc4->
SetMix().Set().push_back(loc3);
16097 misc->
SetLocation().SetMix().Set().push_back(loc1);
16098 misc->
SetLocation().SetMix().Set().push_back(loc4);
16104 "Location: SeqLoc [[lcl|good:1-11, [21-31, 41-51]]] has nested SEQLOC_MIX elements"));
16106 "Product: SeqLoc [[lcl|good:1-11, [21-31, 41-51]]] has nested SEQLOC_MIX elements"));
16108 "Self-referential feature product"));
16110 "Feature products should be entire sequences."));
16112 eval = validator.Validate(seh, options);
16127 "Use the proper genetic code, if available, or set transl_excepts on specific codons"));
16130 eval = validator.Validate(seh, options);
16142 auth->
SetName().SetName().SetFirst(
"F1rst");
16150 "Bad characters in author F1rst"));
16153 eval = validator.Validate(seh, options);
16170 "No CDS location match for 1 mRNA"));
16172 "mRNA overlaps or contains CDS but does not completely contain intervals"));
16174 "Transcript length [26] less than product length [27], but tail is 100% polyA"));
16176 eval = validator.Validate(seh, options);
16179 scope.RemoveTopLevelSeqEntry(seh);
16182 transcript->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCAGAAAAACAGAGATAAACTAAAAAAAAAAAAAAAAAATAA");
16184 seh = scope.AddTopLevelSeqEntry(*entry);
16185 expected_errors[3]->SetErrMsg(
"Transcript length [26] less than product length [46], but tail >= 95% polyA");
16186 eval = validator.Validate(seh, options);
16197 second_mrna->
SetProduct().SetWhole().SetLocal().SetStr(
"nuc");
16202 "mRNA products are not unique"));
16204 "CDS matches 2 mRNAs"));
16206 "No CDS location match for 1 mRNA"));
16208 "Duplicate feature"));
16210 "Identical transcript IDs found on multiple mRNAs"));
16213 eval = validator.Validate(seh, options);
16216 scope.RemoveTopLevelSeqEntry(seh);
16223 second_mrna->
SetProduct().SetWhole().Assign(*nuc_id);
16224 seh = scope.AddTopLevelSeqEntry(*entry);
16229 "CDS matches 2 mRNAs, but product locations are unique"));
16231 "No CDS location match for 1 mRNA"));
16234 "Protein bioseq should be product of CDS feature on contig, but is not"));
16237 eval = validator.Validate(seh, options);
16247 src1->
SetData().SetBiosrc().SetOrg().SetTaxname(
"Homo sapiens");
16248 src1->
SetData().SetBiosrc().SetOrg().SetOrgname().SetLineage(lineage);
16251 src2->
SetData().SetBiosrc().SetOrg().SetTaxname(
"Homo sapiens");
16252 src2->
SetData().SetBiosrc().SetOrg().SetOrgname().SetLineage(lineage);
16253 src2->
SetLocation().SetInt().SetFrom(second_start);
16254 src2->
SetLocation().SetInt().SetTo(second_start + 9);
16261 "Multiple equivalent source features should be combined into one multi-interval feature"));
16266 eval = validator.Validate(seh, options);
16298 feat1->
SetData().SetPub().SetPub().Set().push_back(pub1);
16302 feat2->
SetData().SetPub().SetPub().Set().push_back(pub2);
16308 "Multiple equivalent publication features should be combined into one multi-interval feature"));
16311 eval = validator.Validate(seh, options);
16322 src1->
SetData().SetBiosrc().SetOrg().SetTaxname(
"Homo sapiens");
16323 src1->
SetData().SetBiosrc().SetOrg().SetOrgname().SetLineage(
"some lineage");
16328 feat1->
SetData().SetPub().SetPub().Set().push_back(pub1);
16336 "Publication feature is full length, should be descriptor"));
16339 eval = validator.Validate(seh, options);
16344 scope.RemoveTopLevelSeqEntry(seh);
16346 src2->
SetData().SetBiosrc().SetOrg().SetTaxname(
"Drosophila melanogaster");
16347 src2->
SetData().SetBiosrc().SetOrg().SetOrgname().SetLineage(
"some lineage");
16349 seh = scope.AddTopLevelSeqEntry(*entry);
16352 "Features have identical intervals, but labels differ"));
16356 "Multiple full-length source features, should only be one if descriptor is transgenic"));
16358 "Publication feature is full length, should be descriptor"));
16360 eval = validator.Validate(seh, options);
16374 gene->
SetData().SetGene().SetLocus(
"redundant_g");
16380 "Comment has same value as gene locus"));
16382 eval = validator.Validate(seh, options);
16386 gene->
SetData().SetGene().ResetLocus();
16387 gene->
SetData().SetGene().SetLocus_tag(
"redundant_g");
16389 "Comment has same value as gene locus_tag"));
16391 eval = validator.Validate(seh, options);
16399 "old_locus_tag has same value as gene locus_tag"));
16401 "Gene locus_tag and old_locus_tag 'redundant_g' match"));
16404 eval = validator.Validate(seh, options);
16412 prot->SetData().SetProt().SetName().front().assign(
"redundant_p");
16413 prot->SetComment(
"redundant_p");
16414 prot->SetData().SetProt().SetDesc(
"redundant_p");
16417 "Comment has same value as protein name"));
16419 "Comment has same value as protein description"));
16421 eval = validator.Validate(seh, options);
16431 cds->
SetData().SetCdregion();
16432 cds->
SetProduct().SetWhole().SetLocal().SetStr(prot_id);
16433 cds->
SetLocation().SetInt().SetId().SetLocal().SetStr(nuc_id);
16454 nseq->
SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
16455 nseq->
SetInst().SetLength(360);
16458 id->SetLocal().SetStr(
"nuc");
16459 nseq->
SetId().push_back(
id);
16463 nseq->
SetDescr().Set().push_back(mdesc);
16468 set->SetSeq_set().push_back(nentry);
16491 CSeq_annot::TData::TFtable::iterator cds_it = entry->
SetSet().
SetAnnot().front()->SetData().SetFtable().begin();
16502 "11 out of 12 CDSs unmatched"));
16505 eval = validator.Validate(seh, options);
16508 scope.RemoveTopLevelSeqEntry(seh);
16509 for (
int i = 0;
i < 3;
i++) {
16514 seh = scope.AddTopLevelSeqEntry(*entry);
16515 for (
int i = 0;
i < 8;
i++) {
16521 eval = validator.Validate(seh, options);
16536 bad_cds->
SetData().SetCdregion();
16545 "2 CDS features have 1 product references"));
16546 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"MissingCDSproduct",
"Expected CDS product absent"));
16549 eval = validator.Validate(seh, options);
16554 scope.RemoveTopLevelSeqEntry(seh);
16555 bad_cds->
SetProduct().SetWhole().SetLocal().SetStr(
"prot");
16556 seh = scope.AddTopLevelSeqEntry(*entry);
16559 "CDS products are not unique"));
16561 "Same product Bioseq from multiple CDS features"));
16563 eval = validator.Validate(seh, options);
16568 scope.RemoveTopLevelSeqEntry(seh);
16569 nuc->SetSeq().ResetAnnot();
16574 mrna->
SetProduct().SetWhole().SetGenbank().SetAccession(
"AY123456");
16577 seh = scope.AddTopLevelSeqEntry(*entry);
16579 "2 mRNA features have 1 product references"));
16581 "Transcript length [27] less than (far) product length [485], and tail < 95% polyA"));
16583 "There are 16 mismatches out of 27 bases between the transcript and (far) product sequence"));
16585 "Inconsistent: Product= partial, Location= complete, Feature.partial= FALSE"));
16587 "Type of RNA does not match MolInfo of product Bioseq"));
16590 eval = validator.Validate(seh, options);
16595 scope.RemoveTopLevelSeqEntry(seh);
16596 bad_mrna->
SetProduct().SetWhole().SetGenbank().SetAccession(
"AY123456");
16597 seh = scope.AddTopLevelSeqEntry(*entry);
16599 "mRNA products are not unique"));
16601 "Inconsistent: Product= partial, Location= complete, Feature.partial= FALSE"));
16603 "Identical transcript IDs found on multiple mRNAs"));
16605 "Transcript length [27] less than (far) product length [485], and tail < 95% polyA"));
16607 "There are 16 mismatches out of 27 bases between the transcript and (far) product sequence"));
16609 "Type of RNA does not match MolInfo of product Bioseq"));
16611 "Transcript length [27] less than (far) product length [485], and tail < 95% polyA"));
16613 "There are 16 mismatches out of 27 bases between the transcript and (far) product sequence"));
16615 "Inconsistent: Product= partial, Location= complete, Feature.partial= FALSE"));
16617 "Type of RNA does not match MolInfo of product Bioseq"));
16620 eval = validator.Validate(seh, options);
16629 feat->
SetLocation().SetBond().SetA().SetId().SetLocal().SetStr(
id);
16630 feat->
SetLocation().SetBond().SetA().SetPoint(0);
16631 feat->
SetLocation().SetBond().SetB().SetId().SetLocal().SetStr(
id);
16632 feat->
SetLocation().SetBond().SetB().SetPoint(5);
16657 "Strand 'other' in location"));
16659 "Bond location should only be on bond features"));
16661 "Strand 'other' in location"));
16663 "Bond location should only be on bond features"));
16665 "Strand 'other' in location"));
16667 "Bond location should only be on bond features"));
16669 "Strand 'other' in location"));
16671 eval = validator.Validate(seh, options);
16687 "Feature has gene locus cross-reference but no equivalent gene feature exists"));
16689 "There are 1 gene xrefs and no gene features in this record."));
16691 eval = validator.Validate(seh, options);
16699 "Feature has gene locus_tag cross-reference but no equivalent gene feature exists"));
16701 "There are 1 gene xrefs and no gene features in this record."));
16703 eval = validator.Validate(seh, options);
16723 cds->
SetId().SetLocal().SetId(1);
16727 mrna->
SetId().SetLocal().SetId(2);
16732 gene->
SetId().SetLocal().SetId(3);
16737 misc->
SetId().SetLocal().SetId(4);
16743 cds->
SetXref().push_back(x1);
16746 "SeqFeatXref with no id or data field"));
16748 eval = validator.Validate(seh, options);
16756 scope.RemoveTopLevelSeqEntry(seh);
16758 seh = scope.AddTopLevelSeqEntry(*entry);
16763 "Cross-references are not between CDS and mRNA pair or between a gene and a CDS or mRNA (misc_feature,CDS)"));
16765 "Cross-references are not between CDS and mRNA pair or between a gene and a CDS or mRNA (CDS,misc_feature)"));
16767 eval = validator.Validate(seh, options);
16773 scope.RemoveTopLevelSeqEntry(seh);
16775 seh = scope.AddTopLevelSeqEntry(*entry);
16779 "Cross-referenced feature does not have its own cross-reference"));
16781 eval = validator.Validate(seh, options);
16789 scope.RemoveTopLevelSeqEntry(seh);
16791 seh = scope.AddTopLevelSeqEntry(*entry);
16794 eval = validator.Validate(seh, options);
16798 scope.RemoveTopLevelSeqEntry(seh);
16800 seh = scope.AddTopLevelSeqEntry(*entry);
16802 eval = validator.Validate(seh, options);
16806 scope.RemoveTopLevelSeqEntry(seh);
16808 seh = scope.AddTopLevelSeqEntry(*entry);
16810 eval = validator.Validate(seh, options);
16814 scope.RemoveTopLevelSeqEntry(seh);
16821 seh = scope.AddTopLevelSeqEntry(*entry);
16823 eval = validator.Validate(seh, options);
16828 scope.RemoveTopLevelSeqEntry(seh);
16830 other_gene->
SetData().SetGene().SetLocus(
"mismatch");
16836 seh = scope.AddTopLevelSeqEntry(*entry);
16839 gene_xref->SetData().SetGene().SetLocus(
"mismatch");
16840 cds->
SetXref().push_back(gene_xref);
16843 "Feature gene xref does not match Feature ID cross-referenced gene feature"));
16844 eval = validator.Validate(seh, options);
16850 scope.RemoveTopLevelSeqEntry(seh);
16851 gene_xref->SetData().SetGene().SetLocus(
"gene locus");
16854 seh = scope.AddTopLevelSeqEntry(*entry);
16857 eval = validator.Validate(seh, options);
16872 "Missing encoded amino acid qualifier in tRNA"));
16875 eval = validator.Validate(seh, options);
16886 feat->
SetId().SetLocal().SetId(1);
16888 gene->
SetId().SetLocal().SetId(1);
16893 "Colliding feature ID 1"));
16895 "Colliding feature ID 1"));
16897 eval = validator.Validate(seh, options);
16908 feat->
SetData().SetImp().SetKey(
"polyA_signal");
16913 "PolyA_signal should be a range"));
16915 eval = validator.Validate(seh, options);
16936 "Old locus tag on feature (one value) does not match that on gene (another value)"));
16938 "old_locus_tag without inherited locus_tag"));
16940 eval = validator.Validate(seh, options);
16950 go_term->
SetLabel().SetStr(
"a go term");
16972 "Duplicate GO term on feature"));
16974 eval = validator.Validate(seh, options);
16995 eval = validator.Validate(seh, options);
17012 feat->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"good");
17013 feat->
SetData().SetImp().SetKey(
"misc_feature");
17074 feat->
SetQual().front()->SetVal(
"bad");
17076 "Inference qualifier problem - bad inference prefix (bad)"));
17078 eval = validator.Validate(seh, options);
17081 feat->
SetQual().front()->SetVal(
"similar to sequence");
17082 expected_errors[0]->SetErrMsg(
"Inference qualifier problem - bad inference body (similar to sequence)");
17083 eval = validator.Validate(seh, options);
17086 feat->
SetQual().front()->SetVal(
"profile(same species): INSD:AY123456.1");
17087 expected_errors[0]->SetErrMsg(
"Inference qualifier problem - same species misused (profile(same species): INSD:AY123456.1)");
17088 eval = validator.Validate(seh, options);
17091 feat->
SetQual().front()->SetVal(
"similar to RNA sequence: INSD:AY123456.1 INSD:AY123457");
17092 expected_errors[0]->SetErrMsg(
"Inference qualifier problem - spaces in inference (similar to RNA sequence: INSD:AY123456.1 INSD:AY123457)");
17093 eval = validator.Validate(seh, options);
17096 feat->
SetQual().front()->SetVal(
"similar to RNA sequence: INSD:AY123456");
17097 expected_errors[0]->SetErrMsg(
"Inference qualifier problem - bad inference accession version (similar to RNA sequence: INSD:AY123456)");
17098 eval = validator.Validate(seh, options);
17101 feat->
SetQual().front()->SetVal(
"similar to RNA sequence: RefSeq:AY123456.1");
17102 expected_errors[0]->SetErrMsg(
"Inference qualifier problem - bad accession type (similar to RNA sequence: RefSeq:AY123456.1)");
17103 eval = validator.Validate(seh, options);
17106 feat->
SetQual().front()->SetVal(
"similar to RNA sequence: BLAST:AY123456.1");
17107 expected_errors[0]->SetErrMsg(
"Inference qualifier problem - bad accession type (similar to RNA sequence: BLAST:AY123456.1)");
17108 eval = validator.Validate(seh, options);
17111 feat->
SetQual().front()->SetVal(
"similar to AA sequence:RefSeq:gi|21240850|ref|NP_640432.1|");
17112 eval = validator.Validate(seh, options);
17113 expected_errors[0]->SetErrMsg(
"Inference qualifier problem - the value in the accession field is not legal. The only allowed value is accession.version, eg AF123456.1. Problem = (similar to AA sequence:RefSeq:gi|21240850|ref|NP_640432.1|)");
17120 feat->
SetQual().front()->SetVal(
"similar to RNA sequence:INSD:ERP003431");
17122 eval = validator.Validate(seh, options);
17126 feat->
SetQual().front()->SetVal(
"similar to RNA sequence:GeneDB:LmjF.01.0090");
17127 eval = validator.Validate(seh, options);
17141 prot->SetData().SetProt().ResetName();
17142 prot->SetData().SetProt().SetName().push_back(
"hypothetical protein XP_123");
17147 "Hypothetical protein reference does not match accession"));
17149 eval = validator.Validate(seh, options);
17160 cds->
SetData().SetCdregion();
17170 "Self-referential feature product"));
17172 "Inconsistent: Product= complete, Location= partial, Feature.partial= TRUE"));
17174 "Protein product not packaged in nuc-prot set with nucleotide"));
17176 eval = validator.Validate(seh, options);
17188 rrna->
SetData().SetRna().SetExt().SetName(
"18s ribosomal subunit");
17192 its->
SetData().SetRna().SetExt().SetName(
"internal transcribed spacer 1");
17199 "ITS does not abut adjacent rRNA component"));
17201 eval = validator.Validate(seh, options);
17204 scope.RemoveTopLevelSeqEntry(seh);
17206 seh = scope.AddTopLevelSeqEntry(*entry);
17207 eval = validator.Validate(seh, options);
17210 rrna->
SetData().SetRna().SetExt().SetName(
"5.8S ribosomal subunit");
17211 its->
SetData().SetRna().SetExt().SetName(
"internal transcribed spacer 2");
17212 eval = validator.Validate(seh, options);
17215 scope.RemoveTopLevelSeqEntry(seh);
17217 seh = scope.AddTopLevelSeqEntry(*entry);
17218 eval = validator.Validate(seh, options);
17229 feat->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"Good");
17233 "Sequence identifier in feature location differs in capitalization with identifier on Bioseq"));
17235 eval = validator.Validate(seh, options);
17251 "Invalid GI number"));
17253 "No accession on sequence with gi number"));
17255 "Feature has 1 gi|0 location on Bioseq gi|0"));
17257 eval = validator.Validate(seh, options);
17267 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetSeq_data().SetIupacna().Set(
"CNCATGATGATG");
17270 gap->
SetData().SetImp().SetKey(
"gap");
17276 "Gap feature over 11 real bases"));
17278 eval = validator.Validate(seh, options);
17284 "Gap feature location does not match delta gap coordinates"));
17286 "Gap feature over 2 real bases"));
17290 eval = validator.Validate(seh, options);
17295 expected_errors[1]->SetErrMsg(
"Gap feature over 8 real bases and 1 Ns");
17296 eval = validator.Validate(seh, options);
17302 "Gap feature estimated_length 11 does not match 10 feature length"));
17305 eval = validator.Validate(seh, options);
17316 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"ATGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATAA");
17317 nuc->SetSeq().SetInst().SetLength(366);
17321 prot->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set(
"MFFFFFFFFFFPPPPPPPPPPGGGGGGGGGGKKKKKKKKKKFFFFFFFFFFPPPPPPPPPPGGGGGGGGGGKKKKKKKKKKFFFFFFFFFFPPPPPPPPPPGGGGGGGGGGKKKKKKKKKK");
17322 prot->SetSeq().SetInst().SetLength(121);
17326 cds->
SetLocation().SetInt().SetTo(
nuc->GetSeq().GetInst().GetLength()-1);
17343 "ATGTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATAAGGGCCCTTT"
17356 cds->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"good");
17359 mrna->
SetProduct().SetWhole().Assign(*nuc_id);
17376 genomic->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"ATGTTTCTTTTTTTTTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATAAGGGCCCTTT");
17381 "CDS has unclassified exception but only difference is 1 mismatches out of 121 residues"));
17383 "mRNA has unclassified exception but only difference is 1 mismatches out of 366 bases"));
17385 eval = validator.Validate(seh, options);
17398 cds->
SetData().SetCdregion();
17411 "Strand 'other' in location"));
17413 "Feature may not have whole location"));
17415 "Strand 'other' in location"));
17417 "CDS may not have whole location"));
17419 "Strand 'other' in location"));
17421 "mRNA may not have whole location"));
17423 eval = validator.Validate(seh, options);
17436 prot->SetData().SetProt().SetName().front().append(
"; EC:1.1.1.10");
17437 prot->SetComment(
"EC:1.1.1.10");
17438 prot->SetData().SetProt().SetEc().push_back(
"");
17442 exon->
SetData().SetImp().SetKey(
"exon");
17448 "Qualifier other than replace has just quotation marks"));
17450 "EC number should not be empty"));
17452 "Apparent EC number in protein title"));
17454 "Apparent EC number in protein comment"));
17456 "EC number should not be empty"));
17458 eval = validator.Validate(seh, options);
17463 prot->SetData().SetProt().ResetEc();
17465 "Qualifier other than replace has just quotation marks"));
17467 "EC number should not be empty"));
17469 "Apparent EC number in protein title"));
17471 "Apparent EC number in protein comment"));
17473 "Apparent EC number in CDS comment"));
17476 eval = validator.Validate(seh, options);
17487 misc->
AddQualifier(
"standard_name",
"Vector Contamination");
17492 "Vector Contamination region should be trimmed from sequence"));
17495 eval = validator.Validate(seh, options);
17512 "Feature on protein indicates negative strand"));
17515 eval = validator.Validate(seh, options);
17526 prot->SetData().SetProt().ResetName();
17527 prot->SetData().SetProt().SetName().push_back(
"Hypothetical protein");
17528 prot->SetData().SetProt().SetEc().push_back(
"1.1.1.20");
17533 "Unknown or hypothetical protein should not have EC number"));
17535 eval = validator.Validate(seh, options);
17538 prot->SetData().SetProt().ResetName();
17539 prot->SetData().SetProt().SetName().push_back(
"hypothetical protein");
17540 eval = validator.Validate(seh, options);
17543 prot->SetData().SetProt().ResetName();
17544 prot->SetData().SetProt().SetName().push_back(
"Unknown protein");
17545 eval = validator.Validate(seh, options);
17548 prot->SetData().SetProt().ResetName();
17549 prot->SetData().SetProt().SetName().push_back(
"unknown protein");
17550 eval = validator.Validate(seh, options);
17564 gene2->
SetData().SetGene().SetLocus_tag(
"locus_tag");
17565 gene2->
SetData().SetGene().SetLocus(
"second locus");
17569 x->SetData().SetGene().SetLocus_tag(
"locus_tag");
17570 misc->
SetXref().push_back(x);
17575 "Feature has Gene Xref with locus_tag but no locus, gene with locus_tag and locus exists"));
17577 eval = validator.Validate(seh, options);
17588 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCAGAAAAACAGAGATAAACTAAAAAGGGAAA");
17589 nuc->SetSeq().SetInst().SetLength(36);
17594 utr3->
SetData().SetImp().SetKey(
"3'UTR");
17601 "3'UTR does not extend to end of mRNA"));
17603 eval = validator.Validate(seh, options);
17614 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"ATGNNNNNNNNNNNNNNNATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
17616 prot->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set(
"MXXXXXIN");
17621 "Feature contains more than 50% Ns"));
17623 "CDS translation consists of more than 50% X residues"));
17625 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"));
17628 eval = validator.Validate(seh, options);
17644 scope.RemoveTopLevelSeqEntry(seh);
17646 prot->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set(
tmp);
17647 prot->SetSeq().SetInst().SetLength(
tmp.length());
17650 seh = scope.AddTopLevelSeqEntry(*entry);
17653 "Suspicious CDS location - reading frame > 1 but not 5' partial"));
17655 eval = validator.Validate(seh, options);
17665 scope.RemoveTopLevelSeqEntry(seh);
17666 prot->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set(
tmp);
17667 prot->SetSeq().SetInst().SetLength(
tmp.length());
17672 seh = scope.AddTopLevelSeqEntry(*entry);
17676 "5' partial is not at beginning of sequence, gap, or consensus splice site"));
17678 "Suspicious CDS location - reading frame > 1 and not at consensus splice site"));
17680 eval = validator.Validate(seh, options);
17691 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACNAAGGG");
17696 cds->
SetLocation().SetInt().SetTo(
nuc->GetSeq().GetInst().GetLength() - 1);
17698 prot->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set(
"MPRKTEINXX");
17699 prot->SetSeq().SetInst().SetLength(10);
17708 "Given protein length [8] does not match translation length [10]"));
17710 "Terminal X count for CDS translation (0) and protein product sequence (2) are not equal"));
17712 eval = validator.Validate(seh, options);
17724 codebreak->
SetLoc().SetInt().SetId().SetLocal().SetStr(
"nuc");
17725 codebreak->
SetLoc().SetInt().SetFrom(3);
17726 codebreak->
SetLoc().SetInt().SetTo(5);
17727 codebreak->
SetAa().SetNcbieaa(
'P');
17728 cds->
SetData().SetCdregion().SetCode_break().push_back(codebreak);
17733 "Unnecessary transl_except P at position 2"));
17735 eval = validator.Validate(seh, options);
17739 codebreak->
SetLoc().SetInt().SetFrom(0);
17740 codebreak->
SetLoc().SetInt().SetTo(2);
17742 "Suspicious transl_except P at first codon of complete CDS"));
17744 "Residue 1 in protein [M] != translation [P] at lcl|nuc:1-3"));
17746 eval = validator.Validate(seh, options);
17757 feat->
SetData().SetImp().SetKey(
"misc_difference");
17763 "/replace already matches underlying sequence (aattggccaaa)"));
17765 eval = validator.Validate(seh, options);
17777 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[44] =
'A';
17778 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[45] =
'G';
17785 "Splice donor consensus (GT) not found at start of intron, position 17 of lcl|nuc"));
17787 "Splice donor consensus (GT) not found after exon ending at position 16 of lcl|nuc"));
17789 eval = validator.Validate(seh, options);
17792 scope.RemoveTopLevelSeqEntry(seh);
17794 seh = scope.AddTopLevelSeqEntry(*entry);
17795 expected_errors[0]->SetErrMsg(
"Splice donor consensus (GT) not found at start of intron, position 44 of lcl|nuc");
17796 expected_errors[1]->SetErrMsg(
"Splice donor consensus (GT) not found after exon ending at position 45 of lcl|nuc");
17797 eval = validator.Validate(seh, options);
17800 scope.RemoveTopLevelSeqEntry(seh);
17802 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[16] =
'\xFB';
17803 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[17] =
'\xFB';
17804 seh = scope.AddTopLevelSeqEntry(*entry);
17808 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"NotSpliceConsensusDonor",
"Splice donor consensus (GT) not found at start of intron, position 17 of lcl|nuc"));
17809 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"NotSpliceConsensusDonor",
"Bad sequence at splice donor after exon ending at position 16 of lcl|nuc"));
17810 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Fatal,
"NonAsciiAsn",
"Non-ASCII character '251' found in item"));
17812 eval = validator.Validate(seh, options);
17815 scope.RemoveTopLevelSeqEntry(seh);
17817 seh = scope.AddTopLevelSeqEntry(*entry);
17821 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"NotSpliceConsensusDonor",
"Splice donor consensus (GT) not found at start of intron, position 44 of lcl|nuc"));
17822 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"NotSpliceConsensusDonor",
"Bad sequence at splice donor after exon ending at position 45 of lcl|nuc"));
17823 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Fatal,
"NonAsciiAsn",
"Non-ASCII character '251' found in item"));
17826 eval = validator.Validate(seh, options);
17831 scope.RemoveTopLevelSeqEntry(seh);
17834 intron->
SetData().SetImp().SetKey(
"intron");
17835 seh = scope.AddTopLevelSeqEntry(*entry);
17837 "Splice donor consensus (GT) not found at start of terminal intron, position 1 of lcl|good"));
17839 "Splice acceptor consensus (AG) not found at end of intron, position 11 of lcl|good"));
17841 eval = validator.Validate(seh, options);
17844 scope.RemoveTopLevelSeqEntry(seh);
17846 seh = scope.AddTopLevelSeqEntry(*entry);
17849 "Splice donor consensus (GT) not found at start of terminal intron, position 60 of lcl|good"));
17851 "Splice acceptor consensus (AG) not found at end of intron, position 50 of lcl|good"));
17853 eval = validator.Validate(seh, options);
17866 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[16] =
'G';
17867 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[17] =
'T';
17868 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[44] =
'T';
17869 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[45] =
'C';
17876 "Splice acceptor consensus (AG) not found at end of intron, position 46 of lcl|nuc"));
17878 "Splice acceptor consensus (AG) not found before exon starting at position 47 of lcl|nuc"));
17880 eval = validator.Validate(seh, options);
17883 scope.RemoveTopLevelSeqEntry(seh);
17885 seh = scope.AddTopLevelSeqEntry(*entry);
17886 expected_errors[0]->SetErrMsg(
"Splice acceptor consensus (AG) not found at end of intron, position 15 of lcl|nuc");
17887 expected_errors[1]->SetErrMsg(
"Splice acceptor consensus (AG) not found before exon starting at position 14 of lcl|nuc");
17888 eval = validator.Validate(seh, options);
17891 scope.RemoveTopLevelSeqEntry(seh);
17893 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[44] =
'\xFB';
17894 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[45] =
'\xFB';
17895 seh = scope.AddTopLevelSeqEntry(*entry);
17898 "Invalid residue [251] at position [45]"));
17900 "Invalid residue [251] at position [46]"));
17902 "Splice acceptor consensus (AG) not found at end of intron, position 46 of lcl|nuc"));
17904 "Bad sequence at splice acceptor before exon starting at position 47 of lcl|nuc"));
17905 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Fatal,
"NonAsciiAsn",
"Non-ASCII character '251' found in item"));
17907 eval = validator.Validate(seh, options);
17910 scope.RemoveTopLevelSeqEntry(seh);
17912 seh = scope.AddTopLevelSeqEntry(*entry);
17915 "InvalidResidue",
"Invalid residue [251] at position [15]"));
17917 "Invalid residue [251] at position [16]"));
17919 "Splice acceptor consensus (AG) not found at end of intron, position 15 of lcl|nuc"));
17921 "Bad sequence at splice acceptor before exon starting at position 14 of lcl|nuc"));
17922 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Fatal,
"NonAsciiAsn",
"Non-ASCII character '251' found in item"));
17925 eval = validator.Validate(seh, options);
17930 scope.RemoveTopLevelSeqEntry(seh);
17933 intron->
SetData().SetImp().SetKey(
"intron");
17934 seh = scope.AddTopLevelSeqEntry(*entry);
17936 "Splice donor consensus (GT) not found at start of terminal intron, position 1 of lcl|good"));
17938 "Splice acceptor consensus (AG) not found at end of intron, position 11 of lcl|good"));
17940 eval = validator.Validate(seh, options);
17943 scope.RemoveTopLevelSeqEntry(seh);
17945 seh = scope.AddTopLevelSeqEntry(*entry);
17948 "Splice donor consensus (GT) not found at start of terminal intron, position 60 of lcl|good"));
17950 "Splice acceptor consensus (AG) not found at end of intron, position 50 of lcl|good"));
17952 eval = validator.Validate(seh, options);
17965 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[16] =
'G';
17966 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[17] =
'C';
17967 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[44] =
'A';
17968 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[45] =
'G';
17975 eval = validator.Validate(seh, options);
17978 scope.RemoveTopLevelSeqEntry(seh);
17980 seh = scope.AddTopLevelSeqEntry(*entry);
17984 eval = validator.Validate(seh, options);
17996 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[16] =
'A';
17997 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[17] =
'T';
17998 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[44] =
'A';
17999 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[45] =
'C';
18006 eval = validator.Validate(seh, options);
18010 scope.RemoveTopLevelSeqEntry(seh);
18012 seh = scope.AddTopLevelSeqEntry(*entry);
18016 eval = validator.Validate(seh, options);
18027 cds->
SetId().SetLocal().SetId(1);
18029 mrna->
SetId().SetLocal().SetId(2);
18033 gene->
SetId().SetLocal().SetId(3);
18041 "Cross-referenced feature does not link reciprocally"));
18044 eval = validator.Validate(seh, options);
18055 cds->
SetId().SetLocal().SetId(1);
18057 x1->SetId().SetLocal().SetId(2);
18058 cds->
SetXref().push_back(x1);
18061 "Cross-referenced feature cannot be found"));
18064 eval = validator.Validate(seh, options);
18080 "Feature inside sequence gap"));
18083 eval = validator.Validate(seh, options);
18087 scope.RemoveTopLevelSeqEntry(seh);
18089 gap_seg->SetLiteral().SetSeq_data().SetGap();
18090 gap_seg->SetLiteral().SetLength(10);
18091 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(gap_seg);
18092 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddLiteral(
"CCCANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTGATGATG",
CSeq_inst::eMol_dna);
18096 seh = scope.AddTopLevelSeqEntry(*entry);
18098 "Sequence contains 51 percent Ns"));
18106 "Feature contains more than 50% Ns"));
18109 eval = validator.Validate(seh, options);
18119 for (
auto& it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
18120 if (it->IsLiteral() && it->GetLiteral().GetSeq_data().IsGap()) {
18132 "No CDS location match for 1 mRNA"));
18134 "Feature crosses gap of unknown length"));
18137 eval = validator.Validate(seh, options);
18142 scope.RemoveTopLevelSeqEntry(seh);
18144 int1->
SetInt().SetFrom(3);
18145 int1->
SetInt().SetTo(15);
18146 int1->
SetInt().SetId().SetLocal().SetStr(
"good");
18148 int2->
SetInt().SetFrom(22);
18149 int2->
SetInt().SetTo(30);
18150 int2->
SetInt().SetId().SetLocal().SetStr(
"good");
18151 misc->
SetLocation().SetMix().Set().push_back(int1);
18152 misc->
SetLocation().SetMix().Set().push_back(int2);
18153 seh = scope.AddTopLevelSeqEntry(*entry);
18155 "No CDS location match for 1 mRNA"));
18157 "Internal interval begins or ends in gap"));
18160 eval = validator.Validate(seh, options);
18173 entry->
SetDescr().Set().push_back(desc);
18174 pub->
SetArticle().
SetAuthors().SetNames().SetStd().front()->SetName().SetName().SetSuffix(
"foo");
18178 "Bad author suffix foo"));
18181 eval = validator.Validate(seh, options);
18187 pub->
SetArticle().
SetAuthors().SetNames().SetStd().front()->SetName().SetName().SetSuffix(
"3rd");
18189 eval = validator.Validate(seh, options);
18200 trna->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetFrom(8);
18201 trna->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetTo(10);
18202 trna->
SetData().SetRna().SetExt().SetTRNA().SetAa().SetIupacaa(
'S');
18207 "Codons predicted from anticodon (AAA) cannot produce amino acid (S/Ser)"));
18210 eval = validator.Validate(seh, options);
18221 trna->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetFrom(8);
18222 trna->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetTo(10);
18223 trna->
SetData().SetRna().SetExt().SetTRNA().SetAa().SetIupacaa(
'K');
18224 trna->
SetData().SetRna().SetExt().SetTRNA().SetCodon().push_back(42);
18229 "Codons predicted from anticodon (AAA) cannot produce amino acid (K/Lys)"));
18231 "Codon recognized cannot be produced from anticodon (AAA)"));
18234 eval = validator.Validate(seh, options);
18245 trna->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetFrom(8);
18246 trna->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().SetTo(10);
18248 trna->
SetData().SetRna().SetExt().SetTRNA().SetAa().SetIupacaa(
'K');
18253 "Anticodon strand and tRNA strand do not match."));
18256 eval = validator.Validate(seh, options);
18259 scope.RemoveTopLevelSeqEntry(seh);
18260 trna->
SetData().SetRna().SetExt().SetTRNA().SetAnticodon().SetInt().ResetStrand();
18262 trna->
SetData().SetRna().SetExt().SetTRNA().SetAa().SetIupacaa(
'F');
18263 seh = scope.AddTopLevelSeqEntry(*entry);
18264 eval = validator.Validate(seh, options);
18271 #define test_gene_syn(name) \
18272 gene->SetData().SetGene().ResetSyn(); \
18273 gene->SetData().SetGene().SetSyn().push_back(name); \
18274 msg = "Uninformative gene synonym '"; \
18275 msg.append(name); \
18277 expected_errors[0]->SetErrMsg(msg); \
18278 eval = validator.Validate(seh, options); \
18279 CheckErrors(*eval, expected_errors);
18285 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
18287 gene->
SetData().SetGene().SetLocus(
"something");
18322 gene->
SetData().SetGene().ResetSyn();
18323 gene->
SetData().SetGene().SetSyn().push_back(
"same_as");
18324 gene->
SetData().SetGene().SetLocus(
"same_as");
18325 expected_errors[0]->SetErrMsg(
"gene synonym has same value as gene locus");
18326 eval = validator.Validate(seh, options);
18329 gene->
SetData().SetGene().ResetSyn();
18330 gene->
SetData().SetGene().SetDesc(
"same_as");
18331 expected_errors[0]->SetErrMsg(
"gene description has same value as gene locus");
18332 eval = validator.Validate(seh, options);
18335 gene->
SetData().SetGene().ResetDesc();
18336 gene->
SetData().SetGene().ResetLocus();
18337 gene->
SetData().SetGene().SetSyn().push_back(
"only_syn");
18338 expected_errors[0]->SetErrMsg(
"gene synonym without gene locus or description");
18339 eval = validator.Validate(seh, options);
18347 #define test_undesired_protein_name(name) \
18348 prot->SetData().SetProt().ResetName(); \
18349 prot->SetData().SetProt().SetName().push_back(name); \
18350 msg = "Uninformative protein name '"; \
18351 msg.append(name); \
18353 expected_errors[0]->SetErrMsg(msg); \
18354 eval = validator.Validate(seh, options); \
18355 CheckErrors(*eval, expected_errors);
18361 id->SetOther().SetAccession(
"NC_123456");
18374 "Protein name contains undesired character"));
18376 delete expected_errors[1];
18377 expected_errors.pop_back();
18394 for (
auto& it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
18395 if (it->IsLiteral() && it->GetLiteral().GetSeq_data().IsGap()) {
18406 "Feature begins or ends in gap starting at 13"));
18409 eval = validator.Validate(seh, options);
18412 scope.RemoveTopLevelSeqEntry(seh);
18414 seh = scope.AddTopLevelSeqEntry(*entry);
18415 eval = validator.Validate(seh, options);
18418 scope.RemoveTopLevelSeqEntry(seh);
18421 seh = scope.AddTopLevelSeqEntry(*entry);
18423 eval = validator.Validate(seh, options);
18426 scope.RemoveTopLevelSeqEntry(seh);
18428 seh = scope.AddTopLevelSeqEntry(*entry);
18429 eval = validator.Validate(seh, options);
18440 feat->
SetExt().SetType().SetStr(
"GeneOntology");
18442 go_list->
SetLabel().SetStr(
"Process");
18444 go_term->
SetLabel().SetStr(
"a go term");
18447 pmid->
SetLabel().SetStr(
"pubmed id");
18449 go_term->
SetData().SetFields().push_back(pmid);
18452 term->
SetLabel().SetStr(
"text string");
18453 term->
SetData().SetStr(
"something");
18454 go_term->
SetData().SetFields().push_back(term);
18457 ev->
SetLabel().SetStr(
"evidence");
18458 ev->
SetData().SetStr(
"some evidence");
18459 go_term->
SetData().SetFields().push_back(ev);
18461 go_list->
SetData().SetFields().push_back(go_term);
18462 feat->
SetExt().SetData().push_back(go_list);
18467 "GO term does not have GO identifier"));
18469 eval = validator.Validate(seh, options);
18481 rna->ResetComment();
18483 rna->SetPseudo(
true);
18484 rna->SetProduct().SetWhole().SetGenbank().SetAccession(
"AY123456");
18489 "A pseudo RNA should not have a product"));
18491 eval = validator.Validate(seh, options);
18495 rna->SetExcept(
true);
18496 rna->SetExcept_text(
"transcribed pseudogene");
18499 eval = validator.Validate(seh, options);
18503 scope.RemoveTopLevelSeqEntry(seh);
18504 rna->ResetExcept();
18505 rna->ResetExcept_text();
18509 seh = scope.AddTopLevelSeqEntry(*entry);
18511 "A pseudo RNA should not have a product"));
18512 eval = validator.Validate(seh, options);
18517 rna->ResetPseudo();
18520 "An RNA overlapped by a pseudogene should not have a product"));
18522 eval = validator.Validate(seh, options);
18536 rna->ResetComment();
18538 rna->SetPseudo(
true);
18539 rna->SetProduct().SetWhole().SetGenbank().SetAccession(
"AY123456");
18546 eval = validator.Validate(seh, options);
18550 scope.RemoveTopLevelSeqEntry(seh);
18554 seh = scope.AddTopLevelSeqEntry(*entry);
18555 eval = validator.Validate(seh, options);
18560 rna->ResetPseudo();
18561 eval = validator.Validate(seh, options);
18573 r1->
SetData().SetRna().SetExt().SetName(
"26S ribosomal RNA");
18580 r2->
SetData().SetRna().SetExt().SetName(
"internal transcribed spacer 2");
18587 r3->
SetData().SetRna().SetExt().SetName(
"16S ribosomal RNA");
18597 "Problem with order of abutting rRNA components"));
18599 "Problem with order of abutting rRNA components"));
18601 eval = validator.Validate(seh, options);
18604 scope.RemoveTopLevelSeqEntry(seh);
18606 seh = scope.AddTopLevelSeqEntry(*entry);
18608 eval = validator.Validate(seh, options);
18616 eval = validator.Validate(seh, options);
18626 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
18629 gene1->
SetData().SetGene().SetLocus(
"a");
18630 gene1->
SetData().SetGene().SetLocus_tag(
"tag1");
18633 gene2->
SetData().SetGene().SetLocus(
"b");
18640 "Missing gene locus tag"));
18642 eval = validator.Validate(seh, options);
18654 prot2->
SetData().SetProt().SetName().push_back(
"a second protein name");
18659 "2 full-length protein features present on protein"));
18661 "Features have identical intervals, but labels differ"));
18663 "Protein sequence has multiple unprocessed protein features"));
18665 "Protein sequence has multiple unprocessed protein features"));
18667 eval = validator.Validate(seh, options);
18679 prot->SetData().SetProt().ResetName();
18680 prot->SetData().SetProt().SetName().push_back(
"name~something");
18684 mrna->
SetData().SetRna().SetExt().SetName(
"name~something");
18687 gene->
SetData().SetGene().SetLocus(
"gene?something");
18692 rrna->
SetData().SetRna().SetExt().SetName(
"rna!something");
18696 "mRNA name contains undesired character"));
18698 "Gene locus contains undesired character"));
18700 "rRNA name contains undesired character"));
18702 "Protein name contains undesired character"));
18704 eval = validator.Validate(seh, options);
18717 gene->
SetData().SetGene().SetLocus(
"gene|synonym");
18722 "Gene locus contains undesired character"));
18725 eval = validator.Validate(seh, options);
18737 prot->SetData().SetProt().ResetName();
18738 prot->SetData().SetProt().SetName().push_back(
"name something,");
18742 mrna->
SetData().SetRna().SetExt().SetName(
"name something_");
18745 gene->
SetData().SetGene().SetLocus(
"gene something;");
18750 rrna->
SetData().SetRna().SetExt().SetName(
"rna something:");
18754 "mRNA name ends with undesired character"));
18756 "Gene locus ends with undesired character"));
18758 "rRNA name ends with undesired character"));
18760 "Protein name ends with undesired character"));
18762 eval = validator.Validate(seh, options);
18774 prot->SetData().SetProt().ResetName();
18775 prot->SetData().SetProt().SetName().push_back(
"name something-");
18779 mrna->
SetData().SetRna().SetExt().SetName(
"name something-");
18782 gene->
SetData().SetGene().SetLocus(
"gene something-");
18787 rrna->
SetData().SetRna().SetExt().SetName(
"rna something-");
18791 "mRNA name ends with hyphen"));
18793 "Gene locus ends with hyphen"));
18795 "rRNA name ends with hyphen"));
18797 "Protein name ends with hyphen"));
18799 eval = validator.Validate(seh, options);
18810 gene1->
SetData().SetGene().SetLocus(
"a");
18814 gene2->
SetData().SetGene().SetLocus(
"b");
18816 gene3->
SetData().SetGene().SetLocus(
"c");
18823 eval = validator.Validate(seh, options);
18826 scope.RemoveTopLevelSeqEntry(seh);
18828 gene4->
SetData().SetGene().SetLocus(
"d");
18832 gene5->
SetData().SetGene().SetLocus(
"e");
18836 gene6->
SetData().SetGene().SetLocus(
"f");
18839 seh = scope.AddTopLevelSeqEntry(*entry);
18842 "Gene contains 5 other genes"));
18843 eval = validator.Validate(seh, options);
18854 author->
SetName().SetName().SetLast(
"Gr@nt");
18858 art_title->SetName(
"article title");
18862 entry->
SetDescr().Set().push_back(desc);
18866 "Bad characters in author Gr@nt"));
18868 eval = validator.Validate(seh, options);
18879 cds->
SetData().SetCdregion();
18883 mrna->
SetLocation().SetMix().Set().front()->SetInt().SetTo(16);
18889 "No CDS location match for 1 mRNA"));
18891 "mRNA contains CDS but internal intron-exon boundaries do not match"));
18893 eval = validator.Validate(seh, options);
18896 scope.RemoveTopLevelSeqEntry(seh);
18897 mrna->
SetLocation().SetMix().Set().back()->SetInt().SetTo(55);
18898 seh = scope.AddTopLevelSeqEntry(*entry);
18899 expected_errors[1]->SetErrMsg(
"mRNA overlaps or contains CDS but does not completely contain intervals");
18900 eval = validator.Validate(seh, options);
18913 gene1->
SetData().SetGene().SetLocus(
"a1");
18914 gene1->
SetData().SetGene().SetAllele(
"x");
18917 gene2->
SetData().SetGene().SetLocus(
"a1");
18918 gene2->
SetData().SetGene().SetAllele(
"y");
18924 "Feature overlapped by 2 identical-length equivalent genes but has no cross-reference"));
18926 eval = validator.Validate(seh, options);
18937 prot->SetData().SetProt().SetName().pop_back();
18938 prot->SetData().SetProt().SetName().push_back(
"ribulose bisphosphate");
18941 "Nonstandard ribulose bisphosphate protein name"));
18944 eval = validator.Validate(seh, options);
18961 genomic->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"ATGGGGAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
18969 "mRNA has transcribed product replaced exception"));
18971 eval = validator.Validate(seh, options);
18982 prot->SetData().SetProt().SetName().front().assign(
"(PMID 1234)");
18985 "Protein name has internal PMID"));
18987 eval = validator.Validate(seh, options);
18998 feat->
SetExt().SetType().SetStr(
"GeneOntology");
19000 go_list->
SetData().SetStr(
"something");
19001 feat->
SetExt().SetData().push_back(go_list);
19005 "Bad data format for GO term"));
19007 eval = validator.Validate(seh, options);
19011 go_list->
SetData().SetFields().push_back(go_term);
19012 expected_errors[0]->SetErrMsg(
"Unrecognized GO term label [blank]");
19013 eval = validator.Validate(seh, options);
19016 go_list->
SetLabel().SetStr(
"something");
19017 expected_errors[0]->SetErrMsg(
"Unrecognized GO term label something");
19018 eval = validator.Validate(seh, options);
19021 go_list->
SetLabel().SetStr(
"Process");
19022 expected_errors[0]->SetErrMsg(
"Bad GO term format");
19023 eval = validator.Validate(seh, options);
19027 go_term->
SetData().SetFields().push_back(go_field);
19028 expected_errors[0]->SetErrMsg(
"Unrecognized label on GO term qualifier field [blank]");
19030 "GO term does not have GO identifier"));
19031 eval = validator.Validate(seh, options);
19034 go_field->
SetLabel().SetStr(
"notlabel");
19035 expected_errors[0]->SetErrMsg(
"Unrecognized label on GO term qualifier field notlabel");
19036 eval = validator.Validate(seh, options);
19039 go_field->
SetLabel().SetStr(
"go id");
19040 expected_errors[0]->SetErrMsg(
"Bad data format for GO term qualifier GO ID");
19041 eval = validator.Validate(seh, options);
19044 go_field->
SetData().SetInt(123);
19046 go_field2->
SetLabel().SetStr(
"text string");
19047 go_field2->
SetData().SetInt(123);
19048 go_term->
SetData().SetFields().push_back(go_field2);
19049 expected_errors[0]->SetErrMsg(
"Bad data format for GO term qualifier term");
19050 delete expected_errors[1];
19051 expected_errors.pop_back();
19052 eval = validator.Validate(seh, options);
19055 go_field2->
SetData().SetStr(
"some text");
19057 go_field3->
SetLabel().SetStr(
"pubmed id");
19058 go_field3->
SetData().SetStr(
"some text");
19059 go_term->
SetData().SetFields().push_back(go_field3);
19060 expected_errors[0]->SetErrMsg(
"Bad data format for GO term qualifier PMID");
19061 eval = validator.Validate(seh, options);
19064 go_field3->
SetData().SetInt(123);
19066 go_field4->
SetLabel().SetStr(
"evidence");
19067 go_field4->
SetData().SetInt(123);
19068 go_term->
SetData().SetFields().push_back(go_field4);
19069 expected_errors[0]->SetErrMsg(
"Bad data format for GO term qualifier evidence");
19070 eval = validator.Validate(seh, options);
19088 "Inconsistent GO terms for GO ID 123"));
19090 eval = validator.Validate(seh, options);
19101 gene1->
SetData().SetGene().SetLocus(
"gene1");
19103 gene2->
SetData().SetGene().SetLocus(
"gene1");
19108 "Duplicate feature"));
19110 "Colliding names in gene features, but feature locations are identical"));
19112 eval = validator.Validate(seh, options);
19117 gene2->
SetData().SetGene().SetLocus(
"GENE1");
19119 "Features have identical intervals, but labels differ"));
19121 "Colliding names (with different capitalization) in gene features, but feature locations are identical"));
19123 eval = validator.Validate(seh, options);
19134 gene1->
SetData().SetGene().SetLocus(
"gene1");
19136 gene2->
SetData().SetGene().SetLocus(
"gene1");
19143 eval = validator.Validate(seh, options);
19146 gene2->
SetData().SetGene().SetLocus(
"GENE1");
19147 eval = validator.Validate(seh, options);
19161 gene->
SetData().SetGene().SetLocus(
"gene locus");
19166 "Gene cross-reference is not on expected strand"));
19168 eval = validator.Validate(seh, options);
19171 scope.RemoveTopLevelSeqEntry(seh);
19173 seh = scope.AddTopLevelSeqEntry(*entry);
19174 eval = validator.Validate(seh, options);
19179 gene->
SetData().SetGene().ResetLocus();
19180 gene->
SetData().SetGene().SetLocus_tag(
"LOCUSTAG");
19181 eval = validator.Validate(seh, options);
19184 scope.RemoveTopLevelSeqEntry(seh);
19186 seh = scope.AddTopLevelSeqEntry(*entry);
19187 eval = validator.Validate(seh, options);
19199 cds->
SetId().SetLocal().SetId(1);
19201 x1->SetId().SetLocal().SetId(2);
19202 cds->
SetXref().push_back(x1);
19205 mrna->
SetId().SetLocal().SetId(2);
19207 x2->SetId().SetLocal().SetId(1);
19208 mrna->
SetXref().push_back(x2);
19215 "CDS not contained within cross-referenced mRNA"));
19217 "mRNA overlaps or contains CDS but does not completely contain intervals"));
19219 eval = validator.Validate(seh, options);
19230 gene1->
SetData().SetGene().SetLocus(
"gene1");
19237 gene2->
SetData().SetGene().SetLocus(
"gene2");
19238 gene2->
SetData().SetGene().SetSyn().push_back(
"gene1");
19247 "gene synonym has same value (gene1) as locus of another gene feature"));
19249 eval = validator.Validate(seh, options);
19261 CRef<CSeq_feat> prot_feat =
prot->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
19268 prot->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set(
"PRKTEIN");
19269 prot->SetSeq().SetInst().SetLength(7);
19278 "Coding region and protein feature partials conflict"));
19280 "5' partial is not at beginning of sequence, gap, or consensus splice site"));
19282 "Inconsistent: Product= partial, Location= partial, Feature.partial= FALSE"));
19283 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"PartialProblemHasStop",
"Got stop codon, but 3'end is labeled partial"));
19284 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"PartialProblem",
"CDS is 3' complete but protein is CO2 partial"));
19285 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"PartialProblem",
"CDS is 5' partial but protein is CO2 partial"));
19287 eval = validator.Validate(seh, options);
19292 delete expected_errors[2];
19293 expected_errors[2] =
nullptr;
19294 eval = validator.Validate(seh, options);
19308 "Protein name ends with bracket and may contain organism name"));
19310 eval = validator.Validate(seh, options);
19315 eval = validator.Validate(seh, options);
19323 eval = validator.Validate(seh, options);
19337 gene->
SetData().SetGene().SetLocus(
"locus");
19340 gene->
SetLocation().SetInt().SetId().Assign(*
id);
19345 cds->
SetData().SetCdregion();
19348 loc1->
SetInt().SetFrom(0);
19349 loc1->
SetInt().SetTo(15);
19350 loc1->
SetInt().SetId().Assign(*
id);
19353 loc2->
SetInt().SetFrom(19);
19354 loc2->
SetInt().SetTo(59);
19355 loc2->
SetInt().SetId().Assign(*
id);
19357 cds->
SetLocation().SetMix().Set().push_back(loc1);
19358 cds->
SetLocation().SetMix().Set().push_back(loc2);
19363 intron->
SetData().SetImp().SetKey(
"intron");
19366 intron->
SetLocation().SetInt().SetId().Assign(*
id);
19375 "Illegal start codon used. Wrong genetic code [0] or protein should be partial"));
19377 "Missing stop codon"));
19379 "Splice donor consensus (GT) not found after exon ending at position 16 of lcl|good"));
19381 "Splice acceptor consensus (AG) not found before exon starting at position 20 of lcl|good"));
19383 "Expected CDS product absent"));
19385 "Introns should be at least 10 nt long"));
19387 "Introns at positions 16-20 should be at least 10 nt long"));
19389 "Splice donor consensus (GT) not found at start of intron, position 17 of lcl|good"));
19391 "Splice acceptor consensus (AG) not found at end of intron, position 19 of lcl|good"));
19394 eval = validator.Validate(seh, options);
19401 "Introns should be at least 10 nt long"));
19403 "Splice donor consensus (GT) not found at start of intron, position 17 of lcl|good"));
19405 "Splice acceptor consensus (AG) not found at end of intron, position 19 of lcl|good"));
19409 eval = validator.Validate(seh, options);
19421 "Illegal start codon used. Wrong genetic code [0] or protein should be partial"));
19423 "Missing stop codon"));
19425 "Splice donor consensus (GT) not found after exon ending at position 16 of lcl|good"));
19427 "Splice acceptor consensus (AG) not found before exon starting at position 20 of lcl|good"));
19429 "Expected CDS product absent"));
19431 "Introns at positions 16-20 should be at least 10 nt long"));
19433 "Splice donor consensus (GT) not found at start of intron, position 17 of lcl|good"));
19435 "Splice acceptor consensus (AG) not found at end of intron, position 19 of lcl|good"));
19438 eval = validator.Validate(seh, options);
19447 "Splice donor consensus (GT) not found at start of intron, position 17 of lcl|good"));
19449 "Splice acceptor consensus (AG) not found at end of intron, position 19 of lcl|good"));
19452 eval = validator.Validate(seh, options);
19459 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AATTGGCCAAAATTGGTAAAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCAA");
19462 "No protein Bioseq given"));
19464 "Triplet intron encodes stop codon"));
19466 "Illegal start codon used. Wrong genetic code [0] or protein should be partial"));
19468 "Missing stop codon"));
19470 "Expected CDS product absent"));
19472 "Introns should be at least 10 nt long"));
19474 "Splice donor consensus (GT) not found after exon ending at position 16 of lcl|good"));
19476 "Splice acceptor consensus (AG) not found before exon starting at position 20 of lcl|good"));
19478 "Splice donor consensus (GT) not found at start of intron, position 17 of lcl|good"));
19480 "Splice acceptor consensus (AG) not found at end of intron, position 19 of lcl|good"));
19483 eval = validator.Validate(seh, options);
19498 "A note or other qualifier is required for a misc_feature"));
19500 eval = validator.Validate(seh, options);
19511 misc->
SetData().SetImp().SetKey(
"repeat_region");
19513 qual->
SetQual(
"rpt_unit_range");
19515 misc->
SetQual().push_back(qual);
19520 "/rpt_unit_range is not within sequence length"));
19522 eval = validator.Validate(seh, options);
19533 for (
int i = 0;
i < 50;
i++) {
19535 for (
int j = 0; j < 10; j++) {
19538 string val =
"similar to DNA sequence:";
19539 for (
int k = 0; k < 10; k++) {
19546 misc->
SetQual().push_back(qual);
19552 "Skipping validation of 500 /inference qualifiers with 5000 accessions"));
19565 align->
SetSegs().SetDenseg().SetNumseg(1);
19573 id->Assign(*((*s)->GetSeq().GetId().front()));
19574 align->
SetSegs().SetDenseg().SetIds().push_back(
id);
19575 align->
SetSegs().SetDenseg().SetStarts().push_back(0);
19577 len = (*s)->GetSeq().GetInst().GetLength();
19580 align->
SetSegs().SetDenseg().SetDim(dim);
19581 align->
SetSegs().SetDenseg().SetLens().push_back(
len);
19592 align->
SetSegs().SetDenseg().SetIds().back()->SetLocal().SetStr(
"good4");
19593 annot->
SetData().SetAlign().push_back(align);
19600 "Fasta: This may be a fasta-like alignment for SeqId: lcl|good1 in the context of good1"));
19602 "SeqId: The sequence corresponding to SeqId lcl|good4 could not be found."));
19604 "PercentIdentity: This alignment has a percent identity of 0%"));
19607 eval = validator.Validate(seh, options);
19619 align->
SetSegs().SetDenseg().SetNumseg(2);
19626 id->Assign(*((*s)->GetSeq().GetId().front()));
19627 align->
SetSegs().SetDenseg().SetIds().push_back(
id);
19628 align->
SetSegs().SetDenseg().SetStarts().push_back(0);
19631 align->
SetSegs().SetDenseg().SetDim(dim);
19633 align->
SetSegs().SetDenseg().SetLens().push_back(5);
19634 align->
SetSegs().SetDenseg().SetStarts().push_back(5);
19635 align->
SetSegs().SetDenseg().SetStarts().push_back(6);
19636 align->
SetSegs().SetDenseg().SetStarts().push_back(5);
19637 align->
SetSegs().SetDenseg().SetLens().push_back(10);
19640 annot->
SetData().SetAlign().push_back(align);
19646 "Start/Length: There is a problem with sequence lcl|good2, in segment 1 (near sequence position 0), context good1: the segment is too long or short or the next segment has an incorrect start position"));
19649 eval = validator.Validate(seh, options);
19659 align->
SetSegs().SetDenseg().SetNumseg(2);
19660 align->
SetSegs().SetDenseg().SetLens()[0] = 5;
19661 align->
SetSegs().SetDenseg().SetLens().push_back(60);
19663 align->
SetSegs().SetDenseg().SetStarts().push_back(5);
19664 align->
SetSegs().SetDenseg().SetStarts().push_back(5);
19665 align->
SetSegs().SetDenseg().SetStarts().push_back(5);
19668 annot->
SetData().SetAlign().push_back(align);
19674 "Start: In sequence lcl|good1, segment 2 (near sequence position 5) context good1, the alignment claims to contain residue coordinates that are past the end of the sequence. Either the sequence is too short, or there are extra characters or formatting errors in the alignment"));
19676 "Start: In sequence lcl|good2, segment 2 (near sequence position 5) context good1, the alignment claims to contain residue coordinates that are past the end of the sequence. Either the sequence is too short, or there are extra characters or formatting errors in the alignment"));
19678 "Start: In sequence lcl|good3, segment 2 (near sequence position 5) context good1, the alignment claims to contain residue coordinates that are past the end of the sequence. Either the sequence is too short, or there are extra characters or formatting errors in the alignment"));
19681 eval = validator.Validate(seh, options);
19691 align->
SetSegs().SetDenseg().SetDim(4);
19694 annot->
SetData().SetAlign().push_back(align);
19702 "SeqId: The Seqalign has more or fewer ids than the number of rows in the alignment (context good1). Look for possible formatting errors in the ids."));
19704 "The number of Starts (3) does not match the expected size of dim * numseg (4)"));
19707 eval = validator.Validate(seh, options);
19720 annot->
SetData().SetAlign().push_back(align);
19726 "Fasta: This may be a fasta-like alignment for SeqId: lcl|good1 in the context of good1"));
19728 "PercentIdentity: This alignment has a percent identity of 0%"));
19731 eval = validator.Validate(seh, options);
19735 align->
SetSegs().SetDenseg().SetNumseg(2);
19736 align->
SetSegs().SetDenseg().SetLens()[0] = 5;
19737 align->
SetSegs().SetDenseg().SetLens().push_back(55);
19738 align->
SetSegs().SetDenseg().SetStarts()[2] = -1;
19739 align->
SetSegs().SetDenseg().SetStarts().push_back(5);
19740 align->
SetSegs().SetDenseg().SetStarts().push_back(5);
19741 align->
SetSegs().SetDenseg().SetStarts().push_back(5);
19745 "PercentIdentity: This alignment has a percent identity of 0%"));
19748 eval = validator.Validate(seh, options);
19762 annot->
SetData().SetAlign().push_back(align);
19768 "Segs: This alignment is missing all segments. This is a non-correctable error -- look for serious formatting problems."));
19771 eval = validator.Validate(seh, options);
19782 align->
SetSegs().SetDenseg().SetNumseg(3);
19783 align->
SetSegs().SetDenseg().SetLens()[0] = 5;
19784 align->
SetSegs().SetDenseg().SetLens().push_back(10);
19785 align->
SetSegs().SetDenseg().SetLens().push_back(55);
19786 align->
SetSegs().SetDenseg().SetStarts().push_back(-1);
19787 align->
SetSegs().SetDenseg().SetStarts().push_back(-1);
19788 align->
SetSegs().SetDenseg().SetStarts().push_back(-1);
19789 align->
SetSegs().SetDenseg().SetStarts().push_back(5);
19790 align->
SetSegs().SetDenseg().SetStarts().push_back(5);
19791 align->
SetSegs().SetDenseg().SetStarts().push_back(5);
19794 annot->
SetData().SetAlign().push_back(align);
19800 "Segs: Segment 2 (near alignment position 5) in the context of good1 contains only gaps. Each segment must contain at least one actual sequence -- look for columns with all gaps and delete them."));
19803 eval = validator.Validate(seh, options);
19814 align->
SetSegs().SetDenseg().SetDim(1);
19815 align->
SetSegs().SetDenseg().SetIds().pop_back();
19816 align->
SetSegs().SetDenseg().SetIds().pop_back();
19817 align->
SetSegs().SetDenseg().SetStarts().pop_back();
19818 align->
SetSegs().SetDenseg().SetStarts().pop_back();
19821 annot->
SetData().SetAlign().push_back(align);
19827 "Dim: This seqalign apparently has only one sequence. Each alignment must have at least two sequences. context lcl|good1"));
19830 eval = validator.Validate(seh, options);
19841 align->
SetSegs().SetSparse();
19844 annot->
SetData().SetAlign().push_back(align);
19850 "Segs: This alignment has an undefined or unsupported Seqalign segtype 7 (alignment number 1)"));
19853 eval = validator.Validate(seh, options);
19856 align->
SetSegs().SetSpliced();
19857 expected_errors[0]->SetErrMsg(
"Segs: This alignment has an undefined or unsupported Seqalign segtype 6 (alignment number 1)");
19858 eval = validator.Validate(seh, options);
19871 annot->
SetData().SetAlign().push_back(align);
19875 annot->
SetDesc().Set().push_back(ad);
19881 "Record contains BLAST alignments"));
19884 eval = validator.Validate(seh, options);
19894 entry->
SetSet().
SetSeq_set().front()->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCTTGGCCAAAATTGGCCAA");
19898 annot->
SetData().SetAlign().push_back(align);
19905 "Fasta: This may be a fasta-like alignment for SeqId: lcl|good1 in the context of good1"));
19907 "PercentIdentity: This alignment has a percent identity of 43%"));
19910 eval = validator.Validate(seh, options);
19931 id->Assign(*((*s)->GetSeq().GetId().front()));
19932 diag->SetIds().push_back(
id);
19933 diag->SetStarts().push_back(0);
19935 len = (*s)->GetSeq().GetInst().GetLength();
19940 align->
SetSegs().SetDendiag().push_back(diag);
19952 annot->
SetData().SetAlign().push_back(align);
19959 "UnexpectedAlignmentType: This is not a DenseSeg alignment."));
19961 "PercentIdentity: This alignment has a percent identity of 0%"));
19964 eval = validator.Validate(seh, options);
19983 for (
size_t pos = 0; pos <
len; pos++) {
19984 graph->
SetGraph().SetByte().SetValues().push_back(40);
19988 graph->
SetGraph().SetByte().SetMax(40);
19989 graph->
SetGraph().SetByte().SetMin(40);
19990 graph->
SetGraph().SetByte().SetAxis(40);
19999 graph->
SetGraph().SetByte().SetMin(-1);
20001 annot->
SetData().SetGraph().push_back(graph);
20007 "Graph min (-1) out of range"));
20009 eval = validator.Validate(seh, options);
20012 graph->
SetGraph().SetByte().SetMin(101);
20013 expected_errors[0]->SetErrMsg(
"Graph min (101) out of range");
20015 "60 quality scores have values below the reported minimum or 0"));
20016 eval = validator.Validate(seh, options);
20027 graph->
SetGraph().SetByte().SetMax(-1);
20029 annot->
SetData().SetGraph().push_back(graph);
20035 "Graph max (-1) out of range"));
20037 "60 quality scores have values above the reported maximum or 100"));
20039 eval = validator.Validate(seh, options);
20042 delete expected_errors[1];
20043 expected_errors[1] =
nullptr;
20045 graph->
SetGraph().SetByte().SetMax(101);
20046 expected_errors[0]->SetErrMsg(
"Graph max (101) out of range");
20048 eval = validator.Validate(seh, options);
20061 annot->
SetData().SetGraph().push_back(graph);
20067 "SeqGraph (40) and ByteStore (60) length mismatch"));
20069 "SeqGraph (40) and Bioseq (60) length mismatch"));
20071 eval = validator.Validate(seh, options);
20090 "Graph components are out of order - may be a software bug"));
20092 eval = validator.Validate(seh, options);
20110 "SeqGraph (23) and Bioseq (24) length mismatch"));
20112 "SeqGraph (11) and SeqLit (12) length mismatch"));
20114 "SeqGraph (10) and SeqLit (11) stop do not coincide"));
20116 eval = validator.Validate(seh, options);
20126 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetId().SetGenbank().SetAccession(
"AY123456");
20127 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetFrom(0);
20128 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetTo(11);
20138 "1 gap bases have positive score value"));
20140 "SeqGraph (25) and Bioseq (24) length mismatch"));
20142 "SeqGraph (13) and SeqLoc (12) length mismatch"));
20144 eval = validator.Validate(seh, options);
20162 "1 gap bases have positive score value"));
20164 "SeqGraph (25) and Bioseq (24) length mismatch"));
20166 "SeqGraph (13) and SeqLit (12) length mismatch"));
20168 "SeqGraph (21) and SeqLit (22) start do not coincide"));
20170 eval = validator.Validate(seh, options);
20192 "SeqGraph (6) and SeqLit (12) length mismatch"));
20194 "SeqGraph (5) and SeqLit (11) stop do not coincide"));
20196 "SeqGraph (6) and SeqLit (12) length mismatch"));
20198 "SeqGraph (6) and SeqLit (22) start do not coincide"));
20200 "SeqGraph (11) and SeqLit (33) stop do not coincide"));
20202 "Different number of SeqGraph (3) and SeqLit (2) components"));
20204 eval = validator.Validate(seh, options);
20217 graph->
SetGraph().SetByte().SetValues().pop_back();
20218 graph->
SetGraph().SetByte().SetValues().push_back(0);
20219 graph->
SetGraph().SetByte().SetMin(0);
20220 annot->
SetData().SetGraph().push_back(graph);
20227 "1 ACGT bases have zero score value - first one at position 34"));
20229 eval = validator.Validate(seh, options);
20239 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetSeq_data().SetIupacna().Set(
"CCCATNATGATG");
20250 "1 N bases have positive score value - first one at position 28"));
20252 eval = validator.Validate(seh, options);
20273 "10 gap bases have positive score value"));
20275 "SeqGraph (10) and SeqLit (12) length mismatch"));
20277 "SeqGraph (12) and SeqLit (22) start do not coincide"));
20279 "SeqGraph (21) and SeqLit (33) stop do not coincide"));
20281 "Different number of SeqGraph (3) and SeqLit (2) components"));
20283 eval = validator.Validate(seh, options);
20303 "Graph components overlap, with multiple scores for a single base"));
20305 "SeqGraph (61) and Bioseq (60) length mismatch"));
20307 eval = validator.Validate(seh, options);
20320 graph->
SetLoc().SetInt().SetId().SetLocal().SetStr(
"good2");
20321 annot->
SetData().SetGraph().push_back(graph);
20327 "Bioseq not found for Graph location good2"));
20329 "There is 1 mispackaged graph in this record."));
20331 eval = validator.Validate(seh, options);
20344 graph->
SetGraph().SetByte().ResetValues();
20346 graph->
SetGraph().SetByte().SetValues().push_back(0);
20348 graph->
SetGraph().SetByte().SetMin(0);
20349 annot->
SetData().SetGraph().push_back(graph);
20356 "12 ACGT bases (50.00%) have zero score value - first one at position 23"));
20358 eval = validator.Validate(seh, options);
20368 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetSeq_data().SetIupacna().Set(
"ANNNNNNTGATG");
20379 "6 N bases (25.00%) have positive score value - first one at position 24"));
20388 eval = validator.Validate(seh, options);
20395 scope.RemoveTopLevelSeqEntry(seh);
20397 first_part.
SetSeq_data().SetIupacna().Set(
"AAAAAAAAAAAAAAAAAAAANNNNNNNNNNNNNNNNNNNNTTTTTTTTTTTTTTTTTTTT");
20404 for (
size_t pos = 0; pos < 20; pos++) {
20407 for (
size_t pos = 20; pos < 40; pos++) {
20410 for (
size_t pos = 40; pos < 70; pos++) {
20417 annot2->
SetData().SetGraph().push_back(bad_graph);
20420 seh = scope.AddTopLevelSeqEntry(*entry);
20422 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"GraphBioseqLen",
"SeqGraph(79) and Bioseq(72) length mismatch"));
20424 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"GraphByteLen",
"SeqGraph(79) and ByteStore(70) length mismatch"));
20425 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"GraphACGTScoreMany",
"23 ACGT bases(29.11%) have zero score value - first one at position 1"));
20426 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"GraphNScoreMany",
"20 N bases(25.32%) have positive score value - first one at position 21"));
20427 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"GraphGapScore",
"10 gap bases have positive score value"));
20428 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"GraphAbove",
"79 quality scores have values above the reported maximum or 100"));
20429 eval = validator.Validate(seh, options);
20443 graph->
SetLoc().SetInt().SetTo(61);
20444 annot->
SetData().SetGraph().push_back(graph);
20450 "SeqGraph location (lcl|good:1-62) is invalid"));
20452 eval = validator.Validate(seh, options);
20465 annot->
SetData().SetGraph().push_back(graph);
20471 "SeqGraph location (Unknown) is invalid"));
20473 "There is 1 mispackaged graph in this record."));
20475 eval = validator.Validate(seh, options);
20492 "Record contains Seq-annot.data.ids"));
20494 eval = validator.Validate(seh, options);
20511 "Record contains Seq-annot.data.locs"));
20513 eval = validator.Validate(seh, options);
20525 cds->
SetQual().push_back(qual);
20530 "gene_synonym should not be a gbqual on a CDS feature"));
20532 eval = validator.Validate(seh, options);
20545 bool format_correct;
20546 bool precision_correct;
20553 lat_in_range, lon_in_range,
20554 lat_value, lon_value);
20555 BOOST_CHECK(!format_correct);
20567 latlon =
"35 N 80 E";
20571 BOOST_CHECK_EQUAL(
error,
"Longitude should be set to W (western hemisphere)");
20572 BOOST_CHECK_EQUAL(latlon,
"35.00 N 80.00 W");
20574 latlon =
"25 N 47 E";
20575 country =
"Madagascar";
20578 BOOST_CHECK_EQUAL(
error,
"Latitude should be set to S (southern hemisphere)");
20579 BOOST_CHECK_EQUAL(latlon,
"25.00 S 47.00 E");
20581 latlon =
"15 N 47 E";
20582 country =
"Austria";
20585 BOOST_CHECK_EQUAL(
error,
"Latitude and longitude values appear to be exchanged");
20586 BOOST_CHECK_EQUAL(latlon,
"47.00 N 15.00 E");
20599 string start =
"ATG";
20600 string stop =
"TAA";
20601 string splice_left =
"GT";
20602 string splice_right =
"AG";
20603 string fifteen =
"CCCAGAAAAACAGGT";
20605 string first_exon = start + fifteen;
20606 string intron = splice_left + fifteen + splice_right;
20607 string second_exon = fifteen;
20608 string third_exon = fifteen + stop;
20610 string nuc_str = first_exon + intron + second_exon + intron + third_exon;
20611 nseq->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(nuc_str);
20615 loc1->
SetInt().SetId().SetLocal().SetStr(
"nuc");
20616 loc1->
SetInt().SetFrom(0);
20620 offset += intron.length();
20622 loc2->
SetInt().SetId().SetLocal().SetStr(
"nuc");
20624 offset += second_exon.length();
20628 offset += intron.length();
20630 loc3->
SetInt().SetId().SetLocal().SetStr(
"nuc");
20632 offset += third_exon.length();
20635 cds->
SetLocation().SetMix().Set().push_back(loc1);
20636 cds->
SetLocation().SetMix().Set().push_back(loc2);
20637 cds->
SetLocation().SetMix().Set().push_back(loc3);
20639 string loc_str = first_exon + second_exon + third_exon;
20643 prot_str = prot_str.substr(0, prot_str.length() - 1);
20645 pseq->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(prot_str);
20648 prot->SetLocation().SetInt().SetTo(prot_str.length() - 1);
20653 "Internal coding region exon is too short at position 38-52"));
20655 eval = validator.Validate(seh, options);
20667 second_prot->
SetData().SetProt().SetName().front() =
"different name";
20673 "Protein sequence has multiple unprocessed protein features"));
20675 "Protein sequence has multiple unprocessed protein features"));
20677 eval = validator.Validate(seh, options);
20695 bool bad_format =
false;
20696 bool in_future =
false;
20698 BOOST_CHECK_EQUAL(
true, bad_format);
20910 BOOST_CHECK_EQUAL(ambiguous,
true);
20913 BOOST_CHECK_EQUAL(ambiguous,
true);
20916 BOOST_CHECK_EQUAL(ambiguous,
false);
20917 BOOST_CHECK_EQUAL(day_first,
false);
20920 BOOST_CHECK_EQUAL(ambiguous,
false);
20921 BOOST_CHECK_EQUAL(day_first,
false);
20924 BOOST_CHECK_EQUAL(ambiguous,
false);
20925 BOOST_CHECK_EQUAL(day_first,
true);
20934 BOOST_CHECK_EQUAL(
result, after);
20951 s_USAStateTest(
"USA: Napa, Solano, Yolo, Marin Counties, CA",
"USA: California, Napa, Solano, Yolo, Marin Counties",
CCountries::e_Corrected );
20972 exm[
"USA: Washington, Arkansas"] =
"USA: Arkansas, Washington";
20974 exm[
"USA: Arkansas, Washington"] =
"USA: Arkansas, Washington";
20975 exm[
"USA: Puerto Rico, Florida"] =
"USA: Puerto Rico, Florida";
20976 exm[
"USA: Florida, Puerto Rico"] =
"USA: Puerto Rico, Florida";
20977 exm[
"USA: Los Angeles"] =
"USA: California, Los Angeles";
20978 exm[
"USA:Hayward"] =
"USA: California, Hayward";
21009 BOOST_CHECK_EQUAL(
CCountries::NewFixCountry(
"Egypt: Red Sea, Ras Mohamed, Sinai"),
"Egypt: Red Sea, Ras Mohamed, Sinai");
21012 BOOST_CHECK_EQUAL(
CCountries::NewFixCountry(
"\"United Kingdom: Scotland, Edinburgh\""),
"United Kingdom: Scotland, Edinburgh");
21018 BOOST_CHECK_EQUAL(
CCountries::NewFixCountry(
"Australia: south-western australia"),
"Australia: south-western australia");
21028 BOOST_CHECK_EQUAL(
CCountries::NewFixCountry(
"France: North East France Nievre-Morvan Breuil Chenue forest"),
"France: North East France Nievre-Morvan Breuil Chenue forest");
21030 BOOST_CHECK_EQUAL(
CCountries::NewFixCountry(
"Greenland: Saqqaq Culture site Qeqertasussuk, north-western Greenland"),
"Greenland: Saqqaq Culture site Qeqertasussuk, north-western Greenland");
21042 BOOST_CHECK_EQUAL(
CCountries::NewFixCountry(
"Mexico. Loreto Bay, Gulf of California."),
"Mexico: Loreto Bay, Gulf of California");
21052 BOOST_CHECK_EQUAL(
CCountries::NewFixCountry(
"Roosendaal, De Moeren, the Netherlands"),
"Netherlands: Roosendaal, De Moeren");
21057 BOOST_CHECK_EQUAL(
CCountries::NewFixCountry(
"USA: Boqueron National Wildlife Refuge, Puerto Rico"),
"USA: Boqueron National Wildlife Refuge, Puerto Rico");
21058 BOOST_CHECK_EQUAL(
CCountries::NewFixCountry(
"USA: hypersaline sediment collected at Bitter Lake, New Mexico"),
"USA: hypersaline sediment collected at Bitter Lake, New Mexico");
21062 BOOST_CHECK_EQUAL(
CCountries::NewFixCountry(
"Wissenkerke, Keihoogteweg, the Netherlands"),
"Netherlands: Wissenkerke, Keihoogteweg");
21068 BOOST_CHECK_EQUAL(
CCountries::NewFixCountry(
"UK: Whiteford Burrows, Gower, Wales"),
"United Kingdom: Whiteford Burrows, Gower, Wales");
21069 BOOST_CHECK_EQUAL(
CCountries::NewFixCountry(
"Whiteford Burrows, Gower, Wales"),
"United Kingdom: Wales, Whiteford Burrows, Gower");
21074 BOOST_CHECK_EQUAL(
CCountries::NewFixCountry(
"UK: Whiteford Burrows: Gower: Wales"),
"United Kingdom: Whiteford Burrows, Gower, Wales");
21101 string val =
"USNM<USA>:12345";
21103 BOOST_CHECK_EQUAL(
val,
"USNM<USA>:12345");
21106 val =
"ABS<CHN>:12345";
21108 BOOST_CHECK_EQUAL(
val,
"ABS<CHN>:12345");
21112 val =
"AMNH 12345";
21114 BOOST_CHECK_EQUAL(
val,
"AMNH:12345");
21119 BOOST_CHECK_EQUAL(
val,
"ABB:666");
21122 val =
"CNWGRGL123";
21124 BOOST_CHECK_EQUAL(
val,
"CNWGRGL:123");
21129 BOOST_CHECK_EQUAL(
val,
"A12345");
21134 val =
"M.Riewe 182 (CAS)";
21136 BOOST_CHECK_EQUAL(
val,
"CAS:M.Riewe 182");
21139 val =
"L.R. Xu 0081 (WUG)";
21141 BOOST_CHECK_EQUAL(
val,
"L.R. Xu 0081 (WUG)");
21150 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"NNNNNNNNNNAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCCAANNNNNNNNNN");
21161 bool begin_ambig =
false, end_ambig =
false;
21169 BOOST_CHECK_EQUAL(begin_ambig,
true);
21170 BOOST_CHECK_EQUAL(end_ambig,
true);
21173 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"NNNNNNNNNAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCCAANNNNNNNNN");
21183 BOOST_CHECK_EQUAL(begin_ambig,
true);
21184 BOOST_CHECK_EQUAL(end_ambig,
true);
21187 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCCAA");
21197 BOOST_CHECK_EQUAL(begin_ambig,
false);
21198 BOOST_CHECK_EQUAL(end_ambig,
false);
21201 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"ANANTNNNCAAAATTGGCCAAAATTGGCCAAAANTNNCNCNA");
21211 BOOST_CHECK_EQUAL(begin_ambig,
true);
21212 BOOST_CHECK_EQUAL(end_ambig,
true);
21215 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"GTGTGANANTNNNCNNNNNTGGCCAAAATTGGCCAAAANTNNCNCNAGTGTG");
21225 BOOST_CHECK_EQUAL(begin_ambig,
true);
21226 BOOST_CHECK_EQUAL(end_ambig,
true);
21239 eval = validator.Validate(seh, options);
21263 nuc->SetSeq().SetId().push_front(gi_id);
21265 nuc->SetSeq().SetId().push_front(accv_id);
21270 "Inconsistent create_date [Jun 12, 1998] and update_date [Jun 11, 1998]"));
21272 "Inconsistent create_date [Jun 12, 1998] and update_date [Jun 11, 1998]"));
21275 eval = validator.Validate(seh, options);
21286 edit::CGenomeAssemblyComment gac1;
21287 gac1.SetAssemblyMethodProgram(
"a");
21288 gac1.SetAssemblyMethodVersion(
"1");
21289 gac1.SetGenomeCoverage(
"3x");
21290 gac1.SetSequencingTechnology(
"foo");
21293 sd1->
SetUser(*(gac1.MakeUserObject()));
21297 sd2->
SetUser(*(gac1.MakeUserObject()));
21303 "Multiple structured comments with prefix ##Genome-Assembly-Data-START##"));
21306 eval = validator.Validate(seh, options);
21326 "Coding region on TSA transcribed RNA should not be on the minus strand"));
21329 eval = validator.Validate(seh, options);
21335 "Coding region on TSA transcribed RNA should not be on the minus strand"));
21336 eval = validator.GetTSACDSOnMinusStrandErrors(seh);
21351 cds->SetComment(
"ambiguity in stop codon");
21354 "Feature comment indicates ambiguity in stop codon but no ambiguities are present in stop codon."));
21357 eval = validator.Validate(seh, options);
21363 scope.RemoveTopLevelSeqEntry(seh);
21364 seh = scope.AddTopLevelSeqEntry(*entry);
21366 eval = validator.Validate(seh, options);
21369 scope.RemoveTopLevelSeqEntry(seh);
21370 nentry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCAGAAAAACAGAGATAAACTNAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
21371 seh = scope.AddTopLevelSeqEntry(*entry);
21378 eval = validator.Validate(seh, options);
21408 misc3->
SetLocation().SetPacked_int().Set().push_back(int1);
21409 misc3->
SetLocation().SetPacked_int().Set().push_back(int2);
21414 "Should not specify 'space to left' for both ends of interval"));
21416 "Should not specify 'space to right' for both ends of interval"));
21418 "Should not specify 'space to left' for both ends of interval"));
21420 "Should not specify 'space to right' for both ends of interval"));
21422 "Should not specify 'space to left' at first position of non-circular sequence"));
21424 "Should not specify 'space to left' at first position of non-circular sequence"));
21427 eval = validator.Validate(seh, options);
21474 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNN");
21480 "Sequence is all Ns"));
21483 eval = validator.Validate(seh, options);
21495 ss->
SetName(
"Maryland, USA");
21497 BOOST_CHECK_EQUAL(ss->
GetName(),
"USA: Maryland");
21502 BOOST_CHECK_EQUAL(ss->
GetName(),
"14-Jan-1997");
21505 ss->
SetName(
"Lattitude: 25.790544; longitude: -80.214930");
21507 BOOST_CHECK_EQUAL(ss->
GetName(),
"25.790544 N 80.214930 W");
21512 BOOST_CHECK_EQUAL(ss->
GetName(),
"male, female, and neuter");
21517 BOOST_CHECK_EQUAL(ss->
GetName(),
"37 m");
21526 om->SetSubname(
"ATCC1234");
21528 BOOST_CHECK_EQUAL(
om->GetSubname(),
"ATCC 1234");
21529 om->SetSubname(
"DSM 567");
21531 BOOST_CHECK_EQUAL(
om->GetSubname(),
"DSM 567");
21534 om->SetSubname(
"human");
21536 BOOST_CHECK_EQUAL(
om->GetSubname(),
"Homo sapiens");
21544 ss->
SetName(
"a; [mixed bacterial source]; b");
21546 BOOST_CHECK_EQUAL(ss->
GetName(),
"a; b");
21547 ss->
SetName(
"[uncultured (using species-specific primers) bacterial source]");
21549 BOOST_CHECK_EQUAL(ss->
GetName(),
"amplified with species-specific primers");
21550 ss->
SetName(
"[BankIt_uncultured16S_wizard]; [universal primers]; [tgge]");
21552 BOOST_CHECK_EQUAL(ss->
IsSetName(),
false);
21553 ss->
SetName(
"[BankIt_uncultured16S_wizard]; [species_specific primers]; [dgge]");
21555 BOOST_CHECK_EQUAL(ss->
GetName(),
"amplified with species-specific primers");
21558 ss->
SetName(
"a; [mixed bacterial source]; b");
21561 BOOST_CHECK_EQUAL(ss->
GetName(),
"a; b");
21562 ss->
SetName(
"[BankIt_uncultured16S_wizard]; [universal primers]; [tgge]");
21576 "Non-viral source feature should not have a segment qualifier"));
21578 "Non-viral source feature should not have a segment qualifier"));
21582 "MultipleSourceQualifiers",
21583 "Multiple segment qualifiers present"));
21590 eval = validator.Validate(seh, options);
21598 "MultipleSourceQualifiers",
21599 "Multiple segment qualifiers present"));
21604 expected_errors[0]->SetErrMsg(
"Multiple collected_by qualifiers present");
21606 eval = validator.Validate(seh, options);
21613 expected_errors[0]->SetErrMsg(
"Multiple identified_by qualifiers present");
21615 eval = validator.Validate(seh, options);
21622 expected_errors[0]->SetErrMsg(
"Multiple collection_date qualifiers present");
21624 eval = validator.Validate(seh, options);
21638 entry->
SetSet().
SetSeq_set().back()->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set(
"MP*K*E*N");
21639 entry->
SetSet().
SetSeq_set().front()->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"GTGCCCTAAAAATAAGAGTAAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
21648 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"StopInProtein",
"[3] termination symbols in protein sequence (gene? - fake protein name)"));
21649 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"ExceptionProblem",
"unclassified translation discrepancy is not a legal exception explanation"));
21650 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"InternalStop",
"3 internal stops (and illegal start codon). Genetic code [0]"));
21652 "CDS has unnecessary translated product replaced exception"));
21655 eval = validator.Validate(seh, options);
21667 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"StopInProtein",
"[3] termination symbols in protein sequence (gene? - fake protein name)"));
21668 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"ExceptionProblem",
"unclassified translation discrepancy is not a legal exception explanation"));
21670 "CDS has unnecessary translated product replaced exception"));
21673 eval = validator.Validate(seh, options);
21682 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"StopInProtein",
"[3] termination symbols in protein sequence (gene? - fake protein name)"));
21684 "CDS has unnecessary translated product replaced exception"));
21687 eval = validator.Validate(seh, options);
21697 "CDS has unnecessary translated product replaced exception"));
21700 eval = validator.Validate(seh, options);
21709 bsrc->
SetOrg().SetTaxname(
"Influenza A virus");
21710 bsrc->
SetOrg().SetOrgname().SetLineage(
"Viruses; ssRNA negative-strand viruses; Orthomyxoviridae; Influenzavirus A");
21716 bsrc->
SetOrg().SetOrgname().SetMod().push_back(mod_a);
21717 bsrc->
SetOrg().SetOrgname().SetMod().push_back(mod_b);
21720 BOOST_CHECK_EQUAL(removed,
false);
21724 BOOST_CHECK_EQUAL(removed,
true);
21727 if ((*orgmod)->IsSetSubtype()) {
21734 BOOST_CHECK_EQUAL(removed,
false);
21746 BOOST_CHECK_EQUAL(
orig,
"experiment");
21753 BOOST_CHECK_EQUAL(category,
"");
21754 BOOST_CHECK_EQUAL(experiment,
"experiment");
21755 BOOST_CHECK_EQUAL(doi,
"");
21758 BOOST_CHECK_EQUAL(
orig,
"experiment2[DOI]");
21760 BOOST_CHECK_EQUAL(category,
"");
21761 BOOST_CHECK_EQUAL(experiment,
"experiment2");
21762 BOOST_CHECK_EQUAL(doi,
"DOI");
21765 BOOST_CHECK_EQUAL(
orig,
"COORDINATES:experiment3");
21767 BOOST_CHECK_EQUAL(category,
"COORDINATES");
21768 BOOST_CHECK_EQUAL(experiment,
"experiment3");
21769 BOOST_CHECK_EQUAL(doi,
"");
21772 BOOST_CHECK_EQUAL(
orig,
"EXISTENCE:experiment4[DOI2]");
21774 BOOST_CHECK_EQUAL(category,
"EXISTENCE");
21775 BOOST_CHECK_EQUAL(experiment,
"experiment4");
21776 BOOST_CHECK_EQUAL(doi,
"DOI2");
21784 BOOST_CHECK_EQUAL(msg,
"The International Cell Line Authentication Committee database indicates that 222 from Homo sapiens is known to be contaminated by PA1 from Human. Please see http://iclac.org/databases/cross-contaminations/ for more information and references.");
21787 BOOST_CHECK_EQUAL(msg,
"");
21790 BOOST_CHECK_EQUAL(msg,
"");
21803 "SuspectedContaminatedCellLine",
21804 "The International Cell Line Authentication Committee database indicates that GPS-M from Cavia porcellus is known to be contaminated by Strain L-M from Mouse. Please see http://iclac.org/databases/cross-contaminations/ for more information and references."));
21807 eval = validator.Validate(seh, options);
21829 "No publications anywhere on this entire record."));
21832 "MissingPubRequirement",
21833 "No submission citation anywhere on this entire record."));
21836 eval = validator.Validate(seh, options);
21840 eval = validator.Validate(seh, options);
21847 eval = validator.Validate(seh, options);
21858 repeat_region->
SetData().SetImp().SetKey(
"repeat_region");
21867 "MiscFeatureNeedsNote",
21868 "A note or other qualifier is required for a misc_feature"));
21871 "RepeatRegionNeedsNote",
21872 "repeat_region has no qualifiers"));
21875 eval = validator.Validate(seh, options);
21881 eval = validator.Validate(seh, options);
21885 scope.RemoveTopLevelSeqEntry(seh);
21890 seh = scope.AddTopLevelSeqEntry(*entry);
21894 eval = validator.Validate(seh, options);
21954 reply->SetReply().push_back(t3reply);
21965 for (
size_t i = 0;
i < 50;
i++) {
21967 replies.push_back(reply);
21972 eval = validator.Validate(seh, options);
21976 "TaxonomyServiceProblem",
21977 "Taxonomy service connection failure"));
21989 string id_str =
"ABCD123456789";
21991 id->SetGenbank().SetAccession(id_str);
21997 expected_errors.push_back(
new CExpectedError(
"gb|"+id_str+
"|",
eDiag_Error,
"InconsistentMolInfoTechnique",
"WGS accession should have Mol-info.tech of wgs"));
21999 eval = validator.Validate(seh, options);
22006 eval = validator.Validate(seh, options);
22017 defline->
SetTitle(
"This title contains RefSeq");
22019 nuc->SetSeq().SetDescr().Set().push_back(defline);
22023 expected_errors.push_back(
new CExpectedError(
"lcl|prot",
eDiag_Error,
"RefSeqInText",
"Protein name contains 'RefSeq'"));
22024 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"RefSeqInText",
"Definition line contains 'RefSeq'"));
22026 eval = validator.Validate(seh, options);
22040 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"OrgModValueInvalid",
"Orgmod.strain should not be 'yes'"));
22042 eval = validator.Validate(seh, options);
22047 expected_errors[0]->SetErrMsg(
"Orgmod.strain should not be 'NO'");
22048 eval = validator.Validate(seh, options);
22053 expected_errors[0]->SetErrMsg(
"Orgmod.strain should not be '-'");
22054 eval = validator.Validate(seh, options);
22059 expected_errors[0]->SetErrMsg(
"Orgmod.strain should not be 'microbial'");
22060 eval = validator.Validate(seh, options);
22071 gene->
SetData().SetGene().SetLocus(
"badguy");
22073 loc1->
SetInt().SetFrom(0);
22074 loc1->
SetInt().SetTo(10);
22075 loc1->
SetInt().SetId().SetLocal().SetStr(
"good1");
22077 loc2->
SetInt().SetFrom(0);
22078 loc2->
SetInt().SetTo(10);
22079 loc2->
SetInt().SetId().SetLocal().SetStr(
"good2");
22081 loc3->
SetInt().SetFrom(0);
22082 loc3->
SetInt().SetTo(10);
22083 loc3->
SetInt().SetId().SetLocal().SetStr(
"good3");
22085 gene->
SetLocation().SetMix().Set().push_back(loc1);
22086 gene->
SetLocation().SetMix().Set().push_back(loc2);
22087 gene->
SetLocation().SetMix().Set().push_back(loc3);
22094 "Feature location intervals should all be on the same sequence"));
22096 eval = validator.Validate(seh, options);
22109 eval = validator.Validate(seh, options);
22123 mrna->
SetData().SetRna().SetExt().SetName(
prot->GetData().GetProt().GetName().front());
22130 eval = validator.Validate(seh, options);
22133 scope.RemoveTopLevelSeqEntry(seh);
22135 seh = scope.AddTopLevelSeqEntry(*entry);
22138 "gene should not be 5' complete if coding region is 5' partial"));
22140 "mRNA should not be 5' complete if coding region is 5' partial"));
22141 eval = validator.Validate(seh, options);
22145 scope.RemoveTopLevelSeqEntry(seh);
22147 seh = scope.AddTopLevelSeqEntry(*entry);
22150 "gene should not be 3' complete if coding region is 3' partial"));
22152 "mRNA should not be 3' complete if coding region is 3' partial"));
22154 "3' partial is not at end of sequence, gap, or consensus splice site"));
22156 "Got stop codon, but 3'end is labeled partial"));
22159 eval = validator.Validate(seh, options);
22164 scope.RemoveTopLevelSeqEntry(seh);
22166 seh = scope.AddTopLevelSeqEntry(*entry);
22169 "gene should not be 5' complete if coding region is 5' partial"));
22171 "mRNA should not be 5' complete if coding region is 5' partial"));
22173 "gene should not be 3' complete if coding region is 3' partial"));
22175 "mRNA should not be 3' complete if coding region is 3' partial"));
22177 "3' partial is not at end of sequence, gap, or consensus splice site"));
22179 "Got stop codon, but 3'end is labeled partial"));
22181 eval = validator.Validate(seh, options);
22190 string host =
"Atlantic white-sided dolphin";
22194 BOOST_CHECK_EQUAL(
"Atlantic white-sided dolphin",
FixSpecificHost(
"Atlantic white-sided dolphin"));
22204 eval = validator.Validate(seh, options);
22218 eval = validator.Validate(seh, options);
22220 "Only Pop/Phy/Mut/Eco sets should have titles"));
22232 edit::CGenomeAssemblyComment::SetAssemblyMethod(*user,
"x v. y");
22234 assembly_name->
SetLabel().SetStr(
"Assembly Name");
22235 assembly_name->
SetData().SetStr(
"valid value");
22236 user->
SetData().push_back(assembly_name);
22237 edit::CGenomeAssemblyComment::SetGenomeCoverage(*user,
"2x");
22238 edit::CGenomeAssemblyComment::SetSequencingTechnology(*user,
"z");
22246 eval = validator.Validate(seh, options);
22249 assembly_name->
SetData().SetStr(
"not,valid");
22251 eval = validator.Validate(seh, options);
22253 "BadStrucCommInvalidFieldValue",
22254 "Structured Comment invalid; the field value and/or name are incorrect"));
22256 "BadStrucCommInvalidFieldValue",
22257 "not,valid is not a valid value for Assembly Name"));
22263 assembly_name->
SetData().SetStr(
"Ec2009C-3227");
22265 eval = validator.Validate(seh, options);
22268 assembly_name->
SetData().SetStr(
"Anop_step_SDA-500_V1");
22270 eval = validator.Validate(seh, options);
22281 gene->
SetData().SetGene().SetLocus(
"a");
22284 int1->
SetInt().SetFrom(0);
22285 int1->
SetInt().SetTo(5);
22288 int2->
SetInt().SetFrom(10);
22289 int2->
SetInt().SetTo(15);
22290 gene->
SetLocation().SetMix().Set().push_back(int1);
22291 gene->
SetLocation().SetMix().Set().push_back(int2);
22295 mobile_element->
SetData().SetImp().SetKey(
"mobile_element");
22297 mobile_element->
SetLocation().SetInt().SetFrom(6);
22300 mobile_element->
SetQual().push_back(qual);
22306 eval = validator.Validate(seh, options);
22317 gene->
SetData().SetGene().SetLocus(
"X");
22323 eval = validator.Validate(seh, options);
22325 "BadTranssplicedInterval",
22326 "Trans-spliced feature should have multiple intervals"));
22338 recomb->
SetData().SetImp().SetKey(
"misc_recomb");
22340 recomb->
SetQual().push_back(qual);
22346 eval = validator.Validate(seh, options);
22351 eval = validator.Validate(seh, options);
22353 "RecombinationClassOtherNeedsNote",
22354 "The recombination_class 'other' is missing the required /note"));
22368 qual->
SetVal(
"mitotic");
22370 eval = validator.Validate(seh, options);
22390 entry->
SetDescr().Set().push_back(src_desc);
22396 src_feat->
SetData().SetBiosrc().SetOrg().SetTaxname(
"Influenza virus A");
22407 vector<CRef<COrg_ref>> original;
22408 vector<CRef<COrg_ref>> to_adjust;
22410 for (
const auto& it : test_values) {
22417 to_adjust.push_back(org);
22420 original.push_back(cpy);
22429 BOOST_CHECK_EQUAL(reply->GetReply().size(), org_rq_list.size());
22433 vector<CRef<COrg_ref>>::const_iterator org = to_adjust.begin();
22434 vector<CRef<COrg_ref>>::const_iterator cpy = original.begin();
22435 while (org != to_adjust.cend()) {
22436 const string& before = (*cpy)->GetOrgname().GetMod().front()->GetSubname();
22437 const string& after = (*org)->GetOrgname().GetMod().front()->GetSubname();
22438 THostStringsVector::const_iterator tvit = test_values.cbegin();
22439 while (tvit != test_values.cend() && !
NStr::Equal(tvit->first, before)) {
22443 BOOST_CHECK_EQUAL(after, tvit->second);
22453 test_values.push_back(make_pair(
"Zymomonas anaerobia",
"Zymomonas mobilis"));
22456 test_values.clear();
22457 test_values.push_back(make_pair(
"Zymononas mobilis",
"Zymomonas mobilis"));
22467 test_values.push_back(make_pair(
"Homo supiens",
"Homo supiens"));
22468 test_values.push_back(make_pair(
"HUMAN",
"Homo sapiens"));
22470 test_values.push_back(make_pair(
"Homo sapiens",
"Homo sapiens"));
22472 test_values.push_back(make_pair(
"Gallus Gallus",
"Gallus gallus"));
22474 test_values.push_back(make_pair(
"Conservemos nuestros",
"Conservemos nuestros"));
22476 test_values.push_back(make_pair(
"Pinus sp.",
"Pinus sp."));
22478 test_values.push_back(make_pair(
"Eschericia coli",
"Escherichia coli"));
22480 test_values.push_back(make_pair(
"Avian",
"Avian"));
22482 test_values.push_back(make_pair(
"Bovine",
"Bovine"));
22484 test_values.push_back(make_pair(
"Pig",
"Pig"));
22486 test_values.push_back(make_pair(
" Chicken",
"Chicken"));
22488 test_values.push_back(make_pair(
"Homo sapiens; sex: female",
"Homo sapiens; sex: female"));
22490 test_values.push_back(make_pair(
"Atlantic white-sided dolphin",
"Atlantic white-sided dolphin"));
22492 test_values.push_back(make_pair(
"Zymomonas anaerobia",
"Zymomonas mobilis"));
22495 vector<CRef<COrg_ref>> to_adjust;
22496 vector<CRef<COrg_ref>> original;
22498 for (
const auto& it : test_values) {
22505 to_adjust.push_back(org);
22508 original.push_back(cpy);
22510 string error_message;
22517 BOOST_CHECK_EQUAL(org_rq_list.size(), test_values.size() - 6);
22524 vector<CRef<COrg_ref>>::const_iterator org = to_adjust.begin();
22525 vector<CRef<COrg_ref>>::const_iterator cpy = original.begin();
22526 while (org != to_adjust.cend()) {
22527 const string& before = (*cpy)->GetOrgname().GetMod().front()->GetSubname();
22528 const string& after = (*org)->GetOrgname().GetMod().front()->GetSubname();
22529 THostStringsVector::const_iterator tvit = test_values.cbegin();
22530 while (tvit != test_values.cend() && !
NStr::Equal(tvit->first, before)) {
22534 BOOST_CHECK_EQUAL(after, tvit->second);
22546 to_adjust.push_back(test_src);
22548 COrgName::TMod::const_iterator m = test_src->
GetOrgname().
GetMod().begin();
22549 BOOST_CHECK_EQUAL((*m)->GetSubname(),
"Conservemos nuestros");
22551 BOOST_CHECK_EQUAL((*m)->GetSubname(),
"Pinus sp.");
22553 BOOST_CHECK_EQUAL((*m)->GetSubname(),
"Escherichia coli");
22557 BOOST_CHECK_EQUAL((*m)->GetSubname(),
"Conservemos nuestros");
22559 BOOST_CHECK_EQUAL((*m)->GetSubname(),
"Pinus sp.");
22561 BOOST_CHECK_EQUAL((*m)->GetSubname(),
"Escherichia coli");
22566 BOOST_CHECK_EQUAL(lookup_reply->GetReply().size(), original_orgs.size());
22576 size_t num_descs = tval.
NumDescs();
22577 size_t num_updated_descs = 0;
22578 for (
size_t n = 0;
n < num_descs;
n++) {
22579 if (!original_orgs[
n]->
Equals(*(edited_orgs[
n]))) {
22582 new_desc->
Assign(*desc);
22584 num_updated_descs++;
22588 BOOST_CHECK_EQUAL(num_updated_descs, num_descs);
22590 size_t num_updated_feats = 0;
22592 if (!original_orgs[
n + num_descs]->
Equals(*edited_orgs[
n + num_descs])) {
22595 new_feat->
Assign(*feat);
22596 new_feat->
SetData().SetBiosrc().SetOrg().Assign(*(edited_orgs[
n]));
22597 num_updated_feats++;
22602 BOOST_CHECK_EQUAL(num_updated_feats, (
size_t)5);
22610 org->
SetTaxname(
"Dickeya dadantii subsp. dieffenbachiae");
22612 dbtag->
SetDb(
"taxon");
22613 dbtag->
SetTag().SetId(204040);
22614 org->
SetDb().push_back(dbtag);
22615 org->
SetOrgname().SetName().SetBinomial().SetGenus(
"Dickeya");
22616 org->
SetOrgname().SetName().SetBinomial().SetSpecies(
"dadantii");
22617 org->
SetOrgname().SetName().SetBinomial().SetSubspecies(
"dieffenbachiae");
22621 org->
SetOrgname().SetLineage(
"Bacteria; Proteobacteria; Gammaproteobacteria");
22625 vector<CRef<COrg_ref>> org_rq;
22626 org_rq.push_back(org);
22628 vector<CRef<COrg_ref>> edited_orgs;
22631 edited_orgs.push_back(cpy);
22638 string error_message;
22640 BOOST_CHECK_EQUAL(cpy->
GetTaxname(),
"Dickeya fangzhongdai");
22644 dbtag->
SetTag().SetId(109058);
22645 org->
SetDb().push_back(dbtag);
22646 org->
SetOrgname().SetName().SetBinomial().SetGenus(
"Alnus");
22647 org->
SetOrgname().SetName().SetBinomial().SetSpecies(
"cordata");
22651 org->
SetOrgname().SetLineage(
"Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; Pentapetalae; rosids; fabids; Fagales; Betulaceae; Alnus");
22662 BOOST_CHECK_EQUAL(cpy->
GetTaxname(),
"Alnus cordata subsp. Alnus cordata AZ12-2 chloroplast, complete genome");
22672 test_values.push_back(make_pair(
"Homo supiens",
"Homo supiens"));
22673 test_values.push_back(make_pair(
"HUMAN",
"Homo sapiens"));
22674 test_values.push_back(make_pair(
"Homo sapiens",
"Homo sapiens"));
22675 test_values.push_back(make_pair(
"Pinus sp.",
"Pinus sp."));
22676 test_values.push_back(make_pair(
"Gallus Gallus",
"Gallus gallus"));
22677 test_values.push_back(make_pair(
"Eschericia coli",
"Escherichia coli"));
22678 test_values.push_back(make_pair(
"Avian",
"Avian"));
22679 test_values.push_back(make_pair(
"Bovine",
"Bovine"));
22680 test_values.push_back(make_pair(
"Pig",
"Pig"));
22681 test_values.push_back(make_pair(
" Chicken",
"Chicken"));
22682 test_values.push_back(make_pair(
"Homo sapiens; sex: female",
"Homo sapiens; sex: female"));
22683 test_values.push_back(make_pair(
"Atlantic white-sided dolphin",
"Atlantic white-sided dolphin"));
22685 vector<CRef<COrg_ref>> to_adjust;
22687 for (
const auto& it : test_values) {
22693 to_adjust.push_back(org);
22695 string error_message;
22702 BOOST_CHECK_EQUAL(spec_host_rq.size(), test_values.size() - 6);
22708 while (
i < spec_host_rq.size()) {
22710 vector<CRef<COrg_ref>> tmp_rq(spec_host_rq.begin() +
i, spec_host_rq.begin() +
i +
len);
22720 vector<CRef<COrg_ref>>::iterator org = to_adjust.begin();
22721 THostStringsVector::iterator tvit = test_values.begin();
22722 while (org != to_adjust.end()) {
22723 BOOST_CHECK_EQUAL((*org)->GetOrgname().GetMod().front()->GetSubname(), tvit->second);
22741 entry.
SetDescr().Set().push_back(src_desc);
22745 void TestOneStrain(
const string& taxname,
const string& strain,
const string& lineage,
TTaxId taxID,
bool expect_err)
22756 "BioSource is missing taxon ID"));
22760 "Strain '" + strain +
"' contains taxonomic name information"));
22763 eval = validator.Validate(seh, options);
22780 string error_message;
22786 BOOST_CHECK_EQUAL(strain_rq.size(), (
size_t)9);
22792 while (
i < strain_rq.size()) {
22794 vector<CRef<COrg_ref>> tmp_rq(strain_rq.begin() +
i, strain_rq.begin() +
i +
len);
22829 codebreak->
SetLoc().SetInt().SetId().SetLocal().SetStr(
"nuc");
22830 codebreak->
SetLoc().SetInt().SetFrom(24);
22831 codebreak->
SetLoc().SetInt().SetTo(26);
22833 cds->
SetData().SetCdregion().SetCode_break().push_back(codebreak);
22838 "Translation exception locations should not be partial"));
22840 eval = validator.Validate(seh, options);
22850 exon->
SetData().SetImp().SetKey(
"exon");
22856 "Number qualifiers should not contain spaces"));
22858 eval = validator.Validate(seh, options);
22875 "Should not specify 'space to left' at first position of non-circular sequence"));
22877 eval = validator.Validate(seh, options);
22882 scope.RemoveTopLevelSeqEntry(seh);
22884 seh = scope.AddTopLevelSeqEntry(*entry);
22887 eval = validator.Validate(seh, options);
22892 scope.RemoveTopLevelSeqEntry(seh);
22895 seh = scope.AddTopLevelSeqEntry(*entry);
22897 "Should not specify 'space to right' at last position of non-circular sequence"));
22899 eval = validator.Validate(seh, options);
22904 scope.RemoveTopLevelSeqEntry(seh);
22906 seh = scope.AddTopLevelSeqEntry(*entry);
22907 eval = validator.Validate(seh, options);
22908 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"CompleteCircleProblem",
"Circular topology without complete flag set"));
22913 scope.RemoveTopLevelSeqEntry(seh);
22917 seh = scope.AddTopLevelSeqEntry(*entry);
22918 eval = validator.Validate(seh, options);
22922 scope.RemoveTopLevelSeqEntry(seh);
22924 seh = scope.AddTopLevelSeqEntry(*entry);
22925 eval = validator.Validate(seh, options);
22927 "Should not specify 'space to left' at first position of non-circular sequence"));
22933 scope.RemoveTopLevelSeqEntry(seh);
22936 seh = scope.AddTopLevelSeqEntry(*entry);
22937 eval = validator.Validate(seh, options);
22939 "Should not specify 'space to right' at last position of non-circular sequence"));
22950 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetId().SetGenbank().SetAccession(
"AY123456");
22951 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetFrom(0);
22952 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetTo(11);
22958 "FarLocationExcludesFeatures",
22959 "Scaffold points to some but not all of gb|AY123456|, excluded portion contains features"));
22961 eval = validator.Validate(seh, options);
22966 scope.RemoveTopLevelSeqEntry(seh);
22967 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_00000001");
22971 f->SetLabel().SetStr(
"BioSample");
22972 f->SetData().SetStr(
"SAME0001");
22975 f2->
SetLabel().SetStr(
"BioProject");
22976 f2->
SetData().SetStrs().push_back(
"PRJNA12345");
22980 seh = scope.AddTopLevelSeqEntry(*entry);
22982 eval = validator.Validate(seh, options);
22999 "BadPlasmidChromosomeLinkageName",
23000 "Problematic plasmid/chromosome/linkage group name '_abc'"));
23002 "BadPlasmidChromosomeLinkageName",
23003 "Problematic plasmid/chromosome/linkage group name '*123'"));
23004 eval = validator.Validate(seh, options);
23012 string host =
"Rhesus monkey";
23015 BOOST_CHECK_EQUAL(
"Rhesus monkey",
FixSpecificHost(
"Rhesus monkey"));
23025 eval = validator.Validate(seh, options);
23038 if (it->IsSource()) {
23039 src.
Reset(&(it->SetSource()));
23048 eval = validator.Validate(seh, options);
23053 "BioSourceInconsistency",
23054 "Taxname does not match orgname ('Sebaea microphylla', 'x microphylla')"));
23055 eval = validator.Validate(seh, options);
23059 expected_errors[0]->SetErrMsg(
"Taxname does not match orgname ('Sebaea microphylla', 'x y')");
23060 eval = validator.Validate(seh, options);
23064 expected_errors[0]->SetErrMsg(
"Taxname does not match orgname ('Sebaea microphylla', 'x y subsp. z')");
23065 eval = validator.Validate(seh, options);
23070 expected_errors[0]->SetErrMsg(
"Taxname does not match orgname ('Sebaea microphylla', 'x')");
23071 eval = validator.Validate(seh, options);
23075 orgname.
SetVirus(
"Sebaea microphylla");
23076 eval = validator.Validate(seh, options);
23081 org1->SetName().SetBinomial().SetSpecies(
"z");
23082 org1->SetName().SetBinomial().SetGenus(
"x");
23084 org2->SetName().SetBinomial().SetGenus(
"y");
23085 org2->SetName().SetBinomial().SetSpecies(
"z");
23089 "BioSourceInconsistency",
23090 "Taxname does not match orgname ('Sebaea microphylla', 'x z')"));
23091 eval = validator.Validate(seh, options);
23094 org2->SetName().SetBinomial().SetGenus(
"Sebaea");
23095 org2->SetName().SetBinomial().SetSpecies(
"microphylla");
23097 eval = validator.Validate(seh, options);
23104 "BioSourceInconsistency",
23105 "Taxname does not match orgname ('Sebaea microphylla', 'Sebaea x microphylla')"));
23107 eval = validator.Validate(seh, options);
23113 "BioSourceInconsistency",
23114 "Taxname does not match orgname ('Sebaea microphylla', 'x x microphylla')"));
23115 eval = validator.Validate(seh, options);
23121 elem1->SetName(
"x");
23123 expected_errors[0]->SetErrMsg(
"Taxname does not match orgname ('Sebaea microphylla', 'x')");
23124 eval = validator.Validate(seh, options);
23129 elem2->SetName(
"Sebaea microphylla");
23132 eval = validator.Validate(seh, options);
23141 entry->
SetSeq().
SetId().front()->SetGeneral().SetDb(
"NCBIFILE");
23142 entry->
SetSeq().
SetId().front()->SetGeneral().SetTag().SetStr(
"x");
23148 "The only ids on this Bioseq will be stripped during ID load"));
23150 eval = validator.Validate(seh, options);
23154 scope.RemoveTopLevelSeqEntry(seh);
23158 seh = scope.AddTopLevelSeqEntry(*entry);
23160 eval = validator.Validate(seh, options);
23165 scope.RemoveTopLevelSeqEntry(seh);
23171 misc->
SetLocation().SetInt().SetId().Assign(*bankit);
23172 seh = scope.AddTopLevelSeqEntry(*entry);
23176 "Feature locations should not use Seq-ids that will be stripped during ID load"));
23179 eval = validator.Validate(seh, options);
23195 eval = validator.Validate(seh, options);
23199 scope.RemoveTopLevelSeqEntry(seh);
23200 f->SetData().SetImp().SetKey(
"exon");
23202 seh = scope.AddTopLevelSeqEntry(*entry);
23205 "exon may not be on both (forward) strands"));
23207 eval = validator.Validate(seh, options);
23221 if (expect_error) {
23223 "Problematic plasmid/chromosome/linkage group name '" + plasmid_name +
"'"));
23226 eval = validator.Validate(seh, options);
23251 BOOST_CHECK_EQUAL(
IsLikelyTaxname(
"Atlantic white-sided dolphin"),
false);
23261 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCAGATAAACAGAGATATAATAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
23268 cds->
SetLocation().SetMix().Set().push_back(int1);
23269 cds->
SetLocation().SetMix().Set().push_back(int2);
23270 cds->
SetLocation().SetMix().Set().push_back(int3);
23271 cds->
SetLocation().SetMix().Set().push_back(int4);
23276 BOOST_CHECK_EQUAL(nonsense.size(), (
size_t)2);
23277 BOOST_CHECK_EQUAL(nonsense.front()->GetInt().GetFrom(), (
size_t)9);
23278 BOOST_CHECK_EQUAL(nonsense.front()->GetInt().GetTo(), (
size_t)11);
23279 BOOST_CHECK_EQUAL(nonsense.back()->GetInt().GetFrom(), (
size_t)21);
23280 BOOST_CHECK_EQUAL(nonsense.back()->GetInt().GetTo(), (
size_t)23);
23283 "Triplet intron encodes stop codon"));
23285 "Triplet intron encodes stop codon"));
23286 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"ShortExon",
"Internal coding region exon is too short at position 13-21"));
23287 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"InternalStop",
"2 internal stops. Genetic code [0]"));
23289 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"TransLen",
"Given protein length [8] does not match translation length [17]"));
23290 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"NotSpliceConsensusDonor",
"Splice donor consensus (GT) not found after exon ending at position 9 of lcl|nuc"));
23291 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"NotSpliceConsensusDonor",
"Splice donor consensus (GT) not found after exon ending at position 21 of lcl|nuc"));
23292 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"NotSpliceConsensusDonor",
"Splice donor consensus (GT) not found after exon ending at position 45 of lcl|nuc"));
23293 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"NotSpliceConsensusAcceptor",
"Splice acceptor consensus (AG) not found before exon starting at position 13 of lcl|nuc"));
23294 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"NotSpliceConsensusAcceptor",
"Splice acceptor consensus (AG) not found before exon starting at position 25 of lcl|nuc"));
23296 eval = validator.Validate(seh, options);
23309 gap_seg->SetLiteral().SetSeq_data().SetGap();
23310 gap_seg->SetLiteral().SetLength(10);
23311 pseq->
SetInst().SetExt().SetDelta().Set().push_back(gap_seg);
23313 pseq->
SetInst().SetLength(18);
23317 pseq->
SetId().push_back(pid);
23322 pseq->
SetDescr().Set().push_back(mpdesc);
23331 feat->
SetData().SetProt().SetName().push_back(
"fake protein name");
23332 feat->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"prot");
23339 expected_errors.push_back(
new CExpectedError(
"lcl|prot",
eDiag_Error,
"ProteinShouldNotHaveGaps",
"Protein sequences should not have gaps"));
23341 eval = validator.Validate(seh, options);
23351 entry->
SetSeq().
SetId().front()->SetLocal().SetStr(
id);
23355 "Bad character '" + badchar +
"' in local ID '" +
id +
"'"));
23357 eval = validator.Validate(seh, options);
23378 if (curr_day < 28) {
23382 if (curr_day > 1) {
23390 if (curr_month < 11) {
23394 if (curr_month != 0) {
23414 add_date.
Assign(new_date);
23423 add_date.
Assign(new_date);
23432 add_date.
Assign(new_date);
23444 if (it->GetPub().GetPub().Get().front()->IsSub()) {
23445 subpub = it->SetPub().SetPub().Set().front();
23452 time_t time_now = time(
NULL);
23453 CDate today(time_now);
23454 CDate future(time_now);
23459 "Submission citation date is in the future"));
23461 eval = validator.Validate(seh, options);
23467 eval = validator.Validate(seh, options);
23473 eval = validator.Validate(seh, options);
23479 eval = validator.Validate(seh, options);
23492 cds->
SetData().SetCdregion();
23502 gene->
SetData().SetGene().SetLocus(
"x");
23509 eval = validator.Validate(seh, options);
23515 eval = validator.Validate(seh, options);
23520 eval = validator.Validate(seh, options);
23525 eval = validator.Validate(seh, options);
23531 "InconsistentPseudogeneValue",
23532 "CDS has pseudogene qualifier, gene does not"));
23533 eval = validator.Validate(seh, options);
23539 "InconsistentPseudogeneValue",
23540 "mRNA has pseudogene qualifier, gene does not"));
23541 eval = validator.Validate(seh, options);
23548 mrna->
SetQual().front()->SetVal(
"processed");
23550 "InconsistentPseudogeneValue",
23551 "Different pseudogene values on CDS (unitary) and gene (allelic)"));
23553 "InconsistentPseudogeneValue",
23554 "Different pseudogene values on mRNA (processed) and gene (allelic)"));
23557 eval = validator.Validate(seh, options);
23569 gene->
SetData().SetGene().SetLocus(
"x");
23576 "InvalidPseudoQualifier",
23577 "/pseudogene value should not be empty"));
23579 "InvalidPunctuation",
23580 "Qualifier other than replace has just quotation marks"));
23582 eval = validator.Validate(seh, options);
23587 gene->
SetQual().front()->SetVal(
"abc");
23589 "InvalidPseudoQualifier",
23590 "/pseudogene value should not be 'abc'"));
23592 eval = validator.Validate(seh, options);
23604 rpt->
SetData().SetImp().SetKey(
"repeat_region");
23610 "InvalidRptUnitRange",
23611 "/rpt_unit_range is not a base range"));
23613 eval = validator.Validate(seh, options);
23616 rpt->
SetQual().front()->SetVal(
"a..b");
23617 eval = validator.Validate(seh, options);
23622 rpt->
SetQual().front()->SetVal(
"1..5");
23624 eval = validator.Validate(seh, options);
23636 rpt->
SetData().SetImp().SetKey(
"repeat_region");
23642 "InvalidRptUnitSeqCharacters",
23643 "/rpt_unit_seq has illegal characters"));
23645 eval = validator.Validate(seh, options);
23650 rpt->
SetQual().front()->SetVal(
"(atgc)");
23652 eval = validator.Validate(seh, options);
23665 rna->SetData().SetRna().SetExt().SetName(
"16S ribosomal RNA");
23669 gene1->
SetData().SetGene().SetAllele(
"y");
23674 "MismatchedAllele",
23675 "Mismatched allele qualifier on gene (y) and feature (x)"));
23677 eval = validator.Validate(seh, options);
23690 rna->SetData().SetRna().SetExt().SetName(
"16S ribosomal RNA");
23694 gene1->
SetData().SetGene().SetAllele(
"x");
23699 "InvalidAlleleDuplicates",
23700 "Redundant allele qualifier (x) on gene and feature"));
23702 eval = validator.Validate(seh, options);
23713 operon->
SetData().SetImp().SetKey(
"operon");
23718 gene->
SetData().SetGene().SetLocus(
"x");
23723 "InvalidOperonMatchesGene",
23724 "Operon is same as gene - x"));
23726 eval = validator.Validate(seh, options);
23738 var->
SetData().SetImp().SetKey(
"variation");
23744 "InvalidCompareRefSeqAccession",
23745 "RefSeq accession NC_000001.1 cannot be used for qualifier compare"));
23747 eval = validator.Validate(seh, options);
23759 var->
SetData().SetImp().SetKey(
"variation");
23765 "InvalidCompareMissingVersion",
23766 "NC_000001 accession missing version for qualifier compare"));
23768 eval = validator.Validate(seh, options);
23780 var->
SetData().SetImp().SetKey(
"variation");
23786 "InvalidCompareBadAccession",
23787 "x_y is not a legal accession for qualifier compare"));
23789 eval = validator.Validate(seh, options);
23800 reg->
SetData().SetImp().SetKey(
"regulatory");
23802 reg->
SetQual().push_back(qual);
23808 eval = validator.Validate(seh, options);
23813 eval = validator.Validate(seh, options);
23815 "RegulatoryClassOtherNeedsNote",
23816 "The regulatory_class 'other' is missing the required /note"));
23828 trna->
SetData().SetRna().SetExt().SetTRNA().SetAa().SetNcbieaa(
'A');
23834 "UnparsedtRNAAnticodon",
23835 "Unparsed anticodon qualifier in tRNA"));
23837 eval = validator.Validate(seh, options);
23849 trna->
SetData().SetRna().SetExt().SetTRNA().SetAa().SetNcbieaa(
'A');
23855 "UnparsedtRNAProduct",
23856 "Unparsed product qualifier in tRNA"));
23858 eval = validator.Validate(seh, options);
23874 "rRNADoesNotHaveProduct",
23875 "rRNA has no name"));
23877 eval = validator.Validate(seh, options);
23888 misc->
SetData().SetImp().SetKey(
"repeat_region");
23894 "foo is not a legal value for qualifier mobile_element"));
23896 eval = validator.Validate(seh, options);
23901 misc->
SetQual().front()->SetVal(
"integron");
23903 eval = validator.Validate(seh, options);
23914 misc->
SetData().SetImp().SetKey(
"misc_difference");
23920 "123 is not a legal value for qualifier replace - should only be composed of acgtmrwsykvhdbn nucleotide bases"));
23922 eval = validator.Validate(seh, options);
23927 misc->
SetQual().front()->SetVal(
"aaccttgg");
23928 eval = validator.Validate(seh, options);
23934 scope.RemoveTopLevelSeqEntry(seh);
23939 misc->
SetData().SetImp().SetKey(
"misc_difference");
23941 seh = scope.AddTopLevelSeqEntry(*entry);
23944 "123 is not a legal value for qualifier replace - should only be composed of acdefghiklmnpqrstuvwy* amino acids"));
23946 eval = validator.Validate(seh, options);
23957 misc->
SetData().SetImp().SetKey(
"variation");
23963 "123 is not a legal value for qualifier replace - should only be composed of acgt unambiguous nucleotide bases"));
23965 eval = validator.Validate(seh, options);
23970 misc->
SetQual().front()->SetVal(
"aaccttgg");
23972 eval = validator.Validate(seh, options);
23983 gene->
SetData().SetGene().SetLocus(
"x");
23984 gene->
AddQualifier(
"product",
"hypothetical protein");
23989 "A product qualifier is not used on a gene feature"));
23991 eval = validator.Validate(seh, options);
24006 "codon_start value should be 1, 2, or 3"));
24008 eval = validator.Validate(seh, options);
24019 CSeq_loc& l1 = entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc();
24020 l1.SetInt().SetId().SetGenbank().SetAccession(
"AY123456");
24021 l1.SetInt().SetFrom(0);
24022 l1.SetInt().SetTo(99);
24023 CSeq_loc& l2 = entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().back()->SetLoc();
24024 l2.SetInt().SetId().SetGenbank().SetAccession(
"AY123457");
24025 l2.SetInt().SetFrom(0);
24026 l2.SetInt().SetTo(99);
24033 "Genome difference between parent and component"));
24035 eval = validator.Validate(seh, options);
24047 rrna->
SetData().SetRna().SetExt().SetName(
"16S ribosomal RNA");
24052 trna->
SetData().SetRna().SetExt().SetTRNA().SetAa().SetIupacaa(
'A');
24059 "tRNA-rRNA overlap"));
24062 eval = validator.Validate(seh, options);
24072 loc1->
SetInt().SetId().SetLocal().SetStr(
"good");
24073 loc1->
SetInt().SetFrom(0);
24074 loc1->
SetInt().SetTo(10);
24081 loc2->
SetInt().SetFrom(6);
24082 loc2->
SetInt().SetTo(16);
24085 loc2->
SetInt().SetFrom(7);
24086 loc2->
SetInt().SetTo(17);
24089 loc2->
SetInt().SetFrom(11);
24090 loc2->
SetInt().SetTo(17);
24098 const string cMitoMezoMsg =
"Mitochondrial Metazoan sequences should be less than 65000 bp";
24108 "MitoMetazoanTooLong", cMitoMezoMsg));
24110 "SeqDataLenWrong",
"Bioseq.seq_data too short [60] for given length [110000]"));
24111 eval = validator.Validate(seh, options);
24118 scope.RemoveTopLevelSeqEntry(seh);
24120 seh = scope.AddTopLevelSeqEntry(*entry);
24122 "SeqDataLenWrong",
"Bioseq.seq_data too short [60] for given length [64000]"));
24123 eval = validator.Validate(seh, options);
24142 ce->SetId(code_id);
24144 code->Set().push_back(ce);
24154 for (
size_t i = 0;
i < num_np;
i++) {
24175 loc1->
SetInt().SetFrom(0);
24176 loc1->
SetInt().SetTo(10);
24178 loc1->
SetInt().SetId().Assign(*n1);
24180 loc2->
SetInt().SetFrom(11);
24181 loc2->
SetInt().SetTo(26);
24183 loc2->
SetInt().SetId().Assign(*n2);
24185 cdregion->
SetLocation().SetMix().Set().push_back(loc1);
24186 cdregion->
SetLocation().SetMix().Set().push_back(loc2);
24189 cdregion->
SetProduct().SetWhole().Assign(*(
prot->GetSeq().GetId().front()));
24194 if (
offset == cdr_pos) {
24195 (*it)->SetSet().SetAnnot().front()->SetData().SetFtable().push_back(cdregion);
24198 (*it)->SetSet().SetSeq_set().push_back(
prot);
24218 eval = validator.Validate(seh, options);
24223 scope.RemoveTopLevelSeqEntry(seh);
24225 seh = scope.AddTopLevelSeqEntry(*entry);
24226 eval = validator.Validate(seh, options);
24230 scope.RemoveTopLevelSeqEntry(seh);
24233 seh = scope.AddTopLevelSeqEntry(*entry);
24234 eval = validator.Validate(seh, options);
24239 scope.RemoveTopLevelSeqEntry(seh);
24242 seh = scope.AddTopLevelSeqEntry(*entry);
24245 "CDSproductPackagingProblem",
24246 "Protein product not packaged in nuc-prot set with nucleotide in small genome set"));
24248 eval = validator.Validate(seh, options);
24268 "BadKeywordUnverified",
24269 "Sequence has both BARCODE and UNVERIFIED keywords"));
24272 eval = validator.Validate(seh, options);
24283 CRef<CUser_object> sc = edit::CStructuredCommentField::MakeUserObject(
"International Barcode of Life (iBOL)Data");
24288 uf->
SetLabel().SetStr(
"Barcode Index Number");
24297 "OrganismNotFound",
"Organism not found in taxonomy database"));
24300 eval = validator.Validate(seh, options);
24312 user.
SetData().push_back(uf);
24321 edit::CDBLink::SetAssembly(db1->
SetUser(),
"ZZZ");
24322 edit::CDBLink::SetBioProject(db1->
SetUser(),
"XXX");
24332 edit::CDBLink::SetAssembly(db2->
SetUser(),
"YYY");
24342 "MultipleDBLinkObjects",
"3 DBLink user objects apply to a Bioseq"));
24344 "DBLinkBadAssembly",
24345 "Assembly entries appear in 2 DBLink user objects"));
24348 "Unrecognized entries appear in 1 DBLink user object"));
24350 "DBLinkBadBioProject",
"Bad BioProject format - XXX"));
24352 "DBLinkBadSRAaccession",
"Bad Sequence Read Archive format - AAA"));
24354 "DBLinkBadCapitalization",
"Bad DBLink capitalization - Sequence read archive"));
24356 "DBLinkMissingUserObject",
"DBLink user object descriptor is empty"));
24358 "UserObjectNoData",
"User object with no data"));
24361 eval = validator.Validate(seh, options);
24373 edit::CDBLink::SetBioSample(db1->
SetUser(),
"SAMN1234");
24380 "DBLinkOnSet",
"DBLink user object should not be on this set"));
24382 eval = validator.Validate(seh, options);
24393 assembly_gap->
SetData().SetImp().SetKey(
"assembly_gap");
24394 assembly_gap->
SetLocation().SetInt().SetFrom(12);
24402 "AssemblyGapFeatureProblem",
"An assembly_gap feature should only be on a contig record"));
24404 eval = validator.Validate(seh, options);
24413 loc.SetInt().SetFrom(0);
24414 loc.SetInt().SetTo(5);
24419 loc.SetInt().SetFrom(stop - 6);
24420 loc.SetInt().SetTo(stop - 1);
24430 gene->
SetData().SetGene().SetLocus(
"x");
24439 utr5->
SetData().SetImp().SetKey(
"5'UTR");
24449 utr3->
SetData().SetImp().SetKey(
"3'UTR");
24462 "NoCDSbetweenUTRs",
"CDS not between 5'UTR and 3'UTR on minus strand"));
24465 "NoCDSbetweenUTRs",
"CDS not between 5'UTR and 3'UTR on plus strand"));
24470 eval = validator.Validate(seh, options);
24492 "Specific host value is alternate name: Gromphadorina portentosa should be Gromphadorhina portentosa"));
24495 eval = validator.Validate(seh, options);
24499 string val =
format.FormatForSubmitterReport(*(eval->GetErrs().back()), scope);
24500 BOOST_CHECK_EQUAL(
val,
"lcl|good\tGromphadorina portentosa should be Gromphadorhina portentosa");
24510 rna->SetData().SetRna().SetExt().SetName(
"23S ribosomal RNA");
24511 rna->SetProduct().SetWhole().SetGi(
GI_CONST(507148189));
24516 "Transcript length [11] less than (far) product length [3132], and tail < 95% polyA"));
24518 "There are 7 mismatches out of 11 bases between the transcript and (far) product sequence"));
24520 "Type of RNA does not match MolInfo of product Bioseq"));
24523 eval = validator.Validate(seh, options);
24533 gene->
SetData().SetGene().SetLocus(
"x");
24540 "ExceptionRequiresLocusTag",
24541 "Gene has split exception but no locus_tag"));
24543 eval = validator.Validate(seh, options);
24552 ss->
SetSub().SetTool(
"Geneious");
24554 ss->
SetSub().SetCit().SetAuthors().SetNames().SetStd().push_back(author);
24555 ss->
SetSub().SetCit().SetAuthors().SetAffil().SetStd().SetAffil(
"some affiliation");
24556 ss->
SetSub().SetCit().SetAuthors().SetAffil().SetStd().SetCountry(
"Russia");
24558 ss->
SetSub().SetCit().SetDate().SetStd().SetYear(2009);
24559 ss->
SetSub().SetCit().SetDate().SetStd().SetMonth(12);
24560 ss->
SetSub().SetCit().SetDate().SetStd().SetDay(31);
24573 gene_loc->
SetMix().Set().front()->SetInt().SetFrom(0);
24574 gene_loc->
SetMix().Set().front()->SetInt().SetTo(0);
24576 gene_loc->
SetMix().Set().back()->SetInt().SetFrom(9);
24577 gene_loc->
SetMix().Set().back()->SetInt().SetTo(10);
24580 ss->
SetData().SetEntrys().push_back(entry);
24585 "Location: Mixed strands in SeqLoc [(lcl|good:c1-1, 10-11)]"));
24589 eval = validator.Validate(*ss, &scope, options);
24627 if (it->IsSource()) {
24628 bool found =
false;
24629 for (
auto sit : it->SetSource().SetSubtype()) {
24630 if (sit->GetSubtype() == subtype) {
24638 it->SetSource().SetSubtype().push_back(ss);
24649 expected_errors.push_back(
new CExpectedError(
"lcl|good", sev, err_code, msg));
24654 "Non-viral source feature should not have a segment qualifier"));
24657 eval = validator.Validate(seh, options);
24668 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name '" +
val +
"'");
24670 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name '" +
val +
"'");
24672 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name '" +
val +
"'");
24691 expect_errs ?
"BadPlasmidChromosomeLinkageName" :
"",
24692 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name 'Sebaea microphylla'");
24695 expect_errs ?
"BadPlasmidChromosomeLinkageName" :
"",
24696 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name 'Sebaea'");
24699 expect_errs ?
"BadPlasmidChromosomeLinkageName" :
"",
24700 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name 'microphylla'");
24707 expect_errs ?
"BadPlasmidChromosomeLinkageName" :
"",
24708 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name 'some CHROMOSOME'");
24711 expect_errs ?
"BadPlasmidChromosomeLinkageName" :
"",
24712 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name 'linkage group x'");
24715 expect_errs ?
"BadPlasmidChromosomeLinkageName" :
"",
24716 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name 'linkage-group x'");
24719 expect_errs ?
"BadPlasmidChromosomeLinkageName" :
"",
24720 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name 'linkage_group x'");
24723 expect_errs ?
"BadPlasmidChromosomeLinkageName" :
"",
24724 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name 'chry'");
24727 expect_errs ?
"BadPlasmidChromosomeLinkageName" :
"",
24728 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name 'chrm'");
24731 expect_errs ?
"BadPlasmidChromosomeLinkageName" :
"",
24732 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name 'CHROM'");
24735 expect_errs ?
"BadPlasmidChromosomeLinkageName" :
"",
24736 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name 'PLASMID'");
24761 const string kMoreThan240 =
"A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z";
24762 const string kMoreThan32 =
"A B C D E F G H I J K L M N O P Q R S T U V W X Y Z";
24778 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name '" + kMoreThan32 +
"'");
24780 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name '" + kMoreThan240 +
"'");
24782 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name 'LG 123'");
24788 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name '" + kMoreThan32 +
"'");
24790 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name '" + kMoreThan240 +
"'");
24797 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name '" + kMoreThan32 +
"'");
24799 eDiag_Error,
"Problematic plasmid/chromosome/linkage group name '" + kMoreThan240 +
"'");
24810 bool found_host =
false;
24816 if (d->IsSource() && d->GetSource().IsSetOrgMod()) {
24817 for (
auto om : d->GetSource().GetOrg().GetOrgname().GetMod()) {
24819 BOOST_CHECK_EQUAL(host,
om->IsSetSubname() ?
om->GetSubname() :
kEmptyStr);
24825 BOOST_CHECK_EQUAL(found_host,
true);
24838 validator::CTaxValidationAndCleanup tval;
24840 BOOST_CHECK_EQUAL(tval.DoTaxonomyUpdate(seh,
true),
true);
24848 CheckOneSpecificHost(
"Canis familiaris; some other information",
"Canis familiaris; some other information");
24855 BOOST_CHECK_EQUAL(
"Acropora valida",
FixSpecificHost(
"Acropora tumida"));
24856 BOOST_CHECK_EQUAL(
"Leuzea repens",
FixSpecificHost(
"Acroptilon repens"));
24869 "Viroid has unexpected tissue-type qualifier"));
24872 eval = validator.Validate(seh, options);
24889 eval = validator.Validate(seh, options);
24894 string latlon =
"56.1033 N 10.4578 E";
24910 eval = validator.Validate(seh, options);
24927 string acc_str =
"gb|" + accession +
"|";
24930 "InconsistentMolInfoTechnique",
24931 "WGS accession should have Mol-info.tech of wgs"));
24937 eval = validator.Validate(seh, options);
24942 scope.RemoveTopLevelSeqEntry(seh);
24944 seh = scope.AddTopLevelSeqEntry(*entry);
24949 expected_errors.push_back(
new CExpectedError(acc_str,
eDiag_Error,
"InconsistentWGSFlags",
"Mol-info.tech of wgs should have WGS accession"));
24952 eval = validator.Validate(seh, options);
24968 string acc_str =
"gb|" + accession +
"|";
24969 expected_errors.push_back(
new CExpectedError(acc_str,
eDiag_Error,
"OrphanedProtein",
"Orphaned stand-alone protein"));
24974 eval = validator.Validate(seh, options);
24993 expected_errors.push_back(
new CExpectedError(
"gb|" + n_acc +
"|",
eDiag_Error,
"InconsistentMolInfoTechnique",
"WGS accession should have Mol-info.tech of wgs"));
24995 eval = validator.Validate(seh, options);
25011 eval = validator.Validate(seh, options);
25059 eval = validator.Validate(seh, options);
25066 eval = validator.Validate(seh, options);
25080 eval = validator.Validate(seh, options);
25088 "Organism not found in taxonomy database"));
25089 eval = validator.Validate(seh, options);
25095 eval = validator.Validate(seh, options);
25098 "TaxonomyLookupProblem",
"Organism name is 'Salmonella', taxonomy ID should be '590' but is '592768'"));
25100 "TaxonomyIsSpeciesProblem",
"Taxonomy lookup reports is_species_level FALSE"));
25101 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadOrgMod",
"Salmonella organisms should use serovar instead of serotype."));
25107 eval = validator.Validate(seh, options);
25109 "Organism not found in taxonomy database"));
25110 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadOrgMod",
"Salmonella organisms should use serovar instead of serotype."));
25115 scope.RemoveTopLevelSeqEntry(seh);
25117 seh = scope.AddTopLevelSeqEntry(*entry);
25118 eval = validator.Validate(seh, options);
25120 "Organism not found in taxonomy database"));
25121 expected_errors.push_back(
new CExpectedError(
"ref|NC_123456|",
eDiag_Error,
"BadOrgMod",
"Salmonella organisms should use serovar instead of serotype."));
25128 eval = validator.Validate(seh, options);
25130 "Organism not found in taxonomy database"));
25131 expected_errors.push_back(
new CExpectedError(
"ref|NC_123456|",
eDiag_Error,
"BadOrgMod",
"Salmonella organisms should use serovar instead of serotype."));
25132 expected_errors.push_back(
new CExpectedError(
"ref|NC_123456|",
eDiag_Warning,
"BadOrgMod",
"Salmonella organism name should contain the serovar value."));
25138 eval = validator.Validate(seh, options);
25140 "Organism not found in taxonomy database"));
25141 expected_errors.push_back(
new CExpectedError(
"ref|NC_123456|",
eDiag_Error,
"BadOrgMod",
"Salmonella organisms should use serovar instead of serotype."));
25158 eval = validator.Validate(seh, options);
25165 "Organism not found in taxonomy database"));
25166 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"BadOrgMod",
"Salmonella organism name should contain the serovar value."));
25167 eval = validator.Validate(seh, options);
25184 vector<CExpectedError*> expected_errors;
25186 "Record contains Seq-annot.data.locs"));
25188 unsigned int options{0};
25200 env.Set(
"NI_SERVICE_NAME_TAXON3",
"TaxService3v4test");
25207 "OrganismNotFound",
25208 "Organism not found. Possible matches|Salmonella enterica|Salmonella enterica V|Salmonella enterica subsp. V"));
25210 eval = validator.Validate(seh, options);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eErr_SEQ_FEAT_NotSpliceConsensusDonor
@ eErr_SEQ_FEAT_InternalStop
@ eErr_SEQ_INST_StopInProtein
@ eErr_SEQ_FEAT_ExceptionProblem
bool AddTerminalCodeBreak(CSeq_feat &cds, CScope &scope)
bool RemoveLineageSourceNotes()
bool GetDisableStrainForwarding() const
void RemoveCultureNotes(bool is_species_level=true)
void SetDisableStrainForwarding(bool val)
TSeqPos GetLength(void) const
static vector< CRef< CSeq_loc > > GetNonsenseIntrons(const CSeq_feat &feat, CScope &scope)
static string NewFixCountry(const string &input, bool us_territories=false)
static string USAStateCleanup(const string &country)
static void LoadUSAExceptionMap(const TUsaExceptionMap &exceptions)
bool Match(const CValidErrItem &err_item, bool ignore_severity=false)
CExpectedError(string accession, EDiagSev severity, string err_code, string err_msg)
static void PrintSeenError(const CValidErrItem &err_item)
void Test(const CValidErrItem &err_item)
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
@Gb_qual.hpp User-defined methods of the data storage class.
static void ParseExperiment(const string &orig, string &category, string &experiment, string &doi)
static string BuildExperiment(const string &category, const string &experiment, const string &doi)
list< CRef< CTaxon3_reply > > TReplies
static CNcbiApplication * Instance(void)
Singleton method.
@OrgMod.hpp User-defined methods of the data storage class.
static bool FixStructuredVoucher(string &val, const string &voucher_type)
static string IsCultureCollectionValid(const string &culture_collection)
static bool TrimJunk(string &seq)
static bool IsValid(const string &seq, char &bad_ch)
static bool Fixi(string &seq)
void SetDescr(CSeq_descr &value)
bool IsSetDescr(void) const
namespace ncbi::objects::
void AddQualifier(const string &qual_name, const string &qual_val)
Add a qualifier to this feature.
void SetGeneXref(CGene_ref &value)
bool AddSeqFeatXref(const CSeqFeatXref::TId &id)
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
static string GetCollectionDateProblem(const string &date_string)
static bool NCBI_UseGeoLocNameForCountry(void)
static string FixSexQualifierValue(const string &value)
static bool IsValidSexQualifierValue(const string &value)
@ eLatLonCountryErr_Value
static string AutoFix(TSubtype subtype, const string &value)
static string ValidateLatLonCountry(const string &countryname, string &lat_lon, bool check_state, ELatLonCountryErr &errcode)
static string FixDateFormat(const string &orig_date)
Attempt to fix the format of the date Returns a blank if the format of the date cannot be determined.
static string CheckCellLine(const string &cell_line, const string &organism)
static void IsCorrectLatLonFormat(string lat_lon, bool &format_correct, bool &precision_correct, bool &lat_in_range, bool &lon_in_range, double &lat_value, double &lon_value)
static string FixAltitude(const string &value)
static void RemoveCultureNotes(string &value, bool is_species_level=true)
static void IsCorrectDateFormat(const string &date_string, bool &bad_format, bool &in_future)
static void DetectDateFormat(const string &orig_date, bool &ambiguous, bool &day_first)
CConstRef< CSeq_feat > GetFeat(size_t num) const
vector< CRef< COrg_ref > > GetTaxonomyLookupRequest() const
bool AdjustOrgRefsForSpecificHosts(vector< CRef< COrg_ref > > org_refs)
bool IsSpecificHostMapUpdateComplete() const
CConstRef< CSeqdesc > GetDesc(size_t num) const
bool IsStrainMapUpdateComplete() const
bool AdjustOrgRefsWithSpecificHostReply(vector< CRef< COrg_ref >> requests, const CTaxon3_reply &reply, vector< CRef< COrg_ref >> org_refs)
bool AdjustOrgRefsWithTaxLookupReply(const CTaxon3_reply &reply, vector< CRef< COrg_ref > > org_refs, string &error_message, bool use_error_orgrefs=false) const
vector< CRef< COrg_ref > > GetStrainLookupRequest()
string IncrementalSpecificHostMapUpdate(const vector< CRef< COrg_ref > > &input, const CTaxon3_reply &reply)
string IncrementalStrainMapUpdate(const vector< CRef< COrg_ref > > &input, const CTaxon3_reply &reply, TTaxId descTaxID=ZERO_TAX_ID)
vector< CRef< COrg_ref > > GetSpecificHostLookupRequest(bool for_fix)
void Init(const CSeq_entry &se)
virtual CRef< CTaxon3_reply > SendOrgRefList(const vector< CRef< COrg_ref > > &list, COrg_ref::fOrgref_parts result_parts=COrg_ref::eOrgref_default, fT3reply_parts t3result_parts=eT3reply_default)
void SetObjectType(EObjectType obj_type)
@ eRefGeneTrackingStatus_INFERRED
void SetRefGeneTrackingStatus(ERefGeneTrackingStatus status)
@ eObjectType_RefGeneTracking
@ eObjectType_StructuredComment
@ eObjectType_ValidationSuppression
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
@ eParse_String
Add string even if all numbers.
static const string & ConvertSeverity(EDiagSev sev)
static const string & ConvertErrCode(unsigned int)
const string GetErrCode() const
EDiagSev GetSeverity() const
CRef< CValidError > Validate(const CSeq_entry &se, CScope *scope=nullptr, Uint4 options=0)
@ eVal_collect_locus_tags
@ eVal_locus_tag_general_match
@ eVal_far_fetch_cds_products
@ eVal_latlon_check_state
@ eVal_far_fetch_mrna_products
static const int chunk_size
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
set< CBioseq_Handle > ListOrphanProteins(CSeq_entry_Handle seh, bool force_refseq=false)
set< CSeq_feat_Handle > GetDuplicateFeaturesForRemoval(CSeq_entry_Handle seh)
void SetGoTermPMID(CUser_field &field, int pmid)
size_t CountProcessGoTerms(const CSeq_feat &feat)
void ClearGoTermPMID(CUser_field &field)
void SetGoTermText(CUser_field &field, const string &val)
void AddProcessGoTerm(CSeq_feat &feat, CRef< CUser_field > field)
void ClearGoTermEvidence(CUser_field &field)
void SetGoTermId(CUser_field &field, const string &val)
void AddGoTermEvidence(CUser_field &field, const string &val)
bool RemoveDuplicateGoTerms(CSeq_feat &feat)
#define ENTREZ_ID_CONST(id)
unsigned int TSeqPos
Type for sequence locations and lengths.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
SStrictId_Tax::TId TTaxId
Taxon id type.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
void SetDiagFilter(EDiagFilter what, const char *filter_str)
Set diagnostic filter.
EDiagSev
Severity level for the posted diagnostics.
@ eDiag_Info
Informational message.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
@ eDiag_Critical
Critical error message.
@ eDiagFilter_All
for all non-FATAL
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define MSerial_AsnText
I/O stream manipulators –.
ESerialDataFormat
Data file format.
@ eSerial_AsnText
ASN.1 text.
const string AsFastaString(void) const
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
void SetPartialStart(bool val, ESeqLocExtremes ext)
set / remove e_Lim fuzz on start or stop (lt/gt - indicating partial interval)
void SetPartialStop(bool val, ESeqLocExtremes ext)
void SetNull(void)
Override all setters to incorporate cache invalidation.
TSeqPos GetStop(ESeqLocExtremes ext) const
static CObjectOStream * Open(ESerialDataFormat format, CNcbiOstream &outStream, bool deleteOutStream)
Create serial object writer and attach it to an output stream.
ELocationInFrame IsLocationInFrame(const CSeq_feat_Handle &cds, const CSeq_loc &loc)
Determines whether location loc is in frame with coding region cds.
@ eLocationInFrame_InFrame
@ eLocationInFrame_BadStart
@ eLocationInFrame_BadStop
@ eLocationInFrame_BadStartAndStop
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
CSeq_annot_Handle AddSeq_annot(CSeq_annot &annot, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add Seq-annot, return its CSeq_annot_Handle.
void RemoveTopLevelSeqEntry(const CTSE_Handle &entry)
Revoke TSE previously added using AddTopLevelSeqEntry() or AddBioseq().
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static string & ToUpper(string &str)
Convert string to upper case – string& version.
#define NCBITEST_DISABLE(test_name)
Unconditionally disable test case.
@ eCurrent
Use current time. See also CCurrentTime.
static const char label[]
TKeywords & SetKeywords(void)
Assign a value to Keywords data member.
void SetBook(TBook &value)
Assign a value to Book data member.
void SetTitle(TTitle &value)
Assign a value to Title data member.
void SetDate(TDate &value)
Assign a value to Date data member.
void SetTitle(TTitle &value)
Assign a value to Title data member.
void SetName(TName &value)
Assign a value to Name data member.
void SetFrom(TFrom &value)
Assign a value to From data member.
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
void SetDate(TDate &value)
Assign a value to Date data member.
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
void SetCit(TCit &value)
Assign a value to Cit data member.
void SetCit(const TCit &value)
Assign a value to Cit data member.
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
void ResetDate(void)
Reset Date data member.
void SetTitle(const TTitle &value)
Assign a value to Title data member.
void SetAuthors(TAuthors &value)
Assign a value to Authors data member.
@ ePubStatus_ppublish
published in print by publisher
@ ePubStatus_aheadofprint
epublish, but will be followed by print
@ ePubStatus_epublish
published electronically by publisher
@ ePrepub_in_press
accepted, not published
void SetSubtype(TSubtype value)
Assign a value to Subtype data member.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
void SetSeq(const TSeq &value)
Assign a value to Seq data member.
const TOrg & GetOrg(void) const
Get the Org member data.
TSubtype GetSubtype(void) const
Get the Subtype member data.
void SetForward(TForward &value)
Assign a value to Forward data member.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
void SetPcr_primers(TPcr_primers &value)
Assign a value to Pcr_primers data member.
void SetReverse(TReverse &value)
Assign a value to Reverse data member.
void SetName(const TName &value)
Assign a value to Name data member.
void SetOrg(TOrg &value)
Assign a value to Org data member.
void SetName(const TName &value)
Assign a value to Name data member.
const TName & GetName(void) const
Get the Name member data.
Tdata & Set(void)
Assign a value to data member.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
TSubtype & SetSubtype(void)
Assign a value to Subtype data member.
@ eSubtype_collection_date
DD-MMM-YYYY format.
@ eSubtype_insertion_seq_name
@ eSubtype_transposon_name
@ eSubtype_fwd_primer_seq
sequence (possibly more than one; semicolon-separated)
@ eSubtype_lat_lon
+/- decimal degrees
@ eSubtype_rev_primer_name
@ eSubtype_collected_by
name of person who collected the sample
@ eSubtype_fwd_primer_name
@ eSubtype_rev_primer_seq
sequence (possibly more than one; semicolon-separated)
@ eSubtype_isolation_source
@ eSubtype_environmental_sample
@ eSubtype_endogenous_virus_name
@ eSubtype_identified_by
name of person who identified the sample
@ eOrigin_synthetic
purely synthetic
@ eOrigin_mut
artificially mutagenized
@ eOrigin_artificial
artificially engineered
void SetSeason(const TSeason &value)
Assign a value to Season data member.
void SetYear(TYear value)
Assign a value to Year data member.
void SetTag(TTag &value)
Assign a value to Tag data member.
void SetMonth(TMonth value)
Assign a value to Month data member.
TStd & SetStd(void)
Select the variant.
void SetDay(TDay value)
Assign a value to Day data member.
TData & SetData(void)
Assign a value to Data data member.
void SetLabel(TLabel &value)
Assign a value to Label data member.
TStr & SetStr(void)
Select the variant.
void ResetMonth(void)
Reset Month data member.
void ResetDay(void)
Reset Day data member.
void SetType(TType &value)
Assign a value to Type data member.
void ResetData(void)
Reset Data data member.
void SetData(TData &value)
Assign a value to Data data member.
TYear GetYear(void) const
Get the Year member data.
TMonth GetMonth(void) const
Get the Month member data.
void SetDb(const TDb &value)
Assign a value to Db data member.
TDay GetDay(void) const
Get the Day member data.
const TStd & GetStd(void) const
Get the variant data.
@ eLim_tl
space to left of position
@ eLim_tr
space to right of position
const TMod & GetMod(void) const
Get the Mod member data.
THybrid & SetHybrid(void)
Select the variant.
TNamedhybrid & SetNamedhybrid(void)
Select the variant.
TVirus & SetVirus(void)
Select the variant.
TDb & SetDb(void)
Assign a value to Db data member.
virtual void Reset(void)
Reset the whole object.
Tdata & Set(void)
Assign a value to data member.
void SetGenus(const TGenus &value)
Assign a value to Genus data member.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
void SetSubspecies(const TSubspecies &value)
Assign a value to Subspecies data member.
void SetSpecies(const TSpecies &value)
Assign a value to Species data member.
Tdata & Set(void)
Assign a value to data member.
void SetOrgname(TOrgname &value)
Assign a value to Orgname data member.
TPartial & SetPartial(void)
Select the variant.
TBinomial & SetBinomial(void)
Select the variant.
const TAttrib & GetAttrib(void) const
Get the Attrib member data.
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
@ eSubtype_gb_synonym
used by taxonomy database
@ eSubtype_other
ASN5: old-name (254) will be added to next spec.
@ eSubtype_nat_host
natural host of this specimen
@ eSubtype_metagenome_source
@ eSubtype_specimen_voucher
@ eSubtype_culture_collection
@ eSubtype_forma_specialis
EProcessed
processing status
@ eProcessed_signal_peptide
@ eProcessed_transit_peptide
TProc & SetProc(void)
Select the variant.
TPmid & SetPmid(void)
Select the variant.
TMuid & SetMuid(void)
Select the variant.
TBook & SetBook(void)
Select the variant.
TEquiv & SetEquiv(void)
Select the variant.
TMan & SetMan(void)
Select the variant.
TSub & SetSub(void)
Select the variant.
TGen & SetGen(void)
Select the variant.
TMedline & SetMedline(void)
Select the variant.
TArticle & SetArticle(void)
Select the variant.
void ResetSegs(void)
Reset Segs data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
void SetDim(TDim value)
Assign a value to Dim data member.
void SetType(TType value)
Assign a value to Type data member.
void SetAa(TAa &value)
Assign a value to Aa data member.
TXref & SetXref(void)
Assign a value to Xref data member.
void SetQual(const TQual &value)
Assign a value to Qual data member.
void ResetTitle(void)
Reset Title data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
void SetComment(const TComment &value)
Assign a value to Comment data member.
void ResetExcept(void)
Reset Except data member.
void SetCit(TCit &value)
Assign a value to Cit data member.
void SetPartial(TPartial value)
Assign a value to Partial data member.
void SetProduct(TProduct &value)
Assign a value to Product data member.
const TId & GetId(void) const
Get the Id member data.
void ResetExcept_text(void)
Reset Except_text data member.
const TLocation & GetLocation(void) const
Get the Location member data.
void SetExcept(TExcept value)
Assign a value to Except data member.
void SetExt(TExt &value)
Assign a value to Ext data member.
void SetTitle(const TTitle &value)
Assign a value to Title data member.
void ResetPseudo(void)
Reset Pseudo data member.
void SetId(TId &value)
Assign a value to Id data member.
void SetData(TData &value)
Assign a value to Data data member.
void ResetComment(void)
Reset Comment data member.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
void SetVal(const TVal &value)
Assign a value to Val data member.
void SetPseudo(TPseudo value)
Assign a value to Pseudo data member.
void SetExcept_text(const TExcept_text &value)
Assign a value to Except_text data member.
void ResetProduct(void)
Reset Product data member.
void ResetXref(void)
Reset Xref data member.
TQual & SetQual(void)
Assign a value to Qual data member.
void ResetQual(void)
Reset Qual data member.
@ eFrame_three
reading frame
void SetSeqid(TSeqid value)
Assign a value to Seqid data member.
TGeneral & SetGeneral(void)
Select the variant.
void SetMol(const TMol &value)
Assign a value to Mol data member.
TPatent & SetPatent(void)
Select the variant.
TEmbl & SetEmbl(void)
Select the variant.
TOther & SetOther(void)
Select the variant.
const TId & GetId(void) const
Get the Id member data.
void SetId(TId value)
Assign a value to Id data member.
void SetAccession(const TAccession &value)
Assign a value to Accession data member.
TFrom GetFrom(void) const
Get the From member data.
TTpe & SetTpe(void)
Select the variant.
TTpg & SetTpg(void)
Select the variant.
TPir & SetPir(void)
Select the variant.
TGi & SetGi(void)
Select the variant.
TTpd & SetTpd(void)
Select the variant.
TGibbmt & SetGibbmt(void)
Select the variant.
TGpipe & SetGpipe(void)
Select the variant.
TDdbj & SetDdbj(void)
Select the variant.
void SetFuzz_to(TFuzz_to &value)
Assign a value to Fuzz_to data member.
void SetFuzz_from(TFuzz_from &value)
Assign a value to Fuzz_from data member.
TLocal & SetLocal(void)
Select the variant.
TGiim & SetGiim(void)
Select the variant.
void SetDb(const TDb &value)
Assign a value to Db data member.
TPrf & SetPrf(void)
Select the variant.
TTo GetTo(void) const
Get the To member data.
TGibbsq & SetGibbsq(void)
Select the variant.
TGenbank & SetGenbank(void)
Select the variant.
const TInt & GetInt(void) const
Get the variant data.
TSwissprot & SetSwissprot(void)
Select the variant.
void SetCit(TCit &value)
Assign a value to Cit data member.
void SetVersion(TVersion value)
Assign a value to Version data member.
TPdb & SetPdb(void)
Select the variant.
@ eNa_strand_both_rev
in reverse orientation
@ eNa_strand_both
in forward orientation
void SetMin(TMin value)
Assign a value to Min data member.
void SetTitle(const TTitle &value)
Assign a value to Title data member.
void SetNumval(TNumval value)
Assign a value to Numval data member.
TValues & SetValues(void)
Assign a value to Values data member.
void SetGraph(TGraph &value)
Assign a value to Graph data member.
void SetMax(TMax value)
Assign a value to Max data member.
void ResetLoc(void)
Reset Loc data member.
void ResetValues(void)
Reset Values data member.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
TNumval GetNumval(void) const
Get the Numval member data.
void SetAxis(TAxis value)
Assign a value to Axis data member.
const TSeq & GetSeq(void) const
Get the variant data.
const TDescr & GetDescr(void) const
Get the Descr member data.
TSet & SetSet(void)
Select the variant.
TClass GetClass(void) const
Get the Class member data.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
void ResetAnnot(void)
Reset Annot data member.
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
bool IsSetDescr(void) const
Check if a value has been assigned to Descr data member.
bool IsSet(void) const
Check if variant Set is selected.
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
void SetClass(TClass value)
Assign a value to Class data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
TSeq & SetSeq(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_equiv
a set of equivalent maps or seqs
@ eClass_pop_set
population study
@ eClass_phy_set
phylogenetic study
@ eClass_conset
constructed sequence + parts
@ eClass_pir
converted pir
@ eClass_eco_set
ecological sample study
@ eClass_nuc_prot
nuc acid and coded proteins
@ eClass_gibb
geninfo backbone
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
@ eClass_pdb_entry
a complete PDB entry
@ eClass_genbank
converted genbank
@ eClass_swissprot
converted SWISSPROT
@ eClass_segset
segmented sequence + parts
@ eClass_small_genome_set
viral segments or mitochondrial minicircles
@ eClass_pub_set
all the seqs from a single publication
TModif & SetModif(void)
Select the variant.
virtual void Reset(void)
Reset the whole object.
void SetCompleteness(TCompleteness value)
Assign a value to Completeness data member.
void SetLength(TLength value)
Assign a value to Length data member.
void SetData(TData &value)
Assign a value to Data data member.
list< CRef< CSeqdesc > > Tdata
TMethod & SetMethod(void)
Select the variant.
TId & SetId(void)
Assign a value to Id data member.
TPir & SetPir(void)
Select the variant.
void ResetId(void)
Reset Id data member.
void SetPub(TPub &value)
Assign a value to Pub data member.
const TInst & GetInst(void) const
Get the Inst member data.
TTitle & SetTitle(void)
Select the variant.
TMol_type & SetMol_type(void)
Select the variant.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
TPub & SetPub(void)
Select the variant.
TPrf & SetPrf(void)
Select the variant.
TOrg & SetOrg(void)
Select the variant.
void SetDesc(TDesc &value)
Assign a value to Desc data member.
bool IsSource(void) const
Check if variant Source is selected.
TGenbank & SetGenbank(void)
Select the variant.
TPdb & SetPdb(void)
Select the variant.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
const TId & GetId(void) const
Get the Id member data.
void ResetAnnot(void)
Reset Annot data member.
TUser & SetUser(void)
Select the variant.
TSp & SetSp(void)
Select the variant.
const Tdata & Get(void) const
Get the member data.
void SetType(TType value)
Assign a value to Type data member.
TLength GetLength(void) const
Get the Length member data.
TName & SetName(void)
Select the variant.
TComment & SetComment(void)
Select the variant.
void SetInst(TInst &value)
Assign a value to Inst data member.
void ResetLinkage(void)
Reset Linkage data member.
TSource & SetSource(void)
Select the variant.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
bool IsPub(void) const
Check if variant Pub is selected.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
void ResetLinkage_evidence(void)
Reset Linkage_evidence data member.
TUser & SetUser(void)
Select the variant.
TEmbl & SetEmbl(void)
Select the variant.
void SetComment(const TComment &value)
Assign a value to Comment data member.
TLinkage_evidence & SetLinkage_evidence(void)
Assign a value to Linkage_evidence data member.
void SetLinkage(TLinkage value)
Assign a value to Linkage data member.
void ResetType(void)
Reset Type data member.
void SetTech(TTech value)
Assign a value to Tech data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
TMolinfo & SetMolinfo(void)
Select the variant.
TCreate_date & SetCreate_date(void)
Select the variant.
TUpdate_date & SetUpdate_date(void)
Select the variant.
TRegion & SetRegion(void)
Select the variant.
@ eRepr_const
constructed sequence
@ eRepr_ref
reference to another sequence
@ eRepr_seg
segmented sequence
@ eRepr_delta
sequence made by changes (delta) to others
@ eRepr_map
ordered map of any kind
@ eRepr_consen
consensus sequence or pattern
@ eRepr_raw
continuous sequence
@ eRepr_virtual
no seq data
@ eCompleteness_complete
complete biological entity
@ eCompleteness_no_left
missing 5' or NH3 end
@ eCompleteness_partial
partial but no details given
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ eTech_htgs_2
ordered High Throughput sequence contig
@ eTech_physmap
from physical mapping techniques
@ eTech_htc
high throughput cDNA
@ eTech_both
concept transl. w/ partial pept. seq.
@ eTech_targeted
targeted locus sets/studies
@ eTech_seq_pept_homol
sequenced peptide, ordered by homology
@ eTech_composite_wgs_htgs
composite of WGS and HTGS
@ eTech_sts
Sequence Tagged Site.
@ eTech_htgs_3
finished High Throughput sequence
@ eTech_seq_pept_overlap
sequenced peptide, ordered by overlap
@ eTech_htgs_1
unordered High Throughput sequence contig
@ eTech_concept_trans
conceptual translation
@ eTech_tsa
transcriptome shotgun assembly
@ eTech_standard
standard sequencing
@ eTech_wgs
whole genome shotgun sequencing
@ eTech_seq_pept
peptide was sequenced
@ eTech_survey
one-pass genomic sequence
@ eTech_barcode
barcode of life project
@ eTech_htgs_0
single genomic reads for coordination
@ eTech_derived
derived from other data, not a primary entity
@ eTech_fli_cdna
full length insert cDNA
@ eTech_est
Expressed Sequence Tag.
@ eTech_concept_trans_a
conceptual transl. supplied by author
@ eTech_genemap
from genetic mapping techniques
@ e_Ncbi2na
2 bit nucleic acid code
@ e_Ncbi4na
4 bit nucleic acid code
@ eBiomol_pre_RNA
precursor RNA of any sort really
@ eBiomol_cRNA
viral RNA genome copy intermediate
@ eBiomol_snoRNA
small nucleolar RNA
@ eBiomol_genomic_mRNA
reported a mix of genomic and cdna sequence
@ eBiomol_transcribed_RNA
transcribed RNA other than existing classes
@ eBiomol_other_genetic
other genetic material
@ eGIBB_mod_no_right
missing right end (3' or COOH)
@ eGIBB_mod_mitochondrial
@ eGIBB_mod_no_left
missing left end (5' for na, NH2 for aa)
@ e_Org
if all from one organism
@ e_Pub
a reference to the publication
@ e_Mol_type
type of molecule
@ e_Method
sequencing method
@ e_Molinfo
info on the molecule and techniques
@ e_Title
a title for this sequence
@ e_Source
source of materials, includes Org-ref
@ eType_fragment
Deprecated. Used only for AGP 1.1.
@ eTopology_tandem
some part of tandem repeat
@ eMol_not_set
> cdna = rna
@ eMol_na
just a nucleic acid
@ eStrand_other
default ds for DNA, ss for RNA, pept
@ eStrand_ds
double strand
@ eStrand_ss
single strand
@ eGIBB_mol_pre_mRNA
precursor RNA of any sort really
@ eGIBB_mol_genomic_mRNA
reported a mix of genomic and cdna sequence
@ eGIBB_mol_other_genetic
other genetic material
void SetSub(TSub &value)
Assign a value to Sub data member.
void SetData(TData &value)
Assign a value to Data data member.
void SetLevel(TLevel value)
Assign a value to Level data member.
void SetMessage(const TMessage &value)
Assign a value to Message data member.
TError & SetError(void)
Select the variant.
const TAccnver & GetAccnver(void) const
Get the Accnver member data.
const TMsg & GetMsg(void) const
Get the Msg member data.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
static void text(MDB_val *v)
const struct ncbi::grid::netcache::search::fields::KEY key
static bool Equals(const CVariation::TPlacements &p1, const CVariation::TPlacements &p2)
Defines to provide correct exporting from DLLs in some configurations.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
double f(double x_, const double &y_)
static const char * expected[]
#define FOR_EACH_ORGMOD_ON_BIOSOURCE(Itr, Var)
FOR_EACH_ORGMOD_ON_BIOSOURCE EDIT_EACH_ORGMOD_ON_BIOSOURCE.
#define FOR_EACH_SEQENTRY_ON_SEQSET(Itr, Var)
FOR_EACH_SEQENTRY_ON_SEQSET EDIT_EACH_SEQENTRY_ON_SEQSET.
CRef< objects::CObjectManager > om
static const char * str(char *buf, int n)
Utility stuff for more convenient using of Boost.Test library.
void SetSynthetic_construct(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_entry > BuildGenProdSetNucProtSet(CRef< objects::CSeq_id > nuc_id, CRef< objects::CSeq_id > prot_id)
CRef< objects::CPub > BuildGoodCitGenPub(CRef< objects::CAuthor > author, int serial_number)
CRef< objects::CSeq_feat > MakeGeneForFeature(CRef< objects::CSeq_feat > feat)
CRef< objects::CAuthor > BuildGoodAuthor()
void SetDrosophila_melanogaster(CRef< objects::CSeq_entry > entry)
void SetTaxon(objects::CBioSource &src, size_t taxon)
void SetSubSource(objects::CBioSource &src, objects::CSubSource::TSubtype subtype, string val)
CRef< objects::CSeq_annot > BuildGoodGraphAnnot(string id)
void SetChromosome(objects::CBioSource &src, string chromosome)
void SetTech(CRef< objects::CSeq_entry > entry, objects::CMolInfo::TTech tech)
void ChangeNucId(CRef< objects::CSeq_entry > np_set, CRef< objects::CSeq_id > id)
CRef< objects::CSeq_feat > AddProtFeat(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_id > BuildRefSeqId(void)
CRef< objects::CPub > BuildGoodArticlePub()
void SetFocus(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_entry > GetNucProtSetFromGenProdSet(CRef< objects::CSeq_entry > entry)
void SetGenome(CRef< objects::CSeq_entry > entry, objects::CBioSource::TGenome genome)
void AddToDeltaSeq(CRef< objects::CSeq_entry > entry, string seq)
void SetDiv(CRef< objects::CSeq_entry > entry, string div)
void ChangeProtId(CRef< objects::CSeq_entry > np_set, CRef< objects::CSeq_id > id)
CRef< objects::CSeq_entry > MakeProteinForGoodNucProtSet(string id)
CRef< objects::CSeq_feat > GetCDSFromGoodNucProtSet(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_align > BuildGoodAlign()
void SetSebaea_microphylla(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeqdesc > BuildGoodPubSeqdesc()
CRef< objects::CPub > BuildGoodCitSubPub()
CRef< objects::CSeq_entry > BuildGoodProtSeq(void)
void ResetOrgname(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_feat > GetCDSFromGenProdSet(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_annot > AddFeat(CRef< objects::CSeq_feat > feat, CRef< objects::CSeq_entry > entry)
void SetTaxname(CRef< objects::CSeq_entry > entry, string taxname)
CRef< objects::CSeq_feat > GetmRNAFromGenProdSet(CRef< objects::CSeq_entry > entry)
void RetranslateCdsForNucProtSet(CRef< objects::CSeq_entry > entry, objects::CScope &scope)
void ChangeNucProtSetProteinId(CRef< objects::CSeq_entry > entry, CRef< objects::CSeq_id > id)
void SetCompleteness(CRef< objects::CSeq_entry > entry, objects::CMolInfo::TCompleteness completeness)
void SetNucProtSetPartials(CRef< objects::CSeq_entry > entry, bool partial5, bool partial3)
void SetOrigin(CRef< objects::CSeq_entry > entry, objects::CBioSource::TOrigin origin)
CRef< objects::CSeq_entry > BuildGoodSeq(void)
CRef< objects::CSeq_feat > MakeIntronForMixLoc(CRef< objects::CSeq_id > id)
void SetSpliceForMixLoc(objects::CBioseq &seq)
CRef< objects::CSeq_feat > AddMiscFeature(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_entry > BuildGoodEcoSet()
void SetTransgenic(objects::CBioSource &src, bool do_set)
void ChangeId(CRef< objects::CSeq_annot > annot, CRef< objects::CSeq_id > id)
void MakeNucProtSet3Partial(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_feat > GetProtFeatFromGoodNucProtSet(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_entry > GetProteinSequenceFromGoodNucProtSet(CRef< objects::CSeq_entry > entry)
void RemoveDescriptorType(CRef< objects::CSeq_entry > entry, objects::CSeqdesc::E_Choice desc_choice)
void SetBiomol(CRef< objects::CSeq_entry > entry, objects::CMolInfo::TBiomol biomol)
CRef< objects::CSeq_entry > BuildGoodNucProtSet(void)
CRef< objects::CSeq_feat > MakemRNAForCDS(CRef< objects::CSeq_feat > feat)
void SetOrgMod(objects::CBioSource &src, objects::COrgMod::TSubtype subtype, string val)
CRef< objects::CSeq_entry > GetNucleotideSequenceFromGoodNucProtSet(CRef< objects::CSeq_entry > entry)
void ClearFocus(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_loc > MakeMixLoc(CRef< objects::CSeq_id > id)
void SetNucProtSetProductName(CRef< objects::CSeq_entry > entry, string new_name)
void AddGoodPub(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_feat > MakeCDSForGoodNucProtSet(const string &nuc_id, const string &prot_id)
void AddGoodSource(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_feat > BuildtRNA(CRef< objects::CSeq_id > id)
void SetCommon(CRef< objects::CSeq_entry > entry, string common)
void RevComp(objects::CBioseq &bioseq)
void SetGcode(CRef< objects::CSeq_entry > entry, objects::COrgName::TGcode gcode)
CRef< objects::CSeq_feat > BuildGoodtRNA(CRef< objects::CSeq_id > id)
void ChangeNucProtSetNucId(CRef< objects::CSeq_entry > entry, CRef< objects::CSeq_id > id)
void AdjustProtFeatForNucProtSet(CRef< objects::CSeq_entry > entry)
void RemoveDeltaSeqGaps(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_entry > BuildGoodDeltaSeq(void)
CRef< objects::CSeq_feat > AddGoodSourceFeature(CRef< objects::CSeq_entry > entry)
CRef< objects::CSeq_entry > GetGenomicFromGenProdSet(CRef< objects::CSeq_entry > entry)
void SetDbxref(objects::CBioSource &src, string db, objects::CObject_id::TId id)
CRef< objects::CSeq_entry > BuildGoodGenProdSet()
void SetLineage(CRef< objects::CSeq_entry > entry, string lineage)
void RemoveDbxref(objects::CBioSource &src, string db, objects::CObject_id::TId id)
void TestRepliconForbiddenWords(CSubSource::ESubtype subtype, bool expect_errs)
static CRef< CSeq_align > BuildSetAlign(CRef< CSeq_entry > entry)
void AddYear(CDate &add_date)
void TestOneReplicon(CSubSource::ESubtype subtype, const string &val, const string &err_code, EDiagSev sev, const string &msg)
void TestSpecificHostNoError(const string &host)
void TestUTRPair(bool add_gene, bool is_minus)
#define INTERNAL_STOP_ERR
void TestOneLatLonCountry(const string &country, const string &lat_lon, const string &error, bool use_state=false, const string &err_code="LatLonCountry")
void AddOrgmodDescriptor(CRef< CSeq_entry > entry, const string &val, COrgMod::ESubtype subtype)
void TestNewAccessionOnNucProt(const string &n_acc, const string &p_acc, bool is_wgs)
static void SetErrorsAccessions(vector< CExpectedError * > &expected_errors, string accession)
CRef< CSeq_submit > MakeGeneious()
static CRef< CSeq_entry > BuildGenProdSetBigNucProtSet(CRef< CSeq_id > nuc_id, CRef< CSeq_id > prot_id)
void TestOnePlasmid(const string &plasmid_name, bool expect_error)
static void AddCDSAndProtForBigGoodNucProtSet(CRef< CSeq_entry > entry, string nuc_id, string prot_id, TSeqPos offset)
void CheckUnbalancedParenthesesSubSource(CSubSource::TSubtype subtype, const string &val)
bool s_ArePrimersUnique(const CPCRReactionSet &rset)
CRef< CSeq_entry > s_BuildBadEcNumberEntry()
void CheckStrings(const vector< string > &seen, const vector< string > &expected)
NCBITEST_INIT_CMDLINE(arg_desc)
void AddOrgmodFeat(CRef< CSeq_entry > entry, const string &val, COrgMod::ESubtype subtype)
void TestAlwaysGoodReplicon(const string &val)
#define START_CODON_AND_INT_STOP_ERR
void AddOrgmod(COrg_ref &org, const string &val, COrgMod::ESubtype subtype)
CRef< CSeq_feat > MakeGeneOntologyFeat(CRef< CUser_field > term1, CRef< CUser_field > term2)
void AddSgmlError(vector< CExpectedError * > &expected_errors, const string &valtype, const string &val)
static bool OrgModHasOtherRules(COrgMod::TSubtype subtype)
static void s_USAStateTest(string before, string after, CCountries::EStateCleanup expected)
void TestOneMiscPartial(CRef< CSeq_entry > entry, TSeqPos good_start, TSeqPos bad_start, TSeqPos good_stop, TSeqPos bad_stop, bool is_mrna)
void s_AddGeneralAndLocal(CBioseq &seq)
void CheckLocalId(const string &id, const string &badchar)
void SetUpMiscForPartialTest(CSeq_feat &feat, TSeqPos start, TSeqPos stop, bool pseudo)
void TestGoodNucId(const string &id_str)
static void AddRefGeneTrackingUserObject(CRef< CSeq_entry > entry)
void AddStrsField(CUser_object &user, const string &label, const string &val)
static CRef< CSeq_entry > BuildBigGoodNucProtSet(void)
void AddMonth(CDate &add_date)
static void MakeBadSeasonDate(CDate &date)
vector< pair< string, string > > THostStringsVector
CRef< CUser_field > MkField(const string &label, const string &val)
void TestOneOtherAcc(CRef< CSeq_id > other_acc, bool id_change, bool conflict, bool need_hist=false)
void WriteErrors(const CValidError &eval, bool debug_mode)
void AddCdregionToSmallGenomeSet(CRef< CSeq_entry > entry, size_t cdr1_num, size_t cdr2_num, size_t cdr_pos, size_t p_pos)
void ChangeErrorAcc(vector< CExpectedError * > expected_errors, const string &acc)
static CRef< CSeq_entry > BuildGapFuzz100DeltaSeq(void)
#define test_undesired_protein_name(name)
void AddDay(CDate &add_date)
void TestAlwaysBadReplicon(const string &val)
void TestMultipleEquivBioSources(const string &lineage, TSeqPos first_end, TSeqPos second_start, bool expected)
void CheckGeneOntologyTermNotDuplicate(CRef< CSeq_feat > feat)
void CheckOneSpecificHost(const string &orig, const string &newval)
static void SetRefGeneTrackingStatus(CRef< CSeq_entry > entry, string status)
void TestDeltaTechAllowed(CMolInfo::TTech tech)
static void SetFeatureLocationBond(CRef< CSeq_feat > feat, string id, TSeqPos pt1, TSeqPos pt2)
void ShowOrgRef(const COrg_ref &org)
void TestConsultRequired(const string &taxname)
void TestNewAccessionOnStandaloneProt(const string &accession, bool is_nuc_acc, bool is_wgs)
static void AddGenbankKeyword(CRef< CSeq_entry > entry, string keyword)
void AdjustGap(CSeq_gap &gap, CSeq_gap::EType gap_type, bool is_linked, vector< CLinkage_evidence::EType > linkage_evidence)
static void AddTpaAssemblyUserObject(CRef< CSeq_entry > entry)
static CRef< CSeq_graph > BuildGoodByteGraph(CRef< CSeq_entry > entry, TSeqPos offset=0, TSeqPos len=kInvalidSeqPos)
static CRef< CSeq_entry > BuildGoodSpliceNucProtSet()
void TestGoodProtId(const string &id_str)
void CheckMiscPartialErrors(CRef< CSeq_entry > entry, bool expect_bad_5, bool expect_bad_3)
void TestStartGapSeg(CMolInfo::TTech tech)
void TestDeltaTechNotAllowed(CMolInfo::TTech tech)
static CRef< CSeq_entry > BuildGenProdSetWithBigProduct()
void CheckHost(const CBioseq &seq, const string &host)
#define TESTWGS(seh, entry)
#define EXCEPTION_PROBLEM_ERR
static string MakeWrongCap(const string &str)
void TestBulkSpecificHostFixList(const THostStringsVector &test_values)
void AddChromosomeNoLocation(vector< CExpectedError * > &expected_errors, const string &id)
static CRef< CSeq_entry > MakeGps(CRef< CSeq_entry > member)
BOOST_FIXTURE_TEST_CASE(Test_SEQ_INST_BadSeqIdFormat, CGenBankFixture)
void CheckErrors(const CValidError &eval, vector< CExpectedError * > &expected_errors)
void MakeLeft(CSeq_loc &loc)
void AddGeneticCode(CSeq_feat &cds, CGenetic_code::C_E::TId code_id)
void TestOneStrain(const string &taxname, const string &strain, const string &lineage, TTaxId taxID, bool expect_err)
void TestOneGeneralSeqId(const string &db, const string &tag, const string &errmsg)
void TestOverlappingRNAFeatures(const CSeq_loc &loc1, const CSeq_loc &loc2, bool expect_err)
void TestNewAccessionAsInference(const string &acc)
void TestOneLongGeneral(bool emb, bool err)
void TestBadProtId(const string &id_str)
CRef< CTaxon3_reply > s_CreateReplyWithMessage(const string &message)
static CRef< CSeq_align > BuildSetDendiagAlign(CRef< CSeq_entry > entry)
CRef< CSeq_id > MakeSmallGenomeSetNucId(size_t num)
#define TESTPOPPHYMUTECO(seh, entry)
static void SetTitle(CRef< CSeq_entry > entry, string title)
#define test_gene_syn(name)
void CheckUnbalancedParenthesesOrgMod(COrgMod::TSubtype subtype, const string &val)
CRef< CSeq_entry > BuildSmallGenomeSet(size_t num_np)
void TestRepliconTaxname(CSubSource::ESubtype subtype, bool expect_errs)
static void ChangeGoodNucProtSetIdToGenbankName(CRef< CSeq_entry > entry, string name)
void CheckGeneOntologyTermDuplicate(CRef< CSeq_feat > feat)
void AddStrainDescriptor(CSeq_entry &entry, const string &taxname, const string &strain, const string &lineage, TTaxId taxID)
BOOST_AUTO_TEST_CASE(Test_Descr_MissingKeyword)
static bool SubSourceHasOtherRules(CSubSource::TSubtype subtype)
void MakeRight(CSeq_loc &loc, TSeqPos stop)
static bool IsProteinTech(CMolInfo::TTech tech)
const std::string sc_TestEntryCollidingLocusTags
static NCBI_UNUSED string ToAsn1(const CRef< CSeq_entry > &entry)
CRef< CUser_field > MakeStructuredCommentField(const string &label, const string &value)
void CreateReciprocalLinks(CSeq_feat &f1, CSeq_feat &f2)
static CRef< CUser_field > MakeGoTerm(string text="something", string evidence="some evidence")
void TestNewAccessionOnNuc(const string &accession, bool is_prot_acc, bool is_wgs)
#define STANDARD_SETUP_WITH_MOCK_TAXON(replies)
#define STANDARD_SETUP_NAME(entry_name)
void g_IgnoreDataFile(const string &pattern, bool do_ignore=true)
Ignore (or stop ignoring, depending on do_ignore) NCBI application data files matching the given patt...
static bool ambig(char c)