132 using namespace validator;
133 using namespace unit_test_util;
137 : m_Accession (accession), m_Severity (severity), m_ErrCode(err_code), m_ErrMsg(err_msg)
155 string msg = err_item.
GetMsg();
156 size_t pos =
NStr::Find(msg,
" EXCEPTION: NCBI C++ Exception:");
157 if (pos != string::npos) {
158 msg = msg.substr(0, pos);
178 string msg = err_item.
GetMsg();
179 size_t pos =
NStr::Find(msg,
" EXCEPTION: NCBI C++ Exception:");
180 if (pos != string::npos) {
181 msg = msg.substr(0, pos);
189 string description = err_item.
GetAccnver() +
":"
193 printf(
"%s\n", description.c_str());
204 printf(
"%s\n", description.c_str());
227 vector< CExpectedError* >& expected_errors)
235 bool problem_found =
false;
242 vector<bool> expected_found;
243 for (
size_t i = 0;
i < expected_errors.size();
i++) {
244 if (expected_errors[
i]) {
245 expected_found.push_back(
false);
247 expected_found.push_back(
true);
253 for (
size_t i = 0;
i < expected_errors.size();
i++) {
254 if (!expected_found[
i] && expected_errors[
i]->Match(*vit)) {
255 expected_found[
i] =
true;
261 for (
size_t i = 0;
i < expected_errors.size();
i++) {
262 if (!expected_found[
i] && expected_errors[
i]->Match(*vit,
true)) {
263 printf(
"Problem with ");
265 expected_errors[
i]->Test(*vit);
266 expected_found[
i] =
true;
268 problem_found =
true;
274 BOOST_CHECK_EQUAL(
"Unexpected error",
"Error not found");
276 problem_found =
true;
280 for (
size_t i = 0;
i < expected_errors.size();
i++) {
281 if (!expected_found[
i]) {
282 BOOST_CHECK_EQUAL(expected_errors[
i]->GetErrMsg(),
"Expected error not found");
283 problem_found =
true;
290 printf(
"Expected:\n");
291 for (
auto it : expected_errors) {
302 auto it1 = seen.begin();
305 while (it1 != seen.end() && it2 !=
expected.end()) {
306 BOOST_CHECK_EQUAL(*it1, *it2);
313 while (it1 != seen.end()) {
314 BOOST_CHECK_EQUAL(*it1,
"Unexpected string");
319 BOOST_CHECK_EQUAL(
"Missing string", *it2);
326 auto it1 = seen.begin();
327 while (it1 != seen.end()) {
328 printf(
"%s\n", (*it1).c_str());
331 printf(
"Expected:\n");
334 printf(
"%s\n", (*it2).c_str());
345 static void SetCountryOnSrc(
CBioSource& src,
string country)
385 size_t i,
len = expected_errors.size();
386 for (
i = 0;
i <
len;
i++) {
387 expected_errors[
i]->SetAccession(accession);
396 arg_desc->AddFlag(
"debug_mode",
397 "Debugging mode writes errors seen for each test" );
405 if (args[
"debug_mode"]) {
414 "INDEXER_ONLY - source contains chromosome value '1' but the BioSource location is not set to chromosome"));
419 if (entry->
IsSeq()) {
422 }
else if (entry->
IsSet()) {
470 "Structured Comment is non-compliant, keyword should be removed"));
472 "Required field finishing_strategy is missing when investigation_type has value 'eukaryote'"));
474 "Structured Comment invalid; the field value and/or name are incorrect"));
476 eval = validator.Validate(seh, options);
481 delete expected_errors[0];
482 expected_errors[0] =
nullptr;
483 eval = validator.Validate(seh, options);
491 eval = validator.Validate(seh, options);
498 eval = validator.Validate(seh, options);
523 "Longitude should be set to W (western hemisphere)"));
524 eval = validator.Validate(seh, options);
531 expected_errors[0]->SetErrMsg(
"Latitude should be set to S (southern hemisphere)");
532 eval = validator.Validate(seh, options);
550 "Latitude and longitude values appear to be exchanged"));
551 eval = validator.Validate(seh, options);
558 void TestOneLatLonCountry(
const string& country,
const string& lat_lon,
const string&
error,
bool use_state =
false,
const string& err_code =
"LatLonCountry")
571 if (!
error.empty()) {
574 eval = validator.Validate(seh, options);
577 if (!
error.empty()) {
580 expected.push_back(
"LatLonCountry Errors");
585 vector<string> cat_list =
format.FormatCompleteSubmitterReport(*eval, scope);
586 for (
const string& it : cat_list) {
587 vector<string> sublist;
589 for (
const string& sit : sublist) {
611 "Lat_lon '46.5 N 20 E' maps to 'Hungary' instead of 'Romania' - claimed region 'Romania' is at distance 45 km");
612 TestOneLatLonCountry(
"Romania",
"34 N 65 E",
"Lat_lon '34 N 65 E' maps to 'Afghanistan' instead of 'Romania'");
613 TestOneLatLonCountry(
"Romania",
"48 N 15 E",
"Lat_lon '48 N 15 E' maps to 'Austria' instead of 'Romania'");
614 TestOneLatLonCountry(
"Romania",
"48 N 15 W",
"Lat_lon '48 N 15 W' is in water 'Atlantic Ocean'",
false,
"LatLonWater");
634 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[44] =
'A';
635 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[45] =
'G';
640 other_intron->
SetData().SetImp().SetKey(
"intron");
642 gene->
SetData().SetGene().SetLocus_tag(
"fake_locustag");
647 prot->SetData().SetProt().SetEc().push_back(
"1.2.3.10");
648 prot->SetData().SetProt().SetEc().push_back(
"1.1.3.22");
649 prot->SetData().SetProt().SetEc().push_back(
"1.1.99.n");
650 prot->SetData().SetProt().SetEc().push_back(
"1.1.1.17");
651 prot->SetData().SetProt().SetEc().push_back(
"11.22.33.44");
652 prot->SetData().SetProt().SetEc().push_back(
"11.22.n33.44");
653 prot->SetData().SetProt().SetEc().push_back(
"11.22.33.n44");
667 eval = validator.Validate(seh, options);
672 expected.push_back(
"intron\tlcl|nuc\tGT at 17");
673 expected.push_back(
"intron\tlcl|nuc\tGT at 1");
674 expected.push_back(
"intron\tlcl|nuc\tAG at 11");
675 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
676 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
677 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
678 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
679 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
680 expected.push_back(
"CDS\tlcl|nuc\tGT at 16");
681 expected.push_back(
"lcl|nuc:Lat_lon '30 N 30 E' maps to 'Egypt' instead of 'Panama'");
682 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
683 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
684 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
688 string val =
format.FormatForSubmitterReport(*vit, scope);
696 seen.push_back(vit->GetErrCode());
698 expected.push_back(
"NotSpliceConsensusDonor");
699 expected.push_back(
"NotSpliceConsensusDonorTerminalIntron");
700 expected.push_back(
"NotSpliceConsensusAcceptor");
701 expected.push_back(
"DeletedEcNumber");
702 expected.push_back(
"ReplacedEcNumber");
703 expected.push_back(
"BadEcNumberValue");
704 expected.push_back(
"BadEcNumberFormat");
705 expected.push_back(
"BadEcNumberValue");
706 expected.push_back(
"NotSpliceConsensusDonor");
707 expected.push_back(
"LatLonCountry");
708 expected.push_back(
"BadInstitutionCode");
709 expected.push_back(
"BadInstitutionCode");
710 expected.push_back(
"BadInstitutionCode");
715 vector<unsigned int> codes =
format.GetListOfErrorCodes(*eval);
716 for (
unsigned int it : codes) {
720 expected.push_back(
"LatLonCountry");
721 expected.push_back(
"BadInstitutionCode");
722 expected.push_back(
"BadEcNumberFormat");
723 expected.push_back(
"BadEcNumberValue");
724 expected.push_back(
"NotSpliceConsensusDonor");
725 expected.push_back(
"NotSpliceConsensusAcceptor");
726 expected.push_back(
"DeletedEcNumber");
727 expected.push_back(
"ReplacedEcNumber");
728 expected.push_back(
"NotSpliceConsensusDonorTerminalIntron");
735 expected.push_back(
"Not Splice Consensus");
736 expected.push_back(
"intron\tlcl|nuc\tGT at 17");
737 expected.push_back(
"CDS\tlcl|nuc\tGT at 16");
745 expected.push_back(
"Not Splice Consensus");
746 expected.push_back(
"intron\tlcl|nuc\tGT at 17");
747 expected.push_back(
"intron\tlcl|nuc\tGT at 1");
748 expected.push_back(
"intron\tlcl|nuc\tAG at 11");
749 expected.push_back(
"CDS\tlcl|nuc\tGT at 16");
755 vector<string> cat_list =
format.FormatCompleteSubmitterReport(*eval, scope);
756 for (
const string& it : cat_list) {
757 vector<string> sublist;
759 for (
const string& sit : sublist) {
763 expected.push_back(
"Not Splice Consensus");
764 expected.push_back(
"intron\tlcl|nuc\tGT at 17");
765 expected.push_back(
"intron\tlcl|nuc\tGT at 1");
766 expected.push_back(
"intron\tlcl|nuc\tAG at 11");
767 expected.push_back(
"CDS\tlcl|nuc\tGT at 16");
769 expected.push_back(
"EC Number Format");
770 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
772 expected.push_back(
"EC Number Value");
773 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
774 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
775 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
776 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
778 expected.push_back(
"Bad Institution Codes");
779 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
780 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
781 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
783 expected.push_back(
"LatLonCountry Errors");
784 expected.push_back(
"lcl|nuc:Lat_lon '30 N 30 E' maps to 'Egypt' instead of 'Panama'");
799 eval = validator.Validate(seh, options);
805 vector<string> cat_list =
format.FormatCompleteSubmitterReport(*eval, scope);
806 for (
const string& it : cat_list) {
807 vector<string> sublist;
809 for (
const string& sit : sublist) {
814 expected.push_back(
"lcl|good:Sebaea microphylla");
831 "Lat_lon '36 N 80 W' maps to 'USA: North Carolina' instead of 'USA: South Carolina' - claimed region 'USA: South Carolina' is at distance 130 km"));
834 eval = validator.Validate(seh, options);
845 prot->SetData().SetProt().SetEc().push_back(
"1.2.3.10");
846 prot->SetData().SetProt().SetEc().push_back(
"1.1.3.22");
847 prot->SetData().SetProt().SetEc().push_back(
"1.1.99.n");
848 prot->SetData().SetProt().SetEc().push_back(
"1.1.1.17");
849 prot->SetData().SetProt().SetEc().push_back(
"11.22.33.44");
850 prot->SetData().SetProt().SetEc().push_back(
"11.22.n33.44");
851 prot->SetData().SetProt().SetEc().push_back(
"11.22.33.n44");
864 "EC_number 1.2.3.10 was deleted"));
866 "EC_number 1.1.3.22 was transferred and is no longer valid"));
868 "11.22.33.44 is not a legal value for qualifier EC_number"));
870 "11.22.n33.44 is not in proper EC_number format"));
872 "11.22.33.n44 is not a legal preliminary value for qualifier EC_number"));
874 eval = validator.Validate(seh, options);
877 scope.RemoveTopLevelSeqEntry(seh);
878 prot->SetData().SetProt().ResetEc();
880 misc->
SetData().SetImp().SetKey(
"exon");
889 expected_errors[1]->SetErrMsg(
"EC_number 1.1.3.22 was replaced");
890 seh = scope.AddTopLevelSeqEntry(*entry);
891 eval = validator.Validate(seh, options);
902 misc->
SetData().SetImp().SetKey(
"repeat_region");
908 "repeat_region /rpt_unit and underlying sequence do not match"));
910 eval = validator.Validate(seh, options);
913 scope.RemoveTopLevelSeqEntry(seh);
916 misc->
SetData().SetImp().SetKey(
"repeat_region");
918 seh = scope.AddTopLevelSeqEntry(*entry);
919 expected_errors[0]->SetErrCode(
"InvalidRepeatUnitLength");
920 expected_errors[0]->SetErrMsg(
"Length of rpt_unit_seq is greater than feature length");
922 eval = validator.Validate(seh, options);
942 eval = validator.Validate(seh, options);
947 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AATTGG");
948 expected_errors[0]->SetErrMsg(
"Bioseq-ext not allowed on raw Bioseq");
949 eval = validator.Validate(seh, options);
954 expected_errors[0]->SetErrCode(
"SeqDataNotFound");
955 expected_errors[0]->SetErrMsg(
"Missing Seq-data on raw Bioseq");
957 eval = validator.Validate(seh, options);
961 eval = validator.Validate(seh, options);
966 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AATTGG");
968 expected_errors[0]->SetErrCode(
"ExtNotAllowed");
969 expected_errors[0]->SetErrMsg(
"Bioseq-ext not allowed on constructed Bioseq");
970 eval = validator.Validate(seh, options);
975 expected_errors[0]->SetErrCode(
"SeqDataNotFound");
976 expected_errors[0]->SetErrMsg(
"Missing Seq-data on constructed Bioseq");
978 eval = validator.Validate(seh, options);
982 eval = validator.Validate(seh, options);
988 expected_errors[0]->SetErrCode(
"ExtBadOrMissing");
989 expected_errors[0]->SetErrMsg(
"Missing or incorrect Bioseq-ext on map Bioseq");
991 eval = validator.Validate(seh, options);
995 eval = validator.Validate(seh, options);
999 eval = validator.Validate(seh, options);
1003 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AATTGG");
1004 expected_errors[0]->SetErrCode(
"SeqDataNotAllowed");
1005 expected_errors[0]->SetErrMsg(
"Seq-data not allowed on map Bioseq");
1006 eval = validator.Validate(seh, options);
1014 expected_errors[0]->SetErrCode(
"ExtBadOrMissing");
1015 expected_errors[0]->SetErrMsg(
"Missing or incorrect Bioseq-ext on reference Bioseq");
1016 eval = validator.Validate(seh, options);
1030 expected_errors[0]->SetErrCode(
"ReprInvalid");
1031 expected_errors[0]->SetErrMsg(
"Invalid Bioseq->repr = 6");
1032 eval = validator.Validate(seh, options);
1037 expected_errors[0]->SetErrMsg(
"Invalid Bioseq->repr = 0");
1038 eval = validator.Validate(seh, options);
1043 expected_errors[0]->SetErrMsg(
"Invalid Bioseq->repr = 255");
1044 eval = validator.Validate(seh, options);
1050 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AATTGG");
1052 expected_errors[0]->SetErrCode(
"SeqDataNotAllowed");
1053 expected_errors[0]->SetErrMsg(
"Seq-data not allowed on delta Bioseq");
1054 eval = validator.Validate(seh, options);
1060 expected_errors[0]->SetErrCode(
"ExtBadOrMissing");
1061 expected_errors[0]->SetErrMsg(
"Missing or incorrect Bioseq-ext on delta Bioseq");
1062 eval = validator.Validate(seh, options);
1078 eval = validator.Validate(seh, options);
1081 expected_errors[0]->SetErrMsg(
"Invalid Bioseq->repr = 255");
1083 eval = validator.Validate(seh, options);
1086 expected_errors[0]->SetErrMsg(
"Invalid Bioseq->repr = 6");
1088 eval = validator.Validate(seh, options);
1116 vector< CExpectedError *> expected_errors;
1117 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Warning,
"TerminalNs",
"N at end of sequence"));
1118 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Warning,
"GeneLocusCollidesWithLocusTag",
"locus collides with locus_tag in another gene"));
1119 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"CollidingLocusTags",
"Colliding locus_tags in gene features"));
1120 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"CollidingLocusTags",
"Colliding locus_tags in gene features"));
1121 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"NoMolInfoFound",
"No Mol-info applies to this Bioseq"));
1122 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"LocusTagGeneLocusMatch",
"Gene locus and locus_tag 'foo' match"));
1123 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"NoPubFound",
"No publications anywhere on this entire record."));
1124 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Info,
"MissingPubRequirement",
"No submission citation anywhere on this entire record."));
1125 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"NoSourceDescriptor",
"No source information included on this record."));
1136 local str \"LocusCollidesWithLocusTag\" } ,\
1142 iupacna \"AATTGGCCAANNAATTGGCCAANN\" } ,\
1151 locus-tag \"foo\" } ,\
1158 local str \"LocusCollidesWithLocusTag\" } } ,\
1163 locus-tag \"foo\" } ,\
1170 local str \"LocusCollidesWithLocusTag\" } } ,\
1175 locus-tag \"baz\" } ,\
1182 local str \"LocusCollidesWithLocusTag\" } } ,\
1187 locus-tag \"baz\" } ,\
1194 local str \"LocusCollidesWithLocusTag\" } } } } } }\
1204 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"CircularProtein",
"Non-linear topology set on protein"));
1210 eval = validator.Validate(seh, options);
1214 eval = validator.Validate(seh, options);
1218 eval = validator.Validate(seh, options);
1225 eval = validator.Validate(seh, options);
1230 eval = validator.Validate(seh, options);
1243 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadProteinMoltype",
"Protein not single stranded"));
1247 eval = validator.Validate(seh, options);
1251 eval = validator.Validate(seh, options);
1255 eval = validator.Validate(seh, options);
1264 eval = validator.Validate(seh, options);
1268 eval = validator.Validate(seh, options);
1285 eval = validator.Validate(seh, options);
1288 expected_errors[0]->SetErrCode(
"MolOther");
1289 expected_errors[0]->SetErrMsg(
"Bioseq.mol is type other");
1291 eval = validator.Validate(seh, options);
1294 expected_errors[0]->SetErrCode(
"MolNuclAcid");
1295 expected_errors[0]->SetErrMsg(
"Bioseq.mol is type nucleic acid");
1297 eval = validator.Validate(seh, options);
1315 eval = validator.Validate(seh, options);
1318 expected_errors[0]->SetErrMsg(
"Fuzzy length on const Bioseq");
1320 eval = validator.Validate(seh, options);
1324 expected_errors[0]->SetErrCode(
"SeqDataNotFound");
1325 expected_errors[0]->SetErrMsg(
"Missing Seq-data on constructed Bioseq");
1328 eval = validator.Validate(seh, options);
1353 vector< CExpectedError *> expected_errors;
1354 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidAlphabet",
"Using a nucleic acid alphabet on a protein sequence"));
1361 eval = validator.
Validate(prot_seh, options);
1365 eval = validator.
Validate(prot_seh, options);
1369 eval = validator.
Validate(prot_seh, options);
1373 eval = validator.
Validate(prot_seh, options);
1382 expected_errors[0]->SetErrMsg(
"Using a protein alphabet on a nucleic acid");
1384 eval = validator.
Validate(seh, options);
1388 eval = validator.
Validate(seh, options);
1392 eval = validator.
Validate(seh, options);
1396 eval = validator.
Validate(seh, options);
1400 eval = validator.
Validate(seh, options);
1413 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ");
1414 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFB');
1415 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFB');
1416 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFB');
1417 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFC');
1418 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFC');
1419 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFC');
1420 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFD');
1421 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFD');
1422 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFD');
1423 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFE');
1424 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFE');
1425 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFF');
1426 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFF');
1428 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'E' at position [5]"));
1429 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'F' at position [6]"));
1430 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'I' at position [9]"));
1431 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'J' at position [10]"));
1432 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'L' at position [12]"));
1433 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'O' at position [15]"));
1434 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'P' at position [16]"));
1435 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'Q' at position [17]"));
1436 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'U' at position [21]"));
1437 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'X' at position [24]"));
1438 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'Z' at position [26]"));
1439 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'E' at position [31]"));
1440 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'F' at position [32]"));
1441 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'I' at position [35]"));
1442 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'J' at position [36]"));
1443 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'L' at position [38]"));
1444 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'O' at position [41]"));
1445 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'P' at position [42]"));
1446 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'Q' at position [43]"));
1447 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'U' at position [47]"));
1448 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'X' at position [50]"));
1449 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'Z' at position [52]"));
1460 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"More than 10 invalid residues. Checking stopped"));
1461 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Fatal,
"NonAsciiAsn",
"Non-ASCII character '251' found in item"));
1464 eval = validator.Validate(seh, options);
1469 delete expected_errors[8];
1470 expected_errors[8] =
nullptr;
1471 delete expected_errors[19];
1472 expected_errors[19] =
nullptr;
1473 eval = validator.Validate(seh, options);
1479 if (it->IsMolinfo()) {
1483 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ");
1484 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFB');
1485 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFB');
1486 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFB');
1487 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFC');
1488 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFC');
1489 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFC');
1490 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFD');
1491 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFD');
1492 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFD');
1493 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFE');
1494 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFE');
1495 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFF');
1496 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFF');
1499 feat->
SetData().SetProt().SetName().push_back(
"fake protein name");
1500 feat->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"good");
1504 scope.RemoveEntry (*entry);
1505 seh = scope.AddTopLevelSeqEntry(*entry);
1507 for (
int j = 0; j < 22; j++) {
1508 if (expected_errors[j]) {
1509 delete expected_errors[j];
1510 expected_errors[j] =
nullptr;
1513 eval = validator.Validate(seh, options);
1519 scope.RemoveEntry (*entry);
1521 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"abcdefghijklmnopqrstuvwxyz");
1523 seh = scope.AddTopLevelSeqEntry(*entry);
1524 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Sequence contains lower-case characters"));
1526 eval = validator.Validate(seh, options);
1529 scope.RemoveEntry (*entry);
1531 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"protein");
1532 seh = scope.AddTopLevelSeqEntry(*entry);
1533 eval = validator.Validate(seh, options);
1540 scope.RemoveEntry (*entry);
1545 seg->SetLiteral().SetSeq_data().SetIupacna().Set(
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ");
1546 seg->SetLiteral().SetLength(52);
1547 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(seg);
1549 seh = scope.AddTopLevelSeqEntry(*entry);
1574 eval = validator.Validate(seh, options);
1580 scope.RemoveEntry (*entry);
1585 seg2->SetLiteral().SetSeq_data().SetIupacaa().Set(
"1234567");
1586 seg2->SetLiteral().SetLength(7);
1587 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(seg2);
1589 seh = scope.AddTopLevelSeqEntry(*entry);
1600 eval = validator.Validate(seh, options);
1645 entry->
SetSet().
SetSeq_set().back()->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set(
"MP*K*E*N");
1646 entry->
SetSet().
SetSeq_set().front()->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"GTGCCCTAAAAATAAGAGTAAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
1655 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"StopInProtein",
"[3] termination symbols in protein sequence (gene? - fake protein name)"));
1656 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"ExceptionProblem",
"unclassified translation discrepancy is not a legal exception explanation"));
1657 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"InternalStop",
"3 internal stops (and illegal start codon). Genetic code [0]"));
1659 "CDS has unnecessary translated product replaced exception"));
1662 eval = validator.Validate(seh, options);
1673 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"StopInProtein",
"[3] termination symbols in protein sequence (gene? - fake protein name)"));
1674 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"StartCodon",
"Illegal start codon (and 3 internal stops). Probably wrong genetic code [0]"));
1675 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"InternalStop",
"3 internal stops (and illegal start codon). Genetic code [0]"));
1678 eval = validator.Validate(seh, options);
1683 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCTAAAAATAAGAGTAAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
1688 delete expected_errors[1];
1689 expected_errors[1] =
nullptr;
1690 expected_errors[2]->SetErrMsg(
"3 internal stops. Genetic code [0]");
1691 eval = validator.Validate(seh, options);
1710 entry->
SetSeq().
SetInst().SetExt().SetSeg().Set().push_back(loc1);
1713 entry->
SetSeq().
SetInst().SetExt().SetSeg().Set().push_back(loc2);
1716 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"PartialInconsistent",
"Partial segmented sequence without MolInfo partial"));
1721 eval = validator.Validate(seh, options);
1725 eval = validator.Validate(seh, options);
1729 eval = validator.Validate(seh, options);
1737 eval = validator.Validate(seh, options);
1741 eval = validator.Validate(seh, options);
1745 eval = validator.Validate(seh, options);
1753 eval = validator.Validate(seh, options);
1757 eval = validator.Validate(seh, options);
1761 eval = validator.Validate(seh, options);
1769 expected_errors[0]->SetErrMsg(
"Complete segmented sequence with MolInfo partial");
1770 eval = validator.Validate(seh, options);
1778 expected_errors[0]->SetErrMsg(
"No-left inconsistent with segmented SeqLoc");
1779 eval = validator.Validate(seh, options);
1783 eval = validator.Validate(seh, options);
1787 eval = validator.Validate(seh, options);
1795 expected_errors[0]->SetErrMsg(
"No-right inconsistent with segmented SeqLoc");
1796 eval = validator.Validate(seh, options);
1800 eval = validator.Validate(seh, options);
1804 eval = validator.Validate(seh, options);
1810 expected_errors[0]->SetErrMsg(
"No-ends inconsistent with segmented SeqLoc");
1813 eval = validator.Validate(seh, options);
1817 eval = validator.Validate(seh, options);
1821 eval = validator.Validate(seh, options);
1835 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"MPR");
1837 entry->
SetSeq().
SetAnnot().front()->SetData().SetFtable().front()->SetLocation().SetInt().SetTo(2);
1841 pdb_id->SetMol().Set(
"foo");
1843 entry->
SetSeq().
SetAnnot().front()->SetData().SetFtable().front()->SetLocation().SetInt().SetId().SetPdb(*pdb_id);
1844 scope.RemoveTopLevelSeqEntry(seh);
1845 seh = scope.AddTopLevelSeqEntry(*entry);
1846 eval = validator.Validate(seh, options);
1851 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"PartialsInconsistent",
"Molinfo completeness and protein feature partials conflict"));
1852 expected_errors[0]->SetAccession(
"lcl|good");
1853 entry->
SetSeq().
SetId().front()->SetLocal().SetStr(
"good");
1854 entry->
SetSeq().
SetAnnot().front()->SetData().SetFtable().front()->SetLocation().SetInt().SetId().SetLocal().SetStr(
"good");
1855 scope.RemoveTopLevelSeqEntry(seh);
1856 seh = scope.AddTopLevelSeqEntry(*entry);
1859 eval = validator.Validate(seh, options);
1862 eval = validator.Validate(seh, options);
1865 eval = validator.Validate(seh, options);
1868 eval = validator.Validate(seh, options);
1877 if (it->IsMolinfo()) {
1878 it->SetMolinfo().ResetCompleteness();
1881 eval = validator.Validate(seh, options);
1884 eval = validator.Validate(seh, options);
1887 eval = validator.Validate(seh, options);
1890 eval = validator.Validate(seh, options);
1894 scope.RemoveTopLevelSeqEntry(seh);
1896 seh = scope.AddTopLevelSeqEntry(*entry);
1897 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCTTT");
1899 expected_errors[0]->SetErrMsg(
"Sequence only 9 residues");
1900 eval = validator.Validate(seh, options);
1907 scope.RemoveTopLevelSeqEntry(seh);
1908 seh = scope.AddTopLevelSeqEntry(*entry);
1909 eval = validator.Validate(seh, options);
1942 if (entry->
IsSeq()) {
1944 }
else if (entry->
IsSet()) {
1952 if (entry->
IsSeq()) {
1954 if (it->IsUser() && it->GetUser().IsRefGeneTracking()) {
1955 it->SetUser().SetData().front()->SetData().SetStr(status);
1958 }
else if (entry->
IsSet()) {
1960 if (it->IsUser() && it->GetUser().IsRefGeneTracking()) {
1961 it->SetUser().SetData().front()->SetData().SetStr(status);
1973 auto& cont = entry->
SetDescr().Set();
1974 for (
auto it = cont.begin(); it != cont.end();) {
1975 if ((*it)->IsTitle()) {
1978 it = cont.erase(it);
1981 (*it)->SetTitle(title);
2001 if (it->IsGenbank()) {
2002 it->SetGenbank().SetKeywords().push_back(keyword);
2020 eval = validator.Validate(seh, options);
2022 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Info,
"NoKeywordHasTechnique",
"Molinfo.tech barcode without BARCODE keyword"));
2025 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TSAshouldBNotBeDNA",
"TSA sequence should not be DNA"));
2026 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WrongBiomolForTSA",
"Biomol \"genomic\" is not appropriate for sequences that use the TSA technique."));
2027 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TSAseqGapProblem",
"TSA submission includes wrong gap type. Gaps for TSA should be Assembly Gaps with linkage evidence."));
2029 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WGSseqGapProblem",
"WGS submission includes wrong gap type. Gaps for WGS genomes should be Assembly Gaps with linkage evidence."));
2047 eval = validator.Validate(seh, options);
2049 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ProteinTechniqueOnNucleotide",
"Nucleic acid with protein sequence method"));
2055 eval = validator.Validate(seh, options);
2068 start_gap_seg->SetLiteral().SetLength(10);
2069 start_gap_seg->SetLiteral().SetSeq_data().SetGap();
2070 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().insert(entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().begin(), start_gap_seg);
2071 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddLiteral(10);
2072 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddLiteral(10);
2075 end_gap_seg->SetLiteral().SetLength(10);
2076 end_gap_seg->SetLiteral().SetSeq_data().SetGap();
2077 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(end_gap_seg);
2078 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddLiteral(10);
2085 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadDeltaSeq",
"There is 1 adjacent gap in delta seq"));
2096 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WGSseqGapProblem",
"WGS submission includes wrong gap type. Gaps for WGS genomes should be Assembly Gaps with linkage evidence."));
2098 eval = validator.Validate(seh, options);
2112 if (it->IsMolinfo()) {
2118 scope.RemoveTopLevelSeqEntry(seh);
2119 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
2120 seh = scope.AddTopLevelSeqEntry(*entry);
2121 eval = validator.Validate(seh, options);
2126 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NT_123456");
2127 scope.RemoveTopLevelSeqEntry(seh);
2128 seh = scope.AddTopLevelSeqEntry(*entry);
2129 eval = validator.Validate(seh, options);
2136 entry->
SetSeq().
SetId().front()->SetLocal().SetStr(
"good");
2137 scope.RemoveTopLevelSeqEntry(seh);
2138 seh = scope.AddTopLevelSeqEntry(*entry);
2141 vector<CMolInfo::TTech> allowed_list;
2155 bool allowed =
false;
2189 for (
auto it : linkage_evidence) {
2201 vector<CLinkage_evidence::EType> evidence;
2203 for (
auto it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
2204 if (it->IsLiteral() && it->GetLiteral().IsSetSeq_data() &&
2205 it->GetLiteral().GetSeq_data().IsGap()) {
2206 AdjustGap(it->SetLiteral().SetSeq_data().SetGap(),
2215 "SeqGapBadLinkage",
"Seq-gap of type 3 should not have linkage evidence"));
2217 eval = validator.Validate(seh, options);
2222 scope.RemoveTopLevelSeqEntry(seh);
2223 for (
auto it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
2224 if (it->IsLiteral() && it->GetLiteral().IsSetSeq_data() &&
2225 it->GetLiteral().GetSeq_data().IsGap()) {
2226 CSeq_gap& gap = it->SetLiteral().SetSeq_data().SetGap();
2231 seh = scope.AddTopLevelSeqEntry(*entry);
2235 "SeqGapBadLinkage",
"Seq-gap with linkage evidence must have linkage field set to linked"));
2237 eval = validator.Validate(seh, options);
2242 scope.RemoveTopLevelSeqEntry(seh);
2244 for (
auto it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
2245 if (it->IsLiteral() && it->GetLiteral().IsSetSeq_data() &&
2246 it->GetLiteral().GetSeq_data().IsGap()) {
2247 AdjustGap(it->SetLiteral().SetSeq_data().SetGap(),
2251 seh = scope.AddTopLevelSeqEntry(*entry);
2255 "SeqGapBadLinkage",
"Linkage evidence 'align genus' appears 2 times"));
2257 eval = validator.Validate(seh, options);
2262 evidence.pop_back();
2264 scope.RemoveTopLevelSeqEntry(seh);
2265 for (
auto it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
2266 if (it->IsLiteral() && it->GetLiteral().IsSetSeq_data() &&
2267 it->GetLiteral().GetSeq_data().IsGap()) {
2268 AdjustGap(it->SetLiteral().SetSeq_data().SetGap(),
2272 seh = scope.AddTopLevelSeqEntry(*entry);
2276 "SeqGapBadLinkage",
"Seq-gap type has unspecified and additional linkage evidence"));
2278 eval = validator.Validate(seh, options);
2283 scope.RemoveTopLevelSeqEntry(seh);
2286 for (
auto it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
2287 if (it->IsLiteral() && it->GetLiteral().IsSetSeq_data() &&
2288 it->GetLiteral().GetSeq_data().IsGap()) {
2289 AdjustGap(it->SetLiteral().SetSeq_data().SetGap(),
2293 seh = scope.AddTopLevelSeqEntry(*entry);
2297 "SeqGapBadLinkage",
"Single Seq-gap has unknown type and unspecified linkage"));
2299 eval = validator.Validate(seh, options);
2304 scope.RemoveTopLevelSeqEntry(seh);
2306 gap_seg->SetLiteral().SetLength(10);
2307 AdjustGap(gap_seg->SetLiteral().SetSeq_data().SetGap(),
2311 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetSeq_data().SetIupacna().Set(
"CCCATGATGATGTACCGTACGTTTTCCCATGATGATGTACCGTACGTTTT");
2312 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetLength(50);
2313 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(gap_seg);
2317 seh = scope.AddTopLevelSeqEntry(*entry);
2321 "SeqGapBadLinkage",
"All 2 Seq-gaps have unknown type and unspecified linkage"));
2323 eval = validator.Validate(seh, options);
2332 for (
auto it : expected_errors) {
2334 it->SetAccession(acc);
2346 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (lcl|good - lcl|bad)"));
2350 scope.RemoveTopLevelSeqEntry(seh);
2354 seh = scope.AddTopLevelSeqEntry(*entry);
2355 eval = validator.Validate(seh, options);
2359 scope.RemoveTopLevelSeqEntry(seh);
2363 seh = scope.AddTopLevelSeqEntry(*entry);
2365 expected_errors[0]->SetErrMsg(
"Conflicting ids on a Bioseq: (bbs|1 - bbs|2)");
2366 eval = validator.Validate(seh, options);
2370 scope.RemoveTopLevelSeqEntry(seh);
2373 seh = scope.AddTopLevelSeqEntry(*entry);
2375 expected_errors[0]->SetErrMsg(
"Conflicting ids on a Bioseq: (bbm|1 - bbm|2)");
2376 eval = validator.Validate(seh, options);
2380 scope.RemoveTopLevelSeqEntry(seh);
2385 seh = scope.AddTopLevelSeqEntry(*entry);
2387 expected_errors[0]->SetErrMsg(
"Conflicting ids on a Bioseq: (gi|1 - gi|2)");
2388 eval = validator.Validate(seh, options);
2393 scope.RemoveTopLevelSeqEntry(seh);
2398 seh = scope.AddTopLevelSeqEntry(*entry);
2401 expected_errors.push_back(
new CExpectedError(
"gim|1",
eDiag_Error,
"IdOnMultipleBioseqs",
"BioseqFind (gim|1) unable to find itself - possible internal error"));
2402 expected_errors.push_back(
new CExpectedError(
"gim|1",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (gim|1 - gim|2)"));
2403 expected_errors.push_back(
new CExpectedError(
"gim|1",
eDiag_Error,
"IdOnMultipleBioseqs",
"BioseqFind (gim|2) unable to find itself - possible internal error"));
2405 eval = validator.Validate(seh, options);
2410 scope.RemoveTopLevelSeqEntry(seh);
2417 seh = scope.AddTopLevelSeqEntry(*entry);
2418 expected_errors.push_back(
new CExpectedError(
"pat|USA|1|1",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (pat|USA|1|1 - pat|USA|2|2)"));
2420 eval = validator.Validate(seh, options);
2424 scope.RemoveTopLevelSeqEntry(seh);
2427 seh = scope.AddTopLevelSeqEntry(*entry);
2429 expected_errors[0]->SetErrMsg(
"Conflicting ids on a Bioseq: (pdb|good| - pdb|badd| )");
2430 eval = validator.Validate(seh, options);
2434 scope.RemoveTopLevelSeqEntry(seh);
2439 seh = scope.AddTopLevelSeqEntry(*entry);
2441 expected_errors[0]->SetErrMsg(
"Conflicting ids on a Bioseq: (gnl|a|good - gnl|a|bad)");
2442 eval = validator.Validate(seh, options);
2447 scope.RemoveTopLevelSeqEntry(seh);
2449 seh = scope.AddTopLevelSeqEntry(*entry);
2451 eval = validator.Validate(seh, options);
2455 scope.RemoveTopLevelSeqEntry(seh);
2456 expected_errors.push_back(
new CExpectedError(
"gb|AY123456|",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (gb|AY123456| - gb|AY222222|)"));
2459 seh = scope.AddTopLevelSeqEntry(*entry);
2460 eval = validator.Validate(seh, options);
2464 scope.RemoveTopLevelSeqEntry(seh);
2467 seh = scope.AddTopLevelSeqEntry(*entry);
2470 expected_errors.push_back(
new CExpectedError(
"gb|AY123456.2|",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (gb|AY123456| - gb|AY123456.2|)"));
2471 eval = validator.Validate(seh, options);
2475 scope.RemoveTopLevelSeqEntry(seh);
2477 seh = scope.AddTopLevelSeqEntry(*entry);
2479 expected_errors.push_back(
new CExpectedError(
"gb|AY123456|",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (gb|AY123456| - gpp|AY123456|)"));
2481 eval = validator.Validate(seh, options);
2485 scope.RemoveTopLevelSeqEntry(seh);
2488 seh = scope.AddTopLevelSeqEntry(*entry);
2490 expected_errors[0]->SetErrMsg(
"LRG sequence needs NG_ accession");
2492 eval = validator.Validate(seh, options);
2495 scope.RemoveTopLevelSeqEntry(seh);
2497 seh = scope.AddTopLevelSeqEntry(*entry);
2500 eval = validator.Validate(seh, options);
2514 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"MolNuclAcid",
"Bioseq.mol is type nucleic acid"));
2517 eval = validator.Validate(seh, options);
2531 vector<CMolInfo::TTech> genomic_list;
2542 bool genomic =
false;
2552 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"InconsistentMolType",
"Molecule type (DNA) does not match biomol (RNA)"));
2560 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"BadHTGSeq",
"HTGS 2 raw seq has no gaps and no graphs"));
2563 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"HTGS_STS_GSS_WGSshouldBeGenomic",
"HTGS/STS/GSS/WGS sequence should be genomic"));
2564 eval = validator.Validate(seh, options);
2568 delete expected_errors[0];
2569 expected_errors[0] =
nullptr;
2570 expected_errors.back()->SetErrCode(
"HTGS_STS_GSS_WGSshouldNotBeRNA");
2571 expected_errors.back()->SetErrMsg(
"HTGS/STS/GSS/WGS sequence should not be RNA");
2572 eval = validator.Validate(seh, options);
2576 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ProteinTechniqueOnNucleotide",
"Nucleic acid with protein sequence method"));
2579 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Info,
"NoKeywordHasTechnique",
"Molinfo.tech barcode without BARCODE keyword"));
2581 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TSAshouldBNotBeDNA",
"TSA sequence should not be DNA"));
2582 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WrongBiomolForTSA",
"Biomol \"cRNA\" is not appropriate for sequences that use the TSA technique."));
2584 eval = validator.Validate(seh, options);
2593 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"InconsistentMolType",
"Molecule type (DNA) does not match biomol (RNA)"));
2594 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TSAshouldBNotBeDNA",
"TSA sequence should not be DNA"));
2595 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WrongBiomolForTSA",
"Biomol \"cRNA\" is not appropriate for sequences that use the TSA technique."));
2596 eval = validator.Validate(seh, options);
2601 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TSAshouldBNotBeDNA",
"TSA sequence should not be DNA"));
2602 eval = validator.GetTSAConflictingBiomolTechErrors(seh);
2604 eval = validator.GetTSAConflictingBiomolTechErrors(*(seh.GetSeq().GetCompleteBioseq()));
2613 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
2614 entry->
SetSeq().
SetId().front()->SetOther().SetName(
"good one");
2618 expected_errors.push_back(
new CExpectedError(
"ref|NC_123456|good one",
eDiag_Critical,
"SeqIdNameHasSpace",
"Seq-id.name 'good one' should be a single word without any spaces"));
2621 eval = validator.Validate(seh, options);
2636 seg1->
SetWhole().SetGenbank().SetAccession(
"AY123456");
2637 entry->
SetSeq().
SetInst().SetExt().SetSeg().Set().push_back(seg1);
2639 seg2->
SetWhole().SetGenbank().SetAccession(
"AY123456");
2640 entry->
SetSeq().
SetInst().SetExt().SetSeg().Set().push_back(seg2);
2662 vector< CExpectedError *> expected_errors;
2663 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"SeqLocOrder",
"Segmented BioseqIntervals out of order in SeqLoc [[gb|AY123456|, gb|AY123456|]]"));
2664 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"DuplicateSegmentReferences",
"Segmented sequence has multiple references to gb|AY123456"));
2667 eval = validator.
Validate(seh, options);
2670 seg2->
SetInt().SetId().SetGenbank().SetAccession(
"AY123456");
2671 seg2->
SetInt().SetFrom(0);
2672 seg2->
SetInt().SetTo(484);
2673 expected_errors[0]->SetErrMsg(
"Segmented BioseqIntervals out of order in SeqLoc [[gb|AY123456|, 1-485]]");
2675 expected_errors[1]->SetErrMsg(
"Segmented sequence has multiple references to gb|AY123456 that are not SEQLOC_WHOLE");
2676 eval = validator.
Validate(seh, options);
2689 CRef<CSeq_feat> prot_feat =
prot->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
2691 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCAGAAAAACAGAGATANNNNNN");
2692 nuc->SetSeq().SetInst().SetLength(27);
2693 prot->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set(
"MPRKTEIXX");
2694 prot->SetSeq().SetInst().SetLength(9);
2707 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
2710 eval = validator.Validate(seh, options);
2734 expected_errors.push_back(
new CExpectedError(
"gb|" + id_str +
"|",
eDiag_Error,
"BadSeqIdFormat",
"Bad accession " + id_str));
2737 eval = validator.Validate(seh, options);
2758 eval = validator.Validate(seh, options);
2775 bool is_wgs =
false;
2776 if (id_str.length() == 12 || id_str.length() == 13 || id_str.length() == 14 || id_str.length() == 15) {
2786 eval = validator.Validate(seh, options);
2804 vector<string> bad_ids;
2805 bad_ids.push_back(
"AY123456ABC");
2806 bad_ids.push_back(
"A1234");
2807 bad_ids.push_back(
"A123456");
2808 bad_ids.push_back(
"AY12345");
2809 bad_ids.push_back(
"AY1234567");
2810 bad_ids.push_back(
"ABC1234");
2811 bad_ids.push_back(
"ABC123456");
2812 bad_ids.push_back(
"ABCD1234567");
2813 bad_ids.push_back(
"ABCDE123456");
2814 bad_ids.push_back(
"ABCDE12345678");
2816 vector<string> bad_nuc_ids;
2817 bad_nuc_ids.push_back(
"ABC12345");
2819 vector<string> bad_prot_ids;
2820 bad_prot_ids.push_back(
"AY123456");
2821 bad_prot_ids.push_back(
"A12345");
2823 vector<string> good_ids;
2825 vector<string> good_nuc_ids;
2826 good_nuc_ids.push_back(
"AY123456");
2827 good_nuc_ids.push_back(
"A12345");
2828 good_nuc_ids.push_back(
"ABCD123456789");
2829 good_nuc_ids.push_back(
"ABCD1234567890");
2831 vector<string> good_prot_ids;
2832 good_prot_ids.push_back(
"ABC12345");
2843 for (
const string& id_str : bad_ids) {
2844 const string acc_str =
"gb|" + id_str +
"|";
2846 expected_errors[0]->SetErrMsg(
"Bad accession " + id_str);
2849 scope.RemoveTopLevelSeqEntry(seh);
2850 scope.ResetDataAndHistory();
2854 seh = scope.AddTopLevelSeqEntry(*entry);
2855 eval = validator.Validate(seh, options);
2857 scope.RemoveTopLevelSeqEntry(seh);
2858 scope.ResetDataAndHistory();
2861 seh = scope.AddTopLevelSeqEntry(*entry);
2862 eval = validator.Validate(seh, options);
2866 for (
const string& id_it : bad_ids) {
2867 const string id_str =
"B" + id_it.substr(1);
2868 expected_errors[0]->SetAccession(
"embl|" + id_str +
"|");
2869 expected_errors[0]->SetErrMsg(
"Bad accession " + id_str);
2872 scope.RemoveTopLevelSeqEntry(seh);
2873 scope.ResetDataAndHistory();
2877 seh = scope.AddTopLevelSeqEntry(*entry);
2878 eval = validator.Validate(seh, options);
2879 expected_errors[0]->SetAccession(
"emb|" + id_str +
"|");
2881 scope.RemoveTopLevelSeqEntry(seh);
2882 scope.ResetDataAndHistory();
2885 seh = scope.AddTopLevelSeqEntry(*entry);
2886 eval = validator.Validate(seh, options);
2890 for (
const string& id_it : bad_ids) {
2891 const string id_str =
"C" + id_it.substr(1);
2892 expected_errors[0]->SetAccession(
"dbj|" + id_str +
"|");
2893 expected_errors[0]->SetErrMsg(
"Bad accession " + id_str);
2896 scope.RemoveTopLevelSeqEntry(seh);
2897 scope.ResetDataAndHistory();
2901 seh = scope.AddTopLevelSeqEntry(*entry);
2902 eval = validator.Validate(seh, options);
2903 expected_errors[0]->SetAccession(
"dbj|" + id_str +
"|");
2905 scope.RemoveTopLevelSeqEntry(seh);
2906 scope.ResetDataAndHistory();
2909 seh = scope.AddTopLevelSeqEntry(*entry);
2910 eval = validator.Validate(seh, options);
2915 for (
const string& id_str : bad_nuc_ids) {
2917 scope.RemoveTopLevelSeqEntry(seh);
2920 expected_errors[0]->SetAccession(
"gb|" + id_str +
"|");
2921 expected_errors[0]->SetErrMsg(
"Bad accession " + id_str);
2922 seh = scope.AddTopLevelSeqEntry(*entry);
2923 eval = validator.Validate(seh, options);
2928 for (
auto id_it : bad_prot_ids) {
2935 for (
const string& id_str : good_ids) {
2937 scope.RemoveTopLevelSeqEntry(seh);
2940 seh = scope.AddTopLevelSeqEntry(*entry);
2941 eval = validator.Validate(seh, options);
2944 scope.RemoveTopLevelSeqEntry(seh);
2947 seh = scope.AddTopLevelSeqEntry(*entry);
2948 eval = validator.Validate(seh, options);
2954 for (
const string& id_it : good_nuc_ids) {
2959 for (
const string& id_it : good_prot_ids) {
2964 scope.RemoveTopLevelSeqEntry(seh);
2971 seh = scope.AddTopLevelSeqEntry(*entry);
2972 eval = validator.Validate(seh, options);
2974 "Accession AY123456 has 0 version"));
2975 expected_errors.push_back (
new CExpectedError (
"gb|AY123456|",
eDiag_Warning,
"UnexpectedIdentifierChange",
"New accession (gb|AY123456|) does not match one in NCBI sequence repository (gb|AY123456.1|) on gi (21914627)"));
2984 scope.RemoveTopLevelSeqEntry(seh);
2985 bad_id->
SetLocal().
SetStr(
"ABCDEFGHIJKLMNOPQRSTUVWXYZ012345678901234");
2987 seh = scope.AddTopLevelSeqEntry(*entry);
2988 eval = validator.Validate(seh, options);
2995 scope.RemoveTopLevelSeqEntry(seh);
2999 seh = scope.AddTopLevelSeqEntry(*entry);
3000 eval = validator.Validate(seh, options);
3007 scope.RemoveTopLevelSeqEntry(seh);
3014 seh = scope.AddTopLevelSeqEntry(*entry);
3016 "General database longer than 20 characters"));
3019 eval = validator.Validate(seh, options);
3025 scope.RemoveTopLevelSeqEntry(seh);
3027 entry->
SetSeq().
SetId().front()->SetLocal().SetStr(
"a/b");
3028 seh = scope.AddTopLevelSeqEntry(*entry);
3029 eval = validator.Validate(seh, options);
3041 id->SetGeneral().SetDb(db);
3042 id->SetGeneral().SetTag().SetStr(
tag);
3047 string acc_str =
"lcl|good";
3048 if (!errmsg.empty()) {
3053 eval = validator.Validate(seh, options);
3062 TestOneGeneralSeqId(
"PRJNA318798",
" CpPA02_0001",
"Bad character ' ' in sequence ID 'gnl|PRJNA318798| CpPA02_0001'");
3063 TestOneGeneralSeqId(
"PRJNA3 18798",
"CpPA02_0001",
"Bad character ' ' in sequence ID 'gnl|PRJNA3 18798|CpPA02_0001'");
3071 id->SetGeneral().SetDb(
"lgsi");
3072 id->SetGeneral().SetTag().SetStr(
"thisidentifierismorethanfiftycharactersinlengthsoitshouldberejected");
3085 string acc_str =
"lcl|good";
3087 "General identifier longer than 50 characters"));
3090 eval = validator.Validate(seh, options);
3106 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3111 gbdesc->SetGenbank().SetExtra_accessions().push_back(
"AY123456");
3114 expected_errors.push_back(
new CExpectedError(
"gb|AY123456|",
eDiag_Error,
"BadSecondaryAccn",
"AY123456 used for both primary and secondary accession"));
3116 eval = validator.Validate(seh, options);
3119 gbdesc->SetEmbl().SetExtra_acc().push_back(
"AY123456");
3120 eval = validator.Validate(seh, options);
3135 expected_errors.push_back(
new CExpectedError(
"gi|0",
eDiag_Error,
"GiWithoutAccession",
"No accession on sequence with gi number"));
3137 eval = validator.Validate(seh, options);
3147 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3148 entry->
SetSeq().
SetId().front()->SetGenbank().SetVersion(1);
3156 hist_id->SetGi(
GI_CONST(21914627));
3157 entry->
SetSeq().
SetInst().SetHist().SetReplaced_by().SetIds().push_back(hist_id);
3158 entry->
SetSeq().
SetInst().SetHist().SetReplaced_by().SetDate().SetStd().SetYear(2008);
3160 expected_errors.push_back(
new CExpectedError(
"gb|AY123456.1|",
eDiag_Error,
"HistoryGiCollision",
"Replaced by gi (21914627) is same as current Bioseq"));
3162 eval = validator.Validate(seh, options);
3166 entry->
SetSeq().
SetInst().SetHist().SetReplaces().SetIds().push_back(hist_id);
3167 entry->
SetSeq().
SetInst().SetHist().SetReplaces().SetDate().SetStd().SetYear(2008);
3168 expected_errors[0]->SetErrMsg(
"Replaces gi (21914627) is same as current Bioseq");
3169 eval = validator.Validate(seh, options);
3176 entry->
SetSeq().
SetInst().SetHist().SetReplaced_by().SetIds().push_back(hist_id);
3177 eval = validator.Validate(seh, options);
3182 entry->
SetSeq().
SetInst().SetHist().SetReplaces().SetIds().push_back(hist_id);
3183 eval = validator.Validate(seh, options);
3197 expected_errors.push_back(
new CExpectedError(
"gi|123456",
eDiag_Error,
"GiWithoutAccession",
"No accession on sequence with gi number"));
3199 eval = validator.Validate(seh, options);
3209 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3210 entry->
SetSeq().
SetId().front()->SetGenbank().SetVersion(1);
3215 string acc_str =
"gb|AY123456.1|";
3221 "Conflicting ids on a Bioseq: (gb|AY123456.1| - " + other_acc->
AsFastaString() +
")"));
3223 expected_errors.push_back(
new CExpectedError(acc_str,
eDiag_Error,
"MultipleAccessions",
"Multiple accessions on sequence with gi number"));
3225 expected_errors.push_back(
new CExpectedError(
"gb|AY123456.1|",
eDiag_Warning,
"UnexpectedIdentifierChange",
"New accession (gb|AY123457.1|) does not match one in NCBI sequence repository (gb|AY123456.1|) on gi (21914627)"));
3229 "TPA record gb|AY123456.1| should have Seq-hist.assembly for PRIMARY block"));
3232 eval = validator.Validate(seh, options);
3291 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3292 entry->
SetSeq().
SetId().front()->SetGenbank().SetVersion(1);
3302 string acc_str =
"gb|AY123456.1|";
3303 expected_errors.push_back(
new CExpectedError(acc_str,
eDiag_Error,
"INSDRefSeqPackaging",
"INSD and RefSeq records should not be present in the same set"));
3304 expected_errors.push_back(
new CExpectedError(acc_str,
eDiag_Error,
"MultipleAccessions",
"Multiple accessions on sequence with gi number"));
3306 eval = validator.Validate(seh, options);
3316 tpg_entry->
SetSeq().
SetId().front()->SetTpg().SetAccession(
"AY123456");
3317 tpg_entry->
SetSeq().
SetId().front()->SetTpg().SetVersion(1);
3320 tpe_entry->
SetSeq().
SetId().front()->SetTpe().SetAccession(
"AY123456");
3321 tpe_entry->
SetSeq().
SetId().front()->SetTpe().SetVersion(1);
3324 tpd_entry->
SetSeq().
SetId().front()->SetTpd().SetAccession(
"AY123456");
3325 tpd_entry->
SetSeq().
SetId().front()->SetTpd().SetVersion(1);
3330 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Info,
"HistAssemblyMissing",
"TPA record tpg|AY123456.1| should have Seq-hist.assembly for PRIMARY block"));
3332 eval = validator.Validate(seh, options);
3336 scope.RemoveTopLevelSeqEntry(seh);
3337 seh = scope.AddTopLevelSeqEntry(*tpe_entry);
3339 expected_errors[0]->SetErrMsg(
"TPA record tpe|AY123456.1| should have Seq-hist.assembly for PRIMARY block");
3340 eval = validator.Validate(seh, options);
3345 scope.RemoveTopLevelSeqEntry(seh);
3346 seh = scope.AddTopLevelSeqEntry(*tpd_entry);
3348 expected_errors[0]->SetErrMsg(
"TPA record tpd|AY123456.1| should have Seq-hist.assembly for PRIMARY block");
3349 eval = validator.Validate(seh, options);
3356 block->SetGenbank().SetKeywords().push_back(
"TPA:reassembly");
3358 scope.RemoveTopLevelSeqEntry(seh);
3359 seh = scope.AddTopLevelSeqEntry(*tpg_entry);
3360 eval = validator.Validate(seh, options);
3364 block->SetEmbl().SetKeywords().push_back(
"TPA:reassembly");
3365 eval = validator.Validate(seh, options);
3373 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"NNNNNNNNNNAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCCAANNNNNNNNNN");
3381 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"));
3383 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
3385 eval = validator.Validate(seh, options);
3389 scope.RemoveTopLevelSeqEntry(seh);
3390 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3391 seh = scope.AddTopLevelSeqEntry(*entry);
3395 eval = validator.Validate(seh, options);
3401 scope.RemoveTopLevelSeqEntry(seh);
3403 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetSeq_data().SetIupacna().Set(
"NNNNNNNNNCCC");
3404 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetSeq_data().SetIupacna().Set(
"CCCNNNNNNNNN");
3405 seh = scope.AddTopLevelSeqEntry(*entry);
3407 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ContigsTooShort",
"Maximum contig length is 3 bases"));
3410 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentPercent",
"Sequence contains 52 percent Ns"));
3411 eval = validator.Validate(seh, options);
3415 scope.RemoveTopLevelSeqEntry(seh);
3417 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetSeq_data().SetIupacna().Set(
"NNNNNNNNNNCC");
3418 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetSeq_data().SetIupacna().Set(
"CCNNNNNNNNNN");
3419 seh = scope.AddTopLevelSeqEntry(*entry);
3420 expected_errors[0]->SetErrMsg(
"Maximum contig length is 2 bases");
3421 expected_errors.back()->SetErrMsg (
"Sequence contains 58 percent Ns");
3422 eval = validator.Validate(seh, options);
3426 scope.RemoveTopLevelSeqEntry(seh);
3427 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3428 seh = scope.AddTopLevelSeqEntry(*entry);
3432 eval = validator.Validate(seh, options);
3436 scope.RemoveTopLevelSeqEntry(seh);
3437 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
3438 seh = scope.AddTopLevelSeqEntry(*entry);
3442 eval = validator.Validate(seh, options);
3445 scope.RemoveTopLevelSeqEntry(seh);
3446 entry->
SetSeq().
SetId().front()->SetPatent().SetSeqid(1);
3447 entry->
SetSeq().
SetId().front()->SetPatent().SetCit().SetCountry(
"USA");
3448 entry->
SetSeq().
SetId().front()->SetPatent().SetCit().SetId().SetNumber(
"1");
3449 seh = scope.AddTopLevelSeqEntry(*entry);
3451 delete expected_errors.back();
3452 expected_errors.pop_back();
3453 eval = validator.Validate(seh, options);
3461 "Maximum contig length is 2 bases"));
3463 "Suspicious use of complete"));
3466 eval = validator.Validate(seh, options);
3476 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123457");
3477 entry->
SetSeq().
SetId().front()->SetGenbank().SetVersion(1);
3484 expected_errors.push_back(
new CExpectedError(
"gb|AY123457.1|",
eDiag_Warning,
"UnexpectedIdentifierChange",
"New accession (gb|AY123457.1|) does not match one in NCBI sequence repository (gb|AY123456.1|) on gi (21914627)"));
3486 eval = validator.Validate(seh, options);
3490 scope.RemoveTopLevelSeqEntry(seh);
3491 entry->
SetSeq().
SetId().front()->SetTpg().SetAccession(
"AY123456");
3492 entry->
SetSeq().
SetId().front()->SetTpg().SetVersion(1);
3493 seh = scope.AddTopLevelSeqEntry(*entry);
3495 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Info,
"HistAssemblyMissing",
"TPA record tpg|AY123456.1| should have Seq-hist.assembly for PRIMARY block"));
3496 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Warning,
"UnexpectedIdentifierChange",
"Loss of accession (gb|AY123456.1|) on gi (21914627) compared to the NCBI sequence repository"));
3497 eval = validator.Validate(seh, options);
3514 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"InternalNsInSeqLit",
"Run of 20 Ns in delta component 5 that starts at base 45"));
3515 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WGSseqGapProblem",
"WGS submission includes wrong gap type. Gaps for WGS genomes should be Assembly Gaps with linkage evidence."));
3524 eval = validator.Validate(seh, options);
3532 "Run of 81 Ns in delta component 7 that starts at base 79"));
3541 eval = validator.Validate(seh, options);
3545 eval = validator.Validate(seh, options);
3549 eval = validator.Validate(seh, options);
3552 unit_test_util::AddToDeltaSeq(entry,
"AANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGG");
3554 expected_errors[0]->SetErrMsg(
"Run of 101 Ns in delta component 9 that starts at base 174");
3555 eval = validator.Validate(seh, options);
3566 delta_seq->SetLiteral().SetLength(0);
3567 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(delta_seq);
3571 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"SeqLitGapLength0",
"Gap of length 0 in delta chain"));
3574 eval = validator.Validate(seh, options);
3579 eval = validator.Validate(seh, options);
3582 delta_seq->SetLiteral().SetFuzz().
Reset();
3583 delta_seq->SetLiteral().SetFuzz().SetP_m(10);
3584 eval = validator.Validate(seh, options);
3588 delta_seq->SetLiteral().SetFuzz().
Reset();
3590 expected_errors[0]->SetErrMsg(
"Gap of length 0 with unknown fuzz in delta chain");
3591 eval = validator.Validate(seh, options);
3595 scope.RemoveTopLevelSeqEntry(seh);
3596 entry->
SetSeq().
SetId().front()->SetSwissprot().SetAccession(
"AY123456");
3597 seh = scope.AddTopLevelSeqEntry(*entry);
3600 eval = validator.Validate(seh, options);
3603 delta_seq->SetLiteral().SetFuzz().SetP_m(10);
3604 expected_errors[0]->SetErrMsg(
"Gap of length 0 in delta chain");
3605 eval = validator.Validate(seh, options);
3608 delta_seq->SetLiteral().SetFuzz().
Reset();
3610 eval = validator.Validate(seh, options);
3613 delta_seq->SetLiteral().ResetFuzz();
3614 eval = validator.Validate(seh, options);
3629 field->
SetData().SetStr(
"Data");
3639 member1->
SetSeq().
SetId().front()->SetLocal().SetStr(
"good");
3643 member2->
SetSeq().
SetId().front()->SetLocal().SetStr(
"good2");
3652 eval = validator.Validate(seh, options);
3657 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TpaAssemblyProblem",
"There are 1 TPAs with history and 1 without history in this record."));
3658 eval = validator.Validate(seh, options);
3662 scope.RemoveTopLevelSeqEntry(seh);
3663 member1->
SetSeq().
SetId().front()->SetTpg().SetAccession(
"AY123456");
3664 member1->
SetSeq().
SetId().front()->SetTpg().SetVersion(1);
3668 seh = scope.AddTopLevelSeqEntry(*entry);
3672 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Warning,
"UnexpectedIdentifierChange",
"Loss of accession (gb|AY123456.1|) on gi (21914627) compared to the NCBI sequence repository"));
3673 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Error,
"TpaAssemblyProblem",
"There are 1 TPAs with history and 1 without history in this record."));
3674 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Warning,
"TpaAssemblyProblem",
"There are 1 TPAs without history in this record, but the record has a gi number assignment."));
3677 eval = validator.Validate(seh, options);
3688 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetId().SetGenbank().SetAccession(
"AY123456");
3689 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetFrom(0);
3690 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetTo(9);
3697 eval = validator.Validate(seh, options);
3700 scope.RemoveTopLevelSeqEntry(seh);
3703 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetId().SetGenbank().SetAccession(
"AY123456");
3704 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetFrom(0);
3705 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetTo(10);
3707 seh = scope.AddTopLevelSeqEntry(*entry);
3708 eval = validator.Validate(seh, options);
3726 eval = validator.Validate(seh, options);
3730 eval = validator.Validate(seh, options);
3734 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"MissingGaps",
"HTGS delta seq should have gaps between all sequence runs"));
3735 eval = validator.Validate(seh, options);
3739 eval = validator.Validate(seh, options);
3743 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"BadHTGSeq",
"HTGS 2 delta seq has no gaps and no graphs"));
3744 eval = validator.Validate(seh, options);
3748 scope.RemoveTopLevelSeqEntry(seh);
3749 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
3751 seh = scope.AddTopLevelSeqEntry(*entry);
3754 eval = validator.Validate(seh, options);
3756 delete expected_errors[1];
3757 expected_errors.pop_back();
3760 eval = validator.Validate(seh, options);
3764 eval = validator.Validate(seh, options);
3775 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3777 SetTitle(entry,
"Foo complete genome");
3781 expected_errors.push_back(
new CExpectedError(
"gb|AY123456|",
eDiag_Warning,
"CompleteTitleProblem",
"Complete genome in title without complete flag set"));
3784 eval = validator.Validate(seh, options);
3792 eval = validator.Validate(seh, options);
3797 scope.RemoveTopLevelSeqEntry(seh);
3799 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3801 SetTitle(entry,
"Foo complete genome");
3803 seh = scope.AddTopLevelSeqEntry(*entry);
3806 "CompleteGenomeHasGaps",
"Title contains 'complete genome' but sequence has gaps"));
3808 eval = validator.Validate(seh, options);
3825 "CompleteCircleProblem",
3826 "Circular topology without complete flag set"));
3829 eval = validator.Validate(seh, options);
3834 scope.RemoveTopLevelSeqEntry(seh);
3835 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3836 SetTitle(entry,
"This is just a title");
3838 seh = scope.AddTopLevelSeqEntry(*entry);
3840 "CompleteCircleProblem",
3841 "Circular topology has complete flag set, but title should say complete sequence or complete genome"));
3843 "UnwantedCompleteFlag",
3844 "Suspicious use of complete"));
3847 eval = validator.Validate(seh, options);
3865 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"MissingGaps",
"HTGS delta seq should have gaps between all sequence runs"));
3866 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"BadHTGSeq",
"HTGS 2 delta seq has no gaps and no graphs"));
3867 eval = validator.Validate(seh, options);
3870 delete expected_errors[1];
3871 expected_errors.pop_back();
3875 eval = validator.Validate(seh, options);
3880 scope.RemoveTopLevelSeqEntry(seh);
3883 seh = scope.AddTopLevelSeqEntry(*raw_entry);
3884 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"BadHTGSeq",
"HTGS 2 raw seq has no gaps and no graphs"));
3886 eval = validator.Validate(seh, options);
3894 eval = validator.Validate(seh, options);
3903 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadHTGSeq",
"HTGS 3 sequence should not have HTGS_DRAFT keyword"));
3904 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadHTGSeq",
"HTGS 3 sequence should not have HTGS_PREFIN keyword"));
3905 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadHTGSeq",
"HTGS 3 sequence should not have HTGS_ACTIVEFIN keyword"));
3906 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadHTGSeq",
"HTGS 3 sequence should not have HTGS_FULLTOP keyword"));
3907 eval = validator.Validate(seh, options);
3910 scope.RemoveTopLevelSeqEntry(seh);
3911 seh = scope.AddTopLevelSeqEntry(*delta_entry);
3916 eval = validator.Validate(seh, options);
3927 entry->
SetSeq().
SetInst().SetSeq_data().SetNcbieaa().Set(
"PRK-EIN");
3931 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"GapInProtein",
"[1] internal gap symbols in protein sequence (gene? - fake protein name)"));
3933 eval = validator.Validate(seh, options);
3938 entry->
SetSeq().
SetInst().SetSeq_data().SetNcbieaa().Set(
"-RKTEIN");
3939 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadProteinStart",
"gap symbol at start of protein sequence (gene? - fake protein name)"));
3941 eval = validator.Validate(seh, options);
3944 entry->
SetSeq().
SetInst().SetSeq_data().SetNcbieaa().Set(
"-RK-EIN");
3945 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"GapInProtein",
"[1] internal gap symbols in protein sequence (gene? - fake protein name)"));
3946 eval = validator.Validate(seh, options);
3958 first_seg->SetLiteral().SetLength(9);
3959 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_front(first_seg);
3961 last_seg->SetLiteral().SetLength(9);
3962 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(last_seg);
3979 eval = validator.Validate(seh, options);
3983 scope.RemoveTopLevelSeqEntry(seh);
3984 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetLength(10);
3985 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetLength(10);
3987 seh = scope.AddTopLevelSeqEntry(*entry);
3988 eval = validator.Validate(seh, options);
3992 scope.RemoveTopLevelSeqEntry(seh);
3993 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
3994 seh = scope.AddTopLevelSeqEntry(*entry);
4000 eval = validator.Validate(seh, options);
4003 scope.RemoveTopLevelSeqEntry(seh);
4004 entry->
SetSeq().
SetId().front()->SetPatent().SetSeqid(1);
4005 entry->
SetSeq().
SetId().front()->SetPatent().SetCit().SetCountry(
"USA");
4006 entry->
SetSeq().
SetId().front()->SetPatent().SetCit().SetId().SetNumber(
"1");
4007 seh = scope.AddTopLevelSeqEntry(*entry);
4009 eval = validator.Validate(seh, options);
4018 "Suspicious use of complete"));
4021 eval = validator.Validate(seh, options);
4034 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddSeqRange(*seqid, 0, 10);
4035 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddSeqRange(*seqid, 5, 15);
4036 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddSeqRange(*seqid, 20, 30);
4037 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddSeqRange(*seqid, 25, 35);
4042 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"OverlappingDeltaRange",
"Overlapping delta range 6-16 and 1-11 on a Bioseq gb|AY123456|"));
4043 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"OverlappingDeltaRange",
"Overlapping delta range 26-36 and 21-31 on a Bioseq gb|AY123456|"));
4045 eval = validator.Validate(seh, options);
4056 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"XROTEIN");
4062 eval = validator.Validate(seh, options);
4072 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AAAAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTTTT");
4077 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"InternalNsInSeqRaw",
"Run of 100 Ns in raw sequence starting at base 6"));
4078 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ContigsTooShort",
"Maximum contig length is 5 bases"));
4079 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentPercent",
"Sequence contains 90 percent Ns"));
4081 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"));
4083 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
4085 eval = validator.Validate(seh, options);
4091 scope.RemoveTopLevelSeqEntry(seh);
4092 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AAAAANNNNNNNNNNNNNNNNNNNNTTTTT");
4094 seh = scope.AddTopLevelSeqEntry(*entry);
4095 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ContigsTooShort",
"Maximum contig length is 5 bases"));
4096 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentPercent",
"Sequence contains 66 percent Ns"));
4098 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"));
4100 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
4102 eval = validator.Validate(seh, options);
4109 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"InternalNsInSeqRaw",
"Run of 20 Ns in raw sequence starting at base 6"));
4110 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ContigsTooShort",
"Maximum contig length is 5 bases"));
4111 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentPercent",
"Sequence contains 66 percent Ns"));
4113 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"));
4115 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
4117 eval = validator.Validate(seh, options);
4128 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetSeq_data().SetIupacna().Set(
"ATGATGATGNNN");
4129 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetSeq_data().SetIupacna().Set(
"NNNATGATGATG");
4133 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ContigsTooShort",
"Maximum contig length is 9 bases"));
4134 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"InternalNsAdjacentToGap",
"Ambiguous residue N is adjacent to a gap around position 13"));
4135 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"InternalNsAdjacentToGap",
"Ambiguous residue N is adjacent to a gap around position 23"));
4142 eval = validator.Validate(seh, options);
4152 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetFrom(0);
4153 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetTo(11);
4154 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetId().SetGi(
ZERO_GI);
4159 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"DeltaSeqError",
"Unable to find far delta sequence component"));
4162 eval = validator.Validate(seh, options);
4173 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AATTGGCCAAAATTGGCCAAAATTGG-CAAAATTGGCCAAAATTGGCCAAAATTGGCCAA");
4178 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"InternalGapsInSeqRaw",
"Raw nucleotide should not contain gap characters"));
4181 eval = validator.Validate(seh, options);
4192 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetFrom(0);
4193 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetTo(11);
4194 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetId().SetLocal().SetStr(
"good");
4198 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"SelfReferentialSequence",
"Self-referential delta sequence"));
4199 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"InstantiatedGapMismatch",
"Exception 4 in GapByGapInst"));
4202 eval = validator.Validate(seh, options);
4213 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetWhole().SetGenbank().SetAccession(
"AY123456");
4218 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WholeComponent",
"Delta seq component should not be of type whole"));
4221 eval = validator.Validate(seh, options);
4233 seq.
SetId().front()->Assign(*gnl);
4236 seq.
SetId().push_back(lcl);
4237 seq.
SetAnnot().front()->SetData().SetFtable().front()->SetLocation().SetInt().SetId().Assign(*gnl);
4251 eval = validator.Validate(seh, options);
4255 scope.RemoveTopLevelSeqEntry(seh);
4261 cds->
SetProduct().SetWhole().SetGeneral().SetDb(
"a");
4262 cds->
SetProduct().SetWhole().SetGeneral().SetTag().SetStr(
"b");
4263 seh = scope.AddTopLevelSeqEntry(*entry);
4265 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Info,
"ProteinsHaveGeneralID",
"INDEXER_ONLY - Protein bioseqs have general seq-id."));
4268 eval = validator.Validate(seh, options);
4279 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AAAAATTTTTGGGGGCCCCCAAAAATTTTTGGGGGCCCCCNNNNNNNNNNNAAAATTTTTGGGGGCCCCCAAAAATTTTTGGGGGCCCCCAAAAATTTTT");
4287 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentPercent",
"Sequence contains 11 percent Ns"));
4289 eval = validator.Validate(seh, options);
4292 scope.RemoveTopLevelSeqEntry(seh);
4293 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AAAAATTTTTGGGGGCCCCCAAAAATTTTTGGGGGCCCCCNNNNNNNNNNNNNNNNTTTTGGGGGCCCCCAAAAATTTTTGGGGGCCCCCAAAAATTTTT");
4294 seh = scope.AddTopLevelSeqEntry(*entry);
4295 expected_errors[0]->SetErrMsg(
"Sequence contains 16 percent Ns");
4296 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentStretch",
"Sequence has a stretch of 16 Ns"));
4297 eval = validator.Validate(seh, options);
4302 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentStretch",
"Sequence has a stretch of 16 Ns"));
4303 eval = validator.GetTSANStretchErrors(seh);
4305 eval = validator.GetTSANStretchErrors(entry->
GetSeq());
4310 scope.RemoveTopLevelSeqEntry(seh);
4311 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AANNNNNNNNNNGGGCCCCCAAAAATTTTTGGGGGCCCCCAAAAATTTTTGGGGGTTTTTGGGGGCCCCCAAAAATTTTTGGGGGCCNNNNNNNNNNAAA");
4312 seh = scope.AddTopLevelSeqEntry(*entry);
4314 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"));
4316 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
4318 "Sequence contains 20 percent Ns"));
4320 "Sequence has a stretch of at least 10 Ns within the first 20 bases"));
4322 "Sequence has a stretch of at least 10 Ns within the last 20 bases"));
4324 eval = validator.Validate(seh, options);
4329 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNcontent5Prime",
"Sequence has a stretch of at least 10 Ns within the first 20 bases"));
4330 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNcontent3Prime",
"Sequence has a stretch of at least 10 Ns within the last 20 bases"));
4331 eval = validator.GetTSANStretchErrors(seh);
4333 eval = validator.GetTSANStretchErrors(entry->
GetSeq());
4338 scope.RemoveTopLevelSeqEntry(seh);
4341 gap_seg->SetLiteral().SetSeq_data().SetGap();
4342 gap_seg->SetLiteral().SetLength(10);
4343 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(gap_seg);
4346 seh = scope.AddTopLevelSeqEntry(*entry);
4356 eval = validator.Validate(seh, options);
4368 CDelta_ext::Tdata::iterator seg_it = entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().begin();
4370 (*seg_it)->SetLiteral().SetSeq_data().SetIupacna().Set();
4371 (*seg_it)->SetLiteral().SetLength(0);
4377 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"SeqLitDataLength0",
"Seq-lit of length 0 in delta chain"));
4379 eval = validator.Validate(seh, options);
4394 gap_seg->SetLiteral().SetLength(101);
4396 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(gap_seg);
4410 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"UnknownLengthGapNot100",
"Gap of unknown length should have length 100"));
4418 eval = validator.Validate(seh, options);
4436 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"mRNAshouldBeSingleStranded",
"mRNA should be single stranded not double stranded"));
4438 eval = validator.Validate(seh, options);
4443 eval = validator.Validate(seh, options);
4448 eval = validator.Validate(seh, options);
4457 eval = validator.Validate(seh, options);
4463 eval = validator.Validate(seh, options);
4468 eval = validator.Validate(seh, options);
4484 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"BioSourceMissing",
"Nuc-prot set does not contain expected BioSource descriptor"));
4485 expected_errors.push_back(
new CExpectedError(
"lcl|prot",
eDiag_Fatal,
"NoOrgFound",
"No organism name included in the source. Other qualifiers may exist."));
4488 eval = validator.Validate(seh, options);
4502 entry->
SetDescr().Set().push_back(desc);
4505 entry->
SetDescr().Set().push_back(desc);
4508 entry->
SetDescr().Set().push_back(desc);
4511 entry->
SetDescr().Set().push_back(desc);
4517 "Nucleic acid with protein sequence method"));
4519 "MolType descriptor is obsolete"));
4521 "Modif descriptor is obsolete"));
4523 "Method descriptor is obsolete"));
4525 "OrgRef descriptor is obsolete"));
4529 eval = validator.Validate(seh, options);
4534 scope.RemoveTopLevelSeqEntry(seh);
4535 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
4540 seh = scope.AddTopLevelSeqEntry(*entry);
4542 "Non-TPA record gb|AY123456| should not have TpaAssembly object"));
4545 eval = validator.Validate(seh, options);
4548 scope.RemoveTopLevelSeqEntry(seh);
4549 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
4550 seh = scope.AddTopLevelSeqEntry(*entry);
4552 expected_errors[0]->SetErrMsg(
"Non-TPA record ref|NC_123456| should not have TpaAssembly object");
4553 eval = validator.Validate(seh, options);
4558 entry->
SetDescr().Set().push_back(desc);
4560 "Nucleic acid with GIBB-mol = peptide"));
4562 "MolType descriptor is obsolete"));
4563 eval = validator.Validate(seh, options);
4567 expected_errors[1]->SetErrMsg(
"GIBB-mol unknown or other used");
4568 eval = validator.Validate(seh, options);
4572 eval = validator.Validate(seh, options);
4577 scope.RemoveTopLevelSeqEntry(seh);
4581 entry->
SetDescr().Set().push_back(desc);
4582 seh = scope.AddTopLevelSeqEntry(*entry);
4584 "GIBB-mol [1] used on protein"));
4586 "MolType descriptor is obsolete"));
4588 eval = validator.Validate(seh, options);
4592 expected_errors[0]->SetErrMsg(
"GIBB-mol [2] used on protein");
4593 eval = validator.Validate(seh, options);
4597 expected_errors[0]->SetErrMsg(
"GIBB-mol [3] used on protein");
4598 eval = validator.Validate(seh, options);
4602 expected_errors[0]->SetErrMsg(
"GIBB-mol [4] used on protein");
4603 eval = validator.Validate(seh, options);
4607 expected_errors[0]->SetErrMsg(
"GIBB-mol [5] used on protein");
4608 eval = validator.Validate(seh, options);
4612 expected_errors[0]->SetErrMsg(
"GIBB-mol [6] used on protein");
4613 eval = validator.Validate(seh, options);
4617 expected_errors[0]->SetErrMsg(
"GIBB-mol [7] used on protein");
4618 eval = validator.Validate(seh, options);
4622 expected_errors[0]->SetErrMsg(
"GIBB-mol [9] used on protein");
4623 eval = validator.Validate(seh, options);
4627 expected_errors[0]->SetErrMsg(
"GIBB-mol [10] used on protein");
4628 eval = validator.Validate(seh, options);
4637 "Nucleic acid GIBB-mod [0] on protein"));
4639 "Nucleic acid GIBB-mod [1] on protein"));
4641 "Modif descriptor is obsolete"));
4643 eval = validator.Validate(seh, options);
4648 scope.RemoveTopLevelSeqEntry(seh);
4651 if (it->IsSource()) {
4655 seh = scope.AddTopLevelSeqEntry(*entry);
4658 "Molinfo-biomol other should be used if Biosource-location is synthetic"));
4660 eval = validator.Validate(seh, options);
4666 if (it->IsSource()) {
4667 it->SetSource().ResetOrigin();
4673 "Nucleic acid with Molinfo = peptide"));
4675 eval = validator.Validate(seh, options);
4681 "MoltypeOtherGenetic",
"Molinfo-biomol = other genetic"));
4683 eval = validator.Validate(seh, options);
4689 "MoltypeUnknown",
"Molinfo-biomol unknown used"));
4691 eval = validator.Validate(seh, options);
4697 "MoltypeOther",
"Molinfo-biomol other used"));
4699 eval = validator.Validate(seh, options);
4703 scope.RemoveTopLevelSeqEntry(seh);
4705 seh = scope.AddTopLevelSeqEntry(*entry);
4708 "InvalidForType",
"Molinfo-biomol [1] used on protein"));
4711 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [1] used on protein");
4712 eval = validator.Validate(seh, options);
4716 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [2] used on protein");
4717 eval = validator.Validate(seh, options);
4721 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [3] used on protein");
4722 eval = validator.Validate(seh, options);
4726 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [4] used on protein");
4727 eval = validator.Validate(seh, options);
4731 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [5] used on protein");
4732 eval = validator.Validate(seh, options);
4736 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [6] used on protein");
4737 eval = validator.Validate(seh, options);
4741 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [7] used on protein");
4742 eval = validator.Validate(seh, options);
4746 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [10] used on protein");
4747 eval = validator.Validate(seh, options);
4751 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [11] used on protein");
4752 eval = validator.Validate(seh, options);
4756 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [12] used on protein");
4757 eval = validator.Validate(seh, options);
4761 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [13] used on protein");
4762 eval = validator.Validate(seh, options);
4766 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [14] used on protein");
4767 eval = validator.Validate(seh, options);
4771 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [15] used on protein");
4772 eval = validator.Validate(seh, options);
4777 scope.RemoveTopLevelSeqEntry(seh);
4779 seh = scope.AddTopLevelSeqEntry(*entry);
4782 "synthetic construct should have other-genetic"));
4784 "synthetic construct should have artificial origin"));
4786 eval = validator.Validate(seh, options);