132 using namespace validator;
133 using namespace unit_test_util;
137 : m_Accession(accession), m_Severity(severity), m_ErrCode(err_code), m_ErrMsg(err_msg)
156 size_t pos =
NStr::Find(
msg,
" EXCEPTION: NCBI C++ Exception:");
157 if (pos != string::npos) {
179 size_t pos =
NStr::Find(
msg,
" EXCEPTION: NCBI C++ Exception:");
180 if (pos != string::npos) {
189 string description = err_item.
GetAccnver() +
":"
193 printf(
"%s\n", description.c_str());
203 printf(
"%s\n", description.c_str());
225 vector<CExpectedError*>& expected_errors)
233 bool problem_found =
false;
240 vector<bool> expected_found;
241 for (
size_t i = 0;
i < expected_errors.size();
i++) {
242 if (expected_errors[
i]) {
243 expected_found.push_back(
false);
245 expected_found.push_back(
true);
251 for (
size_t i = 0;
i < expected_errors.size();
i++) {
252 if (!expected_found[
i] && expected_errors[
i]->Match(*vit)) {
253 expected_found[
i] =
true;
259 for (
size_t i = 0;
i < expected_errors.size();
i++) {
260 if (!expected_found[
i] && expected_errors[
i]->Match(*vit,
true)) {
261 printf(
"Problem with ");
263 expected_errors[
i]->Test(*vit);
264 expected_found[
i] =
true;
266 problem_found =
true;
272 BOOST_CHECK_EQUAL(
"Unexpected error",
"Error not found");
274 problem_found =
true;
278 for (
size_t i = 0;
i < expected_errors.size();
i++) {
279 if (!expected_found[
i]) {
280 BOOST_CHECK_EQUAL(expected_errors[
i]->GetErrMsg(),
"Expected error not found");
281 problem_found =
true;
288 printf(
"Expected:\n");
289 for (
auto it : expected_errors) {
300 auto it1 = seen.begin();
303 while (it1 != seen.end() && it2 !=
expected.end()) {
304 BOOST_CHECK_EQUAL(*it1, *it2);
311 while (it1 != seen.end()) {
312 BOOST_CHECK_EQUAL(*it1,
"Unexpected string");
317 BOOST_CHECK_EQUAL(
"Missing string", *it2);
324 auto it1 = seen.begin();
325 while (it1 != seen.end()) {
326 printf(
"%s\n", (*it1).c_str());
329 printf(
"Expected:\n");
332 printf(
"%s\n", (*it2).c_str());
343 static void SetCountryOnSrc(
CBioSource& src,
string country)
384 size_t i,
len = expected_errors.size();
385 for (
i = 0;
i <
len;
i++) {
386 expected_errors[
i]->SetAccession(accession);
396 "debug_mode",
"Debugging mode writes errors seen for each test");
404 if (args[
"debug_mode"]) {
413 "ChromosomeWithoutLocation",
414 "INDEXER_ONLY - source contains chromosome value '1' but the BioSource location is not set to chromosome"));
419 if (entry->
IsSeq()) {
422 }
else if (entry->
IsSet()) {
470 "Structured Comment is non-compliant, keyword should be removed"));
472 "Required field finishing_strategy is missing when investigation_type has value 'eukaryote'"));
474 "Structured Comment invalid; the field value and/or name are incorrect"));
476 eval = validator.Validate(seh, options);
481 delete expected_errors[0];
482 expected_errors[0] =
nullptr;
483 eval = validator.Validate(seh, options);
491 eval = validator.Validate(seh, options);
498 eval = validator.Validate(seh, options);
523 "Longitude should be set to W (western hemisphere)"));
524 eval = validator.Validate(seh, options);
531 expected_errors[0]->SetErrMsg(
"Latitude should be set to S (southern hemisphere)");
532 eval = validator.Validate(seh, options);
550 "Latitude and longitude values appear to be exchanged"));
551 eval = validator.Validate(seh, options);
558 void TestOneLatLonCountry(
const string& country,
const string& lat_lon,
const string&
error,
bool use_state =
false,
const string& err_code =
"LatLonCountry")
571 string err_cd = err_code;
573 if (use_geo_loc_name && err_code ==
"LatLonCountry") {
574 err_cd =
"LatLonGeoLocName";
577 if (!
error.empty()) {
580 eval = validator.Validate(seh, options);
583 if (!
error.empty()) {
586 if (use_geo_loc_name) {
587 expected.push_back(
"LatLonGeoLocName Errors");
589 expected.push_back(
"LatLonCountry Errors");
595 vector<string> cat_list =
format.FormatCompleteSubmitterReport(*eval, scope);
596 for (
const string& it : cat_list) {
597 vector<string> sublist;
599 for (
const string& sit : sublist) {
620 "Lat_lon '46.5 N 20 E' maps to 'Hungary' instead of 'Romania' - claimed region 'Romania' is at distance 45 km");
621 TestOneLatLonCountry(
"Romania",
"34 N 65 E",
"Lat_lon '34 N 65 E' maps to 'Afghanistan' instead of 'Romania'");
622 TestOneLatLonCountry(
"Romania",
"48 N 15 E",
"Lat_lon '48 N 15 E' maps to 'Austria' instead of 'Romania'");
623 TestOneLatLonCountry(
"Romania",
"48 N 15 W",
"Lat_lon '48 N 15 W' is in water 'Atlantic Ocean'",
false,
"LatLonWater");
644 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[44] =
'A';
645 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set()[45] =
'G';
650 other_intron->
SetData().SetImp().SetKey(
"intron");
652 gene->
SetData().SetGene().SetLocus_tag(
"fake_locustag");
657 prot->SetData().SetProt().SetEc().push_back(
"1.2.3.10");
658 prot->SetData().SetProt().SetEc().push_back(
"1.1.3.22");
659 prot->SetData().SetProt().SetEc().push_back(
"1.1.99.n");
660 prot->SetData().SetProt().SetEc().push_back(
"1.1.1.17");
661 prot->SetData().SetProt().SetEc().push_back(
"11.22.33.44");
662 prot->SetData().SetProt().SetEc().push_back(
"11.22.n33.44");
663 prot->SetData().SetProt().SetEc().push_back(
"11.22.33.n44");
677 eval = validator.Validate(seh, options);
682 expected.push_back(
"intron\tlcl|nuc\tGT at 17");
683 expected.push_back(
"intron\tlcl|nuc\tGT at 1");
684 expected.push_back(
"intron\tlcl|nuc\tAG at 11");
685 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
686 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
687 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
688 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
689 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
690 expected.push_back(
"CDS\tlcl|nuc\tGT at 16");
691 expected.push_back(
"lcl|nuc:Lat_lon '30 N 30 E' maps to 'Egypt' instead of 'Panama'");
692 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
693 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
694 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
698 string val =
format.FormatForSubmitterReport(*vit, scope);
706 seen.push_back(vit->GetErrCode());
708 expected.push_back(
"NotSpliceConsensusDonor");
709 expected.push_back(
"NotSpliceConsensusDonorTerminalIntron");
710 expected.push_back(
"NotSpliceConsensusAcceptor");
711 expected.push_back(
"DeletedEcNumber");
712 expected.push_back(
"ReplacedEcNumber");
713 expected.push_back(
"BadEcNumberValue");
714 expected.push_back(
"BadEcNumberFormat");
715 expected.push_back(
"BadEcNumberValue");
716 expected.push_back(
"NotSpliceConsensusDonor");
717 if (use_geo_loc_name) {
718 expected.push_back(
"LatLonGeoLocName");
720 expected.push_back(
"LatLonCountry");
722 expected.push_back(
"BadInstitutionCode");
723 expected.push_back(
"BadInstitutionCode");
724 expected.push_back(
"BadInstitutionCode");
729 vector<CValidErrItem::TErrIndex> codes =
format.GetListOfErrorCodes(*eval);
734 if (use_geo_loc_name) {
735 expected.push_back(
"BadInstitutionCode");
736 expected.push_back(
"LatLonGeoLocName");
738 expected.push_back(
"LatLonCountry");
739 expected.push_back(
"BadInstitutionCode");
741 expected.push_back(
"BadEcNumberFormat");
742 expected.push_back(
"BadEcNumberValue");
743 expected.push_back(
"NotSpliceConsensusDonor");
744 expected.push_back(
"NotSpliceConsensusAcceptor");
745 expected.push_back(
"DeletedEcNumber");
746 expected.push_back(
"ReplacedEcNumber");
747 expected.push_back(
"NotSpliceConsensusDonorTerminalIntron");
754 expected.push_back(
"Not Splice Consensus");
755 expected.push_back(
"intron\tlcl|nuc\tGT at 17");
756 expected.push_back(
"CDS\tlcl|nuc\tGT at 16");
764 expected.push_back(
"Not Splice Consensus");
765 expected.push_back(
"intron\tlcl|nuc\tGT at 17");
766 expected.push_back(
"intron\tlcl|nuc\tGT at 1");
767 expected.push_back(
"intron\tlcl|nuc\tAG at 11");
768 expected.push_back(
"CDS\tlcl|nuc\tGT at 16");
774 vector<string> cat_list =
format.FormatCompleteSubmitterReport(*eval, scope);
775 for (
const string& it : cat_list) {
776 vector<string> sublist;
778 for (
const string& sit : sublist) {
782 expected.push_back(
"Not Splice Consensus");
783 expected.push_back(
"intron\tlcl|nuc\tGT at 17");
784 expected.push_back(
"intron\tlcl|nuc\tGT at 1");
785 expected.push_back(
"intron\tlcl|nuc\tAG at 11");
786 expected.push_back(
"CDS\tlcl|nuc\tGT at 16");
788 expected.push_back(
"EC Number Format");
789 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
791 expected.push_back(
"EC Number Value");
792 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
793 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
794 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
795 expected.push_back(
"lcl|prot\t1.2.3.10;1.1.3.22;1.1.99.n;1.1.1.17;11.22.33.44;11.22.n33.44;11.22.33.n44\t\tfake protein name");
797 expected.push_back(
"Bad Institution Codes");
798 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
799 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
800 expected.push_back(
"lcl|nuc\tXXX;YYY;ZZZ");
802 if (use_geo_loc_name) {
803 expected.push_back(
"LatLonGeoLocName Errors");
805 expected.push_back(
"LatLonCountry Errors");
807 expected.push_back(
"lcl|nuc:Lat_lon '30 N 30 E' maps to 'Egypt' instead of 'Panama'");
821 eval = validator.Validate(seh, options);
827 vector<string> cat_list =
format.FormatCompleteSubmitterReport(*eval, scope);
828 for (
const string& it : cat_list) {
829 vector<string> sublist;
831 for (
const string& sit : sublist) {
836 expected.push_back(
"lcl|good:Sebaea microphylla");
853 "Lat_lon '36 N 80 W' maps to 'USA: North Carolina' instead of 'USA: South Carolina' - claimed region 'USA: South Carolina' is at distance 130 km"));
856 eval = validator.Validate(seh, options);
867 prot->SetData().SetProt().SetEc().push_back(
"1.2.3.10");
868 prot->SetData().SetProt().SetEc().push_back(
"1.1.3.22");
869 prot->SetData().SetProt().SetEc().push_back(
"1.1.99.n");
870 prot->SetData().SetProt().SetEc().push_back(
"1.1.1.17");
871 prot->SetData().SetProt().SetEc().push_back(
"11.22.33.44");
872 prot->SetData().SetProt().SetEc().push_back(
"11.22.n33.44");
873 prot->SetData().SetProt().SetEc().push_back(
"11.22.33.n44");
886 "EC_number 1.2.3.10 was deleted"));
888 "EC_number 1.1.3.22 was transferred and is no longer valid"));
890 "11.22.33.44 is not a legal value for qualifier EC_number"));
892 "11.22.n33.44 is not in proper EC_number format"));
894 "11.22.33.n44 is not a legal preliminary value for qualifier EC_number"));
896 eval = validator.Validate(seh, options);
899 scope.RemoveTopLevelSeqEntry(seh);
900 prot->SetData().SetProt().ResetEc();
902 misc->
SetData().SetImp().SetKey(
"exon");
911 expected_errors[1]->SetErrMsg(
"EC_number 1.1.3.22 was replaced");
912 seh = scope.AddTopLevelSeqEntry(*entry);
913 eval = validator.Validate(seh, options);
924 misc->
SetData().SetImp().SetKey(
"repeat_region");
930 "repeat_region /rpt_unit and underlying sequence do not match"));
932 eval = validator.Validate(seh, options);
935 scope.RemoveTopLevelSeqEntry(seh);
938 misc->
SetData().SetImp().SetKey(
"repeat_region");
940 seh = scope.AddTopLevelSeqEntry(*entry);
941 expected_errors[0]->SetErrCode(
"InvalidRepeatUnitLength");
942 expected_errors[0]->SetErrMsg(
"Length of rpt_unit_seq is greater than feature length");
944 eval = validator.Validate(seh, options);
964 eval = validator.Validate(seh, options);
969 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AATTGG");
970 expected_errors[0]->SetErrMsg(
"Bioseq-ext not allowed on raw Bioseq");
971 eval = validator.Validate(seh, options);
976 expected_errors[0]->SetErrCode(
"SeqDataNotFound");
977 expected_errors[0]->SetErrMsg(
"Missing Seq-data on raw Bioseq");
979 eval = validator.Validate(seh, options);
983 eval = validator.Validate(seh, options);
988 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AATTGG");
990 expected_errors[0]->SetErrCode(
"ExtNotAllowed");
991 expected_errors[0]->SetErrMsg(
"Bioseq-ext not allowed on constructed Bioseq");
992 eval = validator.Validate(seh, options);
997 expected_errors[0]->SetErrCode(
"SeqDataNotFound");
998 expected_errors[0]->SetErrMsg(
"Missing Seq-data on constructed Bioseq");
1000 eval = validator.Validate(seh, options);
1004 eval = validator.Validate(seh, options);
1010 expected_errors[0]->SetErrCode(
"ExtBadOrMissing");
1011 expected_errors[0]->SetErrMsg(
"Missing or incorrect Bioseq-ext on map Bioseq");
1013 eval = validator.Validate(seh, options);
1017 eval = validator.Validate(seh, options);
1021 eval = validator.Validate(seh, options);
1025 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AATTGG");
1026 expected_errors[0]->SetErrCode(
"SeqDataNotAllowed");
1027 expected_errors[0]->SetErrMsg(
"Seq-data not allowed on map Bioseq");
1028 eval = validator.Validate(seh, options);
1036 expected_errors[0]->SetErrCode(
"ExtBadOrMissing");
1037 expected_errors[0]->SetErrMsg(
"Missing or incorrect Bioseq-ext on reference Bioseq");
1038 eval = validator.Validate(seh, options);
1052 expected_errors[0]->SetErrCode(
"ReprInvalid");
1053 expected_errors[0]->SetErrMsg(
"Invalid Bioseq->repr = 6");
1054 eval = validator.Validate(seh, options);
1059 expected_errors[0]->SetErrMsg(
"Invalid Bioseq->repr = 0");
1060 eval = validator.Validate(seh, options);
1065 expected_errors[0]->SetErrMsg(
"Invalid Bioseq->repr = 255");
1066 eval = validator.Validate(seh, options);
1072 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AATTGG");
1074 expected_errors[0]->SetErrCode(
"SeqDataNotAllowed");
1075 expected_errors[0]->SetErrMsg(
"Seq-data not allowed on delta Bioseq");
1076 eval = validator.Validate(seh, options);
1082 expected_errors[0]->SetErrCode(
"ExtBadOrMissing");
1083 expected_errors[0]->SetErrMsg(
"Missing or incorrect Bioseq-ext on delta Bioseq");
1084 eval = validator.Validate(seh, options);
1100 eval = validator.Validate(seh, options);
1103 expected_errors[0]->SetErrMsg(
"Invalid Bioseq->repr = 255");
1105 eval = validator.Validate(seh, options);
1108 expected_errors[0]->SetErrMsg(
"Invalid Bioseq->repr = 6");
1110 eval = validator.Validate(seh, options);
1138 vector<CExpectedError*> expected_errors;
1139 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Warning,
"TerminalNs",
"N at end of sequence"));
1140 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Warning,
"GeneLocusCollidesWithLocusTag",
"locus collides with locus_tag in another gene"));
1141 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"CollidingLocusTags",
"Colliding locus_tags in gene features"));
1142 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"CollidingLocusTags",
"Colliding locus_tags in gene features"));
1143 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"NoMolInfoFound",
"No Mol-info applies to this Bioseq"));
1144 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"LocusTagGeneLocusMatch",
"Gene locus and locus_tag 'foo' match"));
1145 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"NoPubFound",
"No publications anywhere on this entire record."));
1146 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Info,
"MissingPubRequirement",
"No submission citation anywhere on this entire record."));
1147 expected_errors.push_back(
new CExpectedError(
"lcl|LocusCollidesWithLocusTag",
eDiag_Error,
"NoSourceDescriptor",
"No source information included on this record."));
1158 local str \"LocusCollidesWithLocusTag\" } ,\
1164 iupacna \"AATTGGCCAANNAATTGGCCAANN\" } ,\
1173 locus-tag \"foo\" } ,\
1180 local str \"LocusCollidesWithLocusTag\" } } ,\
1185 locus-tag \"foo\" } ,\
1192 local str \"LocusCollidesWithLocusTag\" } } ,\
1197 locus-tag \"baz\" } ,\
1204 local str \"LocusCollidesWithLocusTag\" } } ,\
1209 locus-tag \"baz\" } ,\
1216 local str \"LocusCollidesWithLocusTag\" } } } } } }\
1226 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"CircularProtein",
"Non-linear topology set on protein"));
1232 eval = validator.Validate(seh, options);
1236 eval = validator.Validate(seh, options);
1240 eval = validator.Validate(seh, options);
1247 eval = validator.Validate(seh, options);
1252 eval = validator.Validate(seh, options);
1265 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadProteinMoltype",
"Protein not single stranded"));
1269 eval = validator.Validate(seh, options);
1273 eval = validator.Validate(seh, options);
1277 eval = validator.Validate(seh, options);
1286 eval = validator.Validate(seh, options);
1290 eval = validator.Validate(seh, options);
1307 eval = validator.Validate(seh, options);
1310 expected_errors[0]->SetErrCode(
"MolOther");
1311 expected_errors[0]->SetErrMsg(
"Bioseq.mol is type other");
1313 eval = validator.Validate(seh, options);
1316 expected_errors[0]->SetErrCode(
"MolNuclAcid");
1317 expected_errors[0]->SetErrMsg(
"Bioseq.mol is type nucleic acid");
1319 eval = validator.Validate(seh, options);
1336 eval = validator.Validate(seh, options);
1339 expected_errors[0]->SetErrMsg(
"Fuzzy length on const Bioseq");
1341 eval = validator.Validate(seh, options);
1345 expected_errors[0]->SetErrCode(
"SeqDataNotFound");
1346 expected_errors[0]->SetErrMsg(
"Missing Seq-data on constructed Bioseq");
1349 eval = validator.Validate(seh, options);
1374 vector<CExpectedError*> expected_errors;
1375 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidAlphabet",
"Using a nucleic acid alphabet on a protein sequence"));
1382 eval = validator.
Validate(prot_seh, options);
1386 eval = validator.
Validate(prot_seh, options);
1390 eval = validator.
Validate(prot_seh, options);
1394 eval = validator.
Validate(prot_seh, options);
1403 expected_errors[0]->SetErrMsg(
"Using a protein alphabet on a nucleic acid");
1405 eval = validator.
Validate(seh, options);
1409 eval = validator.
Validate(seh, options);
1413 eval = validator.
Validate(seh, options);
1417 eval = validator.
Validate(seh, options);
1421 eval = validator.
Validate(seh, options);
1434 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ");
1435 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFB');
1436 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFB');
1437 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFB');
1438 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFC');
1439 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFC');
1440 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFC');
1441 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFD');
1442 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFD');
1443 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFD');
1444 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFE');
1445 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFE');
1446 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFF');
1447 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set().push_back(
'\xFF');
1449 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'E' at position [5]"));
1450 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'F' at position [6]"));
1451 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'I' at position [9]"));
1452 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'J' at position [10]"));
1453 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'L' at position [12]"));
1454 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'O' at position [15]"));
1455 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'P' at position [16]"));
1456 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'Q' at position [17]"));
1457 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'U' at position [21]"));
1458 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'X' at position [24]"));
1459 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'Z' at position [26]"));
1460 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'E' at position [31]"));
1461 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'F' at position [32]"));
1462 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'I' at position [35]"));
1463 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'J' at position [36]"));
1464 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'L' at position [38]"));
1465 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'O' at position [41]"));
1466 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'P' at position [42]"));
1467 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'Q' at position [43]"));
1468 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'U' at position [47]"));
1469 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'X' at position [50]"));
1470 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Invalid nucleotide residue 'Z' at position [52]"));
1481 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"More than 10 invalid residues. Checking stopped"));
1482 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Fatal,
"NonAsciiAsn",
"Non-ASCII character '251' found in item"));
1485 eval = validator.Validate(seh, options);
1490 delete expected_errors[8];
1491 expected_errors[8] =
nullptr;
1492 delete expected_errors[19];
1493 expected_errors[19] =
nullptr;
1494 eval = validator.Validate(seh, options);
1500 if (it->IsMolinfo()) {
1504 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ");
1505 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFB');
1506 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFB');
1507 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFB');
1508 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFC');
1509 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFC');
1510 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFC');
1511 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFD');
1512 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFD');
1513 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFD');
1514 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFE');
1515 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFE');
1516 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFF');
1517 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set().push_back(
'\xFF');
1520 feat->
SetData().SetProt().SetName().push_back(
"fake protein name");
1521 feat->
SetLocation().SetInt().SetId().SetLocal().SetStr(
"good");
1525 scope.RemoveEntry(*entry);
1526 seh = scope.AddTopLevelSeqEntry(*entry);
1528 for (
int j = 0; j < 22; j++) {
1529 if (expected_errors[j]) {
1530 delete expected_errors[j];
1531 expected_errors[j] =
nullptr;
1534 eval = validator.Validate(seh, options);
1540 scope.RemoveEntry(*entry);
1542 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"abcdefghijklmnopqrstuvwxyz");
1544 seh = scope.AddTopLevelSeqEntry(*entry);
1545 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"InvalidResidue",
"Sequence contains lower-case characters"));
1547 eval = validator.Validate(seh, options);
1550 scope.RemoveEntry(*entry);
1552 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"protein");
1553 seh = scope.AddTopLevelSeqEntry(*entry);
1554 eval = validator.Validate(seh, options);
1560 scope.RemoveEntry(*entry);
1565 seg->SetLiteral().SetSeq_data().SetIupacna().Set(
"ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ");
1566 seg->SetLiteral().SetLength(52);
1567 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(seg);
1569 seh = scope.AddTopLevelSeqEntry(*entry);
1594 eval = validator.Validate(seh, options);
1600 scope.RemoveEntry(*entry);
1605 seg2->SetLiteral().SetSeq_data().SetIupacaa().Set(
"1234567");
1606 seg2->SetLiteral().SetLength(7);
1607 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(seg2);
1609 seh = scope.AddTopLevelSeqEntry(*entry);
1620 eval = validator.Validate(seh, options);
1665 entry->
SetSet().
SetSeq_set().back()->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set(
"MP*K*E*N");
1666 entry->
SetSet().
SetSeq_set().front()->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"GTGCCCTAAAAATAAGAGTAAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
1675 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"StopInProtein",
"[3] termination symbols in protein sequence (gene? - fake protein name)"));
1676 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"ExceptionProblem",
"unclassified translation discrepancy is not a legal exception explanation"));
1677 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"InternalStop",
"3 internal stops (and illegal start codon). Genetic code [0]"));
1679 "CDS has unnecessary translated product replaced exception"));
1682 eval = validator.Validate(seh, options);
1693 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"StopInProtein",
"[3] termination symbols in protein sequence (gene? - fake protein name)"));
1694 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"StartCodon",
"Illegal start codon (and 3 internal stops). Probably wrong genetic code [0]"));
1695 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Error,
"InternalStop",
"3 internal stops (and illegal start codon). Genetic code [0]"));
1698 eval = validator.Validate(seh, options);
1703 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCTAAAAATAAGAGTAAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG");
1708 delete expected_errors[1];
1709 expected_errors[1] =
nullptr;
1710 expected_errors[2]->SetErrMsg(
"3 internal stops. Genetic code [0]");
1711 eval = validator.Validate(seh, options);
1730 entry->
SetSeq().
SetInst().SetExt().SetSeg().Set().push_back(loc1);
1733 entry->
SetSeq().
SetInst().SetExt().SetSeg().Set().push_back(loc2);
1736 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"PartialInconsistent",
"Partial segmented sequence without MolInfo partial"));
1741 eval = validator.Validate(seh, options);
1745 eval = validator.Validate(seh, options);
1749 eval = validator.Validate(seh, options);
1757 eval = validator.Validate(seh, options);
1761 eval = validator.Validate(seh, options);
1765 eval = validator.Validate(seh, options);
1773 eval = validator.Validate(seh, options);
1777 eval = validator.Validate(seh, options);
1781 eval = validator.Validate(seh, options);
1789 expected_errors[0]->SetErrMsg(
"Complete segmented sequence with MolInfo partial");
1790 eval = validator.Validate(seh, options);
1798 expected_errors[0]->SetErrMsg(
"No-left inconsistent with segmented SeqLoc");
1799 eval = validator.Validate(seh, options);
1803 eval = validator.Validate(seh, options);
1807 eval = validator.Validate(seh, options);
1815 expected_errors[0]->SetErrMsg(
"No-right inconsistent with segmented SeqLoc");
1816 eval = validator.Validate(seh, options);
1820 eval = validator.Validate(seh, options);
1824 eval = validator.Validate(seh, options);
1830 expected_errors[0]->SetErrMsg(
"No-ends inconsistent with segmented SeqLoc");
1833 eval = validator.Validate(seh, options);
1837 eval = validator.Validate(seh, options);
1841 eval = validator.Validate(seh, options);
1855 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"MPR");
1857 entry->
SetSeq().
SetAnnot().front()->SetData().SetFtable().front()->SetLocation().SetInt().SetTo(2);
1861 pdb_id->SetMol().Set(
"foo");
1863 entry->
SetSeq().
SetAnnot().front()->SetData().SetFtable().front()->SetLocation().SetInt().SetId().SetPdb(*pdb_id);
1864 scope.RemoveTopLevelSeqEntry(seh);
1865 seh = scope.AddTopLevelSeqEntry(*entry);
1866 eval = validator.Validate(seh, options);
1871 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"PartialsInconsistent",
"Molinfo completeness and protein feature partials conflict"));
1872 expected_errors[0]->SetAccession(
"lcl|good");
1873 entry->
SetSeq().
SetId().front()->SetLocal().SetStr(
"good");
1874 entry->
SetSeq().
SetAnnot().front()->SetData().SetFtable().front()->SetLocation().SetInt().SetId().SetLocal().SetStr(
"good");
1875 scope.RemoveTopLevelSeqEntry(seh);
1876 seh = scope.AddTopLevelSeqEntry(*entry);
1879 eval = validator.Validate(seh, options);
1882 eval = validator.Validate(seh, options);
1885 eval = validator.Validate(seh, options);
1888 eval = validator.Validate(seh, options);
1897 if (it->IsMolinfo()) {
1898 it->SetMolinfo().ResetCompleteness();
1901 eval = validator.Validate(seh, options);
1904 eval = validator.Validate(seh, options);
1907 eval = validator.Validate(seh, options);
1910 eval = validator.Validate(seh, options);
1914 scope.RemoveTopLevelSeqEntry(seh);
1916 seh = scope.AddTopLevelSeqEntry(*entry);
1917 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCTTT");
1919 expected_errors[0]->SetErrMsg(
"Sequence only 9 residues");
1920 eval = validator.Validate(seh, options);
1927 scope.RemoveTopLevelSeqEntry(seh);
1928 seh = scope.AddTopLevelSeqEntry(*entry);
1929 eval = validator.Validate(seh, options);
1962 if (entry->
IsSeq()) {
1964 }
else if (entry->
IsSet()) {
1972 if (entry->
IsSeq()) {
1974 if (it->IsUser() && it->GetUser().IsRefGeneTracking()) {
1975 it->SetUser().SetData().front()->SetData().SetStr(status);
1978 }
else if (entry->
IsSet()) {
1980 if (it->IsUser() && it->GetUser().IsRefGeneTracking()) {
1981 it->SetUser().SetData().front()->SetData().SetStr(status);
1993 auto& cont = entry->
SetDescr().Set();
1994 for (
auto it = cont.begin(); it != cont.end();) {
1995 if ((*it)->IsTitle()) {
1998 it = cont.erase(it);
2001 (*it)->SetTitle(title);
2021 if (it->IsGenbank()) {
2022 it->SetGenbank().SetKeywords().push_back(keyword);
2040 eval = validator.Validate(seh, options);
2042 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Info,
"NoKeywordHasTechnique",
"Molinfo.tech barcode without BARCODE keyword"));
2045 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TSAshouldBNotBeDNA",
"TSA sequence should not be DNA"));
2046 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WrongBiomolForTSA",
"Biomol \"genomic\" is not appropriate for sequences that use the TSA technique."));
2047 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TSAseqGapProblem",
"TSA submission includes wrong gap type. Gaps for TSA should be Assembly Gaps with linkage evidence."));
2049 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WGSseqGapProblem",
"WGS submission includes wrong gap type. Gaps for WGS genomes should be Assembly Gaps with linkage evidence."));
2067 eval = validator.Validate(seh, options);
2069 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ProteinTechniqueOnNucleotide",
"Nucleic acid with protein sequence method"));
2075 eval = validator.Validate(seh, options);
2088 start_gap_seg->SetLiteral().SetLength(10);
2089 start_gap_seg->SetLiteral().SetSeq_data().SetGap();
2090 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().insert(entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().begin(), start_gap_seg);
2091 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddLiteral(10);
2092 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddLiteral(10);
2095 end_gap_seg->SetLiteral().SetLength(10);
2096 end_gap_seg->SetLiteral().SetSeq_data().SetGap();
2097 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(end_gap_seg);
2098 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddLiteral(10);
2105 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadDeltaSeq",
"There is 1 adjacent gap in delta seq"));
2116 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WGSseqGapProblem",
"WGS submission includes wrong gap type. Gaps for WGS genomes should be Assembly Gaps with linkage evidence."));
2118 eval = validator.Validate(seh, options);
2132 if (it->IsMolinfo()) {
2138 scope.RemoveTopLevelSeqEntry(seh);
2139 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
2140 seh = scope.AddTopLevelSeqEntry(*entry);
2141 eval = validator.Validate(seh, options);
2146 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NT_123456");
2147 scope.RemoveTopLevelSeqEntry(seh);
2148 seh = scope.AddTopLevelSeqEntry(*entry);
2149 eval = validator.Validate(seh, options);
2156 entry->
SetSeq().
SetId().front()->SetLocal().SetStr(
"good");
2157 scope.RemoveTopLevelSeqEntry(seh);
2158 seh = scope.AddTopLevelSeqEntry(*entry);
2161 vector<CMolInfo::TTech> allowed_list;
2175 bool allowed =
false;
2209 for (
auto it : linkage_evidence) {
2221 vector<CLinkage_evidence::EType> evidence;
2223 for (
auto it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
2224 if (it->IsLiteral() && it->GetLiteral().IsSetSeq_data()
2225 && it->GetLiteral().GetSeq_data().IsGap()) {
2226 AdjustGap(it->SetLiteral().SetSeq_data().SetGap(),
2235 "SeqGapBadLinkage",
"Seq-gap of type 3 should not have linkage evidence"));
2237 eval = validator.Validate(seh, options);
2242 scope.RemoveTopLevelSeqEntry(seh);
2243 for (
auto it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
2244 if (it->IsLiteral() && it->GetLiteral().IsSetSeq_data()
2245 && it->GetLiteral().GetSeq_data().IsGap()) {
2246 CSeq_gap& gap = it->SetLiteral().SetSeq_data().SetGap();
2251 seh = scope.AddTopLevelSeqEntry(*entry);
2255 "SeqGapBadLinkage",
"Seq-gap with linkage evidence must have linkage field set to linked"));
2257 eval = validator.Validate(seh, options);
2262 scope.RemoveTopLevelSeqEntry(seh);
2264 for (
auto it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
2265 if (it->IsLiteral() && it->GetLiteral().IsSetSeq_data()
2266 && it->GetLiteral().GetSeq_data().IsGap()) {
2267 AdjustGap(it->SetLiteral().SetSeq_data().SetGap(),
2271 seh = scope.AddTopLevelSeqEntry(*entry);
2275 "SeqGapBadLinkage",
"Linkage evidence 'align genus' appears 2 times"));
2277 eval = validator.Validate(seh, options);
2282 evidence.pop_back();
2284 scope.RemoveTopLevelSeqEntry(seh);
2285 for (
auto it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
2286 if (it->IsLiteral() && it->GetLiteral().IsSetSeq_data()
2287 && it->GetLiteral().GetSeq_data().IsGap()) {
2288 AdjustGap(it->SetLiteral().SetSeq_data().SetGap(),
2292 seh = scope.AddTopLevelSeqEntry(*entry);
2296 "SeqGapBadLinkage",
"Seq-gap type has unspecified and additional linkage evidence"));
2298 eval = validator.Validate(seh, options);
2303 scope.RemoveTopLevelSeqEntry(seh);
2306 for (
auto it : entry->
SetSeq().
SetInst().SetExt().SetDelta().Set()) {
2307 if (it->IsLiteral() && it->GetLiteral().IsSetSeq_data()
2308 && it->GetLiteral().GetSeq_data().IsGap()) {
2309 AdjustGap(it->SetLiteral().SetSeq_data().SetGap(),
2313 seh = scope.AddTopLevelSeqEntry(*entry);
2317 "SeqGapBadLinkage",
"Single Seq-gap has unknown type and unspecified linkage"));
2319 eval = validator.Validate(seh, options);
2324 scope.RemoveTopLevelSeqEntry(seh);
2326 gap_seg->SetLiteral().SetLength(10);
2327 AdjustGap(gap_seg->SetLiteral().SetSeq_data().SetGap(),
2331 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetSeq_data().SetIupacna().Set(
"CCCATGATGATGTACCGTACGTTTTCCCATGATGATGTACCGTACGTTTT");
2332 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetLength(50);
2333 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(gap_seg);
2337 seh = scope.AddTopLevelSeqEntry(*entry);
2341 "SeqGapBadLinkage",
"All 2 Seq-gaps have unknown type and unspecified linkage"));
2343 eval = validator.Validate(seh, options);
2352 for (
auto it : expected_errors) {
2354 it->SetAccession(acc);
2366 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (lcl|good - lcl|bad)"));
2370 scope.RemoveTopLevelSeqEntry(seh);
2374 seh = scope.AddTopLevelSeqEntry(*entry);
2375 eval = validator.Validate(seh, options);
2379 scope.RemoveTopLevelSeqEntry(seh);
2383 seh = scope.AddTopLevelSeqEntry(*entry);
2385 expected_errors[0]->SetErrMsg(
"Conflicting ids on a Bioseq: (bbs|1 - bbs|2)");
2386 eval = validator.Validate(seh, options);
2390 scope.RemoveTopLevelSeqEntry(seh);
2393 seh = scope.AddTopLevelSeqEntry(*entry);
2395 expected_errors[0]->SetErrMsg(
"Conflicting ids on a Bioseq: (bbm|1 - bbm|2)");
2396 eval = validator.Validate(seh, options);
2400 scope.RemoveTopLevelSeqEntry(seh);
2405 seh = scope.AddTopLevelSeqEntry(*entry);
2407 expected_errors[0]->SetErrMsg(
"Conflicting ids on a Bioseq: (gi|1 - gi|2)");
2408 eval = validator.Validate(seh, options);
2413 scope.RemoveTopLevelSeqEntry(seh);
2418 seh = scope.AddTopLevelSeqEntry(*entry);
2421 expected_errors.push_back(
new CExpectedError(
"gim|1",
eDiag_Error,
"IdOnMultipleBioseqs",
"BioseqFind (gim|1) unable to find itself - possible internal error"));
2422 expected_errors.push_back(
new CExpectedError(
"gim|1",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (gim|1 - gim|2)"));
2423 expected_errors.push_back(
new CExpectedError(
"gim|1",
eDiag_Error,
"IdOnMultipleBioseqs",
"BioseqFind (gim|2) unable to find itself - possible internal error"));
2425 eval = validator.Validate(seh, options);
2430 scope.RemoveTopLevelSeqEntry(seh);
2437 seh = scope.AddTopLevelSeqEntry(*entry);
2438 expected_errors.push_back(
new CExpectedError(
"pat|USA|1|1",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (pat|USA|1|1 - pat|USA|2|2)"));
2440 eval = validator.Validate(seh, options);
2444 scope.RemoveTopLevelSeqEntry(seh);
2447 seh = scope.AddTopLevelSeqEntry(*entry);
2449 expected_errors[0]->SetErrMsg(
"Conflicting ids on a Bioseq: (pdb|good| - pdb|badd| )");
2450 eval = validator.Validate(seh, options);
2454 scope.RemoveTopLevelSeqEntry(seh);
2459 seh = scope.AddTopLevelSeqEntry(*entry);
2461 expected_errors[0]->SetErrMsg(
"Conflicting ids on a Bioseq: (gnl|a|good - gnl|a|bad)");
2462 eval = validator.Validate(seh, options);
2467 scope.RemoveTopLevelSeqEntry(seh);
2469 seh = scope.AddTopLevelSeqEntry(*entry);
2471 eval = validator.Validate(seh, options);
2475 scope.RemoveTopLevelSeqEntry(seh);
2476 expected_errors.push_back(
new CExpectedError(
"gb|AY123456|",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (gb|AY123456| - gb|AY222222|)"));
2479 seh = scope.AddTopLevelSeqEntry(*entry);
2480 eval = validator.Validate(seh, options);
2484 scope.RemoveTopLevelSeqEntry(seh);
2487 seh = scope.AddTopLevelSeqEntry(*entry);
2490 expected_errors.push_back(
new CExpectedError(
"gb|AY123456.2|",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (gb|AY123456| - gb|AY123456.2|)"));
2491 eval = validator.Validate(seh, options);
2495 scope.RemoveTopLevelSeqEntry(seh);
2497 seh = scope.AddTopLevelSeqEntry(*entry);
2499 expected_errors.push_back(
new CExpectedError(
"gb|AY123456|",
eDiag_Error,
"ConflictingIdsOnBioseq",
"Conflicting ids on a Bioseq: (gb|AY123456| - gpp|AY123456|)"));
2501 eval = validator.Validate(seh, options);
2505 scope.RemoveTopLevelSeqEntry(seh);
2508 seh = scope.AddTopLevelSeqEntry(*entry);
2510 expected_errors[0]->SetErrMsg(
"LRG sequence needs NG_ accession");
2512 eval = validator.Validate(seh, options);
2515 scope.RemoveTopLevelSeqEntry(seh);
2517 seh = scope.AddTopLevelSeqEntry(*entry);
2520 eval = validator.Validate(seh, options);
2534 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"MolNuclAcid",
"Bioseq.mol is type nucleic acid"));
2537 eval = validator.Validate(seh, options);
2551 vector<CMolInfo::TTech> genomic_list;
2562 bool genomic =
false;
2572 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"InconsistentMolType",
"Molecule type (DNA) does not match biomol (RNA)"));
2580 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"BadHTGSeq",
"HTGS 2 raw seq has no gaps and no graphs"));
2583 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"HTGS_STS_GSS_WGSshouldBeGenomic",
"HTGS/STS/GSS/WGS sequence should be genomic"));
2584 eval = validator.Validate(seh, options);
2588 delete expected_errors[0];
2589 expected_errors[0] =
nullptr;
2590 expected_errors.back()->SetErrCode(
"HTGS_STS_GSS_WGSshouldNotBeRNA");
2591 expected_errors.back()->SetErrMsg(
"HTGS/STS/GSS/WGS sequence should not be RNA");
2592 eval = validator.Validate(seh, options);
2596 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ProteinTechniqueOnNucleotide",
"Nucleic acid with protein sequence method"));
2599 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Info,
"NoKeywordHasTechnique",
"Molinfo.tech barcode without BARCODE keyword"));
2601 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TSAshouldBNotBeDNA",
"TSA sequence should not be DNA"));
2602 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WrongBiomolForTSA",
"Biomol \"cRNA\" is not appropriate for sequences that use the TSA technique."));
2604 eval = validator.Validate(seh, options);
2613 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"InconsistentMolType",
"Molecule type (DNA) does not match biomol (RNA)"));
2614 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TSAshouldBNotBeDNA",
"TSA sequence should not be DNA"));
2615 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WrongBiomolForTSA",
"Biomol \"cRNA\" is not appropriate for sequences that use the TSA technique."));
2616 eval = validator.Validate(seh, options);
2621 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TSAshouldBNotBeDNA",
"TSA sequence should not be DNA"));
2622 eval = validator.GetTSAConflictingBiomolTechErrors(seh);
2624 eval = validator.GetTSAConflictingBiomolTechErrors(*(seh.GetSeq().GetCompleteBioseq()));
2633 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
2634 entry->
SetSeq().
SetId().front()->SetOther().SetName(
"good one");
2638 expected_errors.push_back(
new CExpectedError(
"ref|NC_123456|good one",
eDiag_Critical,
"SeqIdNameHasSpace",
"Seq-id.name 'good one' should be a single word without any spaces"));
2641 eval = validator.Validate(seh, options);
2656 seg1->
SetWhole().SetGenbank().SetAccession(
"AY123456");
2657 entry->
SetSeq().
SetInst().SetExt().SetSeg().Set().push_back(seg1);
2659 seg2->
SetWhole().SetGenbank().SetAccession(
"AY123456");
2660 entry->
SetSeq().
SetInst().SetExt().SetSeg().Set().push_back(seg2);
2682 vector<CExpectedError*> expected_errors;
2683 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"SeqLocOrder",
"Segmented BioseqIntervals out of order in SeqLoc [[gb|AY123456|, gb|AY123456|]]"));
2684 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"DuplicateSegmentReferences",
"Segmented sequence has multiple references to gb|AY123456"));
2687 eval = validator.
Validate(seh, options);
2690 seg2->
SetInt().SetId().SetGenbank().SetAccession(
"AY123456");
2691 seg2->
SetInt().SetFrom(0);
2692 seg2->
SetInt().SetTo(484);
2693 expected_errors[0]->SetErrMsg(
"Segmented BioseqIntervals out of order in SeqLoc [[gb|AY123456|, 1-485]]");
2695 expected_errors[1]->SetErrMsg(
"Segmented sequence has multiple references to gb|AY123456 that are not SEQLOC_WHOLE");
2696 eval = validator.
Validate(seh, options);
2709 CRef<CSeq_feat> prot_feat =
prot->SetSeq().SetAnnot().front()->SetData().SetFtable().front();
2711 nuc->SetSeq().SetInst().SetSeq_data().SetIupacna().Set(
"ATGCCCAGAAAAACAGAGATANNNNNN");
2712 nuc->SetSeq().SetInst().SetLength(27);
2713 prot->SetSeq().SetInst().SetSeq_data().SetIupacaa().Set(
"MPRKTEIXX");
2714 prot->SetSeq().SetInst().SetLength(9);
2727 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
2730 eval = validator.Validate(seh, options);
2753 expected_errors.push_back(
new CExpectedError(
"gb|" + id_str +
"|",
eDiag_Error,
"BadSeqIdFormat",
"Bad accession " + id_str));
2756 eval = validator.Validate(seh, options);
2777 eval = validator.Validate(seh, options);
2794 bool is_wgs =
false;
2795 if (id_str.length() == 12 || id_str.length() == 13 || id_str.length() == 14 || id_str.length() == 15) {
2805 eval = validator.Validate(seh, options);
2823 vector<string> bad_ids;
2824 bad_ids.push_back(
"AY123456ABC");
2825 bad_ids.push_back(
"A1234");
2826 bad_ids.push_back(
"A123456");
2827 bad_ids.push_back(
"AY12345");
2828 bad_ids.push_back(
"AY1234567");
2829 bad_ids.push_back(
"ABC1234");
2830 bad_ids.push_back(
"ABC123456");
2831 bad_ids.push_back(
"ABCD1234567");
2832 bad_ids.push_back(
"ABCDE123456");
2833 bad_ids.push_back(
"ABCDE12345678");
2835 vector<string> bad_nuc_ids;
2836 bad_nuc_ids.push_back(
"ABC12345");
2838 vector<string> bad_prot_ids;
2839 bad_prot_ids.push_back(
"AY123456");
2840 bad_prot_ids.push_back(
"A12345");
2842 vector<string> good_ids;
2844 vector<string> good_nuc_ids;
2845 good_nuc_ids.push_back(
"AY123456");
2846 good_nuc_ids.push_back(
"A12345");
2847 good_nuc_ids.push_back(
"ABCD123456789");
2848 good_nuc_ids.push_back(
"ABCD1234567890");
2850 vector<string> good_prot_ids;
2851 good_prot_ids.push_back(
"ABC12345");
2862 for (
const string& id_str : bad_ids) {
2863 const string acc_str =
"gb|" + id_str +
"|";
2865 expected_errors[0]->SetErrMsg(
"Bad accession " + id_str);
2868 scope.RemoveTopLevelSeqEntry(seh);
2869 scope.ResetDataAndHistory();
2873 seh = scope.AddTopLevelSeqEntry(*entry);
2874 eval = validator.Validate(seh, options);
2876 scope.RemoveTopLevelSeqEntry(seh);
2877 scope.ResetDataAndHistory();
2880 seh = scope.AddTopLevelSeqEntry(*entry);
2881 eval = validator.Validate(seh, options);
2885 for (
const string& id_it : bad_ids) {
2886 const string id_str =
"B" + id_it.substr(1);
2887 expected_errors[0]->SetAccession(
"embl|" + id_str +
"|");
2888 expected_errors[0]->SetErrMsg(
"Bad accession " + id_str);
2891 scope.RemoveTopLevelSeqEntry(seh);
2892 scope.ResetDataAndHistory();
2896 seh = scope.AddTopLevelSeqEntry(*entry);
2897 eval = validator.Validate(seh, options);
2898 expected_errors[0]->SetAccession(
"emb|" + id_str +
"|");
2900 scope.RemoveTopLevelSeqEntry(seh);
2901 scope.ResetDataAndHistory();
2904 seh = scope.AddTopLevelSeqEntry(*entry);
2905 eval = validator.Validate(seh, options);
2909 for (
const string& id_it : bad_ids) {
2910 const string id_str =
"C" + id_it.substr(1);
2911 expected_errors[0]->SetAccession(
"dbj|" + id_str +
"|");
2912 expected_errors[0]->SetErrMsg(
"Bad accession " + id_str);
2915 scope.RemoveTopLevelSeqEntry(seh);
2916 scope.ResetDataAndHistory();
2920 seh = scope.AddTopLevelSeqEntry(*entry);
2921 eval = validator.Validate(seh, options);
2922 expected_errors[0]->SetAccession(
"dbj|" + id_str +
"|");
2924 scope.RemoveTopLevelSeqEntry(seh);
2925 scope.ResetDataAndHistory();
2928 seh = scope.AddTopLevelSeqEntry(*entry);
2929 eval = validator.Validate(seh, options);
2934 for (
const string& id_str : bad_nuc_ids) {
2936 scope.RemoveTopLevelSeqEntry(seh);
2939 expected_errors[0]->SetAccession(
"gb|" + id_str +
"|");
2940 expected_errors[0]->SetErrMsg(
"Bad accession " + id_str);
2941 seh = scope.AddTopLevelSeqEntry(*entry);
2942 eval = validator.Validate(seh, options);
2947 for (
auto id_it : bad_prot_ids) {
2954 for (
const string& id_str : good_ids) {
2956 scope.RemoveTopLevelSeqEntry(seh);
2959 seh = scope.AddTopLevelSeqEntry(*entry);
2960 eval = validator.Validate(seh, options);
2963 scope.RemoveTopLevelSeqEntry(seh);
2966 seh = scope.AddTopLevelSeqEntry(*entry);
2967 eval = validator.Validate(seh, options);
2973 for (
const string& id_it : good_nuc_ids) {
2978 for (
const string& id_it : good_prot_ids) {
2983 scope.RemoveTopLevelSeqEntry(seh);
2990 seh = scope.AddTopLevelSeqEntry(*entry);
2991 eval = validator.Validate(seh, options);
2993 "Accession AY123456 has 0 version"));
2994 expected_errors.push_back(
new CExpectedError(
"gb|AY123456|",
eDiag_Warning,
"UnexpectedIdentifierChange",
"New accession (gb|AY123456|) does not match one in NCBI sequence repository (gb|AY123456.1|) on gi (21914627)"));
3003 scope.RemoveTopLevelSeqEntry(seh);
3004 bad_id->
SetLocal().
SetStr(
"ABCDEFGHIJKLMNOPQRSTUVWXYZ012345678901234");
3006 seh = scope.AddTopLevelSeqEntry(*entry);
3007 eval = validator.Validate(seh, options);
3014 scope.RemoveTopLevelSeqEntry(seh);
3018 seh = scope.AddTopLevelSeqEntry(*entry);
3019 eval = validator.Validate(seh, options);
3026 scope.RemoveTopLevelSeqEntry(seh);
3033 seh = scope.AddTopLevelSeqEntry(*entry);
3035 "General database longer than 20 characters"));
3038 eval = validator.Validate(seh, options);
3044 scope.RemoveTopLevelSeqEntry(seh);
3046 entry->
SetSeq().
SetId().front()->SetLocal().SetStr(
"a/b");
3047 seh = scope.AddTopLevelSeqEntry(*entry);
3048 eval = validator.Validate(seh, options);
3060 id->SetGeneral().SetDb(db);
3061 id->SetGeneral().SetTag().SetStr(
tag);
3066 string acc_str =
"lcl|good";
3067 if (!errmsg.empty()) {
3072 eval = validator.Validate(seh, options);
3081 TestOneGeneralSeqId(
"PRJNA318798",
" CpPA02_0001",
"Bad character ' ' in sequence ID 'gnl|PRJNA318798| CpPA02_0001'");
3082 TestOneGeneralSeqId(
"PRJNA3 18798",
"CpPA02_0001",
"Bad character ' ' in sequence ID 'gnl|PRJNA3 18798|CpPA02_0001'");
3090 id->SetGeneral().SetDb(
"lgsi");
3091 id->SetGeneral().SetTag().SetStr(
"thisidentifierismorethanfiftycharactersinlengthsoitshouldberejected");
3104 string acc_str =
"lcl|good";
3106 "General identifier longer than 50 characters"));
3109 eval = validator.Validate(seh, options);
3125 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3130 gbdesc->SetGenbank().SetExtra_accessions().push_back(
"AY123456");
3133 expected_errors.push_back(
new CExpectedError(
"gb|AY123456|",
eDiag_Error,
"BadSecondaryAccn",
"AY123456 used for both primary and secondary accession"));
3135 eval = validator.Validate(seh, options);
3138 gbdesc->SetEmbl().SetExtra_acc().push_back(
"AY123456");
3139 eval = validator.Validate(seh, options);
3154 expected_errors.push_back(
new CExpectedError(
"gi|0",
eDiag_Error,
"GiWithoutAccession",
"No accession on sequence with gi number"));
3156 eval = validator.Validate(seh, options);
3166 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3167 entry->
SetSeq().
SetId().front()->SetGenbank().SetVersion(1);
3175 hist_id->SetGi(
GI_CONST(21914627));
3176 entry->
SetSeq().
SetInst().SetHist().SetReplaced_by().SetIds().push_back(hist_id);
3177 entry->
SetSeq().
SetInst().SetHist().SetReplaced_by().SetDate().SetStd().SetYear(2008);
3179 expected_errors.push_back(
new CExpectedError(
"gb|AY123456.1|",
eDiag_Error,
"HistoryGiCollision",
"Replaced by gi (21914627) is same as current Bioseq"));
3181 eval = validator.Validate(seh, options);
3185 entry->
SetSeq().
SetInst().SetHist().SetReplaces().SetIds().push_back(hist_id);
3186 entry->
SetSeq().
SetInst().SetHist().SetReplaces().SetDate().SetStd().SetYear(2008);
3187 expected_errors[0]->SetErrMsg(
"Replaces gi (21914627) is same as current Bioseq");
3188 eval = validator.Validate(seh, options);
3195 entry->
SetSeq().
SetInst().SetHist().SetReplaced_by().SetIds().push_back(hist_id);
3196 eval = validator.Validate(seh, options);
3201 entry->
SetSeq().
SetInst().SetHist().SetReplaces().SetIds().push_back(hist_id);
3202 eval = validator.Validate(seh, options);
3216 expected_errors.push_back(
new CExpectedError(
"gi|123456",
eDiag_Error,
"GiWithoutAccession",
"No accession on sequence with gi number"));
3218 eval = validator.Validate(seh, options);
3228 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3229 entry->
SetSeq().
SetId().front()->SetGenbank().SetVersion(1);
3234 string acc_str =
"gb|AY123456.1|";
3240 "Conflicting ids on a Bioseq: (gb|AY123456.1| - " + other_acc->
AsFastaString() +
")"));
3242 expected_errors.push_back(
new CExpectedError(acc_str,
eDiag_Error,
"MultipleAccessions",
"Multiple accessions on sequence with gi number"));
3244 expected_errors.push_back(
new CExpectedError(
"gb|AY123456.1|",
eDiag_Warning,
"UnexpectedIdentifierChange",
"New accession (gb|AY123457.1|) does not match one in NCBI sequence repository (gb|AY123456.1|) on gi (21914627)"));
3248 "TPA record gb|AY123456.1| should have Seq-hist.assembly for PRIMARY block"));
3251 eval = validator.Validate(seh, options);
3310 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3311 entry->
SetSeq().
SetId().front()->SetGenbank().SetVersion(1);
3321 string acc_str =
"gb|AY123456.1|";
3322 expected_errors.push_back(
new CExpectedError(acc_str,
eDiag_Error,
"INSDRefSeqPackaging",
"INSD and RefSeq records should not be present in the same set"));
3323 expected_errors.push_back(
new CExpectedError(acc_str,
eDiag_Error,
"MultipleAccessions",
"Multiple accessions on sequence with gi number"));
3325 eval = validator.Validate(seh, options);
3335 tpg_entry->
SetSeq().
SetId().front()->SetTpg().SetAccession(
"AY123456");
3336 tpg_entry->
SetSeq().
SetId().front()->SetTpg().SetVersion(1);
3339 tpe_entry->
SetSeq().
SetId().front()->SetTpe().SetAccession(
"AY123456");
3340 tpe_entry->
SetSeq().
SetId().front()->SetTpe().SetVersion(1);
3343 tpd_entry->
SetSeq().
SetId().front()->SetTpd().SetAccession(
"AY123456");
3344 tpd_entry->
SetSeq().
SetId().front()->SetTpd().SetVersion(1);
3349 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Info,
"HistAssemblyMissing",
"TPA record tpg|AY123456.1| should have Seq-hist.assembly for PRIMARY block"));
3351 eval = validator.Validate(seh, options);
3355 scope.RemoveTopLevelSeqEntry(seh);
3356 seh = scope.AddTopLevelSeqEntry(*tpe_entry);
3358 expected_errors[0]->SetErrMsg(
"TPA record tpe|AY123456.1| should have Seq-hist.assembly for PRIMARY block");
3359 eval = validator.Validate(seh, options);
3364 scope.RemoveTopLevelSeqEntry(seh);
3365 seh = scope.AddTopLevelSeqEntry(*tpd_entry);
3367 expected_errors[0]->SetErrMsg(
"TPA record tpd|AY123456.1| should have Seq-hist.assembly for PRIMARY block");
3368 eval = validator.Validate(seh, options);
3375 block->SetGenbank().SetKeywords().push_back(
"TPA:reassembly");
3377 scope.RemoveTopLevelSeqEntry(seh);
3378 seh = scope.AddTopLevelSeqEntry(*tpg_entry);
3379 eval = validator.Validate(seh, options);
3383 block->SetEmbl().SetKeywords().push_back(
"TPA:reassembly");
3384 eval = validator.Validate(seh, options);
3392 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"NNNNNNNNNNAAATTGGCCAAAATTGGCCAAAATTGGCCAAAATTGGCCCAANNNNNNNNNN");
3400 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"));
3402 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
3404 eval = validator.Validate(seh, options);
3408 scope.RemoveTopLevelSeqEntry(seh);
3409 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3410 seh = scope.AddTopLevelSeqEntry(*entry);
3414 eval = validator.Validate(seh, options);
3420 scope.RemoveTopLevelSeqEntry(seh);
3422 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetSeq_data().SetIupacna().Set(
"NNNNNNNNNCCC");
3423 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetSeq_data().SetIupacna().Set(
"CCCNNNNNNNNN");
3424 seh = scope.AddTopLevelSeqEntry(*entry);
3426 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ContigsTooShort",
"Maximum contig length is 3 bases"));
3429 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentPercent",
"Sequence contains 52 percent Ns"));
3430 eval = validator.Validate(seh, options);
3434 scope.RemoveTopLevelSeqEntry(seh);
3436 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetSeq_data().SetIupacna().Set(
"NNNNNNNNNNCC");
3437 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetSeq_data().SetIupacna().Set(
"CCNNNNNNNNNN");
3438 seh = scope.AddTopLevelSeqEntry(*entry);
3439 expected_errors[0]->SetErrMsg(
"Maximum contig length is 2 bases");
3440 expected_errors.back()->SetErrMsg(
"Sequence contains 58 percent Ns");
3441 eval = validator.Validate(seh, options);
3445 scope.RemoveTopLevelSeqEntry(seh);
3446 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3447 seh = scope.AddTopLevelSeqEntry(*entry);
3451 eval = validator.Validate(seh, options);
3455 scope.RemoveTopLevelSeqEntry(seh);
3456 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
3457 seh = scope.AddTopLevelSeqEntry(*entry);
3461 eval = validator.Validate(seh, options);
3464 scope.RemoveTopLevelSeqEntry(seh);
3465 entry->
SetSeq().
SetId().front()->SetPatent().SetSeqid(1);
3466 entry->
SetSeq().
SetId().front()->SetPatent().SetCit().SetCountry(
"USA");
3467 entry->
SetSeq().
SetId().front()->SetPatent().SetCit().SetId().SetNumber(
"1");
3468 seh = scope.AddTopLevelSeqEntry(*entry);
3470 delete expected_errors.back();
3471 expected_errors.pop_back();
3472 eval = validator.Validate(seh, options);
3480 "Maximum contig length is 2 bases"));
3482 "Suspicious use of complete"));
3485 eval = validator.Validate(seh, options);
3495 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123457");
3496 entry->
SetSeq().
SetId().front()->SetGenbank().SetVersion(1);
3503 expected_errors.push_back(
new CExpectedError(
"gb|AY123457.1|",
eDiag_Warning,
"UnexpectedIdentifierChange",
"New accession (gb|AY123457.1|) does not match one in NCBI sequence repository (gb|AY123456.1|) on gi (21914627)"));
3505 eval = validator.Validate(seh, options);
3509 scope.RemoveTopLevelSeqEntry(seh);
3510 entry->
SetSeq().
SetId().front()->SetTpg().SetAccession(
"AY123456");
3511 entry->
SetSeq().
SetId().front()->SetTpg().SetVersion(1);
3512 seh = scope.AddTopLevelSeqEntry(*entry);
3514 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Info,
"HistAssemblyMissing",
"TPA record tpg|AY123456.1| should have Seq-hist.assembly for PRIMARY block"));
3515 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Warning,
"UnexpectedIdentifierChange",
"Loss of accession (gb|AY123456.1|) on gi (21914627) compared to the NCBI sequence repository"));
3516 eval = validator.Validate(seh, options);
3533 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"InternalNsInSeqLit",
"Run of 20 Ns in delta component 5 that starts at base 45"));
3534 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WGSseqGapProblem",
"WGS submission includes wrong gap type. Gaps for WGS genomes should be Assembly Gaps with linkage evidence."));
3543 eval = validator.Validate(seh, options);
3551 "Run of 81 Ns in delta component 7 that starts at base 79"));
3560 eval = validator.Validate(seh, options);
3564 eval = validator.Validate(seh, options);
3568 eval = validator.Validate(seh, options);
3571 unit_test_util::AddToDeltaSeq(entry,
"AANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGG");
3573 expected_errors[0]->SetErrMsg(
"Run of 101 Ns in delta component 9 that starts at base 174");
3574 eval = validator.Validate(seh, options);
3585 delta_seq->SetLiteral().SetLength(0);
3586 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(delta_seq);
3590 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"SeqLitGapLength0",
"Gap of length 0 in delta chain"));
3593 eval = validator.Validate(seh, options);
3598 eval = validator.Validate(seh, options);
3601 delta_seq->SetLiteral().SetFuzz().
Reset();
3602 delta_seq->SetLiteral().SetFuzz().SetP_m(10);
3603 eval = validator.Validate(seh, options);
3607 delta_seq->SetLiteral().SetFuzz().
Reset();
3609 expected_errors[0]->SetErrMsg(
"Gap of length 0 with unknown fuzz in delta chain");
3610 eval = validator.Validate(seh, options);
3614 scope.RemoveTopLevelSeqEntry(seh);
3615 entry->
SetSeq().
SetId().front()->SetSwissprot().SetAccession(
"AY123456");
3616 seh = scope.AddTopLevelSeqEntry(*entry);
3619 eval = validator.Validate(seh, options);
3622 delta_seq->SetLiteral().SetFuzz().SetP_m(10);
3623 expected_errors[0]->SetErrMsg(
"Gap of length 0 in delta chain");
3624 eval = validator.Validate(seh, options);
3627 delta_seq->SetLiteral().SetFuzz().
Reset();
3629 eval = validator.Validate(seh, options);
3632 delta_seq->SetLiteral().ResetFuzz();
3633 eval = validator.Validate(seh, options);
3648 field->
SetData().SetStr(
"Data");
3658 member1->
SetSeq().
SetId().front()->SetLocal().SetStr(
"good");
3662 member2->
SetSeq().
SetId().front()->SetLocal().SetStr(
"good2");
3671 eval = validator.Validate(seh, options);
3676 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"TpaAssemblyProblem",
"There are 1 TPAs with history and 1 without history in this record."));
3677 eval = validator.Validate(seh, options);
3681 scope.RemoveTopLevelSeqEntry(seh);
3682 member1->
SetSeq().
SetId().front()->SetTpg().SetAccession(
"AY123456");
3683 member1->
SetSeq().
SetId().front()->SetTpg().SetVersion(1);
3687 seh = scope.AddTopLevelSeqEntry(*entry);
3691 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Warning,
"UnexpectedIdentifierChange",
"Loss of accession (gb|AY123456.1|) on gi (21914627) compared to the NCBI sequence repository"));
3692 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Error,
"TpaAssemblyProblem",
"There are 1 TPAs with history and 1 without history in this record."));
3693 expected_errors.push_back(
new CExpectedError(
"tpg|AY123456.1|",
eDiag_Warning,
"TpaAssemblyProblem",
"There are 1 TPAs without history in this record, but the record has a gi number assignment."));
3696 eval = validator.Validate(seh, options);
3707 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetId().SetGenbank().SetAccession(
"AY123456");
3708 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetFrom(0);
3709 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetTo(9);
3716 eval = validator.Validate(seh, options);
3719 scope.RemoveTopLevelSeqEntry(seh);
3722 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetId().SetGenbank().SetAccession(
"AY123456");
3723 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetFrom(0);
3724 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetTo(10);
3726 seh = scope.AddTopLevelSeqEntry(*entry);
3727 eval = validator.Validate(seh, options);
3745 eval = validator.Validate(seh, options);
3749 eval = validator.Validate(seh, options);
3753 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"MissingGaps",
"HTGS delta seq should have gaps between all sequence runs"));
3754 eval = validator.Validate(seh, options);
3758 eval = validator.Validate(seh, options);
3762 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"BadHTGSeq",
"HTGS 2 delta seq has no gaps and no graphs"));
3763 eval = validator.Validate(seh, options);
3767 scope.RemoveTopLevelSeqEntry(seh);
3768 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
3770 seh = scope.AddTopLevelSeqEntry(*entry);
3773 eval = validator.Validate(seh, options);
3775 delete expected_errors[1];
3776 expected_errors.pop_back();
3779 eval = validator.Validate(seh, options);
3783 eval = validator.Validate(seh, options);
3794 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3796 SetTitle(entry,
"Foo complete genome");
3800 expected_errors.push_back(
new CExpectedError(
"gb|AY123456|",
eDiag_Warning,
"CompleteTitleProblem",
"Complete genome in title without complete flag set"));
3803 eval = validator.Validate(seh, options);
3811 eval = validator.Validate(seh, options);
3816 scope.RemoveTopLevelSeqEntry(seh);
3818 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3820 SetTitle(entry,
"Foo complete genome");
3822 seh = scope.AddTopLevelSeqEntry(*entry);
3825 "CompleteGenomeHasGaps",
"Title contains 'complete genome' but sequence has gaps"));
3827 eval = validator.Validate(seh, options);
3843 "CompleteCircleProblem",
3844 "Circular topology without complete flag set"));
3847 eval = validator.Validate(seh, options);
3852 scope.RemoveTopLevelSeqEntry(seh);
3853 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
3854 SetTitle(entry,
"This is just a title");
3856 seh = scope.AddTopLevelSeqEntry(*entry);
3858 "CompleteCircleProblem",
3859 "Circular topology has complete flag set, but title should say complete sequence or complete genome"));
3861 "UnwantedCompleteFlag",
3862 "Suspicious use of complete"));
3865 eval = validator.Validate(seh, options);
3883 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"MissingGaps",
"HTGS delta seq should have gaps between all sequence runs"));
3884 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"BadHTGSeq",
"HTGS 2 delta seq has no gaps and no graphs"));
3885 eval = validator.Validate(seh, options);
3888 delete expected_errors[1];
3889 expected_errors.pop_back();
3893 eval = validator.Validate(seh, options);
3898 scope.RemoveTopLevelSeqEntry(seh);
3901 seh = scope.AddTopLevelSeqEntry(*raw_entry);
3902 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"BadHTGSeq",
"HTGS 2 raw seq has no gaps and no graphs"));
3904 eval = validator.Validate(seh, options);
3912 eval = validator.Validate(seh, options);
3921 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadHTGSeq",
"HTGS 3 sequence should not have HTGS_DRAFT keyword"));
3922 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadHTGSeq",
"HTGS 3 sequence should not have HTGS_PREFIN keyword"));
3923 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadHTGSeq",
"HTGS 3 sequence should not have HTGS_ACTIVEFIN keyword"));
3924 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadHTGSeq",
"HTGS 3 sequence should not have HTGS_FULLTOP keyword"));
3925 eval = validator.Validate(seh, options);
3928 scope.RemoveTopLevelSeqEntry(seh);
3929 seh = scope.AddTopLevelSeqEntry(*delta_entry);
3934 eval = validator.Validate(seh, options);
3945 entry->
SetSeq().
SetInst().SetSeq_data().SetNcbieaa().Set(
"PRK-EIN");
3949 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"GapInProtein",
"[1] internal gap symbols in protein sequence (gene? - fake protein name)"));
3951 eval = validator.Validate(seh, options);
3956 entry->
SetSeq().
SetInst().SetSeq_data().SetNcbieaa().Set(
"-RKTEIN");
3957 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"BadProteinStart",
"gap symbol at start of protein sequence (gene? - fake protein name)"));
3959 eval = validator.Validate(seh, options);
3962 entry->
SetSeq().
SetInst().SetSeq_data().SetNcbieaa().Set(
"-RK-EIN");
3963 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"GapInProtein",
"[1] internal gap symbols in protein sequence (gene? - fake protein name)"));
3964 eval = validator.Validate(seh, options);
3976 first_seg->SetLiteral().SetLength(9);
3977 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_front(first_seg);
3979 last_seg->SetLiteral().SetLength(9);
3980 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(last_seg);
3997 eval = validator.Validate(seh, options);
4001 scope.RemoveTopLevelSeqEntry(seh);
4002 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetLength(10);
4003 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetLength(10);
4005 seh = scope.AddTopLevelSeqEntry(*entry);
4006 eval = validator.Validate(seh, options);
4010 scope.RemoveTopLevelSeqEntry(seh);
4011 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
4012 seh = scope.AddTopLevelSeqEntry(*entry);
4018 eval = validator.Validate(seh, options);
4021 scope.RemoveTopLevelSeqEntry(seh);
4022 entry->
SetSeq().
SetId().front()->SetPatent().SetSeqid(1);
4023 entry->
SetSeq().
SetId().front()->SetPatent().SetCit().SetCountry(
"USA");
4024 entry->
SetSeq().
SetId().front()->SetPatent().SetCit().SetId().SetNumber(
"1");
4025 seh = scope.AddTopLevelSeqEntry(*entry);
4027 eval = validator.Validate(seh, options);
4036 "Suspicious use of complete"));
4039 eval = validator.Validate(seh, options);
4052 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddSeqRange(*seqid, 0, 10);
4053 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddSeqRange(*seqid, 5, 15);
4054 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddSeqRange(*seqid, 20, 30);
4055 entry->
SetSeq().
SetInst().SetExt().SetDelta().AddSeqRange(*seqid, 25, 35);
4060 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"OverlappingDeltaRange",
"Overlapping delta range 6-16 and 1-11 on a Bioseq gb|AY123456|"));
4061 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"OverlappingDeltaRange",
"Overlapping delta range 26-36 and 21-31 on a Bioseq gb|AY123456|"));
4063 eval = validator.Validate(seh, options);
4074 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacaa().Set(
"XROTEIN");
4080 eval = validator.Validate(seh, options);
4090 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AAAAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTTTTT");
4095 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"InternalNsInSeqRaw",
"Run of 100 Ns in raw sequence starting at base 6"));
4096 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ContigsTooShort",
"Maximum contig length is 5 bases"));
4097 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentPercent",
"Sequence contains 90 percent Ns"));
4099 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"));
4101 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
4103 eval = validator.Validate(seh, options);
4109 scope.RemoveTopLevelSeqEntry(seh);
4110 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AAAAANNNNNNNNNNNNNNNNNNNNTTTTT");
4112 seh = scope.AddTopLevelSeqEntry(*entry);
4113 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ContigsTooShort",
"Maximum contig length is 5 bases"));
4114 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentPercent",
"Sequence contains 66 percent Ns"));
4116 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"));
4118 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
4120 eval = validator.Validate(seh, options);
4127 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"InternalNsInSeqRaw",
"Run of 20 Ns in raw sequence starting at base 6"));
4128 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ContigsTooShort",
"Maximum contig length is 5 bases"));
4129 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentPercent",
"Sequence contains 66 percent Ns"));
4131 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"));
4133 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
4135 eval = validator.Validate(seh, options);
4146 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLiteral().SetSeq_data().SetIupacna().Set(
"ATGATGATGNNN");
4147 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().back()->SetLiteral().SetSeq_data().SetIupacna().Set(
"NNNATGATGATG");
4151 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"ContigsTooShort",
"Maximum contig length is 9 bases"));
4152 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"InternalNsAdjacentToGap",
"Ambiguous residue N is adjacent to a gap around position 13"));
4153 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"InternalNsAdjacentToGap",
"Ambiguous residue N is adjacent to a gap around position 23"));
4160 eval = validator.Validate(seh, options);
4170 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetFrom(0);
4171 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetTo(11);
4172 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetId().SetGi(
ZERO_GI);
4177 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"DeltaSeqError",
"Unable to find far delta sequence component"));
4180 eval = validator.Validate(seh, options);
4191 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AATTGGCCAAAATTGGCCAAAATTGG-CAAAATTGGCCAAAATTGGCCAAAATTGGCCAA");
4196 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"InternalGapsInSeqRaw",
"Raw nucleotide should not contain gap characters"));
4199 eval = validator.Validate(seh, options);
4210 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetFrom(0);
4211 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetTo(11);
4212 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetInt().SetId().SetLocal().SetStr(
"good");
4216 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Critical,
"SelfReferentialSequence",
"Self-referential delta sequence"));
4217 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"InstantiatedGapMismatch",
"Exception 4 in GapByGapInst"));
4220 eval = validator.Validate(seh, options);
4231 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().front()->SetLoc().SetWhole().SetGenbank().SetAccession(
"AY123456");
4236 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"WholeComponent",
"Delta seq component should not be of type whole"));
4239 eval = validator.Validate(seh, options);
4251 seq.
SetId().front()->Assign(*gnl);
4254 seq.
SetId().push_back(lcl);
4255 seq.
SetAnnot().front()->SetData().SetFtable().front()->SetLocation().SetInt().SetId().Assign(*gnl);
4269 eval = validator.Validate(seh, options);
4273 scope.RemoveTopLevelSeqEntry(seh);
4279 cds->
SetProduct().SetWhole().SetGeneral().SetDb(
"a");
4280 cds->
SetProduct().SetWhole().SetGeneral().SetTag().SetStr(
"b");
4281 seh = scope.AddTopLevelSeqEntry(*entry);
4283 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Info,
"ProteinsHaveGeneralID",
"INDEXER_ONLY - Protein bioseqs have general seq-id."));
4286 eval = validator.Validate(seh, options);
4297 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AAAAATTTTTGGGGGCCCCCAAAAATTTTTGGGGGCCCCCNNNNNNNNNNNAAAATTTTTGGGGGCCCCCAAAAATTTTTGGGGGCCCCCAAAAATTTTT");
4305 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentPercent",
"Sequence contains 11 percent Ns"));
4307 eval = validator.Validate(seh, options);
4310 scope.RemoveTopLevelSeqEntry(seh);
4311 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AAAAATTTTTGGGGGCCCCCAAAAATTTTTGGGGGCCCCCNNNNNNNNNNNNNNNNTTTTGGGGGCCCCCAAAAATTTTTGGGGGCCCCCAAAAATTTTT");
4312 seh = scope.AddTopLevelSeqEntry(*entry);
4313 expected_errors[0]->SetErrMsg(
"Sequence contains 16 percent Ns");
4314 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentStretch",
"Sequence has a stretch of 16 Ns"));
4315 eval = validator.Validate(seh, options);
4320 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNContentStretch",
"Sequence has a stretch of 16 Ns"));
4321 eval = validator.GetTSANStretchErrors(seh);
4323 eval = validator.GetTSANStretchErrors(entry->
GetSeq());
4328 scope.RemoveTopLevelSeqEntry(seh);
4329 entry->
SetSeq().
SetInst().SetSeq_data().SetIupacna().Set(
"AANNNNNNNNNNGGGCCCCCAAAAATTTTTGGGGGCCCCCAAAAATTTTTGGGGGTTTTTGGGGGCCCCCAAAAATTTTTGGGGGCCNNNNNNNNNNAAA");
4330 seh = scope.AddTopLevelSeqEntry(*entry);
4332 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"));
4334 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"));
4336 "Sequence contains 20 percent Ns"));
4338 "Sequence has a stretch of at least 10 Ns within the first 20 bases"));
4340 "Sequence has a stretch of at least 10 Ns within the last 20 bases"));
4342 eval = validator.Validate(seh, options);
4347 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNcontent5Prime",
"Sequence has a stretch of at least 10 Ns within the first 20 bases"));
4348 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"HighNcontent3Prime",
"Sequence has a stretch of at least 10 Ns within the last 20 bases"));
4349 eval = validator.GetTSANStretchErrors(seh);
4351 eval = validator.GetTSANStretchErrors(entry->
GetSeq());
4356 scope.RemoveTopLevelSeqEntry(seh);
4359 gap_seg->SetLiteral().SetSeq_data().SetGap();
4360 gap_seg->SetLiteral().SetLength(10);
4361 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(gap_seg);
4364 seh = scope.AddTopLevelSeqEntry(*entry);
4374 eval = validator.Validate(seh, options);
4386 CDelta_ext::Tdata::iterator seg_it = entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().begin();
4388 (*seg_it)->SetLiteral().SetSeq_data().SetIupacna().Set();
4389 (*seg_it)->SetLiteral().SetLength(0);
4395 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"SeqLitDataLength0",
"Seq-lit of length 0 in delta chain"));
4397 eval = validator.Validate(seh, options);
4412 gap_seg->SetLiteral().SetLength(101);
4414 entry->
SetSeq().
SetInst().SetExt().SetDelta().Set().push_back(gap_seg);
4428 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Warning,
"UnknownLengthGapNot100",
"Gap of unknown length should have length 100"));
4436 eval = validator.Validate(seh, options);
4454 expected_errors.push_back(
new CExpectedError(
"lcl|good",
eDiag_Error,
"mRNAshouldBeSingleStranded",
"mRNA should be single stranded not double stranded"));
4456 eval = validator.Validate(seh, options);
4461 eval = validator.Validate(seh, options);
4466 eval = validator.Validate(seh, options);
4475 eval = validator.Validate(seh, options);
4481 eval = validator.Validate(seh, options);
4486 eval = validator.Validate(seh, options);
4502 expected_errors.push_back(
new CExpectedError(
"lcl|nuc",
eDiag_Warning,
"BioSourceMissing",
"Nuc-prot set does not contain expected BioSource descriptor"));
4503 expected_errors.push_back(
new CExpectedError(
"lcl|prot",
eDiag_Fatal,
"NoOrgFound",
"No organism name included in the source. Other qualifiers may exist."));
4506 eval = validator.Validate(seh, options);
4520 entry->
SetDescr().Set().push_back(desc);
4523 entry->
SetDescr().Set().push_back(desc);
4526 entry->
SetDescr().Set().push_back(desc);
4529 entry->
SetDescr().Set().push_back(desc);
4535 "Nucleic acid with protein sequence method"));
4537 "MolType descriptor is obsolete"));
4539 "Modif descriptor is obsolete"));
4541 "Method descriptor is obsolete"));
4543 "OrgRef descriptor is obsolete"));
4547 eval = validator.Validate(seh, options);
4552 scope.RemoveTopLevelSeqEntry(seh);
4553 entry->
SetSeq().
SetId().front()->SetGenbank().SetAccession(
"AY123456");
4558 seh = scope.AddTopLevelSeqEntry(*entry);
4560 "Non-TPA record gb|AY123456| should not have TpaAssembly object"));
4563 eval = validator.Validate(seh, options);
4566 scope.RemoveTopLevelSeqEntry(seh);
4567 entry->
SetSeq().
SetId().front()->SetOther().SetAccession(
"NC_123456");
4568 seh = scope.AddTopLevelSeqEntry(*entry);
4570 expected_errors[0]->SetErrMsg(
"Non-TPA record ref|NC_123456| should not have TpaAssembly object");
4571 eval = validator.Validate(seh, options);
4576 entry->
SetDescr().Set().push_back(desc);
4578 "Nucleic acid with GIBB-mol = peptide"));
4580 "MolType descriptor is obsolete"));
4581 eval = validator.Validate(seh, options);
4585 expected_errors[1]->SetErrMsg(
"GIBB-mol unknown or other used");
4586 eval = validator.Validate(seh, options);
4590 eval = validator.Validate(seh, options);
4595 scope.RemoveTopLevelSeqEntry(seh);
4599 entry->
SetDescr().Set().push_back(desc);
4600 seh = scope.AddTopLevelSeqEntry(*entry);
4602 "GIBB-mol [1] used on protein"));
4604 "MolType descriptor is obsolete"));
4606 eval = validator.Validate(seh, options);
4610 expected_errors[0]->SetErrMsg(
"GIBB-mol [2] used on protein");
4611 eval = validator.Validate(seh, options);
4615 expected_errors[0]->SetErrMsg(
"GIBB-mol [3] used on protein");
4616 eval = validator.Validate(seh, options);
4620 expected_errors[0]->SetErrMsg(
"GIBB-mol [4] used on protein");
4621 eval = validator.Validate(seh, options);
4625 expected_errors[0]->SetErrMsg(
"GIBB-mol [5] used on protein");
4626 eval = validator.Validate(seh, options);
4630 expected_errors[0]->SetErrMsg(
"GIBB-mol [6] used on protein");
4631 eval = validator.Validate(seh, options);
4635 expected_errors[0]->SetErrMsg(
"GIBB-mol [7] used on protein");
4636 eval = validator.Validate(seh, options);
4640 expected_errors[0]->SetErrMsg(
"GIBB-mol [9] used on protein");
4641 eval = validator.Validate(seh, options);
4645 expected_errors[0]->SetErrMsg(
"GIBB-mol [10] used on protein");
4646 eval = validator.Validate(seh, options);
4655 "Nucleic acid GIBB-mod [0] on protein"));
4657 "Nucleic acid GIBB-mod [1] on protein"));
4659 "Modif descriptor is obsolete"));
4661 eval = validator.Validate(seh, options);
4666 scope.RemoveTopLevelSeqEntry(seh);
4669 if (it->IsSource()) {
4673 seh = scope.AddTopLevelSeqEntry(*entry);
4676 "Molinfo-biomol other should be used if Biosource-location is synthetic"));
4678 eval = validator.Validate(seh, options);
4684 if (it->IsSource()) {
4685 it->SetSource().ResetOrigin();
4691 "Nucleic acid with Molinfo = peptide"));
4693 eval = validator.Validate(seh, options);
4699 "MoltypeOtherGenetic",
"Molinfo-biomol = other genetic"));
4701 eval = validator.Validate(seh, options);
4707 "MoltypeUnknown",
"Molinfo-biomol unknown used"));
4709 eval = validator.Validate(seh, options);
4715 "MoltypeOther",
"Molinfo-biomol other used"));
4717 eval = validator.Validate(seh, options);
4721 scope.RemoveTopLevelSeqEntry(seh);
4723 seh = scope.AddTopLevelSeqEntry(*entry);
4726 "InvalidForType",
"Molinfo-biomol [1] used on protein"));
4729 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [1] used on protein");
4730 eval = validator.Validate(seh, options);
4734 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [2] used on protein");
4735 eval = validator.Validate(seh, options);
4739 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [3] used on protein");
4740 eval = validator.Validate(seh, options);
4744 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [4] used on protein");
4745 eval = validator.Validate(seh, options);
4749 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [5] used on protein");
4750 eval = validator.Validate(seh, options);
4754 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [6] used on protein");
4755 eval = validator.Validate(seh, options);
4759 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [7] used on protein");
4760 eval = validator.Validate(seh, options);
4764 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [10] used on protein");
4765 eval = validator.Validate(seh, options);
4769 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [11] used on protein");
4770 eval = validator.Validate(seh, options);
4774 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [12] used on protein");
4775 eval = validator.Validate(seh, options);
4779 expected_errors[0]->SetErrMsg(
"Molinfo-biomol [13] used on protein");