71 vector<size_t> match_lens = word.
GetMatchLens(
"fruit, canned",
"fruit", 0);
72 BOOST_CHECK_EQUAL(match_lens.size(), 1);
73 BOOST_CHECK_EQUAL(match_lens[0], 13);
84 BOOST_CHECK_EQUAL(s.
Match(
"cat"),
true);
85 BOOST_CHECK_EQUAL(s.
Match(
"catalog"),
true);
86 BOOST_CHECK_EQUAL(s.
Match(
"the catalog"),
true);
87 BOOST_CHECK_EQUAL(s.
Match(
"ducat"),
true);
88 BOOST_CHECK_EQUAL(s.
Match(
"dog"),
false);
89 BOOST_CHECK_EQUAL(s.
Match(
"dog, cat, cow"),
true);
92 BOOST_CHECK_EQUAL(s.
Match(
"cat"),
true);
93 BOOST_CHECK_EQUAL(s.
Match(
"catalog"),
false);
94 BOOST_CHECK_EQUAL(s.
Match(
"the catalog"),
false);
95 BOOST_CHECK_EQUAL(s.
Match(
"ducat"),
false);
96 BOOST_CHECK_EQUAL(s.
Match(
"dog"),
false);
97 BOOST_CHECK_EQUAL(s.
Match(
"dog, cat, cow"),
false);
100 BOOST_CHECK_EQUAL(s.
Match(
"cat"),
true);
101 BOOST_CHECK_EQUAL(s.
Match(
"catalog"),
true);
102 BOOST_CHECK_EQUAL(s.
Match(
"the catalog"),
false);
103 BOOST_CHECK_EQUAL(s.
Match(
"ducat"),
false);
104 BOOST_CHECK_EQUAL(s.
Match(
"dog"),
false);
105 BOOST_CHECK_EQUAL(s.
Match(
"dog, cat, cow"),
false);
108 BOOST_CHECK_EQUAL(s.
Match(
"cat"),
true);
109 BOOST_CHECK_EQUAL(s.
Match(
"catalog"),
false);
110 BOOST_CHECK_EQUAL(s.
Match(
"the catalog"),
false);
111 BOOST_CHECK_EQUAL(s.
Match(
"ducat"),
true);
112 BOOST_CHECK_EQUAL(s.
Match(
"dog"),
false);
113 BOOST_CHECK_EQUAL(s.
Match(
"dog, cat, cow"),
false);
134 BOOST_CHECK_EQUAL(s.
Match(
"dog cat cow"),
true);
135 BOOST_CHECK_EQUAL(s.
Match(
"dog cat cow"),
false);
136 BOOST_CHECK_EQUAL(s.
Match(
"dogcatcow"),
false);
137 BOOST_CHECK_EQUAL(s.
Match(
"dog.cat.cow"),
false);
138 BOOST_CHECK_EQUAL(s.
Match(
"dog,cat,cow"),
false);
141 BOOST_CHECK_EQUAL(s.
Match(
"dog cat cow"),
true);
142 BOOST_CHECK_EQUAL(s.
Match(
"dog cat cow"),
true);
143 BOOST_CHECK_EQUAL(s.
Match(
"dogcatcow"),
true);
144 BOOST_CHECK_EQUAL(s.
Match(
"dog.cat.cow"),
true);
145 BOOST_CHECK_EQUAL(s.
Match(
"dog,cat,cow"),
true);
148 BOOST_CHECK_EQUAL(s.
Match(
"dog cat cow"),
false);
149 BOOST_CHECK_EQUAL(s.
Match(
"dog cat cow"),
false);
150 BOOST_CHECK_EQUAL(s.
Match(
"dogcatcow"),
false);
151 BOOST_CHECK_EQUAL(s.
Match(
"dog.cat.cow"),
false);
152 BOOST_CHECK_EQUAL(s.
Match(
"dog,cat,cow"),
true);
158 BOOST_CHECK_EQUAL(s.
Match(
"cat"),
true);
159 BOOST_CHECK_EQUAL(s.
Match(
"catalog"),
false);
160 BOOST_CHECK_EQUAL(s.
Match(
"the catalog"),
false);
161 BOOST_CHECK_EQUAL(s.
Match(
"ducat"),
false);
162 BOOST_CHECK_EQUAL(s.
Match(
"dog"),
false);
163 BOOST_CHECK_EQUAL(s.
Match(
"dog,cat,cow"),
true);
174 BOOST_CHECK_EQUAL(
out,
"dog");
178 BOOST_CHECK_EQUAL(
out,
"dogalog");
182 BOOST_CHECK_EQUAL(
out,
"the dogalog");
186 BOOST_CHECK_EQUAL(
out,
"dudog");
188 in =
"dog, cat, cow";
190 BOOST_CHECK_EQUAL(
out,
"dog, dog, cow");
194 BOOST_CHECK_EQUAL(
out,
"feline");
201 string text =
"The quick brown fox jumped over the lazy dog.";
215 s.
SetMatch_text(
"A fast beige wolf leaped across a sleepy beagle.");
236 hop->SetWhole_word(
true);
240 hop->SetWhole_word(
false);
241 article->SetCase_sensitive(
true);
256 word_sub->SetWord(
"Homo sapiens");
258 syns.push_back(
"human");
259 syns.push_back(
"Homo sapien");
260 syns.push_back(
"Homosapiens");
261 syns.push_back(
"Homo-sapiens");
262 syns.push_back(
"Homo spiens");
263 syns.push_back(
"Homo Sapience");
264 syns.push_back(
"homosapein");
265 syns.push_back(
"homosapiens");
266 syns.push_back(
"homosapien");
267 syns.push_back(
"homo_sapien");
268 syns.push_back(
"homo_sapiens");
269 syns.push_back(
"Homosipian");
270 word_sub->SetSynonyms() = syns;
274 word_sub2->SetWord(
"sapiens");
276 syns.push_back(
"sapien");
277 syns.push_back(
"sapeins");
278 syns.push_back(
"sapein");
279 syns.push_back(
"sapins");
280 syns.push_back(
"sapens");
281 syns.push_back(
"sapin");
282 syns.push_back(
"sapen");
283 syns.push_back(
"sapians");
284 syns.push_back(
"sapian");
285 syns.push_back(
"sapies");
286 syns.push_back(
"sapie");
287 word_sub2->SetSynonyms() = syns;
289 string test =
"human";
293 test =
"Homo sapien";
295 test =
"Human sapien";
312 subst1->SetWord(
"cytochrome b gene");
313 subst1->SetSynonyms().push_back(
"cytochrome b cytb");
314 subst1->SetSynonyms().push_back(
"cytochrome b cyt b");
315 subst1->SetSynonyms().push_back(
"cytochrome b (cytb)");
316 subst1->SetSynonyms().push_back(
"cytochrome b (cyt b)");
317 subst1->SetCase_sensitive(
false);
318 subst1->SetWhole_word(
false);
323 subst2->SetWord(
"gene");
324 subst2->SetSynonyms().push_back(
"sequence");
325 subst2->SetSynonyms().push_back(
"partial");
326 subst2->SetSynonyms().push_back(
"complete");
327 subst2->SetSynonyms().push_back(
"region");
328 subst2->SetSynonyms().push_back(
"partial sequence");
329 subst2->SetSynonyms().push_back(
"complete sequence");
330 subst2->SetCase_sensitive(
false);
331 subst2->SetWhole_word(
false);
343 BOOST_CHECK_EQUAL(s.
Match(
"cytochrome b gene"),
true);
344 BOOST_CHECK_EQUAL(s.
Match(
"cytochrome b partial"),
true);
345 BOOST_CHECK_EQUAL(s.
Match(
"cytb"),
false);
353 rule.
SetFind().SetString_constraint().SetMatch_text(
"localisation");
355 rule.
SetReplace().SetReplace_func().SetSimple_replace().SetReplace(
"localization");
356 rule.
SetReplace().SetReplace_func().SetSimple_replace().SetWhole_string(
false);
357 rule.
SetReplace().SetReplace_func().SetSimple_replace().SetWeasel_to_putative(
false);
360 string original =
"Localisation of periplasmic protein complexes";
361 BOOST_CHECK_EQUAL(rule.
GetFind().
Match(original),
true);
363 BOOST_CHECK_EQUAL(original,
"localization of periplasmic protein complexes");
378 subst1->SetWord(
"cytochrome oxidase subunit I gene");
379 subst1->SetSynonyms().push_back(
"cytochrome oxidase I gene");
380 subst1->SetSynonyms().push_back(
"cytochrome oxidase I");
381 subst1->SetSynonyms().push_back(
"cytochrome subunit I");
382 subst1->SetCase_sensitive(
false);
383 subst1->SetWhole_word(
false);
388 subst2->SetWord(
"gene");
389 subst2->SetCase_sensitive(
false);
390 subst2->SetWhole_word(
false);
394 subst3->SetWord(
"gene");
398 subst3->SetSynonyms().push_back(
"sequence");
399 subst3->SetSynonyms().push_back(
"partial");
400 subst3->SetSynonyms().push_back(
"complete");
401 subst3->SetSynonyms().push_back(
"region");
402 subst3->SetSynonyms().push_back(
"partial sequence");
403 subst3->SetSynonyms().push_back(
"complete sequence");
404 subst3->SetCase_sensitive(
false);
405 subst3->SetWhole_word(
false);
409 subst4->SetWord(
"oxidase");
410 subst4->SetSynonyms().push_back(
"oxydase");
411 subst4->SetCase_sensitive(
false);
412 subst4->SetWhole_word(
false);
422 BOOST_CHECK_EQUAL(s.
Match(
"cytochrome oxidase subunit I"),
true);
423 BOOST_CHECK_EQUAL(s.
Match(
"cytochrome oxydase subunit I"),
true);
424 BOOST_CHECK_EQUAL(s.
Match(
"cytochrome oxydase subunit I gene"),
true);
437 subst2->SetWord(
"gene");
438 subst2->SetSynonyms().push_back(
"sequence");
439 subst2->SetSynonyms().push_back(
"partial");
440 subst2->SetSynonyms().push_back(
"complete");
441 subst2->SetSynonyms().push_back(
"region");
442 subst2->SetSynonyms().push_back(
"partial sequence");
443 subst2->SetSynonyms().push_back(
"complete sequence");
444 subst2->SetCase_sensitive(
false);
445 subst2->SetWhole_word(
false);
456 BOOST_CHECK_EQUAL(s.
Match(
"MHC CLASS II ANTIGEN gene"),
true);
457 BOOST_CHECK_EQUAL(s.
Match(
"MHC class II antigen gene"),
true);
465 BOOST_CHECK_EQUAL(s.
Match(
"MHC CLASS ii ANTIGEN gene"),
false);
466 BOOST_CHECK_EQUAL(s.
Match(
"ANTIGEN"),
true);
467 BOOST_CHECK_EQUAL(s.
Match(
"ANTIGEN GENE"),
true);
468 BOOST_CHECK_EQUAL(s.
Match(
"CLASS: ANTIGEN"),
true);
473 BOOST_CHECK_EQUAL(s.
Match(
"MHC CLASS ii ANTIGEN gene"),
false);
474 BOOST_CHECK_EQUAL(s.
Match(
"antigen"),
true);
475 BOOST_CHECK_EQUAL(s.
Match(
"antigen gene"),
true);
476 BOOST_CHECK_EQUAL(s.
Match(
"class: antigen!"),
true);
490 subst1->SetWord(
"NADH dehydrogenase subunit 1 gene");
491 subst1->SetSynonyms().push_back(
"NADH dehydrogenase subunit 1");
492 subst1->SetSynonyms().push_back(
"NADH dehydrogenase 1 gene");
493 subst1->SetSynonyms().push_back(
"NADH dehydrogenase 1");
494 subst1->SetSynonyms().push_back(
"NADH dehydrogenase subunit 1 protein");
495 subst1->SetSynonyms().push_back(
"NADH dehydrogenase 1 protein");
496 subst1->SetCase_sensitive(
false);
497 subst1->SetWhole_word(
false);
501 subst2->SetWord(
"1");
502 subst2->SetSynonyms().push_back(
"one");
503 subst2->SetCase_sensitive(
false);
504 subst2->SetWhole_word(
false);
508 subst3->SetWord(
"gene");
509 subst3->SetSynonyms().push_back(
"sequence");
510 subst3->SetSynonyms().push_back(
"partial");
511 subst3->SetSynonyms().push_back(
"complete");
512 subst3->SetSynonyms().push_back(
"region");
513 subst3->SetSynonyms().push_back(
"partial sequence");
514 subst3->SetSynonyms().push_back(
"complete sequence");
515 subst3->SetCase_sensitive(
false);
516 subst3->SetWhole_word(
false);
526 BOOST_CHECK_EQUAL(s.
Match(
"NADH dehydrogenase subunit one sequence"),
true);
527 BOOST_CHECK_EQUAL(s.
Match(
"NADH dehydrogenase subunit 1 gene"),
true);
528 BOOST_CHECK_EQUAL(s.
Match(
"NADH dehydrogenase subunit one"),
false);
529 BOOST_CHECK_EQUAL(s.
Match(
"NADH dehydrogenase subunit 2 gene"),
false);
530 BOOST_CHECK_EQUAL(s.
Match(
"NADH dehydrogenase subunit sequence"),
false);
543 subst1->SetWord(
"beta-actin gene");
544 subst1->SetSynonyms().push_back(
"beta-actin");
545 subst1->SetSynonyms().push_back(
"beta actin");
546 subst1->SetSynonyms().push_back(
"beta actin gene");
547 subst1->SetSynonyms().push_back(
"beta_actin");
548 subst1->SetSynonyms().push_back(
"beta_actin gene");
549 subst1->SetCase_sensitive(
false);
550 subst1->SetWhole_word(
false);
554 subst2->SetWord(
"gene");
555 subst2->SetSynonyms().push_back(
"sequence");
556 subst2->SetSynonyms().push_back(
"partial");
557 subst2->SetSynonyms().push_back(
"complete");
558 subst2->SetSynonyms().push_back(
"region");
559 subst2->SetSynonyms().push_back(
"partial sequence");
560 subst2->SetSynonyms().push_back(
"complete sequence");
561 subst2->SetCase_sensitive(
false);
562 subst2->SetWhole_word(
false);
572 BOOST_CHECK_EQUAL(s.
Match(
"beta actin"),
true);
573 BOOST_CHECK_EQUAL(s.
Match(
"beta-actin gene"),
true);
574 BOOST_CHECK_EQUAL(s.
Match(
"beta_actin sequence"),
true);
582 BOOST_CHECK_EQUAL(s.
Match(
""),
false);
583 BOOST_CHECK_EQUAL(s.
Match(
"beta actin"),
false);
584 BOOST_CHECK_EQUAL(s.
Match(
"beta Actin"),
false);
585 BOOST_CHECK_EQUAL(s.
Match(
"bEta actin"),
false);
586 BOOST_CHECK_EQUAL(s.
Match(
"BEta actin"),
true);
587 BOOST_CHECK_EQUAL(s.
Match(
"Beta-actin Gene"),
true);
588 BOOST_CHECK_EQUAL(s.
Match(
"?Beta_Actin Gene"),
true);
589 BOOST_CHECK_EQUAL(s.
Match(
" Beta actin"),
true);
590 BOOST_CHECK_EQUAL(s.
Match(
"4"),
false);
591 BOOST_CHECK_EQUAL(s.
Match(
"-12Beta"),
false);
596 BOOST_CHECK_EQUAL(s.
Match(
""),
false);
597 BOOST_CHECK_EQUAL(s.
Match(
"beta actin"),
false);
598 BOOST_CHECK_EQUAL(s.
Match(
"Beta Actin"),
true);
599 BOOST_CHECK_EQUAL(s.
Match(
"bEta Actin"),
false);
600 BOOST_CHECK_EQUAL(s.
Match(
" BEta.Actin"),
true);
601 BOOST_CHECK_EQUAL(s.
Match(
"Beta-actin Gene"),
true);
602 BOOST_CHECK_EQUAL(s.
Match(
"Beta-Actin Gene"),
true);
603 BOOST_CHECK_EQUAL(s.
Match(
"Beta_actin Gene"),
false);
604 BOOST_CHECK_EQUAL(s.
Match(
"-Beta-actin Gene"),
true);
605 BOOST_CHECK_EQUAL(s.
Match(
"?Beta_Actin Gene"),
true);
606 BOOST_CHECK_EQUAL(s.
Match(
" BETA ACTIN"),
true);
607 BOOST_CHECK_EQUAL(s.
Match(
"12 Ribosomal RNA"),
true);
608 BOOST_CHECK_EQUAL(s.
Match(
"12R Ribosomal RNA"),
false);
609 BOOST_CHECK_EQUAL(s.
Match(
"12r Ribosomal RNA"),
false);
623 subst1->SetSynonyms().push_back(
"partial sequence");
624 subst1->SetSynonyms().push_back(
"complete sequence");
625 subst1->SetSynonyms().push_back(
"partial");
626 subst1->SetSynonyms().push_back(
"complete");
627 subst1->SetSynonyms().push_back(
"gene");
628 subst1->SetSynonyms().push_back(
"region");
630 subst1->SetCase_sensitive(
false);
631 subst1->SetWhole_word(
false);
635 subst2->SetWord(
"16S");
636 subst2->SetSynonyms().push_back(
"5.8S");
637 subst2->SetSynonyms().push_back(
"12S");
638 subst2->SetSynonyms().push_back(
"18S");
639 subst2->SetSynonyms().push_back(
"23S");
640 subst2->SetSynonyms().push_back(
"28S");
642 subst2->SetCase_sensitive(
false);
643 subst2->SetWhole_word(
false);
647 subst3->SetWord(
"gene");
648 subst3->SetCase_sensitive(
false);
649 subst3->SetWhole_word(
false);
659 BOOST_CHECK_EQUAL(s.
Match(
"18S ribosomal RNA gene"),
true);
660 BOOST_CHECK_EQUAL(s.
Match(
"18S ribosomal RNA gene, partial sequence"),
true);
672 BOOST_CHECK_EQUAL(s.
Match(
"cytochrome oxidase subunit I (COI)"),
true);
675 subst1->SetWord(
"cytochrome oxidase subunit I (COI)");
676 subst1->SetSynonyms().push_back(
"cytochrome oxidase subunit I");
678 subst1->SetCase_sensitive(
false);
679 subst1->SetWhole_word(
false);
682 BOOST_CHECK_EQUAL(s.
Match(
"cytochrome oxidase subunit I (COI)"),
true);
694 BOOST_CHECK_EQUAL(s.
Match(
"Bacillus"),
false);
705 subst1->SetWord(
"aceae");
706 subst1->SetSynonyms().push_back(
"ales");
708 BOOST_CHECK_EQUAL(s.
Match(
"Methylophilaceae bacterium"),
false);
710 BOOST_CHECK_EQUAL(s.
Match(
"Methylophilaceae"),
true);
711 BOOST_CHECK_EQUAL(s.
Match(
"bacterium"),
false);
723 BOOST_CHECK_EQUAL(s.
Match(
"NADH dehydrogenase subunit"),
false);
735 BOOST_CHECK_EQUAL(s.
Match(
"reverse transcriptase"),
false);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool Match(const CMatchString &str) const
bool ReplaceStringConstraintPortionInString(string &result, const CMatchString &str, const string &replace) const
bool Match(const CMatchString &str) const
void SetMatch_text(const TMatch_text &value)
bool ApplyToString(string &result, const CMatchString &str) const
vector< size_t > GetMatchLens(const string &text, const string &pattern, char prev_char) const
std::ofstream out("events_result.xml")
main entry point for tests
#define test(a, b, c, d, e)
void SetIs_first_cap(TIs_first_cap value)
Assign a value to Is_first_cap data member.
void SetIs_all_lower(TIs_all_lower value)
Assign a value to Is_all_lower data member.
void SetIs_first_each_cap(TIs_first_each_cap value)
Assign a value to Is_first_each_cap data member.
void SetFind(TFind &value)
Assign a value to Find data member.
void SetIs_all_punct(TIs_all_punct value)
Assign a value to Is_all_punct data member.
void SetIgnore_punct(TIgnore_punct value)
Assign a value to Ignore_punct data member.
void SetIgnore_words(TIgnore_words &value)
Assign a value to Ignore_words data member.
TSynonyms & SetSynonyms(void)
Assign a value to Synonyms data member.
const TFind & GetFind(void) const
Get the Find member data.
void SetMatch_location(TMatch_location value)
Assign a value to Match_location data member.
void SetIs_all_caps(TIs_all_caps value)
Assign a value to Is_all_caps data member.
void SetWord(const TWord &value)
Assign a value to Word data member.
void SetCase_sensitive(TCase_sensitive value)
Assign a value to Case_sensitive data member.
virtual void Reset(void)
Reset the whole object.
void SetIgnore_space(TIgnore_space value)
Assign a value to Ignore_space data member.
void SetWhole_word(TWhole_word value)
Assign a value to Whole_word data member.
void SetIgnore_weasel(TIgnore_weasel value)
Assign a value to Ignore_weasel data member.
void SetNot_present(TNot_present value)
Assign a value to Not_present data member.
void SetReplace(TReplace &value)
Assign a value to Replace data member.
void ResetIgnore_punct(void)
Reset Ignore_punct data member.
@ eString_location_equals
@ eString_location_contains
@ eString_location_starts
static void text(MDB_val *v)
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
std::istream & in(std::istream &in_, double &x_)
Utility stuff for more convenient using of Boost.Test library.
BOOST_AUTO_TEST_CASE(Test_WordSubstitution)