NCBI C++ ToolKit
unit_test_string_constraint.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: unit_test_string_constraint.cpp 83880 2018-09-26 16:07:20Z kachalos $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin
27 *
28 * File Description:
29 * Simple unit test for CString_constraint.
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 
45 
46 #include <corelib/ncbiapp.hpp>
47 #include <corelib/test_boost.hpp>
48 
49 #include <util/util_misc.hpp>
50 
51 #include <common/test_assert.h> /* This header must go last */
52 
55 
57 {
58 }
59 
60 BOOST_AUTO_TEST_CASE(Test_WordSubstitution)
61 {
62  CWord_substitution word;
63 
64  word.SetWord("fruit");
65  word.SetSynonyms().push_back("apple");
66  word.SetSynonyms().push_back("orange");
67  word.SetSynonyms().push_back("pear");
68  word.SetSynonyms().push_back("grapefruit");
69  word.SetSynonyms().push_back("fruit, canned");
70 
71  vector<size_t> match_lens = word.GetMatchLens("fruit, canned", "fruit", 0);
72  BOOST_CHECK_EQUAL(match_lens.size(), 1);
73  BOOST_CHECK_EQUAL(match_lens[0], 13);
74 }
75 
76 
77 BOOST_AUTO_TEST_CASE(Test_SimpleConstraints)
78 {
80 
81  s.SetMatch_text("cat");
83 
84  BOOST_CHECK_EQUAL(s.Match("cat"), true);
85  BOOST_CHECK_EQUAL(s.Match("catalog"), true);
86  BOOST_CHECK_EQUAL(s.Match("the catalog"), true);
87  BOOST_CHECK_EQUAL(s.Match("ducat"), true);
88  BOOST_CHECK_EQUAL(s.Match("dog"), false);
89  BOOST_CHECK_EQUAL(s.Match("dog, cat, cow"), true);
90 
92  BOOST_CHECK_EQUAL(s.Match("cat"), true);
93  BOOST_CHECK_EQUAL(s.Match("catalog"), false);
94  BOOST_CHECK_EQUAL(s.Match("the catalog"), false);
95  BOOST_CHECK_EQUAL(s.Match("ducat"), false);
96  BOOST_CHECK_EQUAL(s.Match("dog"), false);
97  BOOST_CHECK_EQUAL(s.Match("dog, cat, cow"), false);
98 
100  BOOST_CHECK_EQUAL(s.Match("cat"), true);
101  BOOST_CHECK_EQUAL(s.Match("catalog"), true);
102  BOOST_CHECK_EQUAL(s.Match("the catalog"), false);
103  BOOST_CHECK_EQUAL(s.Match("ducat"), false);
104  BOOST_CHECK_EQUAL(s.Match("dog"), false);
105  BOOST_CHECK_EQUAL(s.Match("dog, cat, cow"), false);
106 
108  BOOST_CHECK_EQUAL(s.Match("cat"), true);
109  BOOST_CHECK_EQUAL(s.Match("catalog"), false);
110  BOOST_CHECK_EQUAL(s.Match("the catalog"), false);
111  BOOST_CHECK_EQUAL(s.Match("ducat"), true);
112  BOOST_CHECK_EQUAL(s.Match("dog"), false);
113  BOOST_CHECK_EQUAL(s.Match("dog, cat, cow"), false);
114 
115  // eString_location_inlist - no longer supported
116  //s.SetMatch_location(eString_location_inlist);
117  //BOOST_CHECK_EQUAL(s.Match("cat"), true);
118  //BOOST_CHECK_EQUAL(s.Match("catalog"), false);
119  //BOOST_CHECK_EQUAL(s.Match("the catalog"), false);
120  //BOOST_CHECK_EQUAL(s.Match("ducat"), false);
121  //BOOST_CHECK_EQUAL(s.Match("dog"), false);
122  //BOOST_CHECK_EQUAL(s.Match("dog,cat,cow"), false); // because list is in constraint
123 
124  //s.SetMatch_text("dog, cat, cow");
125  //BOOST_CHECK_EQUAL(s.Match("cat"), true);
126  //BOOST_CHECK_EQUAL(s.Match("catalog"), false);
127  //BOOST_CHECK_EQUAL(s.Match("the catalog"), false);
128  //BOOST_CHECK_EQUAL(s.Match("ducat"), false);
129  //BOOST_CHECK_EQUAL(s.Match("dog"), true);
130 
131  s.SetMatch_text("dog, cat, cow");
133  s.SetIgnore_punct(true);
134  BOOST_CHECK_EQUAL(s.Match("dog cat cow"), true);
135  BOOST_CHECK_EQUAL(s.Match("dog cat cow"), false);
136  BOOST_CHECK_EQUAL(s.Match("dogcatcow"), false);
137  BOOST_CHECK_EQUAL(s.Match("dog.cat.cow"), false);
138  BOOST_CHECK_EQUAL(s.Match("dog,cat,cow"), false);
139 
140  s.SetIgnore_space(true);
141  BOOST_CHECK_EQUAL(s.Match("dog cat cow"), true);
142  BOOST_CHECK_EQUAL(s.Match("dog cat cow"), true);
143  BOOST_CHECK_EQUAL(s.Match("dogcatcow"), true);
144  BOOST_CHECK_EQUAL(s.Match("dog.cat.cow"), true);
145  BOOST_CHECK_EQUAL(s.Match("dog,cat,cow"), true);
146 
147  s.ResetIgnore_punct();
148  BOOST_CHECK_EQUAL(s.Match("dog cat cow"), false);
149  BOOST_CHECK_EQUAL(s.Match("dog cat cow"), false);
150  BOOST_CHECK_EQUAL(s.Match("dogcatcow"), false);
151  BOOST_CHECK_EQUAL(s.Match("dog.cat.cow"), false);
152  BOOST_CHECK_EQUAL(s.Match("dog,cat,cow"), true);
153 
154  s.Reset();
155  s.SetMatch_text("cat");
156  s.SetWhole_word(true);
158  BOOST_CHECK_EQUAL(s.Match("cat"), true);
159  BOOST_CHECK_EQUAL(s.Match("catalog"), false);
160  BOOST_CHECK_EQUAL(s.Match("the catalog"), false);
161  BOOST_CHECK_EQUAL(s.Match("ducat"), false);
162  BOOST_CHECK_EQUAL(s.Match("dog"), false);
163  BOOST_CHECK_EQUAL(s.Match("dog,cat,cow"), true);
164 
165 
166 
167  string in, out;
168  s.Reset();
169  s.SetMatch_text("cat");
171 
172  in = "cat";
173  BOOST_CHECK(s.ReplaceStringConstraintPortionInString(out, in, "dog"));
174  BOOST_CHECK_EQUAL(out, "dog");
175 
176  in = "catalog";
177  BOOST_CHECK(s.ReplaceStringConstraintPortionInString(out, in, "dog"));
178  BOOST_CHECK_EQUAL(out, "dogalog");
179 
180  in = "the catalog";
181  BOOST_CHECK(s.ReplaceStringConstraintPortionInString(out, in, "dog"));
182  BOOST_CHECK_EQUAL(out, "the dogalog");
183 
184  in = "ducat";
185  BOOST_CHECK(s.ReplaceStringConstraintPortionInString(out, in, "dog"));
186  BOOST_CHECK_EQUAL(out, "dudog");
187 
188  in = "dog, cat, cow";
189  BOOST_CHECK(s.ReplaceStringConstraintPortionInString(out, in, "dog"));
190  BOOST_CHECK_EQUAL(out, "dog, dog, cow");
191 
192  in = "feline";
193  BOOST_CHECK(!s.ReplaceStringConstraintPortionInString(out, in, "dog"));
194  BOOST_CHECK_EQUAL(out, "feline");
195 
196 }
197 
198 
199 BOOST_AUTO_TEST_CASE(Test_StringConstraintWithSynonyms)
200 {
201  string text = "The quick brown fox jumped over the lazy dog.";
202 
205  s.SetMatch_text("dog leaped");
206  CRef<CWord_substitution> subst1(new CWord_substitution("leap", "jump"));
207  s.SetIgnore_words().Set().push_back(subst1);
208  CRef<CWord_substitution> subst2(new CWord_substitution("dog", "fox"));
209  s.SetIgnore_words().Set().push_back(subst2);
210 
211  BOOST_CHECK_EQUAL(s.Match(text), true);
212 
213  s.Reset();
215  s.SetMatch_text("A fast beige wolf leaped across a sleepy beagle.");
216  CRef<CWord_substitution> article(new CWord_substitution("a", "the"));
217  s.SetIgnore_words().Set().push_back(article);
218  CRef<CWord_substitution> speedy(new CWord_substitution("fast", "quick"));
219  s.SetIgnore_words().Set().push_back(speedy);
220  CRef<CWord_substitution> color(new CWord_substitution("beige", "brown"));
221  s.SetIgnore_words().Set().push_back(color);
222  CRef<CWord_substitution> wild(new CWord_substitution("wolf", "fox"));
223  s.SetIgnore_words().Set().push_back(wild);
224  CRef<CWord_substitution> hop(new CWord_substitution("leap", "jump"));
225  s.SetIgnore_words().Set().push_back(hop);
226  CRef<CWord_substitution> direction(new CWord_substitution("across", "over"));
227  s.SetIgnore_words().Set().push_back(direction);
228  CRef<CWord_substitution> tired(new CWord_substitution("sleepy", "lazy"));
229  s.SetIgnore_words().Set().push_back(tired);
230  CRef<CWord_substitution> tame(new CWord_substitution("beagle", "dog"));
231  s.SetIgnore_words().Set().push_back(tame);
232 
233  BOOST_CHECK_EQUAL(s.Match(text), true);
234 
235  // won't work if leap is whole word
236  hop->SetWhole_word(true);
237  BOOST_CHECK_EQUAL(s.Match(text), false);
238 
239  // won't work if articles are case sensitive
240  hop->SetWhole_word(false);
241  article->SetCase_sensitive(true);
242  BOOST_CHECK_EQUAL(s.Match(text), false);
243 
244 }
245 
246 BOOST_AUTO_TEST_CASE(Test_synonyms)
247 {
248  // string_constraint with ignore-words
250  s.SetMatch_text("Homo sapiens");
252  s.SetIgnore_space(true);
253  s.SetIgnore_punct(true);
254 
256  word_sub->SetWord("Homo sapiens");
257  list <string> syns;
258  syns.push_back("human");
259  syns.push_back("Homo sapien");
260  syns.push_back("Homosapiens");
261  syns.push_back("Homo-sapiens");
262  syns.push_back("Homo spiens");
263  syns.push_back("Homo Sapience");
264  syns.push_back("homosapein");
265  syns.push_back("homosapiens");
266  syns.push_back("homosapien");
267  syns.push_back("homo_sapien");
268  syns.push_back("homo_sapiens");
269  syns.push_back("Homosipian");
270  word_sub->SetSynonyms() = syns;
271  s.SetIgnore_words().Set().push_back(word_sub);
272 
274  word_sub2->SetWord("sapiens");
275  syns.clear();
276  syns.push_back("sapien");
277  syns.push_back("sapeins");
278  syns.push_back("sapein");
279  syns.push_back("sapins");
280  syns.push_back("sapens");
281  syns.push_back("sapin");
282  syns.push_back("sapen");
283  syns.push_back("sapians");
284  syns.push_back("sapian");
285  syns.push_back("sapies");
286  syns.push_back("sapie");
287  word_sub2->SetSynonyms() = syns;
288  s.SetIgnore_words().Set().push_back(word_sub2);
289  string test = "human";
290  BOOST_CHECK_EQUAL(s.Match(test), true);
291  test = "humano";
292  BOOST_CHECK_EQUAL(s.Match(test), false);
293  test = "Homo sapien";
294  BOOST_CHECK_EQUAL(s.Match(test), true);
295  test = "Human sapien";
296  BOOST_CHECK_EQUAL(s.Match(test), false);
297  test = "sapien";
298  BOOST_CHECK_EQUAL(s.Match(test), false);
299 }
300 
301 
302 BOOST_AUTO_TEST_CASE(Test_SQD_2048)
303 {
305  s.SetMatch_text("cytochrome b gene");
307  s.SetCase_sensitive(false);
308  s.SetIgnore_space(true);
309  s.SetIgnore_punct(true);
310 
312  subst1->SetWord("cytochrome b gene");
313  subst1->SetSynonyms().push_back("cytochrome b cytb");
314  subst1->SetSynonyms().push_back("cytochrome b cyt b");
315  subst1->SetSynonyms().push_back("cytochrome b (cytb)");
316  subst1->SetSynonyms().push_back("cytochrome b (cyt b)");
317  subst1->SetCase_sensitive(false);
318  subst1->SetWhole_word(false);
319 
320  s.SetIgnore_words().Set().push_back(subst1);
321 
323  subst2->SetWord("gene");
324  subst2->SetSynonyms().push_back("sequence");
325  subst2->SetSynonyms().push_back("partial");
326  subst2->SetSynonyms().push_back("complete");
327  subst2->SetSynonyms().push_back("region");
328  subst2->SetSynonyms().push_back("partial sequence");
329  subst2->SetSynonyms().push_back("complete sequence");
330  subst2->SetCase_sensitive(false);
331  subst2->SetWhole_word(false);
332 
333  s.SetIgnore_words().Set().push_back(subst2);
334  s.SetWhole_word(false);
335  s.SetNot_present(false);
336  s.SetIs_all_caps(false);
337  s.SetIs_all_lower(false);
338  s.SetIs_all_punct(false);
339  s.SetIgnore_weasel(false);
340 
341  //NcbiCout << MSerial_AsnText << s;
342 
343  BOOST_CHECK_EQUAL(s.Match("cytochrome b gene"), true);
344  BOOST_CHECK_EQUAL(s.Match("cytochrome b partial"), true);
345  BOOST_CHECK_EQUAL(s.Match("cytb"), false);
346 }
347 
348 
349 BOOST_AUTO_TEST_CASE(Test_SQD_2093)
350 {
351  CSuspect_rule rule;
352 
353  rule.SetFind().SetString_constraint().SetMatch_text("localisation");
354  rule.SetFind().SetString_constraint().SetMatch_location(eString_location_contains);
355  rule.SetReplace().SetReplace_func().SetSimple_replace().SetReplace("localization");
356  rule.SetReplace().SetReplace_func().SetSimple_replace().SetWhole_string(false);
357  rule.SetReplace().SetReplace_func().SetSimple_replace().SetWeasel_to_putative(false);
358  rule.SetReplace().SetMove_to_note(false);
359 
360  string original = "Localisation of periplasmic protein complexes";
361  BOOST_CHECK_EQUAL(rule.GetFind().Match(original), true);
362  BOOST_CHECK_EQUAL(rule.ApplyToString(original), true);
363  BOOST_CHECK_EQUAL(original, "localization of periplasmic protein complexes");
364 
365 }
366 
367 
368 BOOST_AUTO_TEST_CASE(Test_CytochromeOxidase)
369 {
371  s.SetMatch_text("cytochrome oxidase subunit I gene");
373  s.SetCase_sensitive(false);
374  s.SetIgnore_space(true);
375  s.SetIgnore_punct(true);
376 
378  subst1->SetWord("cytochrome oxidase subunit I gene");
379  subst1->SetSynonyms().push_back("cytochrome oxidase I gene");
380  subst1->SetSynonyms().push_back("cytochrome oxidase I");
381  subst1->SetSynonyms().push_back("cytochrome subunit I");
382  subst1->SetCase_sensitive(false);
383  subst1->SetWhole_word(false);
384 
385  s.SetIgnore_words().Set().push_back(subst1);
386 
388  subst2->SetWord("gene");
389  subst2->SetCase_sensitive(false);
390  subst2->SetWhole_word(false);
391  s.SetIgnore_words().Set().push_back(subst2);
392 
394  subst3->SetWord("gene");
395  /* Instead of having subst2, we can add the line below to subst3, the effect is the same
396  * subst3->SetSynonyms().push_back(kEmptyStr);
397  */
398  subst3->SetSynonyms().push_back("sequence");
399  subst3->SetSynonyms().push_back("partial");
400  subst3->SetSynonyms().push_back("complete");
401  subst3->SetSynonyms().push_back("region");
402  subst3->SetSynonyms().push_back("partial sequence");
403  subst3->SetSynonyms().push_back("complete sequence");
404  subst3->SetCase_sensitive(false);
405  subst3->SetWhole_word(false);
406  s.SetIgnore_words().Set().push_back(subst3);
407 
409  subst4->SetWord("oxidase");
410  subst4->SetSynonyms().push_back("oxydase");
411  subst4->SetCase_sensitive(false);
412  subst4->SetWhole_word(false);
413  s.SetIgnore_words().Set().push_back(subst4);
414 
415  s.SetWhole_word(false);
416  s.SetNot_present(false);
417  s.SetIs_all_caps(false);
418  s.SetIs_all_lower(false);
419  s.SetIs_all_punct(false);
420  s.SetIgnore_weasel(false);
421 
422  BOOST_CHECK_EQUAL(s.Match("cytochrome oxidase subunit I"), true);
423  BOOST_CHECK_EQUAL(s.Match("cytochrome oxydase subunit I"), true);
424  BOOST_CHECK_EQUAL(s.Match("cytochrome oxydase subunit I gene"), true);
425 }
426 
427 BOOST_AUTO_TEST_CASE(Test_AntigenGene)
428 {
430  s.SetMatch_text("MHC CLASS II ANTIGEN gene");
432  s.SetCase_sensitive(false);
433  s.SetIgnore_space(true);
434  s.SetIgnore_punct(true);
435 
437  subst2->SetWord("gene");
438  subst2->SetSynonyms().push_back("sequence");
439  subst2->SetSynonyms().push_back("partial");
440  subst2->SetSynonyms().push_back("complete");
441  subst2->SetSynonyms().push_back("region");
442  subst2->SetSynonyms().push_back("partial sequence");
443  subst2->SetSynonyms().push_back("complete sequence");
444  subst2->SetCase_sensitive(false);
445  subst2->SetWhole_word(false);
446  s.SetIgnore_words().Set().push_back(subst2);
447 
448 
449  s.SetWhole_word(false);
450  s.SetNot_present(false);
451  s.SetIs_all_caps(false);
452  s.SetIs_all_lower(false);
453  s.SetIs_all_punct(false);
454  s.SetIgnore_weasel(false);
455 
456  BOOST_CHECK_EQUAL(s.Match("MHC CLASS II ANTIGEN gene"), true);
457  BOOST_CHECK_EQUAL(s.Match("MHC class II antigen gene"), true);
458 }
459 
460 BOOST_AUTO_TEST_CASE(Test_Upper_LowerCases)
461 {
463  s.SetIs_all_caps(true);
464 
465  BOOST_CHECK_EQUAL(s.Match("MHC CLASS ii ANTIGEN gene"), false);
466  BOOST_CHECK_EQUAL(s.Match("ANTIGEN"), true);
467  BOOST_CHECK_EQUAL(s.Match("ANTIGEN GENE"), true);
468  BOOST_CHECK_EQUAL(s.Match("CLASS: ANTIGEN"), true);
469 
470  s.SetIs_all_caps(false);
471  s.SetIs_all_lower(true);
472 
473  BOOST_CHECK_EQUAL(s.Match("MHC CLASS ii ANTIGEN gene"), false);
474  BOOST_CHECK_EQUAL(s.Match("antigen"), true);
475  BOOST_CHECK_EQUAL(s.Match("antigen gene"), true);
476  BOOST_CHECK_EQUAL(s.Match("class: antigen!"), true);
477 }
478 
479 
480 BOOST_AUTO_TEST_CASE(Test_NADH_dehydrogenase)
481 {
483  s.SetMatch_text("NADH dehydrogenase subunit 1 gene");
485  s.SetCase_sensitive(false);
486  s.SetIgnore_space(true);
487  s.SetIgnore_punct(true);
488 
490  subst1->SetWord("NADH dehydrogenase subunit 1 gene");
491  subst1->SetSynonyms().push_back("NADH dehydrogenase subunit 1");
492  subst1->SetSynonyms().push_back("NADH dehydrogenase 1 gene");
493  subst1->SetSynonyms().push_back("NADH dehydrogenase 1");
494  subst1->SetSynonyms().push_back("NADH dehydrogenase subunit 1 protein");
495  subst1->SetSynonyms().push_back("NADH dehydrogenase 1 protein");
496  subst1->SetCase_sensitive(false);
497  subst1->SetWhole_word(false);
498  s.SetIgnore_words().Set().push_back(subst1);
499 
501  subst2->SetWord("1");
502  subst2->SetSynonyms().push_back("one");
503  subst2->SetCase_sensitive(false);
504  subst2->SetWhole_word(false);
505  s.SetIgnore_words().Set().push_back(subst2);
506 
508  subst3->SetWord("gene");
509  subst3->SetSynonyms().push_back("sequence");
510  subst3->SetSynonyms().push_back("partial");
511  subst3->SetSynonyms().push_back("complete");
512  subst3->SetSynonyms().push_back("region");
513  subst3->SetSynonyms().push_back("partial sequence");
514  subst3->SetSynonyms().push_back("complete sequence");
515  subst3->SetCase_sensitive(false);
516  subst3->SetWhole_word(false);
517  s.SetIgnore_words().Set().push_back(subst3);
518 
519  s.SetWhole_word(false);
520  s.SetNot_present(false);
521  s.SetIs_all_caps(false);
522  s.SetIs_all_lower(false);
523  s.SetIs_all_punct(false);
524  s.SetIgnore_weasel(false);
525 
526  BOOST_CHECK_EQUAL(s.Match("NADH dehydrogenase subunit one sequence"), true);
527  BOOST_CHECK_EQUAL(s.Match("NADH dehydrogenase subunit 1 gene"), true);
528  BOOST_CHECK_EQUAL(s.Match("NADH dehydrogenase subunit one"), false);
529  BOOST_CHECK_EQUAL(s.Match("NADH dehydrogenase subunit 2 gene"), false);
530  BOOST_CHECK_EQUAL(s.Match("NADH dehydrogenase subunit sequence"), false);
531 }
532 
533 BOOST_AUTO_TEST_CASE(Test_Beta_actinGene)
534 {
536  s.SetMatch_text("beta-actin gene");
538  s.SetCase_sensitive(false);
539  s.SetIgnore_space(true);
540  s.SetIgnore_punct(true);
541 
543  subst1->SetWord("beta-actin gene");
544  subst1->SetSynonyms().push_back("beta-actin");
545  subst1->SetSynonyms().push_back("beta actin");
546  subst1->SetSynonyms().push_back("beta actin gene");
547  subst1->SetSynonyms().push_back("beta_actin");
548  subst1->SetSynonyms().push_back("beta_actin gene");
549  subst1->SetCase_sensitive(false);
550  subst1->SetWhole_word(false);
551  s.SetIgnore_words().Set().push_back(subst1);
552 
554  subst2->SetWord("gene");
555  subst2->SetSynonyms().push_back("sequence");
556  subst2->SetSynonyms().push_back("partial");
557  subst2->SetSynonyms().push_back("complete");
558  subst2->SetSynonyms().push_back("region");
559  subst2->SetSynonyms().push_back("partial sequence");
560  subst2->SetSynonyms().push_back("complete sequence");
561  subst2->SetCase_sensitive(false);
562  subst2->SetWhole_word(false);
563  s.SetIgnore_words().Set().push_back(subst2);
564 
565  s.SetWhole_word(false);
566  s.SetNot_present(false);
567  s.SetIs_all_caps(false);
568  s.SetIs_all_lower(false);
569  s.SetIs_all_punct(false);
570  s.SetIgnore_weasel(false);
571 
572  BOOST_CHECK_EQUAL(s.Match("beta actin"), true);
573  BOOST_CHECK_EQUAL(s.Match("beta-actin gene"), true);
574  BOOST_CHECK_EQUAL(s.Match("beta_actin sequence"), true);
575 }
576 
577 BOOST_AUTO_TEST_CASE(Test_FirstCaps)
578 {
580  s.SetIs_first_cap(true);
581 
582  BOOST_CHECK_EQUAL(s.Match(""), false);
583  BOOST_CHECK_EQUAL(s.Match("beta actin"), false);
584  BOOST_CHECK_EQUAL(s.Match("beta Actin"), false);
585  BOOST_CHECK_EQUAL(s.Match("bEta actin"), false);
586  BOOST_CHECK_EQUAL(s.Match("BEta actin"), true);
587  BOOST_CHECK_EQUAL(s.Match("Beta-actin Gene"), true);
588  BOOST_CHECK_EQUAL(s.Match("?Beta_Actin Gene"), true);
589  BOOST_CHECK_EQUAL(s.Match(" Beta actin"), true);
590  BOOST_CHECK_EQUAL(s.Match("4"), false);
591  BOOST_CHECK_EQUAL(s.Match("-12Beta"), false);
592 
593  s.SetIs_first_cap(false);
594  s.SetIs_first_each_cap(true);
595 
596  BOOST_CHECK_EQUAL(s.Match(""), false);
597  BOOST_CHECK_EQUAL(s.Match("beta actin"), false);
598  BOOST_CHECK_EQUAL(s.Match("Beta Actin"), true);
599  BOOST_CHECK_EQUAL(s.Match("bEta Actin"), false);
600  BOOST_CHECK_EQUAL(s.Match(" BEta.Actin"), true);
601  BOOST_CHECK_EQUAL(s.Match("Beta-actin Gene"), true); //!!
602  BOOST_CHECK_EQUAL(s.Match("Beta-Actin Gene"), true);
603  BOOST_CHECK_EQUAL(s.Match("Beta_actin Gene"), false);
604  BOOST_CHECK_EQUAL(s.Match("-Beta-actin Gene"), true);
605  BOOST_CHECK_EQUAL(s.Match("?Beta_Actin Gene"), true);
606  BOOST_CHECK_EQUAL(s.Match(" BETA ACTIN"), true);
607  BOOST_CHECK_EQUAL(s.Match("12 Ribosomal RNA"), true);
608  BOOST_CHECK_EQUAL(s.Match("12R Ribosomal RNA"), false); //!!
609  BOOST_CHECK_EQUAL(s.Match("12r Ribosomal RNA"), false); //!!
610 }
611 
612 BOOST_AUTO_TEST_CASE(Test_Matching_OptionalString)
613 {
615  s.SetMatch_text("16S ribosomal RNA gene");
617  s.SetCase_sensitive(false);
618  s.SetIgnore_space(true);
619  s.SetIgnore_punct(true);
620 
622  subst1->SetWord("");
623  subst1->SetSynonyms().push_back("partial sequence");
624  subst1->SetSynonyms().push_back("complete sequence");
625  subst1->SetSynonyms().push_back("partial");
626  subst1->SetSynonyms().push_back("complete");
627  subst1->SetSynonyms().push_back("gene");
628  subst1->SetSynonyms().push_back("region");
629 
630  subst1->SetCase_sensitive(false);
631  subst1->SetWhole_word(false);
632  s.SetIgnore_words().Set().push_back(subst1);
633 
635  subst2->SetWord("16S");
636  subst2->SetSynonyms().push_back("5.8S");
637  subst2->SetSynonyms().push_back("12S");
638  subst2->SetSynonyms().push_back("18S");
639  subst2->SetSynonyms().push_back("23S");
640  subst2->SetSynonyms().push_back("28S");
641 
642  subst2->SetCase_sensitive(false);
643  subst2->SetWhole_word(false);
644  s.SetIgnore_words().Set().push_back(subst2);
645 
647  subst3->SetWord("gene");
648  subst3->SetCase_sensitive(false);
649  subst3->SetWhole_word(false);
650  s.SetIgnore_words().Set().push_back(subst3);
651 
652  s.SetWhole_word(false);
653  s.SetNot_present(false);
654  s.SetIs_all_caps(false);
655  s.SetIs_all_lower(false);
656  s.SetIs_all_punct(false);
657  s.SetIgnore_weasel(false);
658 
659  BOOST_CHECK_EQUAL(s.Match("18S ribosomal RNA gene"), true);
660  BOOST_CHECK_EQUAL(s.Match("18S ribosomal RNA gene, partial sequence"), true);
661 }
662 
663 BOOST_AUTO_TEST_CASE(Test_Matching_COI)
664 {
666  s.SetMatch_text("cytochrome oxidase subunit I (COI)");
668  s.SetCase_sensitive(false);
669  s.SetIgnore_space(true);
670  s.SetIgnore_punct(true);
671 
672  BOOST_CHECK_EQUAL(s.Match("cytochrome oxidase subunit I (COI)"), true);
673 
675  subst1->SetWord("cytochrome oxidase subunit I (COI)");
676  subst1->SetSynonyms().push_back("cytochrome oxidase subunit I");
677 
678  subst1->SetCase_sensitive(false);
679  subst1->SetWhole_word(false);
680  s.SetIgnore_words().Set().push_back(subst1);
681 
682  BOOST_CHECK_EQUAL(s.Match("cytochrome oxidase subunit I (COI)"), true);
683 }
684 
685 BOOST_AUTO_TEST_CASE(Test_Matching_Space)
686 {
688  s.SetMatch_text(" ");
690  s.SetCase_sensitive(false);
691  s.SetIgnore_space(false);
692  s.SetIgnore_punct(false);
693 
694  BOOST_CHECK_EQUAL(s.Match("Bacillus"), false);
695 }
696 
697 BOOST_AUTO_TEST_CASE(Test_MatchEnd)
698 {
700  s.SetMatch_text("aceae");
702  s.SetCase_sensitive(true);
703 
705  subst1->SetWord("aceae");
706  subst1->SetSynonyms().push_back("ales");
707  s.SetIgnore_words().Set().push_back(subst1);
708  BOOST_CHECK_EQUAL(s.Match("Methylophilaceae bacterium"), false);
709 
710  BOOST_CHECK_EQUAL(s.Match("Methylophilaceae"), true);
711  BOOST_CHECK_EQUAL(s.Match("bacterium"), false);
712 }
713 
714 BOOST_AUTO_TEST_CASE(Test_UnwantedMatch1)
715 {
717  s.SetMatch_text("RNA-Dependent RNA polymerase");
719  s.SetCase_sensitive(false);
720  s.SetIgnore_space(true);
721  s.SetIgnore_punct(true);
722 
723  BOOST_CHECK_EQUAL(s.Match("NADH dehydrogenase subunit"), false);
724 }
725 
726 BOOST_AUTO_TEST_CASE(Test_UnwantedMatch2)
727 {
729  s.SetMatch_text("Nonstructural protein");
731  s.SetCase_sensitive(false);
732  s.SetIgnore_space(true);
733  s.SetIgnore_punct(true);
734 
735  BOOST_CHECK_EQUAL(s.Match("reverse transcriptase"), false);
736 }
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CRef –.
Definition: ncbiobj.hpp:618
bool Match(const CMatchString &str) const
bool ReplaceStringConstraintPortionInString(string &result, const CMatchString &str, const string &replace) const
bool Match(const CMatchString &str) const
void SetMatch_text(const TMatch_text &value)
bool ApplyToString(string &result, const CMatchString &str) const
vector< size_t > GetMatchLens(const string &text, const string &pattern, char prev_char) const
std::ofstream out("events_result.xml")
main entry point for tests
void SetIs_first_cap(TIs_first_cap value)
Assign a value to Is_first_cap data member.
void SetIs_all_lower(TIs_all_lower value)
Assign a value to Is_all_lower data member.
void SetIs_first_each_cap(TIs_first_each_cap value)
Assign a value to Is_first_each_cap data member.
void SetFind(TFind &value)
Assign a value to Find data member.
void SetIs_all_punct(TIs_all_punct value)
Assign a value to Is_all_punct data member.
void SetIgnore_punct(TIgnore_punct value)
Assign a value to Ignore_punct data member.
void SetIgnore_words(TIgnore_words &value)
Assign a value to Ignore_words data member.
TSynonyms & SetSynonyms(void)
Assign a value to Synonyms data member.
const TFind & GetFind(void) const
Get the Find member data.
void SetMatch_location(TMatch_location value)
Assign a value to Match_location data member.
void SetIs_all_caps(TIs_all_caps value)
Assign a value to Is_all_caps data member.
void SetWord(const TWord &value)
Assign a value to Word data member.
void SetCase_sensitive(TCase_sensitive value)
Assign a value to Case_sensitive data member.
virtual void Reset(void)
Reset the whole object.
void SetIgnore_space(TIgnore_space value)
Assign a value to Ignore_space data member.
void SetWhole_word(TWhole_word value)
Assign a value to Whole_word data member.
void SetIgnore_weasel(TIgnore_weasel value)
Assign a value to Ignore_weasel data member.
void SetNot_present(TNot_present value)
Assign a value to Not_present data member.
void SetReplace(TReplace &value)
Assign a value to Replace data member.
void ResetIgnore_punct(void)
Reset Ignore_punct data member.
@ eString_location_equals
@ eString_location_contains
@ eString_location_starts
@ eString_location_ends
n background color
static void text(MDB_val *v)
Definition: mdb_dump.c:62
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
std::istream & in(std::istream &in_, double &x_)
int test(int srctype, const void *srcdata, int srclen, int dsttype, int dstlen)
Definition: t0019.c:43
Utility stuff for more convenient using of Boost.Test library.
USING_SCOPE(objects)
BOOST_AUTO_TEST_CASE(Test_WordSubstitution)
Modified on Thu Mar 28 17:12:05 2024 by modify_doxy.py rev. 669887