NCBI C++ ToolKit
pack_string.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: pack_string.cpp 99876 2023-05-18 17:31:03Z vasilche $
2 * ===========================================================================
3 * PUBLIC DOMAIN NOTICE
4 * National Center for Biotechnology Information
5 *
6 * This software/database is a "United States Government Work" under the
7 * terms of the United States Copyright Act. It was written as part of
8 * the author's official duties as a United States Government employee and
9 * thus cannot be copyrighted. This software/database is freely available
10 * to the public for use. The National Library of Medicine and the U.S.
11 * Government have not placed any restriction on its use or reproduction.
12 *
13 * Although all reasonable efforts have been taken to ensure the accuracy
14 * and reliability of the software and data, the NLM and the U.S.
15 * Government do not and cannot warrant the performance or results that
16 * may be obtained by using this software or data. The NLM and the U.S.
17 * Government disclaim all warranties, express or implied, including
18 * warranties of performance, merchantability or fitness for any particular
19 * purpose.
20 *
21 * Please cite the author in any work or product based on this material.
22 * ===========================================================================
23 *
24 * Author: Eugene Vasilchenko
25 *
26 * File Description: Serialization hooks to make strings with equal value
27 * to share representation object.
28 *
29 */
30 
31 #include <ncbi_pch.hpp>
32 #include <serial/pack_string.hpp>
33 #include <serial/objistr.hpp>
34 #include <serial/objectiter.hpp>
35 
37 
38 static const char* const STRING_PACK_ENV = "NCBI_SERIAL_PACK_STRINGS";
39 static const char* const ENV_YES = "YES";
40 
41 static const size_t kDefaultLengthLimit = 32;
42 static const size_t kDefaultCountLimit = 32;
43 
45  : m_LengthLimit(kDefaultLengthLimit), m_CountLimit(kDefaultCountLimit),
46  m_Skipped(0), m_CompressedIn(0),
47  m_CompressedOut(0)
48 {
49 }
50 
51 
52 CPackString::CPackString(size_t length_limit, size_t count_limit)
53  : m_LengthLimit(length_limit), m_CountLimit(count_limit),
54  m_Skipped(0), m_CompressedIn(0),
55  m_CompressedOut(0)
56 {
57 }
58 
59 
61 {
62 }
63 
64 
66 {
67  size_t total = 0;
68  typedef multiset< pair<size_t, string> > TStat;
69  TStat stat;
70  ITERATE ( TStrings, i, m_Strings ) {
71  stat.insert(TStat::value_type(i->GetCount(), i->GetString()));
72  total += i->GetCount();
73  }
74  ITERATE ( TStat, i, stat ) {
75  out << setw(10) << i->first << " : \"" << i->second << "\"\n";
76  }
77  out << setw(10) << total << " = " << m_CompressedIn << " -> " << m_CompressedOut << " strings\n";
78  out << setw(10) << m_Skipped << " skipped\n";
79  return out;
80 }
81 
82 
83 bool CPackString::s_GetEnvFlag(const char* env, bool def_val)
84 {
85  const char* val = ::getenv(env);
86  if ( !val ) {
87  return def_val;
88  }
89  string s(val);
90  return s == "1" || NStr::CompareNocase(s, ENV_YES) == 0;
91 }
92 
93 
95 {
96  static atomic<Int1> saved_use_string_pack{-1};
97  auto use_string_pack = saved_use_string_pack.load(memory_order_acquire);
98  if ( use_string_pack < 0 ) {
99  // check if string packing is enabled by environment
100  use_string_pack = s_GetEnvFlag(STRING_PACK_ENV, true);
101  if ( use_string_pack ) {
102  // check if it's available in the STL implementation
103 
104  string s1("test"), s2;
105  s2 = s1;
106  if ( s1.data() != s2.data() ) {
107  // strings don't use reference counters
108  use_string_pack = false;
109  }
110  }
111  // save the result
112  saved_use_string_pack.store(use_string_pack, memory_order_release);
113  }
114  return use_string_pack != 0;
115 }
116 
117 
119 {
120  THROW1_TRACE(runtime_error,
121  "CPackString: bad ref counting");
122 }
123 
124 
125 bool CPackString::x_Assign(string& s, const string& src)
126 {
127  if ( TryStringPack() ) {
128  const_cast<string&>(src) = s;
129  s = src;
130  if ( s.data() != src.data() ) {
132  }
133  return true;
134  }
135  else {
136  return false;
137  }
138 }
139 
140 
141 bool CPackString::Pack(string& s)
142 {
143  if ( s.size() <= GetLengthLimit() ) {
144  SNode key(s);
146  bool found = iter != m_Strings.end() && *iter == key;
147  if ( found ) {
148  AddOld(s, iter);
149  return false;
150  }
151  else if ( GetCount() < GetCountLimit() ) {
152  iter = m_Strings.insert(iter, key);
153  ++m_CompressedOut;
154  iter->SetString(s);
155  AddOld(s, iter);
156  return true;
157  }
158  }
159  Skipped();
160  return false;
161 }
162 
163 
164 bool CPackString::Pack(string& s, const char* data, size_t size)
165 {
166  if ( size <= GetLengthLimit() ) {
167  SNode key(data, size);
169  bool found = iter != m_Strings.end() && *iter == key;
170  if ( found ) {
171  AddOld(s, iter);
172  return false;
173  }
174  else if ( GetCount() < GetCountLimit() ) {
175  iter = m_Strings.insert(iter, key);
176  ++m_CompressedOut;
177  iter->SetString();
178  AddOld(s, iter);
179  return true;
180  }
181  }
182  Skipped();
183  s.assign(data, size);
184  return false;
185 }
186 
187 
188 bool CPackString::AddNew(string& s, const char* data, size_t size,
189  iterator iter)
190 {
191  SNode key(data, size);
193  _ASSERT(iter == m_Strings.lower_bound(key));
194  _ASSERT(!(iter != m_Strings.end() && *iter == key));
195  if ( GetCount() < GetCountLimit() ) {
196  iter = m_Strings.insert(iter, key);
197  ++m_CompressedOut;
198  iter->SetString();
199  AddOld(s, iter);
200  return true;
201  }
202  Skipped();
203  s.assign(data, size);
204  return false;
205 }
206 
207 
209 {
210 }
211 
212 
214  size_t count_limit)
215  : m_PackString(length_limit, count_limit)
216 {
217 }
218 
219 
221 {
222 #if 0
223  NcbiCout << "CPackStringClassHook statistics:\n" <<
225 #endif
226 }
227 
228 
230 {
231 }
232 
233 
235  size_t count_limit)
236  : m_PackString(length_limit, count_limit)
237 {
238 }
239 
240 
242 {
243 #if 0
244  NcbiCout << "CPackStringChoiceHook statistics:\n" <<
246 #endif
247 }
248 
249 
CPackString m_PackString
CPackString m_PackString
static bool s_GetEnvFlag(const char *env, bool def_val)
Definition: pack_string.cpp:83
static void x_RefCounterError(void)
bool Pack(string &s)
static bool TryStringPack(void)
Definition: pack_string.cpp:94
size_t m_Skipped
size_t GetLengthLimit(void) const
set< SNode > m_Strings
~CPackString(void)
Definition: pack_string.cpp:60
static bool x_Assign(string &s, const string &src)
size_t GetCountLimit(void) const
void AddOld(string &s, const iterator &iter)
void Skipped(void)
size_t m_CompressedOut
size_t GetCount(void) const
TStrings::iterator iterator
CPackString(void)
Definition: pack_string.cpp:44
bool AddNew(string &s, const char *data, size_t size, iterator iter)
CNcbiOstream & DumpStatistics(CNcbiOstream &out) const
Definition: pack_string.cpp:65
size_t m_CompressedIn
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator end() const
Definition: set.hpp:136
const_iterator lower_bound(const key_type &key) const
Definition: set.hpp:138
std::ofstream out("events_result.xml")
main entry point for tests
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define THROW1_TRACE(exception_class, exception_arg)
Throw trace.
Definition: ncbiexpt.hpp:417
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define NcbiEndl
Definition: ncbistre.hpp:548
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NcbiCout
Definition: ncbistre.hpp:543
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
int i
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
static const char *const STRING_PACK_ENV
Definition: pack_string.cpp:38
static const size_t kDefaultCountLimit
Definition: pack_string.cpp:42
static const size_t kDefaultLengthLimit
Definition: pack_string.cpp:41
static const char *const ENV_YES
Definition: pack_string.cpp:39
#define _ASSERT
static HENV env
Definition: transaction2.c:38
Modified on Tue Dec 05 02:19:52 2023 by modify_doxy.py rev. 669887