Polaris: String.h Source File

String.h

Go to the documentation of this file.
00001 ///
00002 #ifndef _POLARIS_STRING_H
00003 #define _POLARIS_STRING_H
00004 ///
00005 /// \class String 
00006 /// \brief a string class
00007 /// \defgroup C
00008 /// \ingroup C++ CVDL
00009 ///  Polaris
00010 /// \see base/String.h
00011 /// \see String.h
00012 /// \see String.cc
00013 ///
00014 /// \endcode
00015 /// \section Description Description
00016 /// The String class implements variable-length strings.  Most of the
00017 /// standard and expected methods are defined, including operator== and
00018 /// operator!=, operator=, operator+, and operator+=.
00019 ///
00020 /// This implementation allows strings to be undefined or defined.  The
00021 /// string is undefined if it is created by a parameterless constructor,
00022 /// and it remains undefined until an assignment.  This condition may be
00023 /// checked through the method defined().  E.g., the following code
00024 ///
00025 /// String str;
00026 /// if ( ! str.defined()) { cout << "Undefined at beginning.\n"; }
00027 /// str = "";
00028 /// if ( ! str.defined()) { cout << "Undefined at end.\n"; }
00029 ///
00030 /// will print ONLY the message "Undefined at beginning."
00031 /// (NOTE from the above that there is a difference between a String
00032 /// being equal to the empty string [""] and a String being undefined.)
00033 ///
00034 /// To make a String undefined, you can assign it the NULL character
00035 /// string:
00036 ///
00037 /// String str = "Defined";
00038 /// ...
00039 /// str = 0; // make str undefined again
00040 ///
00041 /// \endcode
00042 /// \section Listable Listable Strings
00043 /// Note:  This version of String is not listable.  If you need a
00044 /// listable string, look at the class "StringElem."
00045 ///
00046 /// \endcode
00047 /// \section Other Other Utilities
00048 /// A few other String-related or char*-related utilities may be
00049 /// found in the file utilities/string_util.h.  The utility functions
00050 /// to be found there may require more components of the base than
00051 /// just String (e.g. split() requires StringElem).  Those found here
00052 /// are fairly self-dependent.
00053 ///
00054 /// \endcode
00055 /// \section Comparing Comparing strings
00056 /// Either the left or right side of an '==' operator must be a String
00057 /// for the '==' operator to do string-based (and not pointer-based)
00058 /// comparisons.  For example, the following examples:
00059 ///
00060 /// String s1, s2;
00061 /// char *buf1, *buf2
00062 /// ...
00063 /// if (s1 == s2) ...
00064 /// if (s1 == s2) ...
00065 /// if (s1 == s2) ...
00066 /// if ((String)s1 == s2) ...
00067 /// if (s1 == (String)) ...
00068 ///
00069 /// are all ways of doing string-based comparisons, but NOT:
00070 ///
00071 /// char *buf1, *buf2;
00072 /// if (buf1 == buf2)  ...    NO!  Pointer-based comparison.
00073 ///
00074 /// This previous example will give a pointer-based comparison
00075 /// of the addresses pointed to by buf1 and buf2, and not a
00076 /// string-based comparison of the strings pointed to by buf1
00077 /// and buf2.
00078 ///
00079 /// Since every construction of a String requires the copying of
00080 /// a character string, it is recommended that if a single character
00081 /// string is to be compared against several other strings that it
00082 /// should first be assigned to a single String before all the
00083 /// comparisons rather than having an explicit (String) typecast
00084 /// at every comparison.
00085 ///
00086 /// \endcode
00087 /// \section Examples Examples
00088 /// The following are examples of String usage (where what follows each
00089 /// "=>" is the output to standard output:
00090 ///
00091 /// String s;
00092 /// cout << "Undefined string: '" << s << "'\n";
00093 ///
00094 /// =>
00095 ///
00096 /// Undefined string: '(Undefined String)'
00097 ///
00098 ///
00099 ///
00100 /// String sblank("");
00101 /// cout << "Zero-length string: '" << sblank << "'\n";
00102 ///
00103 /// =>
00104 ///
00105 /// Zero-length string: ''
00106 ///
00107 ///
00108 ///
00109 /// cout << ( (String)"First is" + " a String" ) << "\n";
00110 /// cout << ( "Second is " + (String)"a String" ) << "\n";
00111 /// cout << ( (String)"Both are " + (String)"Strings" ) << "\n";
00112 ///
00113 /// =>
00114 ///
00115 /// First is a String
00116 /// Second is a String
00117 /// Both are Strings
00118 ///
00119 ///
00120 ///
00121 /// String s;
00122 /// s += "Hello";
00123 /// cout << s << "\n";
00124 ///
00125 /// =>
00126 ///
00127 /// Hello
00128 ///
00129 ///
00130 ///
00131 /// String s = "Hello";
00132 /// s += " There";
00133 /// cout << s << "\n";
00134 ///
00135 /// =>
00136 ///
00137 /// Hello There
00138 ///
00139 ///
00140 ///
00141 /// String s = "Hi.";
00142 /// s = "Hello again.";
00143 /// cout << s << "\n";
00144 ///
00145 /// =>
00146 ///
00147 /// Hello again.
00148 ///
00149 ///
00150 ///
00151 /// String s("1234567890", 5);
00152 /// cout << s << "\n";
00153 ///
00154 /// =>
00155 ///
00156 /// 12345
00157 ///
00158 ///
00159 ///
00160 ///
00161 /// \endcode
00162 /// \section Bugs Bugs
00163 /// There is one pitfall in using strings, which has to do with
00164 /// functions which returns Strings.  For instance, the following
00165 /// code will cause a program to bomb on many systems:
00166 ///
00167 /// String string();
00168 /// void func(const char *s);
00169 /// void main() {
00170 /// func(string());
00171 /// }
00172 ///
00173 /// The reason for the bomb is that the compiler creates a tempoary
00174 /// String class object for the return value of string(), and this
00175 /// string objects contains a local character buffer.  Before
00176 /// calling func(), the compiler causes an automatic coercion from
00177 /// that temporary String into a const char *, and the String's
00178 /// pointer value is passed in to the function.  Then the temporary
00179 /// String, which is no longer needed (or at least as far as the
00180 /// compiler can see) is deleted, which causes the string buffer to
00181 /// be deleted.  Finally, the function is called, but by then the
00182 /// const char * which was passed in to it points to a deleted
00183 /// memory buffer.
00184 ///
00185 #ifdef POLARIS_GNU_PRAGMAS
00186 #pragma interface
00187 #endif
00188 ///
00189 #include "ClassNames.h"
00190 #include <stream.h>
00191 #include <string.h>
00192 ///
00193 #include "Boolean.h"
00194 ///
00195 /// \note  A few other String-related or char*-related utilities may be
00196 /// found in the file utilities/string_util.h.  The utility functions
00197 /// to be found there may require more components of the base than
00198 /// just String (e.g. split() requires StringElem).  Those found here
00199 /// are fairly self-dependent.
00200 ///
00201 char           *upcase_ch(const char *string);
00202 ///
00203 /// Returns a copy of a string with all characters converted to upper case
00204 ///
00205 int             fortran_string_length(const char *string);
00206 ///
00207 /// Returns the length a char *string would be given in a Fortran program
00208 /// (this strips the enclosing quotes and any doubled internal quotes)
00209 ///
00210 class String {
00211     friend String   operator + (const char *str, const String & string);
00212     friend Boolean  operator == (const char *str, const String & string);
00213     friend Boolean  operator != (const char *str, const String & string);
00214     friend Boolean  operator<(const char *str, const String & string);
00215     friend inline   ostream & operator << (ostream & o, const String & string);
00216 
00217     ///< Without this, String may be destroyed before printing if
00218     ///< created on fly
00219 
00220  private:
00221     int             _len;
00222     char           *_ptr;
00223     unsigned        _defined:1;
00224 
00225     void            fillin_string(const int size, const char *chars);
00226     ///< Fill in the fields of a String structure
00227 
00228  public:
00229     inline          String();
00230     ///< Construct an undefined String
00231 
00232     String(const char *str);
00233     ///< Construct a string with a copy of a character string.  If str == 0,
00234     ///< the String will be undefined.
00235 
00236     String(const char *str, int max_len);
00237     ///< Construct a string with a copy of the first max_len characters
00238     ///< of a character string (up to the length of 'str')
00239 
00240     String(const String & other);
00241     ///< Construct a string with a copy of another String's current
00242     ///< value.
00243 
00244     String(const String & other, int first, int last);
00245     ///< Substring constructor
00246 
00247     String(const String & other, char *delim1, char *delim2);
00248     ///< Substring constructor using sets of delimiters
00249 
00250     virtual         ~String();
00251     ///< Virtual destructor
00252 
00253     unsigned        hash_value() const;
00254     ///< Returns a hash value based on the contents of the String.
00255 
00256     String & operator = (const char *str);
00257     ///< Assignment from a character string
00258 
00259     String & operator = (const String * other);
00260     ///< Assignment from a String *
00261 
00262     String & operator = (const String & other);
00263     ///< Assignment from a String
00264 
00265     String          operator + (const char *str) const;
00266     ///< Concatention with a character string
00267 
00268     String          operator + (const String & other) const;
00269     ///< Concatention with a String
00270 
00271     String & operator += (const char *str);
00272     ///< Append a character string
00273 
00274     String & operator += (const String & other);
00275     ///< Append a String
00276 
00277     inline const char &operator[] (int index) const;
00278     inline char       &operator[] (int index) ;
00279     ///< Return a modifiable character reference to the (index-1)th
00280     ///< character in the string.  THERE IS NO CHECK FOR LEGALITY
00281     ///< OF THE INDEX
00282 
00283     inline int      operator == (const char *str) const;
00284     ///< Case-sensitive test of equality
00285 
00286     inline int      operator != (const char *str) const;
00287     ///< Case-sensitive test of inequality
00288 
00289     inline int      operator<(const char *str) const;
00290     ///< Case-sensitive string comparison
00291 
00292     inline int      operator == (const String & s) const;
00293     ///< Case-sensitive test of equality
00294 
00295     inline int      operator != (const String & s) const;
00296     ///< Case-sensitive test of inequality
00297 
00298     inline int      operator<(const String & s) const;
00299     ///< Case-sensitive string comparison
00300 
00301     inline operator const char *() const;
00302     inline int      len() const;
00303     inline int      defined() const;
00304 
00305     int             index(const char *substring) const;
00306     ///< Return the index into a string where the given substring begins.
00307     ///< Returns -1 if 'substring' is not found, or if *this is undefined.
00308 
00309     int             index_case(const char *substring) const;
00310     ///< Return the index into a string where the given substring begins,
00311     ///< ignoring the case of both strings.
00312     ///< Returns -1 if 'substring' is not found, or if *this is undefined.
00313 
00314     String          upcase() const;
00315     ///< Returns a new String whose alphabetic chars have been converted
00316     ///< to upper case.  If this object is undefined, the String returned
00317     ///< will also be undefined.
00318 
00319     int             structures_OK() const;
00320     ///< Check the structure of the data for errors or inconsistency
00321     ///< Return 0 and print error message if problems found, otherwise
00322     ///< return 1 without message.
00323 
00324     void            absorb(char *allocated_str);
00325     ///< Set the string to the value of allocated_str, without
00326     ///< allocating new memory.
00327     ///< Note:  allocated_str must have been allocated using
00328     ///< the new operator (not malloc).
00329     ///< If allocated_str == 0, this String becomes undefined.
00330     ///< THIS MEMORY WILL BE DELETED UPON DESTROYING THE STRING!!
00331 };
00332 
00333 /* String dec(int i); */
00334 /* String hex(int i); */
00335 
00336 
00337 
00338 ///< inline functions
00339 
00340 inline ostream & 
00341 operator << (ostream & o, const String & string) {
00342     return o << (string.defined() ? (const char *) string : "(Undefined Str)");
00343 }
00344 
00345 inline 
00346 String::String()
00347 {
00348     #ifdef CLASS_INSTANCE_REGISTRY
00349     register_instance(STRING_CLASS, sizeof(String), this);
00350     #endif
00351 
00352     _len = 0;
00353     _ptr = "";
00354     _defined = 0;
00355 }
00356 
00357 inline const char &
00358 String::operator[] (int index) const 
00359 {
00360     return _ptr[index];
00361 }
00362 
00363 inline char &
00364 String::operator[] (int index)
00365 {
00366     return _ptr[index];
00367 }
00368 
00369 inline int 
00370 String::operator == (const char *str) const 
00371 {
00372     return _defined && 0 == strcmp(_ptr, str);
00373 }
00374 
00375 inline int  
00376 String::operator != (const char *str) const 
00377 {
00378     return !(*this == str);
00379 }
00380 
00381 inline int      
00382 String::operator < (const char *str) const 
00383 {
00384     return strcmp(_ptr, str) < 0;
00385 }
00386 
00387 inline int      
00388 String::operator == (const String & s) const 
00389 {
00390     return defined() && s.defined() && (0 == strcmp(_ptr, s._ptr));
00391 }
00392 
00393 inline int      
00394 String::operator != (const String & s) const 
00395 {
00396     return !(*this == s);
00397 }
00398 
00399 inline int      
00400 String::operator < (const String & s) const 
00401 {
00402     return defined() && s.defined() && (strcmp(_ptr, s._ptr) < 0);
00403 }
00404 
00405 inline 
00406 String::operator const char *() const 
00407 {
00408     return _defined ? _ptr : "**-*-Undefined String**-*-";
00409 }
00410 
00411 inline int 
00412 String::len() const
00413 {
00414     return _len;
00415 }
00416 
00417 inline int 
00418 String::defined() const
00419 {
00420     return _defined;
00421 }
00422 
00423 #endif
 © 1995-2005 University of Illinois, Urbana-Champaign. All rights reserved.  Fri Mar 25 23:06:13 2005