/* str.cc */ #include "str.h" // #include "conv.h" #if 0 #include "inf.h" // only for test_normalize_path #endif #include // function malloc #include // function memcpy OPEN_NAMESPACE /* ---------------------------------------------------------------------- */ /* string CharsToStr (const char * c) { if (c == NULL) return ""; else return string (strdup (c)); } */ char * StrToChars (const string s) { int len = s.length (); char * p = static_cast (malloc (len + 1)); memcpy (p, s.c_str (), len); p [len] = 0; return p; } /* ---------------------------------------------------------------------- */ string UpperCase (string s) { int len = s.length (); for (int i = 0; i= 'a' && c <= 'z') { c = c - 'a' + 'A'; s[i] = c; } } return s; } string LowerCase (string s) { int len = s.length (); for (int i = 0; i= 'A' && c <= 'Z') { c = c - 'A' + 'a'; s[i] = c; } } return s; } string FirstUpperCase (string s) { if (s.length () != 0) s[0] = UpCase (s[0]); return s; } string FirstLowerCase (string s) { if (s.length () != 0) s[0] = LoCase (s[0]); return s; } /* ---------------------------------------------------------------------- */ bool IsValidIdent (const string value) { bool result = false; int len = value.length (); if (len != 0) { result = IsLetter (value [0]); for (int i = 1; i 0); for (int i = 0; i < len && result; i++) if (! IsDigit (value [i])) result = false; return result; } /* ---------------------------------------------------------------------- */ inline bool is_WhiteSpace (int n) { return n == ' ' || n == '\n' || n == '\r' || n == '\x09'; } string Trim (string s) { int len = s.length (); while (len > 0 && is_WhiteSpace (s[len-1])) len --; int ofs = 1; while (ofs <= len && is_WhiteSpace (s[ofs-1])) ofs ++; return s.substr (ofs-1, 1+len-ofs); } string TrimLeft (string s) { int len = s.length (); int ofs = 1; while (ofs <= len && is_WhiteSpace (s[ofs-1])) ofs ++; return s.substr (ofs-1, 1+len-ofs); } string TrimRight (string s) { int len = s.length (); while (len > 0 && is_WhiteSpace (s[len-1])) len --; return s.substr (0, len); } /* ---------------------------------------------------------------------- */ bool HasPrefix (const string value, char prefix) { int len = value.length (); return len > 1 && value [0] == prefix; } string RemovePrefix (const string value, char prefix) { int len = value.length (); if (len > 1 && value [0] == prefix) return value.substr (1); else return value; } bool HasPrefix (const string value, const string prefix) { int len = value.length (); int pref_len = prefix.length (); return len > pref_len && value.substr (0, pref_len) == prefix; } string RemovePrefix (const string value, const string prefix) { int len = value.length (); int pref_len = prefix.length (); if (len > pref_len && value.substr (0, pref_len) == prefix) return value.substr (pref_len); else return value; } /* ---------------------------------------------------------------------- */ bool HasWordPrefix (const string value, char prefix) { int len = value.length (); return len > 1 && value [0] == prefix && IsUpperCaseLetter (value [1]); } string RemoveWordPrefix (const string value, char prefix) { int len = value.length (); if (len > 1 && value [0] == prefix && IsUpperCaseLetter (value [1])) return value.substr (1); else return value; } bool HasWordPrefix (const string value, const string prefix) { int len = value.length (); int pref_len = prefix.length (); return len > pref_len && value.substr (0, pref_len) == prefix && IsUpperCaseLetter (value [pref_len]); } string RemoveWordPrefix (const string value, const string prefix) { int len = value.length (); int pref_len = prefix.length (); if (len > pref_len && value.substr (0, pref_len) == prefix && IsUpperCaseLetter (value [pref_len])) return value.substr (pref_len); else return value; } /* ---------------------------------------------------------------------- */ bool HasSuffix (const string value, char suffix) { int len = value.length (); return len > 1 && value [len-1] == suffix; } string RemoveSuffix (const string value, char suffix) { int len = value.length (); if (len > 1 && value [len-1] == suffix) return value.substr (0, len-1); else return value; } bool HasSuffix (const string value, const string suffix) { int len = value.length (); int suffix_len = suffix.length (); return len > suffix_len && value.substr (len-suffix_len) == suffix; } string RemoveSuffix (const string value, const string suffix) { if (HasSuffix (value, suffix)) return value.substr (0, value.length () - suffix.length ()); else return value; } /* ---------------------------------------------------------------------- */ string ReplaceChars (string s, char c1, char c2) { int len = s.length (); for (int i = 0; i < len; i++) if (s[i] == c1) s[i] = c2; return s; } string RemoveChars (const string s, char c) { string result = ""; int len = s.length (); for (int i = 0; i= 'A' && c <= 'Z') { if (underline) result += '_'; c = c - 'A' + 'a'; // to lower case underline = false; } else if (c >= 'a' && c <= 'z') underline = true; else underline = false; result += c; } return result; } string RemoveUnderlines (const string s) { // remove '_' and convert next character to upper case string result = ""; int len = s.length (); bool upc = false; for (int i = 0; i < len; i ++) { char c = s[i]; if (c == '_') upc = true; else { if (upc && c >= 'a' && c <= 'z') c = c - 'a' + 'A'; // to upper case result = result + c; upc = false; } } return result; } /* ---------------------------------------------------------------------- */ string RemovePath (string s) { string::size_type inx = s.rfind ('/'); if (inx != string::npos) // if found s = s.substr (inx+1); return s; } string ExtractPath (string s) { string::size_type inx = s.rfind ('/'); if (inx != string::npos) // if found s = s.substr (0, inx); return s; } string AddSlash (string s) { int len = s.length (); if (len > 0 && s[len-1] != '/') return s + '/'; else return s; } string RemoveLastSlash (string s) { int len = s.length (); if (len > 1 && s[len-1] == '/') return s.substr (0, len-1); else return s; } string AddSubdir (const string dir_name, const string file_name) { if (dir_name == "") return file_name; else if (file_name == "") return dir_name; else if (HasPrefix (file_name, '/')) return RemoveLastSlash (dir_name) + file_name; else return AddSlash (dir_name) + file_name; } /* ---------------------------------------------------------------------- */ string AddNamespace (string a, string b) { if (a == "") return b; else if (a == "::") return "::" + b; else return RemoveSuffix (a, "::") + "::" + b; // RemoveSuffix recognize suffix only when first parameter is longer then second } /* ---------------------------------------------------------------------- */ string JoinIdentifiers (string a, string b) { // join identifiers with '_' // or convert first chatacter of second parameter to upper case if (a != "" && b != "") // one empty => nothing to do { char aa = a [a.length () - 1]; // last character from a char bb = b[0]; // first character from b // any underline => join with underline if (a.find ('_') != string::npos || b.find ('_') != string::npos) { if (aa != '_' && bb != '_') // do not repeat underlines a = a + '_'; } else { // convert to uppercase if (bb >= 'a' && bb <='z') b[0] = UpCase (bb); } } return a + b; } /* ---------------------------------------------------------------------- */ string ReplaceTwoColons (const string s, char replacement) { string result = ""; int len = s.length (); if (len > 0) { char last_char = s[0]; for (int i = 1; i 0 && value [len-1] == ' ') len --; // skip spaces if (len > 0 && value [len-1] == '*') { len --; // skip asterisk while (len > 0 && value [len-1] == ' ') len --; // skip spaces } return Head (value, len); } /* ---------------------------------------------------------------------- */ string ModifyIdentifier (string s) { int len = s.length (); if (len == 0) s = "_"; else { if (! IsLetter (s[0])) s[0] = '_'; for (int i = 1; i < len; i++) if (! IsLetterOrDigit (s[i])) s[i] = '_'; } return s; } string ModifyFileName (string s) { int len = s.length (); if (len == 0) s = "_"; // empty string -> underline else { if (! IsLetterOrDigit (s[0])) s[0] = '_'; // first character -> underline for (int i = 1; i < len; i++) if (! IsLetterOrDigit (s[i]) || s[i] == '_') s[i] = '-'; // next characters -> dash } return s; } string HeaderIdentifier (const string prefix, const string file_name) { return "__" + UpperCase (ModifyIdentifier (prefix + "_" + RemovePath (file_name))) + "__"; } /* ---------------------------------------------------------------------- */ const char hex [16+1] = "0123456789abcdef"; string QuoteChrContent (char value) { string result = ""; unsigned char ch = value; if (ch < ' ' || ch == 128 || ch >= 255) { result = '\\'; switch (ch) { case '\a': result = result + 'a'; break; case '\b': result = result + 'b'; break; case '\f': result = result + 'f'; break; case '\n': result = result + 'n'; break; case '\r': result = result + 'r'; break; case '\t': result = result + 't'; break; case '\v': result = result + 'v'; break; default: result = result + 'x' + hex [ch >> 4] + hex [ch & 15]; break; } } else { if (ch == '\\' || ch == '"') result = CharToStr ('\\') + value; else result = value; } return result; } string QuoteStrContent (const string value) { int len = value.length (); bool simple = true; for (int i = 0; i < len && simple; i++) { unsigned char ch = value [i]; if (ch < ' ' || ch == '\\' || ch == '"' || ch == 128 || ch >= 255) simple = false; } if (simple) { return value; } else { string result = ""; for (int i = 0; i < len; i++) result += QuoteChrContent (value [i]); return result; } } string QuoteStr (const string value, char quote) { return quote + QuoteStrContent (value) + quote; } string QuoteChr (char value, char quote) { return quote + QuoteStrContent (CharToStr (value)) + quote; } string EscapeStr (const string value) { int len = value.length (); bool simple = true; for (int i = 0; i < len && simple; i++) { unsigned char ch = value [i]; if (ch < ' ' || ch == 128 || ch >= 255) simple = false; } if (simple) { return value; } else { string result = ""; for (int i = 0; i < len; i++) { unsigned char ch = value [i]; if (ch < ' ' || ch == 128 || ch >= 255) result += QuoteChrContent (ch); else result += ch; } return result; } } /* ---------------------------------------------------------------------- */ string RemoveQuotes (const string value, char quote) { string result = value; int len = value.length (); if (len >= 2 && value [0] == quote && value [len-1] == quote) { result = ""; int inx = 0; if (value [0] == quote) inx = 1; /* skip first quote */ int max = len-1; if (value [len-1] == quote) max = len-2; /* skip last quote */ while (inx <= max) { char ch = value [inx]; if (ch != backslash || inx == max) /* simple character */ { result += ch; inx ++; } else /* escape sequence */ { inx ++; /* skip backslash */ ch = value [inx]; if (IsOctal (ch)) /* octal */ { int n = 0; int cnt = 1; while (IsOctal (ch) && cnt <= 3 && inx <= max) { n = n * 8 + ch - '0'; cnt ++; inx ++; ch = value [inx]; } result += char (n); } else if (ch =='x' || ch =='X') /* hex */ { inx ++; int n = 0; while (IsHexDigit (ch) && inx <= max) { int d; if (ch >= 'A' && ch <= 'F') d = ch - 'A' + 10; else if (ch >= 'a' && ch <= 'f') d = ch - 'a' + 10; else d = ch - '0'; n = n * 16 + d; inx ++; ch = value [inx]; } result += char (n); } else { char n; switch (ch) /* other */ { case 'a': n = '\a'; break; case 'b': n = '\b'; break; case 'f': n = '\f'; break; case 'n': n = '\n'; break; case 'r': n = '\r'; break; case 't': n = '\t'; break; case 'v': n = '\v'; break; case quote1: case quote2: case backslash: case '?': default: n = ch; break; } result += n; inx ++; ch = value [inx]; } } } /* end of loop */ } return result; } /* ---------------------------------------------------------------------- */ string TrimLines (string s) // remove leading and trailing white spaces and empty lines { int len = s.length (); int inx = 0; bool finish = false; while (inx < len && ! finish) { // skip white spaces while (inx < len && is_WhiteSpace (s[inx])) inx ++; // skip end of line if (inx < len && s[inx] == cr) { inx ++; if (inx < len && s[inx] == lf) inx ++; } else if (inx < len && s[inx] == lf) inx ++; else finish = true; } int start = inx; inx = len-1; // last character finish = false; while (start <= inx && ! finish) { // skip white spaces while (start <= inx && is_WhiteSpace (s[inx])) inx --; // skip end of line if (start <= inx && s[inx] == lf) { inx --; if (start <= inx && s[inx] == cr) inx --; } else if (start <= inx && s[inx] == cr) inx --; else finish = true; } int stop = inx; if (start <= stop) return s.substr (start, stop-start+1); else return ""; } /* ---------------------------------------------------------------------- */ string NormalizeLines (const string value) // change end of line to line feed characters { string s = value; int len = s.length (); int inx = 0; int max = 0; while (inx < len) { if (s[inx] == cr) { s[max] = lf; // store line feed max ++; inx ++; if (inx < len && s[inx] == lf) inx ++; } else { if (max != inx) s[max] = s[inx]; max ++; inx ++; } } return s.substr (0, max); } /* ---------------------------------------------------------------------- */ string NormalizePath (const string s) // remove ".", ".." and "//" { string result = s; // reserve space int n = 0; // number of characters in output variable enum kind_t { none, // result variable is empty abs, // result starting with '/' rel, // result starting with '.' up, // result starting with '..' name // result starting with name }; kind_t kind = none; int level = 0; // number of directory names in output variable int len = s.length (); int i = 0; while (i < len) { // get characters, up to next slash int start = i; while (i < len && s[i] != '/') i++; int step = i-start; i ++; bool dupl = false; // true => add directory name to result variable if (step == 0) // "/" { if (kind == none) { assert (n <= len); result[n] = '/'; n ++; kind = abs; // starting absolute path } } else if (step == 1 && s[start] == '.') // "." { if (kind == none) { assert (n+1 <= len); result[n] = '.'; n ++; kind = rel; // starting relative path } } else if (step == 2 && s[start] == '.' && s[start+1] == '.') // ".." { if (kind == none) { kind = up; // starting path dupl = true; } else if (level == 0) { // level is zero => name could be removed from result variable assert (kind != name); if (kind == abs) { // strange result "/.." dupl = true; // write two dots } else if (kind == rel) { // "." --> ".." n --; // remove "." assert (n == 0); kind = up; dupl = true; // write two dots } else if (kind == up) { dupl = true; // write two dots } } else if (level > 0) { level --; // remove one directory while (n > 0 && result[n-1] != '/') n --; if (level > 0) { assert (n > 0); n --; // skip slash } else { if (kind == name) { kind = rel; // result staring with "." // write one dot assert (n == 0); assert (n+1 <= len); result[n] = '.'; n ++; } else if (kind == abs) { // nothing to do } else if (kind == rel) { assert (n > 0); n --; // skip slash } else if (kind == up) { assert (n > 0); n --; // skip slash } } } } else { dupl = true; // add name to result variable level ++; // some name which cold be removed if (kind == none) kind = name; } if (dupl) { if (n == 0 || (n == 1 && result[0] == '/')) { // nothing } else { assert (n+1 <= len); result[n] = '/'; n ++; } assert (n+step <= len); for (int k = start; k < start+step; k++) { result[n] = s[k]; n ++; } } #if 0 string txt = ""; if (kind == abs) txt = "ABS"; else if (kind == rel) txt = "REL"; else if (kind == up) txt = "UP"; else if (kind == name) txt = "NAME"; info (Head (s, i) + " ~~> " + Head (result, n) + " " + txt + " (level=" + IntToStr (level) + ")"); #endif } result = Head (result, n); return result; } #if 0 void tst (string dir, string answer) { // info ("TEST " + dir); string dir2 = NormalizePath (dir); #if 0 info ("(1) " + dir); info ("(2) " + dir2); if (dir2 != answer) info ("BAD"); info (""); #else if (dir2 != answer) { info ("(1) " + dir); info ("(2) " + dir2); info ("BAD"); info ("EXPECTED " + answer); info (""); } #endif } void test_normalize_path () { tst ("/", "/"); tst ("//", "/"); tst ("///", "/"); tst ("./", "."); tst ("~", "~"); tst ("", ""); tst ("/a/b/c/", "/a/b/c"); tst ("/a/b/c/.", "/a/b/c"); tst ("/a/b/c/..", "/a/b"); tst ("/a//b", "/a/b"); tst ("/a/./b", "/a/b"); tst ("/a/b/../c", "/a/c"); tst ("/a/../b", "/b"); tst ("/a/b/c/", "/a/b/c"); tst ("/a//b/c/", "/a/b/c"); tst ("/a/b//c/./d/n/../e", "/a/b/c/d/e"); tst ("/a/b//c/./d/n/m/../e/..", "/a/b/c/d/n"); tst ("/a/../bee/../cee", "/cee"); tst ("/a/../../", "/.."); tst ("./../../", "../.."); tst ("/.", "/"); tst ("/..", "/.."); tst ("/./a", "/a"); tst ("/../a", "/../a"); tst ("/.././a", "/../a"); tst ("/../../a", "/../../a"); tst (".", "."); tst ("..", ".."); tst ("./a", "./a"); tst ("../a", "../a"); tst (".././a", "../a"); tst ("../../a", "../../a"); tst ("/./a/b/c/", "/a/b/c"); tst ("/a/b/c/../../../f", "/f"); tst ("/a/b/c/../../../../f", "/../f"); tst ("/a/b/c/../d/../../f", "/a/f"); tst ("/a/b/c/../d/../../../f", "/f"); tst ("/a/b/c/../d/../../../../f", "/../f"); tst ("./a/b/", "./a/b"); tst ("./a/b/..", "./a"); tst ("./../", ".."); tst ("./../..", "../.."); tst ("./a/../", "."); tst ("./a/../../", ".."); tst ("./a/../b", "./b"); tst ("./a/b/../..", "."); tst ("./a/b/../../..", ".."); tst ("./a/b/../../c", "./c"); tst ("./a/../../b", "../b"); tst ("./a/../../b/..", ".."); tst ("./a/../../b/../..", "../.."); tst ("./a/b/c/../d/../../f", "./a/f"); tst ("./a/b/c/../d/../../../f", "./f"); tst ("./a/b/c/../d/../../../../f", "../f"); tst ("a/b/", "a/b"); tst ("a/b/..", "a"); tst ("a/../b", "./b"); tst ("a/../../b", "../b"); } #endif CLOSE_NAMESPACE