strutl.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. // -*- mode: cpp; mode: fold -*-
  2. // Description /*{{{*/
  3. // $Id: strutl.h,v 1.22 2003/02/02 22:20:27 jgg Exp $
  4. /* ######################################################################
  5. String Util - These are some useful string functions
  6. _strstrip is a function to remove whitespace from the front and end
  7. of a string.
  8. This source is placed in the Public Domain, do with it what you will
  9. It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  10. ##################################################################### */
  11. /*}}}*/
  12. #ifndef STRUTL_H
  13. #define STRUTL_H
  14. #include <limits>
  15. #include <string>
  16. #include <cstring>
  17. #include <vector>
  18. #include <iostream>
  19. #ifdef APT_PKG_EXPOSE_STRING_VIEW
  20. #include <apt-pkg/string_view.h>
  21. #endif
  22. #include <time.h>
  23. #include <stddef.h>
  24. #include "macros.h"
  25. #ifndef APT_10_CLEANER_HEADERS
  26. #include <stdlib.h>
  27. #endif
  28. #ifndef APT_8_CLEANER_HEADERS
  29. using std::string;
  30. using std::vector;
  31. using std::ostream;
  32. #endif
  33. namespace APT {
  34. namespace String {
  35. std::string Strip(const std::string &s);
  36. bool Endswith(const std::string &s, const std::string &ending);
  37. bool Startswith(const std::string &s, const std::string &starting);
  38. }
  39. }
  40. bool UTF8ToCodeset(const char *codeset, const std::string &orig, std::string *dest);
  41. char *_strstrip(char *String);
  42. char *_strrstrip(char *String); // right strip only
  43. char *_strtabexpand(char *String,size_t Len);
  44. bool ParseQuoteWord(const char *&String,std::string &Res);
  45. bool ParseCWord(const char *&String,std::string &Res);
  46. std::string QuoteString(const std::string &Str,const char *Bad);
  47. std::string DeQuoteString(const std::string &Str);
  48. std::string DeQuoteString(std::string::const_iterator const &begin, std::string::const_iterator const &end);
  49. // unescape (\0XX and \xXX) from a string
  50. std::string DeEscapeString(const std::string &input);
  51. std::string SizeToStr(double Bytes);
  52. std::string TimeToStr(unsigned long Sec);
  53. std::string Base64Encode(const std::string &Str);
  54. std::string OutputInDepth(const unsigned long Depth, const char* Separator=" ");
  55. std::string URItoFileName(const std::string &URI);
  56. APT_DEPRECATED_MSG("Specify if GMT is required or a numeric timezone can be used") std::string TimeRFC1123(time_t Date);
  57. /** returns a datetime string as needed by HTTP/1.1 and Debian files.
  58. *
  59. * Note: The date will always be represented in a UTC timezone
  60. *
  61. * @param Date to be represented as a string
  62. * @param NumericTimezone is preferred in general, but HTTP/1.1 requires the use
  63. * of GMT as timezone instead. \b true means that the timezone should be denoted
  64. * as "+0000" while \b false uses "GMT".
  65. */
  66. std::string TimeRFC1123(time_t Date, bool const NumericTimezone);
  67. /** parses time as needed by HTTP/1.1 and Debian files.
  68. *
  69. * HTTP/1.1 prefers dates in RFC1123 format (but the other two obsolete date formats
  70. * are supported to) and e.g. Release files use the same format in Date & Valid-Until
  71. * fields.
  72. *
  73. * Note: datetime strings need to be in UTC timezones (GMT, UTC, Z, +/-0000) to be
  74. * parsed. Other timezones will be rejected as invalid. Previous implementations
  75. * accepted other timezones, but treated them as UTC.
  76. *
  77. * @param str is the datetime string to parse
  78. * @param[out] time will be the seconds since epoch of the given datetime if
  79. * parsing is successful, undefined otherwise.
  80. * @return \b true if parsing was successful, otherwise \b false.
  81. */
  82. bool RFC1123StrToTime(const char* const str,time_t &time) APT_MUSTCHECK;
  83. bool FTPMDTMStrToTime(const char* const str,time_t &time) APT_MUSTCHECK;
  84. APT_DEPRECATED_MSG("Use RFC1123StrToTime or FTPMDTMStrToTime as needed instead") bool StrToTime(const std::string &Val,time_t &Result);
  85. std::string LookupTag(const std::string &Message,const char *Tag,const char *Default = 0);
  86. int StringToBool(const std::string &Text,int Default = -1);
  87. bool ReadMessages(int Fd, std::vector<std::string> &List);
  88. bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base = 0);
  89. bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base = 0);
  90. bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len);
  91. bool Base256ToNum(const char *Str,unsigned long long &Res,unsigned int Len);
  92. bool Hex2Num(const std::string &Str,unsigned char *Num,unsigned int Length);
  93. #ifdef APT_PKG_EXPOSE_STRING_VIEW
  94. APT_HIDDEN bool Hex2Num(const APT::StringView Str,unsigned char *Num,unsigned int Length);
  95. #endif
  96. // input changing string split
  97. bool TokSplitString(char Tok,char *Input,char **List,
  98. unsigned long ListMax);
  99. // split a given string by a char
  100. std::vector<std::string> VectorizeString(std::string const &haystack, char const &split) APT_PURE;
  101. /* \brief Return a vector of strings from string "input" where "sep"
  102. * is used as the delimiter string.
  103. *
  104. * \param input The input string.
  105. *
  106. * \param sep The separator to use.
  107. *
  108. * \param maxsplit (optional) The maximum amount of splitting that
  109. * should be done .
  110. *
  111. * The optional "maxsplit" argument can be used to limit the splitting,
  112. * if used the string is only split on maxsplit places and the last
  113. * item in the vector contains the remainder string.
  114. */
  115. std::vector<std::string> StringSplit(std::string const &input,
  116. std::string const &sep,
  117. unsigned int maxsplit=std::numeric_limits<unsigned int>::max()) APT_CONST;
  118. void ioprintf(std::ostream &out,const char *format,...) APT_PRINTF(2);
  119. void strprintf(std::string &out,const char *format,...) APT_PRINTF(2);
  120. char *safe_snprintf(char *Buffer,char *End,const char *Format,...) APT_PRINTF(3);
  121. bool CheckDomainList(const std::string &Host, const std::string &List);
  122. /* Do some compat mumbo jumbo */
  123. #define tolower_ascii tolower_ascii_inline
  124. #define isspace_ascii isspace_ascii_inline
  125. APT_CONST APT_HOT
  126. static inline int tolower_ascii_unsafe(int const c)
  127. {
  128. return c | 0x20;
  129. }
  130. APT_CONST APT_HOT
  131. static inline int tolower_ascii_inline(int const c)
  132. {
  133. return (c >= 'A' && c <= 'Z') ? c + 32 : c;
  134. }
  135. APT_CONST APT_HOT
  136. static inline int isspace_ascii_inline(int const c)
  137. {
  138. // 9='\t',10='\n',11='\v',12='\f',13='\r',32=' '
  139. return (c >= 9 && c <= 13) || c == ' ';
  140. }
  141. // StringViewCompareFast - awkward attempt to optimize cache generation /*{{{*/
  142. #ifdef APT_PKG_EXPOSE_STRING_VIEW
  143. /**
  144. * \brief Faster comparison for string views (compare size before data)
  145. *
  146. * Still stable, but faster than the normal ordering.
  147. * As this is used for package comparison this *MUST* be case insensitive,
  148. * as the alternative is to lower case all dependency fields which is slow. */
  149. static inline int StringViewCompareFast(APT::StringView a, APT::StringView b) {
  150. if (a.size() != b.size())
  151. return a.size() - b.size();
  152. auto l(a.data()), r(b.data());
  153. for (auto e(a.size()), i(decltype(e)(0)); i != e; ++i)
  154. if (tolower_ascii_inline(l[i]) != tolower_ascii_inline(r[i]))
  155. return tolower_ascii(l[i]) < tolower_ascii(r[i]) ? -1 : 1;
  156. return 0;
  157. }
  158. #endif
  159. /*}}}*/
  160. std::string StripEpoch(const std::string &VerStr);
  161. #define APT_MKSTRCMP(name,func) \
  162. inline APT_PURE int name(const char *A,const char *B) {return func(A,A+strlen(A),B,B+strlen(B));} \
  163. inline APT_PURE int name(const char *A,const char *AEnd,const char *B) {return func(A,AEnd,B,B+strlen(B));} \
  164. inline APT_PURE int name(const std::string& A,const char *B) {return func(A.c_str(),A.c_str()+A.length(),B,B+strlen(B));} \
  165. inline APT_PURE int name(const std::string& A,const std::string& B) {return func(A.c_str(),A.c_str()+A.length(),B.c_str(),B.c_str()+B.length());} \
  166. inline APT_PURE int name(const std::string& A,const char *B,const char *BEnd) {return func(A.c_str(),A.c_str()+A.length(),B,BEnd);}
  167. #define APT_MKSTRCMP2(name,func) \
  168. inline APT_PURE int name(const char *A,const char *AEnd,const char *B) {return func(A,AEnd,B,B+strlen(B));} \
  169. inline APT_PURE int name(const std::string& A,const char *B) {return func(A.begin(),A.end(),B,B+strlen(B));} \
  170. inline APT_PURE int name(const std::string& A,const std::string& B) {return func(A.begin(),A.end(),B.begin(),B.end());} \
  171. inline APT_PURE int name(const std::string& A,const char *B,const char *BEnd) {return func(A.begin(),A.end(),B,BEnd);}
  172. int APT_PURE stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd);
  173. int APT_PURE stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd);
  174. /* We assume that GCC 3 indicates that libstdc++3 is in use too. In that
  175. case the definition of string::const_iterator is not the same as
  176. const char * and we need these extra functions */
  177. #if __GNUC__ >= 3
  178. int APT_PURE stringcmp(std::string::const_iterator A,std::string::const_iterator AEnd,
  179. const char *B,const char *BEnd);
  180. int APT_PURE stringcmp(std::string::const_iterator A,std::string::const_iterator AEnd,
  181. std::string::const_iterator B,std::string::const_iterator BEnd);
  182. int APT_PURE stringcasecmp(std::string::const_iterator A,std::string::const_iterator AEnd,
  183. const char *B,const char *BEnd);
  184. int APT_PURE stringcasecmp(std::string::const_iterator A,std::string::const_iterator AEnd,
  185. std::string::const_iterator B,std::string::const_iterator BEnd);
  186. inline APT_PURE int stringcmp(std::string::const_iterator A,std::string::const_iterator Aend,const char *B) {return stringcmp(A,Aend,B,B+strlen(B));}
  187. inline APT_PURE int stringcasecmp(std::string::const_iterator A,std::string::const_iterator Aend,const char *B) {return stringcasecmp(A,Aend,B,B+strlen(B));}
  188. #endif
  189. APT_MKSTRCMP2(stringcmp,stringcmp)
  190. APT_MKSTRCMP2(stringcasecmp,stringcasecmp)
  191. // Return the length of a NULL-terminated string array
  192. size_t APT_PURE strv_length(const char **str_array);
  193. inline const char *DeNull(const char *s) {return (s == 0?"(null)":s);}
  194. class URI
  195. {
  196. void CopyFrom(const std::string &From);
  197. public:
  198. std::string Access;
  199. std::string User;
  200. std::string Password;
  201. std::string Host;
  202. std::string Path;
  203. unsigned int Port;
  204. operator std::string();
  205. inline void operator =(const std::string &From) {CopyFrom(From);}
  206. inline bool empty() {return Access.empty();};
  207. static std::string SiteOnly(const std::string &URI);
  208. static std::string ArchiveOnly(const std::string &URI);
  209. static std::string NoUserPassword(const std::string &URI);
  210. URI(std::string Path) {CopyFrom(Path);}
  211. URI() : Port(0) {}
  212. };
  213. struct SubstVar
  214. {
  215. const char *Subst;
  216. const std::string *Contents;
  217. };
  218. std::string SubstVar(std::string Str,const struct SubstVar *Vars);
  219. std::string SubstVar(const std::string &Str,const std::string &Subst,const std::string &Contents);
  220. struct RxChoiceList
  221. {
  222. void *UserData;
  223. const char *Str;
  224. bool Hit;
  225. };
  226. unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
  227. const char **ListEnd);
  228. #endif