Browse Source

Do not use MD5SumValue for Description_md5()

Our profile says we spend about 5% of the time transforming the
hex digits into the binary format used by HashsumValue, all for
comparing them against the other strings. That makes no sense
at all.

According to callgrind, this reduces the overall instruction
count from 5,3 billion to 5 billion in my example, which
roughly matches the 5%.
Julian Andres Klode 7 years ago
parent
commit
49521f8785

+ 7 - 11
apt-pkg/deb/deblistparser.cc

@@ -276,31 +276,27 @@ std::vector<std::string> debListParser::AvailableDescriptionLanguages()
    description. If no Description-md5 is found in the section it will be
    calculated.
  */
-MD5SumValue debListParser::Description_md5()
+APT::StringView debListParser::Description_md5()
 {
    StringView const value = Section.Find(pkgTagSection::Key::Description_md5);
-   if (value.empty() == true)
+   if (unlikely(value.empty() == true))
    {
       StringView const desc = Section.Find(pkgTagSection::Key::Description);
       if (desc == "\n")
-	 return MD5SumValue();
+	 return StringView();
 
       MD5Summation md5;
       md5.Add(desc.data(), desc.size());
       md5.Add("\n");
-      return md5.Result();
+      MD5Buffer = md5.Result();
+      return StringView(MD5Buffer);
    }
    else if (likely(value.size() == 32))
    {
-      MD5SumValue sumvalue;
-      if (sumvalue.Set(value))
-	 return sumvalue;
-
-      _error->Error("Malformed Description-md5 line; includes invalid character '%.*s'", (int)value.length(), value.data());
-      return MD5SumValue();
+      return value;
    }
    _error->Error("Malformed Description-md5 line; doesn't have the required length (32 != %d) '%.*s'", (int)value.size(), (int)value.length(), value.data());
-   return MD5SumValue();
+   return StringView();
 }
                                                                         /*}}}*/
 // ListParser::UsePackage - Update a package structure			/*{{{*/

+ 4 - 1
apt-pkg/deb/deblistparser.h

@@ -45,6 +45,7 @@ class APT_HIDDEN debListParser : public pkgCacheListParser
    private:
    std::vector<std::string> forceEssential;
    std::vector<std::string> forceImportant;
+   std::string MD5Buffer;
 
    protected:
    pkgTagFile Tags;
@@ -74,7 +75,9 @@ class APT_HIDDEN debListParser : public pkgCacheListParser
 #endif
    virtual bool NewVersion(pkgCache::VerIterator &Ver) APT_OVERRIDE;
    virtual std::vector<std::string> AvailableDescriptionLanguages() APT_OVERRIDE;
-   virtual MD5SumValue Description_md5() APT_OVERRIDE;
+#ifdef APT_PKG_EXPOSE_STRING_VIEW
+   virtual APT::StringView Description_md5() APT_OVERRIDE;
+#endif
    virtual unsigned short VersionHash() APT_OVERRIDE;
    virtual bool SameVersion(unsigned short const Hash, pkgCache::VerIterator const &Ver) APT_OVERRIDE;
    virtual bool UsePackage(pkgCache::PkgIterator &Pkg,

+ 3 - 2
apt-pkg/edsp/edsplistparser.cc

@@ -20,6 +20,7 @@
 #include <apt-pkg/tagfile.h>
 #include <apt-pkg/fileutl.h>
 #include <apt-pkg/pkgsystem.h>
+#include <apt-pkg/string_view.h>
 
 #include <array>
 
@@ -53,9 +54,9 @@ std::vector<std::string> edspLikeListParser::AvailableDescriptionLanguages()
 {
    return {};
 }
-MD5SumValue edspLikeListParser::Description_md5()
+APT::StringView edspLikeListParser::Description_md5()
 {
-   return MD5SumValue("");
+   return APT::StringView();
 }
 									/*}}}*/
 // ListParser::VersionHash - Compute a unique hash for this version	/*{{{*/

+ 4 - 1
apt-pkg/edsp/edsplistparser.h

@@ -24,12 +24,15 @@
 #include <apt-pkg/tagfile.h>
 #endif
 
+namespace APT {
+   class StringView;
+}
 class APT_HIDDEN edspLikeListParser : public debListParser
 {
    public:
    virtual bool NewVersion(pkgCache::VerIterator &Ver) APT_OVERRIDE;
    virtual std::vector<std::string> AvailableDescriptionLanguages() APT_OVERRIDE;
-   virtual MD5SumValue Description_md5() APT_OVERRIDE;
+   virtual APT::StringView Description_md5() APT_OVERRIDE;
    virtual unsigned short VersionHash() APT_OVERRIDE;
 
    bool LoadReleaseInfo(pkgCache::RlsFileIterator &FileI,FileFd &File,

+ 13 - 13
apt-pkg/pkgcachegen.cc

@@ -45,8 +45,8 @@ template<class T> using Dynamic = pkgCacheGenerator::Dynamic<T>;
 typedef std::vector<pkgIndexFile *>::iterator FileIterator;
 template <typename Iter> std::vector<Iter*> pkgCacheGenerator::Dynamic<Iter>::toReMap;
 
-static bool IsDuplicateDescription(pkgCache::DescIterator Desc,
-			    MD5SumValue const &CurMd5, std::string const &CurLang);
+static bool IsDuplicateDescription(pkgCache &Cache, pkgCache::DescIterator Desc,
+			    APT::StringView CurMd5, std::string const &CurLang);
 
 using std::string;
 using APT::StringView;
@@ -340,14 +340,14 @@ bool pkgCacheGenerator::MergeListPackage(ListParser &List, pkgCache::PkgIterator
 			   Pkg.Name(), "UsePackage", 1);
 
    // Find the right version to write the description
-   MD5SumValue CurMd5 = List.Description_md5();
+   StringView CurMd5 = List.Description_md5();
    std::vector<std::string> availDesc = List.AvailableDescriptionLanguages();
    for (Ver = Pkg.VersionList(); Ver.end() == false; ++Ver)
    {
       pkgCache::DescIterator VerDesc = Ver.DescriptionList();
 
       // a version can only have one md5 describing it
-      if (VerDesc.end() == true || MD5SumValue(VerDesc.md5()) != CurMd5)
+      if (VerDesc.end() == true || Cache.ViewString(VerDesc->md5sum) != CurMd5)
 	 continue;
 
       map_stringitem_t md5idx = VerDesc->md5sum;
@@ -355,7 +355,7 @@ bool pkgCacheGenerator::MergeListPackage(ListParser &List, pkgCache::PkgIterator
       {
 	 // don't add a new description if we have one for the given
 	 // md5 && language
-	 if (IsDuplicateDescription(VerDesc, CurMd5, *CurLang) == true)
+	 if (IsDuplicateDescription(Cache, VerDesc, CurMd5, *CurLang) == true)
 	    continue;
 
 	 AddNewDescription(List, Ver, *CurLang, CurMd5, md5idx);
@@ -489,7 +489,7 @@ bool pkgCacheGenerator::MergeListVersion(ListParser &List, pkgCache::PkgIterator
    }
 
    /* Record the Description(s) based on their master md5sum */
-   MD5SumValue CurMd5 = List.Description_md5();
+   StringView CurMd5 = List.Description_md5();
 
    /* Before we add a new description we first search in the group for
       a version with a description of the same MD5 - if so we reuse this
@@ -500,7 +500,7 @@ bool pkgCacheGenerator::MergeListVersion(ListParser &List, pkgCache::PkgIterator
       for (pkgCache::VerIterator V = P.VersionList();
 	   V.end() == false; ++V)
       {
-	 if (V->DescriptionList == 0 || MD5SumValue(V.DescriptionList().md5()) != CurMd5)
+	 if (V->DescriptionList == 0 || Cache.ViewString(V.DescriptionList()->md5sum) != CurMd5)
 	    continue;
 	 Ver->DescriptionList = V->DescriptionList;
       }
@@ -515,7 +515,7 @@ bool pkgCacheGenerator::MergeListVersion(ListParser &List, pkgCache::PkgIterator
    return true;
 }
 									/*}}}*/
-bool pkgCacheGenerator::AddNewDescription(ListParser &List, pkgCache::VerIterator &Ver, std::string const &lang, MD5SumValue const &CurMd5, map_stringitem_t &md5idx) /*{{{*/
+bool pkgCacheGenerator::AddNewDescription(ListParser &List, pkgCache::VerIterator &Ver, std::string const &lang, APT::StringView CurMd5, map_stringitem_t &md5idx) /*{{{*/
 {
    pkgCache::DescIterator Desc;
    Dynamic<pkgCache::DescIterator> DynDesc(Desc);
@@ -935,7 +935,7 @@ bool pkgCacheGenerator::NewFileDesc(pkgCache::DescIterator &Desc,
 /* This puts a description structure in the linked list */
 map_pointer_t pkgCacheGenerator::NewDescription(pkgCache::DescIterator &Desc,
 					    const string &Lang,
-					    const MD5SumValue &md5sum,
+					    APT::StringView md5sum,
 					    map_stringitem_t const idxmd5str)
 {
    // Get a structure
@@ -955,7 +955,7 @@ map_pointer_t pkgCacheGenerator::NewDescription(pkgCache::DescIterator &Desc,
       Desc->md5sum = idxmd5str;
    else
    {
-      map_stringitem_t const idxmd5sum = WriteStringInMap(md5sum.Value());
+      map_stringitem_t const idxmd5sum = WriteStringInMap(md5sum);
       if (unlikely(idxmd5sum == 0))
 	 return 0;
       Desc->md5sum = idxmd5sum;
@@ -1839,11 +1839,11 @@ bool pkgCacheGenerator::MakeOnlyStatusCache(OpProgress *Progress,DynamicMMap **O
 }
 									/*}}}*/
 // IsDuplicateDescription						/*{{{*/
-static bool IsDuplicateDescription(pkgCache::DescIterator Desc,
-			    MD5SumValue const &CurMd5, std::string const &CurLang)
+static bool IsDuplicateDescription(pkgCache &Cache, pkgCache::DescIterator Desc,
+			    APT::StringView CurMd5, std::string const &CurLang)
 {
    // Descriptions in the same link-list have all the same md5
-   if (Desc.end() == true || MD5SumValue(Desc.md5()) != CurMd5)
+   if (Desc.end() == true || Cache.ViewString(Desc->md5sum) != CurMd5)
       return false;
    for (; Desc.end() == false; ++Desc)
       if (Desc.LanguageCode() == CurLang)

+ 11 - 5
apt-pkg/pkgcachegen.h

@@ -41,7 +41,9 @@ class pkgCacheListParser;
 
 class APT_HIDDEN pkgCacheGenerator					/*{{{*/
 {
-   APT_HIDDEN map_stringitem_t WriteStringInMap(std::string const &String) { return WriteStringInMap(String.c_str()); };
+#ifdef APT_PKG_EXPOSE_STRING_VIEW
+   APT_HIDDEN map_stringitem_t WriteStringInMap(APT::StringView String) { return WriteStringInMap(String.data(), String.size()); };
+#endif
    APT_HIDDEN map_stringitem_t WriteStringInMap(const char *String);
    APT_HIDDEN map_stringitem_t WriteStringInMap(const char *String, const unsigned long &Len);
    APT_HIDDEN map_pointer_t AllocateInMap(const unsigned long &size);
@@ -117,13 +119,13 @@ class APT_HIDDEN pkgCacheGenerator					/*{{{*/
    map_pointer_t NewVersion(pkgCache::VerIterator &Ver, APT::StringView const &VerStr,
 			    map_pointer_t const ParentPkg, unsigned short const Hash,
 			    map_pointer_t const Next);
+   map_pointer_t NewDescription(pkgCache::DescIterator &Desc,const std::string &Lang, APT::StringView md5sum,map_stringitem_t const idxmd5str);
 #endif
    bool NewFileVer(pkgCache::VerIterator &Ver,ListParser &List);
    bool NewFileDesc(pkgCache::DescIterator &Desc,ListParser &List);
    bool NewDepends(pkgCache::PkgIterator &Pkg, pkgCache::VerIterator &Ver,
 		   map_pointer_t const Version, uint8_t const Op,
 		   uint8_t const Type, map_pointer_t* &OldDepLast);
-   map_pointer_t NewDescription(pkgCache::DescIterator &Desc,const std::string &Lang,const MD5SumValue &md5sum,map_stringitem_t const idxmd5str);
    bool NewProvides(pkgCache::VerIterator &Ver, pkgCache::PkgIterator &Pkg,
 		    map_stringitem_t const ProvidesVersion, uint8_t const Flags);
 
@@ -171,8 +173,10 @@ class APT_HIDDEN pkgCacheGenerator					/*{{{*/
 			   pkgCache::VerIterator &V);
    APT_HIDDEN bool AddImplicitDepends(pkgCache::VerIterator &V, pkgCache::PkgIterator &D);
 
+#ifdef APT_PKG_EXPOSE_STRING_VIEW
    APT_HIDDEN bool AddNewDescription(ListParser &List, pkgCache::VerIterator &Ver,
-	 std::string const &lang, MD5SumValue const &CurMd5, map_stringitem_t &md5idx);
+	 std::string const &lang, APT::StringView CurMd5, map_stringitem_t &md5idx);
+#endif
 };
 									/*}}}*/
 // This is the abstract package list parser class.			/*{{{*/
@@ -192,9 +196,9 @@ class APT_HIDDEN pkgCacheListParser
    inline map_stringitem_t StoreString(pkgCacheGenerator::StringType const type, const char *S,unsigned int Size) {return Owner->StoreString(type, S, Size);};
 #ifdef APT_PKG_EXPOSE_STRING_VIEW
    inline map_stringitem_t StoreString(pkgCacheGenerator::StringType const type, APT::StringView S) {return Owner->StoreString(type, S);};
+   inline map_stringitem_t WriteString(APT::StringView S) {return Owner->WriteStringInMap(S.data(), S.size());};
 #endif
 
-   inline map_stringitem_t WriteString(const std::string &S) {return Owner->WriteStringInMap(S);};
    inline map_stringitem_t WriteString(const char *S,unsigned int Size) {return Owner->WriteStringInMap(S,Size);};
 #ifdef APT_PKG_EXPOSE_STRING_VIEW
    bool NewDepends(pkgCache::VerIterator &Ver,APT::StringView Package, APT::StringView Arch,
@@ -217,7 +221,9 @@ class APT_HIDDEN pkgCacheListParser
 #endif
    virtual bool NewVersion(pkgCache::VerIterator &Ver) = 0;
    virtual std::vector<std::string> AvailableDescriptionLanguages() = 0;
-   virtual MD5SumValue Description_md5() = 0;
+#ifdef APT_PKG_EXPOSE_STRING_VIEW
+   virtual APT::StringView Description_md5() = 0;
+#endif
    virtual unsigned short VersionHash() = 0;
    /** compare currently parsed version with given version
     *