Browse Source

perl: Split overlong regexes into multiline extended regexes

This should make the regular expressions easier to read and understand,
and allows to add comments describing specific parts.

Addresses RegularExpressions::RequireExtendedFormatting.

Warned-by: perlcritic
Guillem Jover 8 years ago
parent
commit
23020a4da8

+ 1 - 0
debian/changelog

@@ -15,6 +15,7 @@ dpkg (1.18.3) UNRELEASED; urgency=low
   * Fix «dpkg --verify» with --root.
   * Fix an off-by-one write access in dpkg-deb when parsing the .deb magic.
     Reported by Jacek Wielemborek <d33tah@gmail.com>. Closes: #798324
+  * Split overlong perl regexes into multiline extended regexes.
   * Perl modules:
     - Only warn on invalid week days instead of aborting in
       Dpkg::Changelog::Entry::Debian. Regression introduced in dpkg 1.18.2.

+ 9 - 2
dselect/methods/Dselect/Ftp.pm

@@ -314,6 +314,14 @@ my %months = ('Jan', 0,
 	      'Nov', 10,
 	      'Dec', 11);
 
+my $ls_l_re = qr<
+    ([^ ]+\ *){5}                       # Perms, Links, User, Group, Size
+    [^ ]+                               # Blanks
+    \ ([A-Z][a-z]{2})                   # Month name (abbreviated)
+    \ ([0-9 ][0-9])                     # Day of month
+    \ ([0-9 ][0-9][:0-9][0-9]{2})       # Filename
+>x;
+
 sub do_mdtm {
     my ($ftp, $file) = @_;
     my ($time);
@@ -347,8 +355,7 @@ sub do_mdtm {
 #	print "[$#files]";
 
 	# get the date components from the output of 'ls -l'
-	if ($files[0] =~
-	    /([^ ]+ *){5}[^ ]+ ([A-Z][a-z]{2}) ([ 0-9][0-9]) ([0-9 ][0-9][:0-9][0-9]{2})/) {
+	if ($files[0] =~ $ls_l_re) {
 
             my($month_name, $day, $year_or_time, $month, $hours, $minutes,
 	       $year);

+ 58 - 8
scripts/Dpkg/Changelog/Debian.pm

@@ -58,6 +58,63 @@ use constant {
     CHANGES_OR_TRAILER => g_('more change data or trailer'),
 };
 
+my $ancient_delimiter_re = qr{
+    ^
+    (?: # Ancient GNU style changelog entry with expanded date
+      (?:
+        \w+\s+                          # Day of week (abbreviated)
+        \w+\s+                          # Month name (abbreviated)
+        \d{1,2}                         # Day of month
+        \Q \E
+        \d{1,2}:\d{1,2}:\d{1,2}\s+      # Time
+        [\w\s]*                         # Timezone
+        \d{4}                           # Year
+      )
+      \s+
+      (?:.*)                            # Maintainer name
+      \s+
+      [<\(]
+        (?:.*)                          # Maintainer email
+      [\)>]
+    | # Old GNU style changelog entry with expanded date
+      (?:
+        \w+\s+                          # Day of week (abbreviated)
+        \w+\s+                          # Month name (abbreviated)
+        \d{1,2},?\s*                    # Day of month
+        \d{4}                           # Year
+      )
+      \s+
+      (?:.*)                            # Maintainer name
+      \s+
+      [<\(]
+        (?:.*)                          # Maintainer email
+      [\)>]
+    | # Ancient changelog header w/o key=value options
+      (?:\w[-+0-9a-z.]*)                # Package name
+      \Q \E
+      \(
+        (?:[^\(\) \t]+)                 # Package version
+      \)
+      \;?
+    | # Ancient changelog header
+      (?:[\w.+-]+)                      # Package name
+      [- ]
+      (?:\S+)                           # Package version
+      \ Debian
+      \ (?:\S+)                         # Package revision
+    |
+      Changes\ from\ version\ (?:.*)\ to\ (?:.*):
+    |
+      Changes\ for\ [\w.+-]+-[\w.+-]+:?\s*$
+    |
+      Old\ Changelog:\s*$
+    |
+      (?:\d+:)?
+      \w[\w.+~-]*:?
+      \s*$
+    )
+}xi;
+
 =head1 METHODS
 
 =over 4
@@ -113,14 +170,7 @@ sub parse {
 	    next; # skip comments, even that's not supported
 	} elsif (m{^/\*.*\*/}o) {
 	    next; # more comments
-	} elsif (m/^(?:\w+\s+\w+\s+\d{1,2} \d{1,2}:\d{1,2}:\d{1,2}\s+[\w\s]*\d{4})\s+(?:.*)\s+[<\(](?:.*)[\)>]/o
-		 || m/^(?:\w+\s+\w+\s+\d{1,2},?\s*\d{4})\s+(?:.*)\s+[<\(](?:.*)[\)>]/o
-		 || m/^(?:\w[-+0-9a-z.]*) \((?:[^\(\) \t]+)\)\;?/io
-		 || m/^(?:[\w.+-]+)[- ](?:\S+) Debian (?:\S+)/io
-		 || m/^Changes from version (?:.*) to (?:.*):/io
-		 || m/^Changes for [\w.+-]+-[\w.+-]+:?\s*$/io
-		 || m/^Old Changelog:\s*$/io
-		 || m/^(?:\d+:)?\w[\w.+~-]*:?\s*$/o) {
+	} elsif (m/$ancient_delimiter_re/) {
 	    # save entries on old changelog format verbatim
 	    # we assume the rest of the file will be in old format once we
 	    # hit it for the first time

+ 31 - 4
scripts/Dpkg/Changelog/Entry/Debian.pm

@@ -60,12 +60,36 @@ my $name_chars = qr/[-+0-9a-z.]/i;
 
 # The matched content is the source package name ($1), the version ($2),
 # the target distributions ($3) and the options on the rest of the line ($4).
-our $regex_header = qr/^(\w$name_chars*) \(([^\(\) \t]+)\)((?:\s+$name_chars+)+)\;(.*?)\s*$/i;
+our $regex_header = qr{
+    ^
+    (\w$name_chars*)                    # Package name
+    \ \(([^\(\) \t]+)\)                 # Package version
+    ((?:\s+$name_chars+)+)              # Target distribution
+    \;                                  # Separator
+    (.*?)                               # Key=Value options
+    \s*$                                # Trailing space
+}xi;
 
 # The matched content is the maintainer name ($1), its email ($2),
 # some blanks ($3) and the timestamp ($4), which is decomposed into
 # day of week ($6), date-time ($7) and this into month name ($8).
-our $regex_trailer = qr/^ \-\- (.*) <(.*)>(  ?)(((\w+)\,\s*)?(\d{1,2}\s+(\w+)\s+\d{4}\s+\d{1,2}:\d\d:\d\d\s+[-+]\d{4}))\s*$/o;
+our $regex_trailer = qr<
+    ^
+    \ \-\-                              # Trailer marker
+    \ (.*)                              # Maintainer name
+    \ \<(.*)\>                          # Maintainer email
+    (\ \ ?)                             # Blanks
+    (
+      ((\w+)\,\s*)?                     # Day of week (abbreviated)
+      (
+        \d{1,2}\s+                      # Day of month
+        (\w+)\s+                        # Month name (abbreviated)
+        \d{4}\s+                        # Year
+        \d{1,2}:\d\d:\d\d\s+[-+]\d{4}   # ISO 8601 date
+      )
+    )
+    \s*$                                # Trailing space
+>xo;
 
 my %week_day = map { $_ => 1 } qw(Mon Tue Wed Thu Fri Sat Sun);
 my %month_abbrev = map { $_ => 1 } qw(
@@ -329,8 +353,11 @@ sub find_closes {
     my $changes = shift;
     my %closes;
 
-    while ($changes &&
-           ($changes =~ /closes:\s*(?:bug)?\#?\s?\d+(?:,\s*(?:bug)?\#?\s?\d+)*/pig)) {
+    while ($changes && ($changes =~ m{
+               closes:\s*
+               (?:bug)?\#?\s?\d+
+               (?:,\s*(?:bug)?\#?\s?\d+)*
+           }pigx)) {
         $closes{$_} = 1 foreach (${^MATCH} =~ /\#?\s?(\d+)/g);
     }
 

+ 13 - 2
scripts/Dpkg/Shlibs/Objdump.pm

@@ -296,10 +296,21 @@ sub parse_objdump_output {
 # (GLIBC_2.2) is the same but the symbol is hidden, a newer version of the
 # symbol exist
 
+my $vis_re = qr/(\.protected|\.hidden|\.internal|0x\S+)/;
+my $dynsym_re = qr<
+    ^
+    [0-9a-f]+                   # Symbol size
+    \ (.{7})                    # Flags
+    \s+(\S+)                    # Section name
+    \s+[0-9a-f]+                # Alignment
+    (?:\s+(\S+))?               # Version string
+    (?:\s+$vis_re)?             # Visibility
+    \s+(.+)                     # Symbol name
+>x;
+
 sub parse_dynamic_symbol {
     my ($self, $line) = @_;
-    my $vis_re = '(\.protected|\.hidden|\.internal|0x\S+)';
-    if ($line =~ /^[0-9a-f]+ (.{7})\s+(\S+)\s+[0-9a-f]+(?:\s+(\S+))?(?:\s+$vis_re)?\s+(.+)/) {
+    if ($line =~ $dynsym_re) {
 
 	my ($flags, $sect, $ver, $vis, $name) = ($1, $2, $3, $4, $5);
 

+ 13 - 1
scripts/dpkg-shlibdeps.pl

@@ -670,6 +670,18 @@ sub split_soname {
     }
 }
 
+my $shlibs_re = qr{
+    ^\s*
+    (?:(\S+):\s+)?              # Optional type
+    (\S+)\s+                    # Library
+    (\S+)                       # Version
+    (?:
+      \s+
+      (\S.*\S)                  # Dependencies
+    )?
+    \s*$
+}x;
+
 sub extract_from_shlibs {
     my ($soname, $shlibfile) = @_;
     # Split soname in name/version
@@ -686,7 +698,7 @@ sub extract_from_shlibs {
     while (<$shlibs_fh>) {
 	s/\s*\n$//;
 	next if m/^\#/;
-	if (!m/^\s*(?:(\S+):\s+)?(\S+)\s+(\S+)(?:\s+(\S.*\S))?\s*$/) {
+	if (!m/$shlibs_re/) {
 	    warning(g_("shared libs info file '%s' line %d: bad line '%s'"),
 	            $shlibfile, $., $_);
 	    next;

+ 10 - 2
src/t/dpkg_divert.t

@@ -283,7 +283,11 @@ install_diversions('');
 
 system("touch $testdir/foo");
 call_divert(['--rename', '--add', "$testdir/foo"],
-            expect_stdout_like => qr{Adding.*local.*diversion.*\Q$testdir\E/foo.*\Q$testdir\E/foo.distrib},
+            expect_stdout_like => qr{
+                Adding.*local.*diversion.*
+                \Q$testdir\E/foo.*
+                \Q$testdir\E/foo.distrib
+            }x,
             expect_stderr => '');
 ok(-e "$testdir/foo.distrib", 'foo diverted');
 ok(!-e "$testdir/foo", 'foo diverted');
@@ -297,7 +301,11 @@ install_diversions('');
 
 system("touch $testdir/foo");
 call_divert(['--add', "$testdir/foo"],
-            expect_stdout_like => qr{Adding.*local.*diversion.*\Q$testdir\E/foo.*\Q$testdir\E/foo.distrib},
+            expect_stdout_like => qr{
+                Adding.*local.*diversion.*
+                \Q$testdir\E/foo.*
+                \Q$testdir\E/foo.distrib
+            }x,
             expect_stderr => '');
 ok(!-e "$testdir/foo.distrib", 'foo diverted');
 ok(-e "$testdir/foo", 'foo diverted');

+ 1 - 0
t/critic.t

@@ -84,6 +84,7 @@ my @policies = qw(
     RegularExpressions::ProhibitUnusualDelimiters
     RegularExpressions::ProhibitUselessTopic
     RegularExpressions::RequireBracesForMultiline
+    RegularExpressions::RequireExtendedFormatting
     Subroutines::ProhibitExplicitReturnUndef
     Subroutines::ProhibitNestedSubs
     Subroutines::ProhibitReturnSort

+ 5 - 2
t/critic/perlcriticrc

@@ -9,8 +9,11 @@ add_list_funcs = Dpkg::Util::any Dpkg::Util::none
 [RegularExpressions::ProhibitUnusualDelimiters]
 allow_all_brackets = 1
 
-#[RegularExpressions::RequireExtendedFormatting]
-#minimum_regex_length_to_complain_about = 60
+[RegularExpressions::RequireBracesForMultiline]
+allow_all_brackets = 1
+
+[RegularExpressions::RequireExtendedFormatting]
+minimum_regex_length_to_complain_about = 60
 
 [ValuesAndExpressions::ProhibitInterpolationOfLiterals]
 # TODO: switch these to q{} ?