HashCore.pm 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. # Copyright © 2007-2009 Raphaël Hertzog <hertzog@debian.org>
  2. # Copyright © 2009, 2012-2015 Guillem Jover <guillem@debian.org>
  3. #
  4. # This program is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation; either version 2 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. package Dpkg::Control::HashCore;
  17. use strict;
  18. use warnings;
  19. our $VERSION = '1.01';
  20. use Dpkg::Gettext;
  21. use Dpkg::ErrorHandling;
  22. use Dpkg::Control::FieldsCore;
  23. # This module cannot use Dpkg::Control::Fields, because that one makes use
  24. # of Dpkg::Vendor which at the same time uses this module, which would turn
  25. # into a compilation error. We can use Dpkg::Control::FieldsCore instead.
  26. use parent qw(Dpkg::Interface::Storable);
  27. use overload
  28. '%{}' => sub { ${$_[0]}->{fields} },
  29. 'eq' => sub { "$_[0]" eq "$_[1]" };
  30. =encoding utf8
  31. =head1 NAME
  32. Dpkg::Control::HashCore - parse and manipulate a block of RFC822-like fields
  33. =head1 DESCRIPTION
  34. The Dpkg::Control::Hash object is a hash-like representation of a set of
  35. RFC822-like fields. The fields names are case insensitive and are always
  36. capitalized the same when output (see field_capitalize function in
  37. Dpkg::Control::Fields).
  38. The order in which fields have been set is remembered and is used
  39. to be able to dump back the same content. The output order can also be
  40. overridden if needed.
  41. You can store arbitrary values in the hash, they will always be properly
  42. escaped in the output to conform to the syntax of control files. This is
  43. relevant mainly for multilines values: while the first line is always output
  44. unchanged directly after the field name, supplementary lines are
  45. modified. Empty lines and lines containing only dots are prefixed with
  46. " ." (space + dot) while other lines are prefixed with a single space.
  47. During parsing, trailing spaces are stripped on all lines while leading
  48. spaces are stripped only on the first line of each field.
  49. =head1 METHODS
  50. =over 4
  51. =item $c = Dpkg::Control::Hash->new(%opts)
  52. Creates a new object with the indicated options. Supported options
  53. are:
  54. =over 8
  55. =item allow_pgp
  56. Configures the parser to accept OpenPGP signatures around the control
  57. information. Value can be 0 (default) or 1.
  58. =item allow_duplicate
  59. Configures the parser to allow duplicate fields in the control
  60. information. Value can be 0 (default) or 1.
  61. =item drop_empty
  62. Defines if empty fields are dropped during the output. Value can be 0
  63. (default) or 1.
  64. =item name
  65. The user friendly name of the information stored in the object. It might
  66. be used in some error messages or warnings. A default name might be set
  67. depending on the type.
  68. =item is_pgp_signed
  69. Set by the parser (starting in dpkg 1.17.0) if it finds an OpenPGP
  70. signature around the control information. Value can be 0 (default)
  71. or 1, and undef when the option is not supported by the code (in
  72. versions older than dpkg 1.17.0).
  73. =back
  74. =cut
  75. sub new {
  76. my ($this, %opts) = @_;
  77. my $class = ref($this) || $this;
  78. # Object is a scalar reference and not a hash ref to avoid
  79. # infinite recursion due to overloading hash-derefencing
  80. my $self = \{
  81. in_order => [],
  82. out_order => [],
  83. is_pgp_signed => 0,
  84. allow_pgp => 0,
  85. allow_duplicate => 0,
  86. drop_empty => 0,
  87. };
  88. bless $self, $class;
  89. $$self->{fields} = Dpkg::Control::HashCore::Tie->new($self);
  90. # Options set by the user override default values
  91. $$self->{$_} = $opts{$_} foreach keys %opts;
  92. return $self;
  93. }
  94. # There is naturally a circular reference between the tied hash and its
  95. # containing object. Happily, the extra layer of scalar reference can
  96. # be used to detect the destruction of the object and break the loop so
  97. # that everything gets garbage-collected.
  98. sub DESTROY {
  99. my $self = shift;
  100. delete $$self->{fields};
  101. }
  102. =item $c->set_options($option, %opts)
  103. Changes the value of one or more options.
  104. =cut
  105. sub set_options {
  106. my ($self, %opts) = @_;
  107. $$self->{$_} = $opts{$_} foreach keys %opts;
  108. }
  109. =item $value = $c->get_option($option)
  110. Returns the value of the corresponding option.
  111. =cut
  112. sub get_option {
  113. my ($self, $k) = @_;
  114. return $$self->{$k};
  115. }
  116. =item $c->load($file)
  117. Parse the content of $file. Exits in case of errors. Returns true if some
  118. fields have been parsed.
  119. =item $c->parse_error($file, $fmt, ...)
  120. Prints an error message and dies on syntax parse errors.
  121. =cut
  122. sub parse_error {
  123. my ($self, $file, $msg) = (shift, shift, shift);
  124. $msg = sprintf($msg, @_) if (@_);
  125. error(g_('syntax error in %s at line %d: %s'), $file, $., $msg);
  126. }
  127. =item $c->parse($fh, $description)
  128. Parse a control file from the given filehandle. Exits in case of errors.
  129. $description is used to describe the filehandle, ideally it's a filename
  130. or a description of where the data comes from. It's used in error
  131. messages. When called multiple times, the parsed fields are accumulated.
  132. Returns true if some fields have been parsed.
  133. =cut
  134. sub parse {
  135. my ($self, $fh, $desc) = @_;
  136. my $paraborder = 1;
  137. my $parabody = 0;
  138. my $cf; # Current field
  139. my $expect_pgp_sig = 0;
  140. local $_;
  141. while (<$fh>) {
  142. chomp;
  143. next if m/^\s*$/ and $paraborder;
  144. next if (m/^#/);
  145. $paraborder = 0;
  146. if (m/^(\S+?)\s*:\s*(.*)$/) {
  147. $parabody = 1;
  148. my ($name, $value) = ($1, $2);
  149. if ($name =~ m/^-/) {
  150. $self->parse_error($desc, g_('field cannot start with a hyphen'));
  151. }
  152. if (exists $self->{$name}) {
  153. unless ($$self->{allow_duplicate}) {
  154. $self->parse_error($desc, g_('duplicate field %s found'), $name);
  155. }
  156. }
  157. $value =~ s/\s*$//;
  158. $self->{$name} = $value;
  159. $cf = $name;
  160. } elsif (m/^\s(\s*\S.*)$/) {
  161. my $line = $1;
  162. unless (defined($cf)) {
  163. $self->parse_error($desc, g_('continued value line not in field'));
  164. }
  165. $line =~ s/\s*$//;
  166. if ($line =~ /^\.+$/) {
  167. $line = substr $line, 1;
  168. }
  169. $self->{$cf} .= "\n$line";
  170. } elsif (m/^-----BEGIN PGP SIGNED MESSAGE-----[\r\t ]*$/) {
  171. $expect_pgp_sig = 1;
  172. if ($$self->{allow_pgp} and not $parabody) {
  173. # Skip OpenPGP headers
  174. while (<$fh>) {
  175. last if m/^\s*$/;
  176. }
  177. } else {
  178. $self->parse_error($desc, g_('OpenPGP signature not allowed here'));
  179. }
  180. } elsif (m/^\s*$/ ||
  181. ($expect_pgp_sig && m/^-----BEGIN PGP SIGNATURE-----[\r\t ]*$/)) {
  182. if ($expect_pgp_sig) {
  183. # Skip empty lines
  184. $_ = <$fh> while defined && m/^\s*$/;
  185. unless (length) {
  186. $self->parse_error($desc, g_('expected OpenPGP signature, ' .
  187. 'found end of file after blank line'));
  188. }
  189. chomp;
  190. unless (m/^-----BEGIN PGP SIGNATURE-----[\r\t ]*$/) {
  191. $self->parse_error($desc, g_('expected OpenPGP signature, ' .
  192. "found something else '%s'"), $_);
  193. }
  194. # Skip OpenPGP signature
  195. while (<$fh>) {
  196. chomp;
  197. last if m/^-----END PGP SIGNATURE-----[\r\t ]*$/;
  198. }
  199. unless (defined) {
  200. $self->parse_error($desc, g_('unfinished OpenPGP signature'));
  201. }
  202. # This does not mean the signature is correct, that needs to
  203. # be verified by gnupg.
  204. $$self->{is_pgp_signed} = 1;
  205. }
  206. last; # Finished parsing one block
  207. } else {
  208. $self->parse_error($desc,
  209. g_('line with unknown format (not field-colon-value)'));
  210. }
  211. }
  212. if ($expect_pgp_sig and not $$self->{is_pgp_signed}) {
  213. $self->parse_error($desc, g_('unfinished OpenPGP signature'));
  214. }
  215. return defined($cf);
  216. }
  217. =item $c->find_custom_field($name)
  218. Scan the fields and look for a user specific field whose name matches the
  219. following regex: /X[SBC]*-$name/i. Return the name of the field found or
  220. undef if nothing has been found.
  221. =cut
  222. sub find_custom_field {
  223. my ($self, $name) = @_;
  224. foreach my $key (keys %$self) {
  225. return $key if $key =~ /^X[SBC]*-\Q$name\E$/i;
  226. }
  227. return;
  228. }
  229. =item $c->get_custom_field($name)
  230. Identify a user field and retrieve its value.
  231. =cut
  232. sub get_custom_field {
  233. my ($self, $name) = @_;
  234. my $key = $self->find_custom_field($name);
  235. return $self->{$key} if defined $key;
  236. return;
  237. }
  238. =item $c->save($filename)
  239. Write the string representation of the control information to a
  240. file.
  241. =item $str = $c->output()
  242. =item "$c"
  243. Get a string representation of the control information. The fields
  244. are sorted in the order in which they have been read or set except
  245. if the order has been overridden with set_output_order().
  246. =item $c->output($fh)
  247. Print the string representation of the control information to a
  248. filehandle.
  249. =cut
  250. sub output {
  251. my ($self, $fh) = @_;
  252. my $str = '';
  253. my @keys;
  254. if (@{$$self->{out_order}}) {
  255. my $i = 1;
  256. my $imp = {};
  257. $imp->{$_} = $i++ foreach @{$$self->{out_order}};
  258. @keys = sort {
  259. if (defined $imp->{$a} && defined $imp->{$b}) {
  260. $imp->{$a} <=> $imp->{$b};
  261. } elsif (defined($imp->{$a})) {
  262. -1;
  263. } elsif (defined($imp->{$b})) {
  264. 1;
  265. } else {
  266. $a cmp $b;
  267. }
  268. } keys %$self;
  269. } else {
  270. @keys = @{$$self->{in_order}};
  271. }
  272. foreach my $key (@keys) {
  273. if (exists $self->{$key}) {
  274. my $value = $self->{$key};
  275. # Skip whitespace-only fields
  276. next if $$self->{drop_empty} and $value !~ m/\S/;
  277. # Escape data to follow control file syntax
  278. my ($first_line, @lines) = split /\n/, $value;
  279. my $kv = "$key:";
  280. $kv .= ' ' . $first_line if length $first_line;
  281. $kv .= "\n";
  282. foreach (@lines) {
  283. s/\s+$//;
  284. if (length == 0 or /^\.+$/) {
  285. $kv .= " .$_\n";
  286. } else {
  287. $kv .= " $_\n";
  288. }
  289. }
  290. # Print it out
  291. if ($fh) {
  292. print { $fh } $kv
  293. or syserr(g_('write error on control data'));
  294. }
  295. $str .= $kv if defined wantarray;
  296. }
  297. }
  298. return $str;
  299. }
  300. =item $c->set_output_order(@fields)
  301. Define the order in which fields will be displayed in the output() method.
  302. =cut
  303. sub set_output_order {
  304. my ($self, @fields) = @_;
  305. $$self->{out_order} = [@fields];
  306. }
  307. =item $c->apply_substvars($substvars)
  308. Update all fields by replacing the variables references with
  309. the corresponding value stored in the Dpkg::Substvars object.
  310. =cut
  311. sub apply_substvars {
  312. my ($self, $substvars, %opts) = @_;
  313. # Add substvars to refer to other fields
  314. $substvars->set_field_substvars($self, 'F');
  315. foreach my $f (keys %$self) {
  316. my $v = $substvars->substvars($self->{$f}, %opts);
  317. if ($v ne $self->{$f}) {
  318. my $sep;
  319. $sep = field_get_sep_type($f);
  320. # If we replaced stuff, ensure we're not breaking
  321. # a dependency field by introducing empty lines, or multiple
  322. # commas
  323. if ($sep & (FIELD_SEP_COMMA | FIELD_SEP_LINE)) {
  324. # Drop empty/whitespace-only lines
  325. $v =~ s/\n[ \t]*(\n|$)/$1/;
  326. }
  327. if ($sep & FIELD_SEP_COMMA) {
  328. $v =~ s/,[\s,]*,/,/g;
  329. $v =~ s/^\s*,\s*//;
  330. $v =~ s/\s*,\s*$//;
  331. }
  332. }
  333. $v =~ s/\$\{\}/\$/g; # XXX: what for?
  334. $self->{$f} = $v;
  335. }
  336. }
  337. package Dpkg::Control::HashCore::Tie;
  338. # This object is used to tie a hash. It implements hash-like functions by
  339. # normalizing the name of fields received in keys (using
  340. # Dpkg::Control::Fields::field_capitalize). It also stores the order in
  341. # which fields have been added in order to be able to dump them in the
  342. # same order. But the order information is stored in a parent object of
  343. # type Dpkg::Control.
  344. use strict;
  345. use warnings;
  346. use Dpkg::Control::FieldsCore;
  347. use Carp;
  348. use Tie::Hash;
  349. use parent -norequire, qw(Tie::ExtraHash);
  350. # $self->[0] is the real hash
  351. # $self->[1] is a reference to the hash contained by the parent object.
  352. # This reference bypasses the top-level scalar reference of a
  353. # Dpkg::Control::Hash, hence ensuring that reference gets DESTROYed
  354. # properly.
  355. # Dpkg::Control::Hash->new($parent)
  356. #
  357. # Return a reference to a tied hash implementing storage of simple
  358. # "field: value" mapping as used in many Debian-specific files.
  359. sub new {
  360. my $class = shift;
  361. my $hash = {};
  362. tie %{$hash}, $class, @_;
  363. return $hash;
  364. }
  365. sub TIEHASH {
  366. my ($class, $parent) = @_;
  367. croak 'parent object must be Dpkg::Control::Hash'
  368. if not $parent->isa('Dpkg::Control::HashCore') and
  369. not $parent->isa('Dpkg::Control::Hash');
  370. return bless [ {}, $$parent ], $class;
  371. }
  372. sub FETCH {
  373. my ($self, $key) = @_;
  374. $key = lc($key);
  375. return $self->[0]->{$key} if exists $self->[0]->{$key};
  376. return;
  377. }
  378. sub STORE {
  379. my ($self, $key, $value) = @_;
  380. my $parent = $self->[1];
  381. $key = lc($key);
  382. if (not exists $self->[0]->{$key}) {
  383. push @{$parent->{in_order}}, field_capitalize($key);
  384. }
  385. $self->[0]->{$key} = $value;
  386. }
  387. sub EXISTS {
  388. my ($self, $key) = @_;
  389. $key = lc($key);
  390. return exists $self->[0]->{$key};
  391. }
  392. sub DELETE {
  393. my ($self, $key) = @_;
  394. my $parent = $self->[1];
  395. my $in_order = $parent->{in_order};
  396. $key = lc($key);
  397. if (exists $self->[0]->{$key}) {
  398. delete $self->[0]->{$key};
  399. @{$in_order} = grep { lc ne $key } @{$in_order};
  400. return 1;
  401. } else {
  402. return 0;
  403. }
  404. }
  405. sub FIRSTKEY {
  406. my $self = shift;
  407. my $parent = $self->[1];
  408. foreach my $key (@{$parent->{in_order}}) {
  409. return $key if exists $self->[0]->{lc $key};
  410. }
  411. }
  412. sub NEXTKEY {
  413. my ($self, $last) = @_;
  414. my $parent = $self->[1];
  415. my $found = 0;
  416. foreach my $key (@{$parent->{in_order}}) {
  417. if ($found) {
  418. return $key if exists $self->[0]->{lc $key};
  419. } else {
  420. $found = 1 if $key eq $last;
  421. }
  422. }
  423. return;
  424. }
  425. 1;
  426. =back
  427. =head1 CHANGES
  428. =head2 Version 1.01 (dpkg 1.17.2)
  429. New method: $c->parse_error().
  430. =head2 Version 1.00 (dpkg 1.17.0)
  431. Mark the module as public.
  432. =cut
  433. 1;