HashCore.pm 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. # Copyright © 2007-2009 Raphaël Hertzog <hertzog@debian.org>
  2. # Copyright © 2009, 2012-2015 Guillem Jover <guillem@debian.org>
  3. #
  4. # This program is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation; either version 2 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program. If not, see <https://www.gnu.org/licenses/>.
  16. package Dpkg::Control::HashCore;
  17. use strict;
  18. use warnings;
  19. our $VERSION = '1.01';
  20. use Dpkg::Gettext;
  21. use Dpkg::ErrorHandling;
  22. use Dpkg::Control::FieldsCore;
  23. # This module cannot use Dpkg::Control::Fields, because that one makes use
  24. # of Dpkg::Vendor which at the same time uses this module, which would turn
  25. # into a compilation error. We can use Dpkg::Control::FieldsCore instead.
  26. use parent qw(Dpkg::Interface::Storable);
  27. use overload
  28. '%{}' => sub { ${$_[0]}->{fields} },
  29. 'eq' => sub { "$_[0]" eq "$_[1]" };
  30. =encoding utf8
  31. =head1 NAME
  32. Dpkg::Control::HashCore - parse and manipulate a block of RFC822-like fields
  33. =head1 DESCRIPTION
  34. The Dpkg::Control::Hash object is a hash-like representation of a set of
  35. RFC822-like fields. The fields names are case insensitive and are always
  36. capitalized the same when output (see field_capitalize function in
  37. Dpkg::Control::Fields).
  38. The order in which fields have been set is remembered and is used
  39. to be able to dump back the same content. The output order can also be
  40. overridden if needed.
  41. You can store arbitrary values in the hash, they will always be properly
  42. escaped in the output to conform to the syntax of control files. This is
  43. relevant mainly for multilines values: while the first line is always output
  44. unchanged directly after the field name, supplementary lines are
  45. modified. Empty lines and lines containing only dots are prefixed with
  46. " ." (space + dot) while other lines are prefixed with a single space.
  47. During parsing, trailing spaces are stripped on all lines while leading
  48. spaces are stripped only on the first line of each field.
  49. =head1 FUNCTIONS
  50. =over 4
  51. =item my $c = Dpkg::Control::Hash->new(%opts)
  52. Creates a new object with the indicated options. Supported options
  53. are:
  54. =over 8
  55. =item allow_pgp
  56. Configures the parser to accept OpenPGP signatures around the control
  57. information. Value can be 0 (default) or 1.
  58. =item allow_duplicate
  59. Configures the parser to allow duplicate fields in the control
  60. information. Value can be 0 (default) or 1.
  61. =item drop_empty
  62. Defines if empty fields are dropped during the output. Value can be 0
  63. (default) or 1.
  64. =item name
  65. The user friendly name of the information stored in the object. It might
  66. be used in some error messages or warnings. A default name might be set
  67. depending on the type.
  68. =item is_pgp_signed
  69. Set by the parser (starting in dpkg 1.17.0) if it finds an OpenPGP
  70. signature around the control information. Value can be 0 (default)
  71. or 1, and undef when the option is not supported by the code (in
  72. versions older than dpkg 1.17.0).
  73. =back
  74. =cut
  75. sub new {
  76. my ($this, %opts) = @_;
  77. my $class = ref($this) || $this;
  78. # Object is a scalar reference and not a hash ref to avoid
  79. # infinite recursion due to overloading hash-derefencing
  80. my $self = \{
  81. in_order => [],
  82. out_order => [],
  83. is_pgp_signed => 0,
  84. allow_pgp => 0,
  85. allow_duplicate => 0,
  86. drop_empty => 0,
  87. };
  88. bless $self, $class;
  89. $$self->{fields} = Dpkg::Control::HashCore::Tie->new($self);
  90. # Options set by the user override default values
  91. $$self->{$_} = $opts{$_} foreach keys %opts;
  92. return $self;
  93. }
  94. # There is naturally a circular reference between the tied hash and its
  95. # containing object. Happily, the extra layer of scalar reference can
  96. # be used to detect the destruction of the object and break the loop so
  97. # that everything gets garbage-collected.
  98. sub DESTROY {
  99. my $self = shift;
  100. delete $$self->{fields};
  101. }
  102. =item $c->set_options($option, %opts)
  103. Changes the value of one or more options.
  104. =cut
  105. sub set_options {
  106. my ($self, %opts) = @_;
  107. $$self->{$_} = $opts{$_} foreach keys %opts;
  108. }
  109. =item my $value = $c->get_option($option)
  110. Returns the value of the corresponding option.
  111. =cut
  112. sub get_option {
  113. my ($self, $k) = @_;
  114. return $$self->{$k};
  115. }
  116. =item $c->load($file)
  117. Parse the content of $file. Exits in case of errors. Returns true if some
  118. fields have been parsed.
  119. =item $c->parse_error($file, $fmt, ...)
  120. Prints an error message and dies on syntax parse errors.
  121. =cut
  122. sub parse_error {
  123. my ($self, $file, $msg) = (shift, shift, shift);
  124. $msg = sprintf($msg, @_) if (@_);
  125. error(g_('syntax error in %s at line %d: %s'), $file, $., $msg);
  126. }
  127. =item $c->parse($fh, $description)
  128. Parse a control file from the given filehandle. Exits in case of errors.
  129. $description is used to describe the filehandle, ideally it's a filename
  130. or a description of where the data comes from. It's used in error
  131. messages. Returns true if some fields have been parsed.
  132. =cut
  133. sub parse {
  134. my ($self, $fh, $desc) = @_;
  135. my $paraborder = 1;
  136. my $parabody = 0;
  137. my $cf; # Current field
  138. my $expect_pgp_sig = 0;
  139. local $_;
  140. while (<$fh>) {
  141. s/\s*\n$//;
  142. next if length == 0 and $paraborder;
  143. next if (m/^#/);
  144. $paraborder = 0;
  145. if (m/^(\S+?)\s*:\s*(.*)$/) {
  146. $parabody = 1;
  147. my ($name, $value) = ($1, $2);
  148. if ($name =~ m/^-/) {
  149. $self->parse_error($desc, g_('field cannot start with a hyphen'));
  150. }
  151. if (exists $self->{$name}) {
  152. unless ($$self->{allow_duplicate}) {
  153. $self->parse_error($desc, g_('duplicate field %s found'), $name);
  154. }
  155. }
  156. $self->{$name} = $value;
  157. $cf = $name;
  158. } elsif (m/^\s(\s*\S.*)$/) {
  159. my $line = $1;
  160. unless (defined($cf)) {
  161. $self->parse_error($desc, g_('continued value line not in field'));
  162. }
  163. if ($line =~ /^\.+$/) {
  164. $line = substr $line, 1;
  165. }
  166. $self->{$cf} .= "\n$line";
  167. } elsif (m/^-----BEGIN PGP SIGNED MESSAGE-----$/) {
  168. $expect_pgp_sig = 1;
  169. if ($$self->{allow_pgp} and not $parabody) {
  170. # Skip OpenPGP headers
  171. while (<$fh>) {
  172. last if m/^\s*$/;
  173. }
  174. } else {
  175. $self->parse_error($desc, g_('OpenPGP signature not allowed here'));
  176. }
  177. } elsif (length == 0 || ($expect_pgp_sig && m/^-----BEGIN PGP SIGNATURE-----$/)) {
  178. if ($expect_pgp_sig) {
  179. # Skip empty lines
  180. $_ = <$fh> while defined && m/^\s*$/;
  181. unless (length) {
  182. $self->parse_error($desc, g_('expected OpenPGP signature, ' .
  183. 'found end of file after blank line'));
  184. }
  185. s/\s*\n$//;
  186. unless (m/^-----BEGIN PGP SIGNATURE-----$/) {
  187. $self->parse_error($desc, g_('expected OpenPGP signature, ' .
  188. "found something else '%s'"), $_);
  189. }
  190. # Skip OpenPGP signature
  191. while (<$fh>) {
  192. s/\s*\n$//;
  193. last if m/^-----END PGP SIGNATURE-----$/;
  194. }
  195. unless (defined) {
  196. $self->parse_error($desc, g_('unfinished OpenPGP signature'));
  197. }
  198. # This does not mean the signature is correct, that needs to
  199. # be verified by gnupg.
  200. $$self->{is_pgp_signed} = 1;
  201. }
  202. last; # Finished parsing one block
  203. } else {
  204. $self->parse_error($desc,
  205. g_('line with unknown format (not field-colon-value)'));
  206. }
  207. }
  208. if ($expect_pgp_sig and not $$self->{is_pgp_signed}) {
  209. $self->parse_error($desc, g_('unfinished OpenPGP signature'));
  210. }
  211. return defined($cf);
  212. }
  213. =item $c->find_custom_field($name)
  214. Scan the fields and look for a user specific field whose name matches the
  215. following regex: /X[SBC]*-$name/i. Return the name of the field found or
  216. undef if nothing has been found.
  217. =cut
  218. sub find_custom_field {
  219. my ($self, $name) = @_;
  220. foreach my $key (keys %$self) {
  221. return $key if $key =~ /^X[SBC]*-\Q$name\E$/i;
  222. }
  223. return;
  224. }
  225. =item $c->get_custom_field($name)
  226. Identify a user field and retrieve its value.
  227. =cut
  228. sub get_custom_field {
  229. my ($self, $name) = @_;
  230. my $key = $self->find_custom_field($name);
  231. return $self->{$key} if defined $key;
  232. return;
  233. }
  234. =item $c->save($filename)
  235. Write the string representation of the control information to a
  236. file.
  237. =item my $str = $c->output()
  238. =item "$c"
  239. Get a string representation of the control information. The fields
  240. are sorted in the order in which they have been read or set except
  241. if the order has been overridden with set_output_order().
  242. =item $c->output($fh)
  243. Print the string representation of the control information to a
  244. filehandle.
  245. =cut
  246. sub output {
  247. my ($self, $fh) = @_;
  248. my $str = '';
  249. my @keys;
  250. if (@{$$self->{out_order}}) {
  251. my $i = 1;
  252. my $imp = {};
  253. $imp->{$_} = $i++ foreach @{$$self->{out_order}};
  254. @keys = sort {
  255. if (defined $imp->{$a} && defined $imp->{$b}) {
  256. $imp->{$a} <=> $imp->{$b};
  257. } elsif (defined($imp->{$a})) {
  258. -1;
  259. } elsif (defined($imp->{$b})) {
  260. 1;
  261. } else {
  262. $a cmp $b;
  263. }
  264. } keys %$self;
  265. } else {
  266. @keys = @{$$self->{in_order}};
  267. }
  268. foreach my $key (@keys) {
  269. if (exists $self->{$key}) {
  270. my $value = $self->{$key};
  271. # Skip whitespace-only fields
  272. next if $$self->{drop_empty} and $value !~ m/\S/;
  273. # Escape data to follow control file syntax
  274. my ($first_line, @lines) = split /\n/, $value;
  275. my $kv = "$key:";
  276. $kv .= ' ' . $first_line if length $first_line;
  277. $kv .= "\n";
  278. foreach (@lines) {
  279. s/\s+$//;
  280. if (length == 0 or /^\.+$/) {
  281. $kv .= " .$_\n";
  282. } else {
  283. $kv .= " $_\n";
  284. }
  285. }
  286. # Print it out
  287. if ($fh) {
  288. print { $fh } $kv
  289. or syserr(g_('write error on control data'));
  290. }
  291. $str .= $kv if defined wantarray;
  292. }
  293. }
  294. return $str;
  295. }
  296. =item $c->set_output_order(@fields)
  297. Define the order in which fields will be displayed in the output() method.
  298. =cut
  299. sub set_output_order {
  300. my ($self, @fields) = @_;
  301. $$self->{out_order} = [@fields];
  302. }
  303. =item $c->apply_substvars($substvars)
  304. Update all fields by replacing the variables references with
  305. the corresponding value stored in the Dpkg::Substvars object.
  306. =cut
  307. sub apply_substvars {
  308. my ($self, $substvars, %opts) = @_;
  309. # Add substvars to refer to other fields
  310. foreach my $f (keys %$self) {
  311. $substvars->set_as_auto("F:$f", $self->{$f});
  312. }
  313. foreach my $f (keys %$self) {
  314. my $v = $substvars->substvars($self->{$f}, %opts);
  315. if ($v ne $self->{$f}) {
  316. my $sep;
  317. $sep = field_get_sep_type($f);
  318. # If we replaced stuff, ensure we're not breaking
  319. # a dependency field by introducing empty lines, or multiple
  320. # commas
  321. if ($sep & (FIELD_SEP_COMMA | FIELD_SEP_LINE)) {
  322. # Drop empty/whitespace-only lines
  323. $v =~ s/\n[ \t]*(\n|$)/$1/;
  324. }
  325. if ($sep & FIELD_SEP_COMMA) {
  326. $v =~ s/,[\s,]*,/,/g;
  327. $v =~ s/^\s*,\s*//;
  328. $v =~ s/\s*,\s*$//;
  329. }
  330. }
  331. $v =~ s/\$\{\}/\$/g; # XXX: what for?
  332. $self->{$f} = $v;
  333. }
  334. }
  335. package Dpkg::Control::HashCore::Tie;
  336. # This object is used to tie a hash. It implements hash-like functions by
  337. # normalizing the name of fields received in keys (using
  338. # Dpkg::Control::Fields::field_capitalize). It also stores the order in
  339. # which fields have been added in order to be able to dump them in the
  340. # same order. But the order information is stored in a parent object of
  341. # type Dpkg::Control.
  342. use Dpkg::Checksums;
  343. use Dpkg::Control::FieldsCore;
  344. use Carp;
  345. use Tie::Hash;
  346. use parent -norequire, qw(Tie::ExtraHash);
  347. # $self->[0] is the real hash
  348. # $self->[1] is a reference to the hash contained by the parent object.
  349. # This reference bypasses the top-level scalar reference of a
  350. # Dpkg::Control::Hash, hence ensuring that that reference gets DESTROYed
  351. # properly.
  352. # Dpkg::Control::Hash->new($parent)
  353. #
  354. # Return a reference to a tied hash implementing storage of simple
  355. # "field: value" mapping as used in many Debian-specific files.
  356. sub new {
  357. my $class = shift;
  358. my $hash = {};
  359. tie %{$hash}, $class, @_;
  360. return $hash;
  361. }
  362. sub TIEHASH {
  363. my ($class, $parent) = @_;
  364. croak 'parent object must be Dpkg::Control::Hash'
  365. if not $parent->isa('Dpkg::Control::HashCore') and
  366. not $parent->isa('Dpkg::Control::Hash');
  367. return bless [ {}, $$parent ], $class;
  368. }
  369. sub FETCH {
  370. my ($self, $key) = @_;
  371. $key = lc($key);
  372. return $self->[0]->{$key} if exists $self->[0]->{$key};
  373. return;
  374. }
  375. sub STORE {
  376. my ($self, $key, $value) = @_;
  377. my $parent = $self->[1];
  378. $key = lc($key);
  379. if (not exists $self->[0]->{$key}) {
  380. push @{$parent->{in_order}}, field_capitalize($key);
  381. }
  382. $self->[0]->{$key} = $value;
  383. }
  384. sub EXISTS {
  385. my ($self, $key) = @_;
  386. $key = lc($key);
  387. return exists $self->[0]->{$key};
  388. }
  389. sub DELETE {
  390. my ($self, $key) = @_;
  391. my $parent = $self->[1];
  392. my $in_order = $parent->{in_order};
  393. $key = lc($key);
  394. if (exists $self->[0]->{$key}) {
  395. delete $self->[0]->{$key};
  396. @{$in_order} = grep { lc ne $key } @{$in_order};
  397. return 1;
  398. } else {
  399. return 0;
  400. }
  401. }
  402. sub FIRSTKEY {
  403. my $self = shift;
  404. my $parent = $self->[1];
  405. foreach my $key (@{$parent->{in_order}}) {
  406. return $key if exists $self->[0]->{lc $key};
  407. }
  408. }
  409. sub NEXTKEY {
  410. my ($self, $last) = @_;
  411. my $parent = $self->[1];
  412. my $found = 0;
  413. foreach my $key (@{$parent->{in_order}}) {
  414. if ($found) {
  415. return $key if exists $self->[0]->{lc $key};
  416. } else {
  417. $found = 1 if $key eq $last;
  418. }
  419. }
  420. return;
  421. }
  422. 1;
  423. =back
  424. =head1 CHANGES
  425. =head2 Version 1.01
  426. New method: $c->parse_error().
  427. =head2 Version 1.00
  428. Mark the module as public.
  429. =head1 AUTHOR
  430. Raphaël Hertzog <hertzog@debian.org>.
  431. =cut
  432. 1;