Util.pm 20 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
# -*- Mode: perl; indent-tabs-mode: nil -*-
#
# The contents of this file are subject to the Mozilla Public
# License Version 1.1 (the "License"); you may not use this file
# except in compliance with the License. You may obtain a copy of
# the License at http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS
# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
# implied. See the License for the specific language governing
# rights and limitations under the License.
#
# The Original Code is the Bugzilla Bug Tracking System.
#
# The Initial Developer of the Original Code is Netscape Communications
# Corporation. Portions created by Netscape are
# Copyright (C) 1998 Netscape Communications Corporation. All
# Rights Reserved.
#
# Contributor(s): Terry Weissman <terry@mozilla.org>
#                 Dan Mosedale <dmose@mozilla.org>
22
#                 Jacob Steenhagen <jake@bugzilla.org>
23 24
#                 Bradley Baetz <bbaetz@student.usyd.edu.au>
#                 Christopher Aillon <christopher@aillon.com>
25
#                 Max Kanat-Alexander <mkanat@bugzilla.org>
26
#                 Frédéric Buclin <LpSolit@gmail.com>
27 28 29

package Bugzilla::Util;

30
use strict;
31

32 33
use base qw(Exporter);
@Bugzilla::Util::EXPORT = qw(is_tainted trick_taint detaint_natural
34
                             detaint_signed
35
                             html_quote url_quote value_quote xml_quote
36
                             css_class_quote
37
                             i_am_cgi
38
                             lsearch max min
39 40
                             diff_arrays diff_strings
                             trim wrap_comment find_wrap_point
41
                             perform_substs
42
                             format_time format_time_decimal validate_date
43
                             file_mod_time is_7bit_clean
44
                             bz_crypt generate_random_password
45
                             validate_email_syntax clean_text);
46

47
use Bugzilla::Config;
48
use Bugzilla::Constants;
49

50 51
use Date::Parse;
use Date::Format;
52
use Text::Wrap;
53 54 55 56 57 58 59 60 61 62 63 64

# This is from the perlsec page, slightly modifed to remove a warning
# From that page:
#      This function makes use of the fact that the presence of
#      tainted data anywhere within an expression renders the
#      entire expression tainted.
# Don't ask me how it works...
sub is_tainted {
    return not eval { my $foo = join('',@_), kill 0; 1; };
}

sub trick_taint {
65 66
    require Carp;
    Carp::confess("Undef to trick_taint") unless defined $_[0];
67 68
    my ($match) = $_[0] =~ /^(.*)$/s;
    $_[0] = $match;
69 70 71 72
    return (defined($_[0]));
}

sub detaint_natural {
73 74
    my ($match) = $_[0] =~ /^(\d+)$/;
    $_[0] = $match;
75 76 77
    return (defined($_[0]));
}

78
sub detaint_signed {
79 80
    my ($match) = $_[0] =~ /^([-+]?\d+)$/;
    $_[0] = $match;
81 82 83 84 85 86 87
    # Remove any leading plus sign.
    if (defined($_[0]) && $_[0] =~ /^\+(\d+)$/) {
        $_[0] = $1;
    }
    return (defined($_[0]));
}

88 89 90 91 92 93 94 95 96
sub html_quote {
    my ($var) = (@_);
    $var =~ s/\&/\&amp;/g;
    $var =~ s/</\&lt;/g;
    $var =~ s/>/\&gt;/g;
    $var =~ s/\"/\&quot;/g;
    return $var;
}

97
# This originally came from CGI.pm, by Lincoln D. Stein
98 99 100 101 102 103
sub url_quote {
    my ($toencode) = (@_);
    $toencode =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("%%%02x",ord($1))/eg;
    return $toencode;
}

104 105 106 107 108 109 110
sub css_class_quote {
    my ($toencode) = (@_);
    $toencode =~ s/ /_/g;
    $toencode =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("&#x%x;",ord($1))/eg;
    return $toencode;
}

111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
sub value_quote {
    my ($var) = (@_);
    $var =~ s/\&/\&amp;/g;
    $var =~ s/</\&lt;/g;
    $var =~ s/>/\&gt;/g;
    $var =~ s/\"/\&quot;/g;
    # See bug http://bugzilla.mozilla.org/show_bug.cgi?id=4928 for 
    # explanaion of why bugzilla does this linebreak substitution. 
    # This caused form submission problems in mozilla (bug 22983, 32000).
    $var =~ s/\r\n/\&#013;/g;
    $var =~ s/\n\r/\&#013;/g;
    $var =~ s/\r/\&#013;/g;
    $var =~ s/\n/\&#013;/g;
    return $var;
}

127 128 129 130 131 132 133 134 135 136
sub xml_quote {
    my ($var) = (@_);
    $var =~ s/\&/\&amp;/g;
    $var =~ s/</\&lt;/g;
    $var =~ s/>/\&gt;/g;
    $var =~ s/\"/\&quot;/g;
    $var =~ s/\'/\&apos;/g;
    return $var;
}

137 138 139 140 141 142 143
sub url_decode {
    my ($todecode) = (@_);
    $todecode =~ tr/+/ /;       # pluses become spaces
    $todecode =~ s/%([0-9a-fA-F]{2})/pack("c",hex($1))/ge;
    return $todecode;
}

144
sub i_am_cgi {
145 146 147 148 149
    # I use SERVER_SOFTWARE because it's required to be
    # defined for all requests in the CGI spec.
    return exists $ENV{'SERVER_SOFTWARE'} ? 1 : 0;
}

150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
sub lsearch {
    my ($list,$item) = (@_);
    my $count = 0;
    foreach my $i (@$list) {
        if ($i eq $item) {
            return $count;
        }
        $count++;
    }
    return -1;
}

sub max {
    my $max = shift(@_);
    foreach my $val (@_) {
        $max = $val if $val > $max;
    }
    return $max;
}

sub min {
    my $min = shift(@_);
    foreach my $val (@_) {
        $min = $val if $val < $min;
    }
    return $min;
}

178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
sub diff_arrays {
    my ($old_ref, $new_ref) = @_;

    my @old = @$old_ref;
    my @new = @$new_ref;

    # For each pair of (old, new) entries:
    # If they're equal, set them to empty. When done, @old contains entries
    # that were removed; @new contains ones that got added.
    foreach my $oldv (@old) {
        foreach my $newv (@new) {
            next if ($newv eq '');
            if ($oldv eq $newv) {
                $newv = $oldv = '';
            }
        }
    }

    my @removed = grep { $_ ne '' } @old;
    my @added = grep { $_ ne '' } @new;
    return (\@removed, \@added);
}

201 202
sub trim {
    my ($str) = @_;
203 204 205 206
    if ($str) {
      $str =~ s/^\s+//g;
      $str =~ s/\s+$//g;
    }
207 208 209
    return $str;
}

210 211 212 213 214 215 216 217 218
sub diff_strings {
    my ($oldstr, $newstr) = @_;

    # Split the old and new strings into arrays containing their values.
    $oldstr =~ s/[\s,]+/ /g;
    $newstr =~ s/[\s,]+/ /g;
    my @old = split(" ", $oldstr);
    my @new = split(" ", $newstr);

219
    my ($rem, $add) = diff_arrays(\@old, \@new);
220

221 222
    my $removed = join (", ", @$rem);
    my $added = join (", ", @$add);
223 224 225 226

    return ($removed, $added);
}

227
sub wrap_comment {
228
    my ($comment) = @_;
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
    my $wrappedcomment = "";

    # Use 'local', as recommended by Text::Wrap's perldoc.
    local $Text::Wrap::columns = COMMENT_COLS;
    # Make words that are longer than COMMENT_COLS not wrap.
    local $Text::Wrap::huge    = 'overflow';
    # Don't mess with tabs.
    local $Text::Wrap::unexpand = 0;

    # If the line starts with ">", don't wrap it. Otherwise, wrap.
    foreach my $line (split(/\r\n|\r|\n/, $comment)) {
      if ($line =~ qr/^>/) {
        $wrappedcomment .= ($line . "\n");
      }
      else {
        $wrappedcomment .= (wrap('', '', $line) . "\n");
      }
    }

    return $wrappedcomment;
249 250
}

251
sub find_wrap_point {
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
    my ($string, $maxpos) = @_;
    if (!$string) { return 0 }
    if (length($string) < $maxpos) { return length($string) }
    my $wrappoint = rindex($string, ",", $maxpos); # look for comma
    if ($wrappoint < 0) {  # can't find comma
        $wrappoint = rindex($string, " ", $maxpos); # look for space
        if ($wrappoint < 0) {  # can't find space
            $wrappoint = rindex($string, "-", $maxpos); # look for hyphen
            if ($wrappoint < 0) {  # can't find hyphen
                $wrappoint = $maxpos;  # just truncate it
            } else {
                $wrappoint++; # leave hyphen on the left side
            }
        }
    }
    return $wrappoint;
}

270 271 272 273 274 275
sub perform_substs {
    my ($str, $substs) = (@_);
    $str =~ s/%([a-z]*)%/(defined $substs->{$1} ? $substs->{$1} : Param($1))/eg;
    return $str;
}

276
sub format_time {
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294
    my ($date, $format) = @_;

    # If $format is undefined, try to guess the correct date format.    
    my $show_timezone;
    if (!defined($format)) {
        if ($date =~ m/^(\d{4})[-\.](\d{2})[-\.](\d{2}) (\d{2}):(\d{2})(:(\d{2}))?$/) {
            my $sec = $7;
            if (defined $sec) {
                $format = "%Y-%m-%d %T";
            } else {
                $format = "%Y-%m-%d %R";
            }
        } else {
            # Default date format. See Date::Format for other formats available.
            $format = "%Y-%m-%d %R";
        }
        # By default, we want the timezone to be displayed.
        $show_timezone = 1;
295 296
    }
    else {
297 298 299
        # Search for %Z or %z, meaning we want the timezone to be displayed.
        # Till bug 182238 gets fixed, we assume Param('timezone') is used.
        $show_timezone = ($format =~ s/\s?%Z$//i);
300 301
    }

302 303 304 305 306 307 308 309 310 311
    # str2time($date) is undefined if $date has an invalid date format.
    my $time = str2time($date);

    if (defined $time) {
        $date = time2str($format, $time);
        $date .= " " . &::Param('timezone') if $show_timezone;
    }
    else {
        # Don't let invalid (time) strings to be passed to templates!
        $date = '';
312
    }
313
    return trim($date);
314 315
}

316 317 318 319 320 321 322 323 324 325 326 327
sub format_time_decimal {
    my ($time) = (@_);

    my $newtime = sprintf("%.2f", $time);

    if ($newtime =~ /0\Z/) {
        $newtime = sprintf("%.1f", $time);
    }

    return $newtime;
}

328
sub file_mod_time {
329 330 331 332 333 334 335
    my ($filename) = (@_);
    my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
        $atime,$mtime,$ctime,$blksize,$blocks)
        = stat($filename);
    return $mtime;
}

336
sub bz_crypt {
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
    my ($password) = @_;

    # The list of characters that can appear in a salt.  Salts and hashes
    # are both encoded as a sequence of characters from a set containing
    # 64 characters, each one of which represents 6 bits of the salt/hash.
    # The encoding is similar to BASE64, the difference being that the
    # BASE64 plus sign (+) is replaced with a forward slash (/).
    my @saltchars = (0..9, 'A'..'Z', 'a'..'z', '.', '/');

    # Generate the salt.  We use an 8 character (48 bit) salt for maximum
    # security on systems whose crypt uses MD5.  Systems with older
    # versions of crypt will just use the first two characters of the salt.
    my $salt = '';
    for ( my $i=0 ; $i < 8 ; ++$i ) {
        $salt .= $saltchars[rand(64)];
    }

    # Crypt the password.
    my $cryptedpassword = crypt($password, $salt);

    # Return the crypted password.
    return $cryptedpassword;
}

361 362 363 364 365
sub generate_random_password {
    my $size = shift || 10; # default to 10 chars if nothing specified
    return join("", map{ ('0'..'9','a'..'z','A'..'Z')[rand 62] } (1..$size));
}

366 367
sub validate_email_syntax {
    my ($addr) = @_;
368
    my $match = Param('emailregexp');
369 370
    my $ret = ($addr =~ /$match/ && $addr !~ /[\\\(\)<>&,;:"\[\] \t\r\n]/);
    return $ret ? 1 : 0;
371 372
}

373 374
sub validate_date {
    my ($date) = @_;
375
    my $date2;
376

377 378 379 380
    # $ts is undefined if the parser fails.
    my $ts = str2time($date);
    if ($ts) {
        $date2 = time2str("%Y-%m-%d", $ts);
381

382 383 384
        $date =~ s/(\d+)-0*(\d+?)-0*(\d+?)/$1-$2-$3/; 
        $date2 =~ s/(\d+)-0*(\d+?)-0*(\d+?)/$1-$2-$3/;
    }
385 386
    my $ret = ($ts && $date eq $date2);
    return $ret ? 1 : 0;
387 388
}

389 390 391 392
sub is_7bit_clean {
    return $_[0] !~ /[^\x20-\x7E\x0A\x0D]/;
}

393 394
sub clean_text {
    my ($dtext) = shift;
395 396
    $dtext =~  s/[\x00-\x1F\x7F]+/ /g;   # change control characters into a space
    return trim($dtext);
397 398
}

399 400 401 402
1;

__END__

403 404 405 406 407 408 409 410 411 412 413 414
=head1 NAME

Bugzilla::Util - Generic utility functions for bugzilla

=head1 SYNOPSIS

  use Bugzilla::Util;

  # Functions for dealing with variable tainting
  $rv = is_tainted($var);
  trick_taint($var);
  detaint_natural($var);
415
  detaint_signed($var);
416 417 418

  # Functions for quoting
  html_quote($var);
419
  url_quote($var);
420
  value_quote($var);
421
  xml_quote($var);
422

423 424
  # Functions for decoding
  $rv = url_decode($var);
425

426 427 428
  # Functions that tell you about your environment
  my $is_cgi = i_am_cgi();

429 430 431 432 433
  # Functions for searching
  $loc = lsearch(\@arr, $val);
  $val = max($a, $b, $c);
  $val = min($a, $b, $c);

434 435 436
  # Data manipulation
  ($removed, $added) = diff_arrays(\@old, \@new);

437
  # Functions for manipulating strings
438
  $val = trim(" abc ");
439
  ($removed, $added) = diff_strings($old, $new);
440
  $wrapped = wrap_comment($comment);
441
  $msg = perform_substs($str, $substs);
442

443 444 445
  # Functions for formatting time
  format_time($time);

446 447 448
  # Functions for dealing with files
  $time = file_mod_time($filename);

449 450
  # Cryptographic Functions
  $crypted_password = bz_crypt($password);
451
  $new_password = generate_random_password($password_length);
452

453
  # Validation Functions
454 455
  validate_email_syntax($email);
  validate_date($date);
456

457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489
=head1 DESCRIPTION

This package contains various utility functions which do not belong anywhere
else.

B<It is not intended as a general dumping group for something which
people feel might be useful somewhere, someday>. Do not add methods to this
package unless it is intended to be used for a significant number of files,
and it does not belong anywhere else.

=head1 FUNCTIONS

This package provides several types of routines:

=head2 Tainting

Several functions are available to deal with tainted variables. B<Use these
with care> to avoid security holes.

=over 4

=item C<is_tainted>

Determines whether a particular variable is tainted

=item C<trick_taint($val)>

Tricks perl into untainting a particular variable.

Use trick_taint() when you know that there is no way that the data
in a scalar can be tainted, but taint mode still bails on it.

B<WARNING!! Using this routine on data that really could be tainted defeats
490 491
the purpose of taint mode.  It should only be used on variables that have been
sanity checked in some way and have been determined to be OK.>
492 493 494 495 496 497 498

=item C<detaint_natural($num)>

This routine detaints a natural number. It returns a true value if the
value passed in was a valid natural number, else it returns false. You
B<MUST> check the result of this routine to avoid security holes.

499 500 501 502 503 504
=item C<detaint_signed($num)>

This routine detaints a signed integer. It returns a true value if the
value passed in was a valid signed integer, else it returns false. You
B<MUST> check the result of this routine to avoid security holes.

505 506 507 508 509 510 511 512 513 514 515 516 517 518
=back

=head2 Quoting

Some values may need to be quoted from perl. However, this should in general
be done in the template where possible.

=over 4

=item C<html_quote($val)>

Returns a value quoted for use in HTML, with &, E<lt>, E<gt>, and E<34> being
replaced with their appropriate HTML entities.

519 520 521 522
=item C<url_quote($val)>

Quotes characters so that they may be included as part of a url.

523 524 525 526 527
=item C<css_class_quote($val)>

Quotes characters so that they may be used as CSS class names. Spaces
are replaced by underscores.

528 529 530 531 532
=item C<value_quote($val)>

As well as escaping html like C<html_quote>, this routine converts newlines
into &#013;, suitable for use in html attributes.

533 534 535 536 537 538
=item C<xml_quote($val)>

This is similar to C<html_quote>, except that ' is escaped to &apos;. This
is kept separate from html_quote partly for compatibility with previous code
(for &apos;) and partly for future handling of non-ASCII characters.

539 540 541 542
=item C<url_decode($val)>

Converts the %xx encoding from the given URL back to its original form.

543 544 545 546 547 548
=item C<i_am_cgi()>

Tells you whether or not you are being run as a CGI script in a web
server. For example, it would return false if the caller is running
in a command-line script.

549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
=back

=head2 Searching

Functions for searching within a set of values.

=over 4

=item C<lsearch($list, $item)>

Returns the position of C<$item> in C<$list>. C<$list> must be a list
reference.

If the item is not in the list, returns -1.

=item C<max($a, $b, ...)>

Returns the maximum from a set of values.

=item C<min($a, $b, ...)>

Returns the minimum from a set of values.

=back

574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591
=head2 Data Manipulation

=over 4

=item C<diff_arrays(\@old, \@new)>

 Description: Takes two arrayrefs, and will tell you what it takes to 
              get from @old to @new.
 Params:      @old = array that you are changing from
              @new = array that you are changing to
 Returns:     A list of two arrayrefs. The first is a reference to an 
              array containing items that were removed from @old. The
              second is a reference to an array containing items
              that were added to @old. If both returned arrays are 
              empty, @old and @new contain the same values.

=back

592
=head2 String Manipulation
593 594 595 596 597 598 599 600

=over 4

=item C<trim($str)>

Removes any leading or trailing whitespace from a string. This routine does not
modify the existing string.

601 602 603 604 605 606 607 608
=item C<diff_strings($oldstr, $newstr)>

Takes two strings containing a list of comma- or space-separated items
and returns what items were removed from or added to the new one, 
compared to the old one. Returns a list, where the first entry is a scalar
containing removed items, and the second entry is a scalar containing added
items.

609 610 611 612 613 614 615 616 617 618
=item C<wrap_comment($comment)>

Takes a bug comment, and wraps it to the appropriate length. The length is
currently specified in C<Bugzilla::Constants::COMMENT_COLS>. Lines beginning
with ">" are assumed to be quotes, and they will not be wrapped.

The intended use of this function is to wrap comments that are about to be
displayed or emailed. Generally, wrapped text should not be stored in the
database.

619 620 621 622 623 624
=item C<find_wrap_point($string, $maxpos)>

Search for a comma, a whitespace or a hyphen to split $string, within the first
$maxpos characters. If none of them is found, just split $string at $maxpos.
The search starts at $maxpos and goes back to the beginning of the string.

625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
=item C<perform_substs($str, $substs)>

Performs substitutions for sending out email with variables in it,
or for inserting a parameter into some other string.

Takes a string and a reference to a hash containing substitution 
variables and their values.

If the hash is not specified, or if we need to substitute something
that's not in the hash, then we will use parameters to do the 
substitution instead.

Substitutions are always enclosed with '%' symbols. So they look like:
%some_variable_name%. If "some_variable_name" is a key in the hash, then
its value will be placed into the string. If it's not a key in the hash,
then the value of the parameter called "some_variable_name" will be placed
into the string.

643 644 645 646 647
=item C<is_7bit_clean($str)>

Returns true is the string contains only 7-bit characters (ASCII 32 through 126,
ASCII 10 (LineFeed) and ASCII 13 (Carrage Return).

648 649 650 651
=item C<clean_text($str)>
Returns the parameter "cleaned" by exchanging non-printable characters with spaces.
Specifically characters (ASCII 0 through 31) and (ASCII 127) will become ASCII 32 (Space).

652 653
=back

654 655 656 657 658 659
=head2 Formatting Time

=over 4

=item C<format_time($time)>

660 661 662 663 664 665 666 667 668
Takes a time, converts it to the desired format and appends the timezone
as defined in editparams.cgi, if desired. This routine will be expanded
in the future to adjust for user preferences regarding what timezone to
display times in.

This routine is mainly called from templates to filter dates, see
"FILTER time" in Templates.pm. In this case, $format is undefined and
the routine has to "guess" the date format that was passed to $dbh->sql_date_format().

669

670 671 672 673 674
=item C<format_time_decimal($time)>

Returns a number with 2 digit precision, unless the last digit is a 0. Then it 
returns only 1 digit precision.

675 676 677
=back


678 679 680 681 682 683
=head2 Files

=over 4

=item C<file_mod_time($filename)>

684 685
Takes a filename and returns the modification time. It returns it in the format
of the "mtime" parameter of the perl "stat" function.
686

687 688
=back

689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709
=head2 Cryptography

=over 4

=item C<bz_crypt($password)>

Takes a string and returns a C<crypt>ed value for it, using a random salt.

Please always use this function instead of the built-in perl "crypt"
when initially encrypting a password.

=begin undocumented

Random salts are generated because the alternative is usually
to use the first two characters of the password itself, and since
the salt appears in plaintext at the beginning of the encrypted
password string this has the effect of revealing the first two
characters of the password to anyone who views the encrypted version.

=end undocumented

710 711 712 713 714 715
=item C<generate_random_password($password_length)>

Returns an alphanumeric string with the specified length
(10 characters by default). Use this function to generate passwords
and tokens.

716
=back
717 718 719 720 721

=head2 Validation

=over 4

722 723 724 725 726 727
=item C<validate_email_syntax($email)>

Do a syntax checking for a legal email address and returns 1 if
the check is successful, else returns 0.

=item C<validate_date($date)>
728

729 730
Make sure the date has the correct format and returns 1 if
the check is successful, else returns 0.
731 732

=back