Commit 9f0310bf authored by mkanat%bugzilla.org's avatar mkanat%bugzilla.org

Bug 363153: Turn on the utf8 bit on all strings in Bugzilla that contain

non-ASCII data, if the utf8 parameter is on. This means that string functions like substr() work properly on multi-byte languages, now. Patch By Max Kanat-Alexander <mkanat@bugzilla.org> r=wurblzap, a=mkanat
parent 8ab75a83
......@@ -81,6 +81,7 @@ use constant SHUTDOWNHTML_EXIT_SILENTLY => [
# Note that this is a raw subroutine, not a method, so $class isn't available.
sub init_page {
(binmode STDOUT, ':utf8') if Bugzilla->params->{'utf8'};
# Some environment variables are not taint safe
delete @::ENV{'PATH', 'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
......
......@@ -233,6 +233,27 @@ sub header {
return $self->SUPER::header(@_) || "";
}
# CGI.pm is not utf8-aware and passes data as bytes instead of UTF-8 strings.
sub param {
my $self = shift;
if (Bugzilla->params->{'utf8'} && scalar(@_) == 1) {
if (wantarray) {
return map { _fix_utf8($_) } $self->SUPER::param(@_);
}
else {
return _fix_utf8(scalar $self->SUPER::param(@_));
}
}
return $self->SUPER::param(@_);
}
sub _fix_utf8 {
my $input = shift;
# The is_utf8 is here in case CGI gets smart about utf8 someday.
utf8::decode($input) if defined $input && !utf8::is_utf8($input);
return $input;
}
# The various parts of Bugzilla which create cookies don't want to have to
# pass them around to all of the callers. Instead, store them locally here,
# and then output as required from |header|.
......
......@@ -382,10 +382,10 @@ use constant DB_MODULE => {
dbd => {
package => 'DBD-mysql',
module => 'DBD::mysql',
version => '2.9003',
# Certain versions are broken, development versions are
# always disallowed.
blacklist => ['^3\.000[3-6]', '_'],
# Disallow development versions
blacklist => ['_'],
# For UTF-8 support
version => '4.00',
},
name => 'MySQL'},
'pg' => {db => 'Bugzilla::DB::Pg', db_version => '8.00.0000',
......
......@@ -59,7 +59,9 @@ sub new {
$dsn .= ";port=$port" if $port;
$dsn .= ";mysql_socket=$sock" if $sock;
my $self = $class->db_new($dsn, $user, $pass);
my $attrs = { mysql_enable_utf8 => Bugzilla->params->{'utf8'} };
my $self = $class->db_new($dsn, $user, $pass, $attrs);
# This makes sure that if the tables are encoded as UTF-8, we
# return their data correctly.
......
......@@ -68,7 +68,9 @@ sub new {
# creating tables.
$dsn .= ";options='-c client_min_messages=warning'";
my $self = $class->db_new($dsn, $user, $pass);
my $attrs = { pg_enable_utf8 => Bugzilla->params->{'utf8'} };
my $self = $class->db_new($dsn, $user, $pass, $attrs);
# all class local variables stored in DBI derived class needs to have
# a prefix 'private_'. See DBI documentation.
......
......@@ -67,7 +67,9 @@ sub MessageToMTA {
# Encode the headers correctly in quoted-printable
foreach my $header qw(From To Cc Reply-To Sender Errors-To Subject) {
if (my $value = $email->header($header)) {
$value = Encode::decode("UTF-8", $value) if Bugzilla->params->{'utf8'};
if (Bugzilla->params->{'utf8'} && !utf8::is_utf8($value)) {
$value = utf8::decode($value);
}
my $encoded = encode('MIME-Q', $value);
$email->header_set($header, $encoded);
}
......
......@@ -185,6 +185,8 @@ sub html_light_quote {
# This originally came from CGI.pm, by Lincoln D. Stein
sub url_quote {
my ($toencode) = (@_);
utf8::encode($toencode) # The below regex works only on bytes
if Bugzilla->params->{'utf8'} && utf8::is_utf8($toencode);
$toencode =~ s/([^a-zA-Z0-9_\-.])/uc sprintf("%%%02x",ord($1))/eg;
return $toencode;
}
......@@ -206,6 +208,10 @@ sub xml_quote {
return $var;
}
# This function must not be relied upon to return a valid string to pass to
# the DB or the user in UTF-8 situations. The only thing you can rely upon
# it for is that if you url_decode a string, it will url_encode back to the
# exact same thing.
sub url_decode {
my ($todecode) = (@_);
$todecode =~ tr/+/ /; # pluses become spaces
......
......@@ -38,7 +38,7 @@ use Email::MIME;
use Email::MIME::Attachment::Stripper;
use Getopt::Long qw(:config bundling);
use Pod::Usage;
use Encode qw(encode decode);
use Encode;
use Bugzilla;
use Bugzilla::Bug qw(ValidateBugID);
......@@ -306,8 +306,8 @@ sub get_text_alternative {
debug_print("Part Character Encoding: $charset", 2);
if (!$ct || $ct =~ /^text\/plain/i) {
$body = $part->body;
if (Bugzilla->params->{'utf8'}) {
$body = encode('UTF-8', decode($charset, $body));
if (Bugzilla->params->{'utf8'} && !utf8::is_utf8($body)) {
$body = Encode::decode($charset, $body);
}
last;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment