Commit 35f1ce88 authored by lpsolit%gmail.com's avatar lpsolit%gmail.com

Bug 126266: Use UTF-8 (Unicode) charset encoding for pages and email for NEW…

Bug 126266: Use UTF-8 (Unicode) charset encoding for pages and email for NEW installations - Patch by byron jones (glob) <bugzilla@glob.com.au> r=wurblzap a=justdave
parent 1f3e3f23
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
# Bradley Baetz <bbaetz@student.usyd.edu.au> # Bradley Baetz <bbaetz@student.usyd.edu.au>
# J. Paul Reed <preed@sigkill.com> # J. Paul Reed <preed@sigkill.com>
# Gervase Markham <gerv@gerv.net> # Gervase Markham <gerv@gerv.net>
# Byron Jones <bugzilla@glob.com.au>
use strict; use strict;
...@@ -47,6 +48,10 @@ use Date::Parse; ...@@ -47,6 +48,10 @@ use Date::Parse;
use Date::Format; use Date::Format;
use Mail::Mailer; use Mail::Mailer;
use Mail::Header; use Mail::Header;
use MIME::Base64;
use MIME::QuotedPrint;
use MIME::Parser;
use Mail::Address;
# We need these strings for the X-Bugzilla-Reasons header # We need these strings for the X-Bugzilla-Reasons header
# Note: this hash uses "," rather than "=>" to avoid auto-quoting of the LHS. # Note: this hash uses "," rather than "=>" to avoid auto-quoting of the LHS.
...@@ -619,16 +624,102 @@ sub MessageToMTA ($) { ...@@ -619,16 +624,102 @@ sub MessageToMTA ($) {
$Mail::Mailer::testfile::config{outfile} = "$datadir/mailer.testfile"; $Mail::Mailer::testfile::config{outfile} = "$datadir/mailer.testfile";
} }
$msg =~ /(.*?)\n\n(.*)/ms; my ($header, $body) = $msg =~ /(.*?\n)\n(.*)/s ? ($1, $2) : ('', $msg);
my @header_lines = split(/\n/, $1); my $headers;
my $body = $2;
if (Param('utf8') and (!is_7bit_clean($header) or !is_7bit_clean($body))) {
($headers, $body) = encode_message($header, $body);
} else {
my @header_lines = split(/\n/, $header);
$headers = new Mail::Header \@header_lines, Modify => 0;
}
my $headers = new Mail::Header \@header_lines, Modify => 0;
$mailer->open($headers->header_hashref); $mailer->open($headers->header_hashref);
print $mailer $body; print $mailer $body;
$mailer->close; $mailer->close;
} }
sub encode_qp_words($) {
my ($line) = (@_);
my @encoded;
foreach my $word (split / /, $line) {
if (!is_7bit_clean($word)) {
push @encoded, '=?UTF-8?Q?_' . encode_qp($word, '') . '?=';
} else {
push @encoded, $word;
}
}
return join(' ', @encoded);
}
sub encode_message($$) {
my ($header, $body) = @_;
# read header into MIME::Entity
my $parser = MIME::Parser->new;
$parser->output_to_core(1);
$parser->tmp_to_core(1);
my $entity = $parser->parse_data($header);
my $head = $entity->head;
# set charset to UTF-8
$head->mime_attr('Content-Type' => 'text/plain')
unless defined $head->mime_attr('content-type');
$head->mime_attr('Content-Type.charset' => 'UTF-8');
# encode the subject
my $subject = $head->get('subject');
if (defined $subject && !is_7bit_clean($subject)) {
$subject =~ s/[\r\n]+$//;
$head->replace('subject', encode_qp_words($subject));
}
# encode addresses
foreach my $field (qw(from to cc reply-to sender errors-to)) {
my $high = $head->count($field) - 1;
foreach my $index (0..$high) {
my $value = $head->get($field, $index);
my @addresses;
my $changed = 0;
foreach my $addr (Mail::Address->parse($value)) {
my $phrase = $addr->phrase;
if (is_7bit_clean($phrase)) {
push @addresses, $addr->format;
} else {
push @addresses, encode_qp_phrase($phrase) .
' <' . $addr->address . '>';
$changed = 1;
}
}
$changed && $head->replace($field, join(', ', @addresses), $index);
}
}
# process the body
if (!is_7bit_clean($body)) {
# count number of 7-bit chars, and use quoted-printable if more
# than half the message is 7-bit clean
my $count = ($body =~ tr/\x20-\x7E\x0A\x0D//);
if ($count > length($body) / 2) {
$head->replace('Content-Transfer-Encoding', 'quoted-printable');
$body = encode_qp($body);
} else {
$head->replace('Content-Transfer-Encoding', 'base64');
$body = encode_base64($body);
}
}
# done
$head->fold(75);
return ($head, $body);
}
# Performs substitutions for sending out email with variables in it, # Performs substitutions for sending out email with variables in it,
# or for inserting a parameter into some other string. # or for inserting a parameter into some other string.
# #
......
...@@ -60,8 +60,8 @@ sub new { ...@@ -60,8 +60,8 @@ sub new {
# Make sure our outgoing cookie list is empty on each invocation # Make sure our outgoing cookie list is empty on each invocation
$self->{Bugzilla_cookie_list} = []; $self->{Bugzilla_cookie_list} = [];
# Make sure that we don't send any charset headers # Send appropriate charset
$self->charset(''); $self->charset(Param('utf8') ? 'UTF-8' : '');
# Redirect to SSL if required # Redirect to SSL if required
if (Param('sslbase') ne '' and Param('ssl') eq 'always') { if (Param('sslbase') ne '' and Param('ssl') eq 'always') {
......
...@@ -38,7 +38,7 @@ use base qw(Exporter); ...@@ -38,7 +38,7 @@ use base qw(Exporter);
diff_arrays diff_strings diff_arrays diff_strings
trim wrap_comment find_wrap_point trim wrap_comment find_wrap_point
format_time format_time_decimal format_time format_time_decimal
file_mod_time file_mod_time is_7bit_clean
bz_crypt check_email_syntax); bz_crypt check_email_syntax);
use Bugzilla::Config; use Bugzilla::Config;
...@@ -374,6 +374,10 @@ sub ValidateDate { ...@@ -374,6 +374,10 @@ sub ValidateDate {
} }
} }
sub is_7bit_clean {
return $_[0] !~ /[^\x20-\x7E\x0A\x0D]/;
}
1; 1;
__END__ __END__
...@@ -597,6 +601,11 @@ Search for a comma, a whitespace or a hyphen to split $string, within the first ...@@ -597,6 +601,11 @@ Search for a comma, a whitespace or a hyphen to split $string, within the first
$maxpos characters. If none of them is found, just split $string at $maxpos. $maxpos characters. If none of them is found, just split $string at $maxpos.
The search starts at $maxpos and goes back to the beginning of the string. The search starts at $maxpos and goes back to the beginning of the string.
=item C<is_7bit_clean($str)>
Returns true is the string contains only 7-bit characters (ASCII 32 through 126,
ASCII 10 (LineFeed) and ASCII 13 (Carrage Return).
=back =back
=head2 Formatting Time =head2 Formatting Time
......
...@@ -317,7 +317,15 @@ my $modules = [ ...@@ -317,7 +317,15 @@ my $modules = [
}, },
{ {
name => 'Mail::Mailer', name => 'Mail::Mailer',
version => '1.65' version => '1.67'
},
{
name => 'MIME::Base64',
version => $^O =~ /MSWin32/i ? '3.01' : '3.03'
},
{
name => 'MIME::Tools',
version => '5.417'
}, },
{ {
name => 'Storable', name => 'Storable',
...@@ -339,6 +347,7 @@ my %ppm_modules = ( ...@@ -339,6 +347,7 @@ my %ppm_modules = (
'GD::Graph' => 'GDGraph', 'GD::Graph' => 'GDGraph',
'GD::Text::Align' => 'GDTextUtil', 'GD::Text::Align' => 'GDTextUtil',
'Mail::Mailer' => 'MailTools', 'Mail::Mailer' => 'MailTools',
'MIME::Tools' => 'MIME-Tools',
); );
sub install_command { sub install_command {
...@@ -1142,6 +1151,10 @@ END ...@@ -1142,6 +1151,10 @@ END
# Just to be sure ... # Just to be sure ...
unlink "$datadir/versioncache"; unlink "$datadir/versioncache";
# Check for a new install
my $newinstall = !-e "$datadir/params";
# Remove parameters from the params file that no longer exist in Bugzilla, # Remove parameters from the params file that no longer exist in Bugzilla,
# and set the defaults for new ones # and set the defaults for new ones
...@@ -1185,6 +1198,11 @@ if ($^O =~ /MSWin32/i && Param('mail_delivery_method') eq 'sendmail') { ...@@ -1185,6 +1198,11 @@ if ($^O =~ /MSWin32/i && Param('mail_delivery_method') eq 'sendmail') {
SetParam('smtpserver', $smtp); SetParam('smtpserver', $smtp);
} }
# Enable UTF-8 on new installs
if ($newinstall) {
SetParam('utf8', 1);
}
# WriteParams will only write out still-valid entries # WriteParams will only write out still-valid entries
WriteParams(); WriteParams();
...@@ -4211,6 +4229,9 @@ if ($sth->rows == 0) { ...@@ -4211,6 +4229,9 @@ if ($sth->rows == 0) {
if ($admin_create) { if ($admin_create) {
require Bugzilla::Util;
import Bugzilla::Util 'is_7bit_clean';
while( $realname eq "" ) { while( $realname eq "" ) {
print "Enter the real name of the administrator: "; print "Enter the real name of the administrator: ";
$realname = $answer{'ADMIN_REALNAME'} $realname = $answer{'ADMIN_REALNAME'}
...@@ -4220,6 +4241,13 @@ if ($sth->rows == 0) { ...@@ -4220,6 +4241,13 @@ if ($sth->rows == 0) {
if(! $realname ) { if(! $realname ) {
print "\nReally. We need a full name.\n"; print "\nReally. We need a full name.\n";
} }
if(! is_7bit_clean($realname)) {
print "\nSorry, but at this stage the real name can only " .
"contain standard English\ncharacters. Once Bugzilla " .
"has been installed, you can use the 'Prefs' page\nto " .
"update the real name.\n";
$realname = '';
}
} }
# trap a few interrupts so we can fix the echo if we get aborted. # trap a few interrupts so we can fix the echo if we get aborted.
......
...@@ -367,6 +367,17 @@ sub find_languages { ...@@ -367,6 +367,17 @@ sub find_languages {
}, },
{ {
name => 'utf8',
desc => 'Use UTF-8 (Unicode) encoding for all text in Bugzilla. New ' .
'installations should set this to true to avoid character encoding ' .
'problems. Existing databases should set this to true only after ' .
'the data has been converted from existing legacy character ' .
'encodings to UTF-8.',
type => 'b',
default => '0',
},
{
name => 'cookiedomain', name => 'cookiedomain',
desc => 'The domain for Bugzilla cookies. Normally blank. ' . desc => 'The domain for Bugzilla cookies. Normally blank. ' .
'If your website is at "www.foo.com", setting this to ' . 'If your website is at "www.foo.com", setting this to ' .
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
# Contributor(s): Bradley Baetz <bbaetz@student.usyd.edu.au> # Contributor(s): Bradley Baetz <bbaetz@student.usyd.edu.au>
# #
#%] #%]
<?xml version="1.0" standalone="yes"?> <?xml version="1.0" [% IF Param('utf8') %]encoding="UTF-8" [% END %]standalone="yes" ?>
<!DOCTYPE bugzilla SYSTEM "[% Param('urlbase') %]bugzilla.dtd"> <!DOCTYPE bugzilla SYSTEM "[% Param('urlbase') %]bugzilla.dtd">
<bugzilla version="[% VERSION %]" <bugzilla version="[% VERSION %]"
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
# Contributor(s): Myk Melez <myk@mozilla.org> # Contributor(s): Myk Melez <myk@mozilla.org>
#%] #%]
<?xml version="1.0"?> <?xml version="1.0"[% IF Param('utf8') %] encoding="UTF-8"[% END %]?>
<!-- Note: this interface is experimental and under development. <!-- Note: this interface is experimental and under development.
- We may and probably will make breaking changes to it in the future. --> - We may and probably will make breaking changes to it in the future. -->
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
# Contributor(s): Myk Melez <myk@mozilla.org> # Contributor(s): Myk Melez <myk@mozilla.org>
#%] #%]
<?xml version="1.0"?> <?xml version="1.0"[% IF Param('utf8') %] encoding="UTF-8"[% END %]?>
<!-- [% template_version %] --> <!-- [% template_version %] -->
<RDF xmlns="http://www.w3.org/1999/02/22-rdf-syntax-ns#" <RDF xmlns="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
[% DEFAULT title = "$terms.Bugzilla $terms.Bugs" %] [% DEFAULT title = "$terms.Bugzilla $terms.Bugs" %]
<?xml version="1.0"?> <?xml version="1.0"[% IF Param('utf8') %] encoding="UTF-8"[% END %]?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/" xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dc="http://purl.org/dc/elements/1.1/"
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
# Contributor(s): Myk Melez <myk@mozilla.org> # Contributor(s): Myk Melez <myk@mozilla.org>
#%] #%]
<?xml version="1.0"?> <?xml version="1.0"[% IF Param('utf8') %] encoding="UTF-8"[% END %]?>
<!-- [% template_version %] --> <!-- [% template_version %] -->
<RDF xmlns="http://www.w3.org/1999/02/22-rdf-syntax-ns#" <RDF xmlns="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment