Bug 151871 - rewrite quoteUrls to fix major performance problems, and a

few other misc bugs too. r=myk x2
parent 8f025114
...@@ -792,8 +792,8 @@ Content-type: text/html ...@@ -792,8 +792,8 @@ Content-type: text/html
if (!defined $nexturl || $nexturl eq "") { if (!defined $nexturl || $nexturl eq "") {
# Sets nexturl to be argv0, stripping everything up to and # Sets nexturl to be argv0, stripping everything up to and
# including the last slash (or backslash on Windows). # including the last slash (or backslash on Windows).
$0 =~ m:[^/\\]*$:; $0 =~ m:([^/\\]*)$:;
$nexturl = $&; $nexturl = $1;
} }
my $method = "POST"; my $method = "POST";
# We always want to use POST here, because we're submitting a password and don't # We always want to use POST here, because we're submitting a password and don't
......
...@@ -974,104 +974,104 @@ sub detaint_natural { ...@@ -974,104 +974,104 @@ sub detaint_natural {
# module by Gareth Rees <garethr@cre.canon.co.uk>. It has been heavily hacked, # module by Gareth Rees <garethr@cre.canon.co.uk>. It has been heavily hacked,
# all that is really recognizable from the original is bits of the regular # all that is really recognizable from the original is bits of the regular
# expressions. # expressions.
# This has been rewritten to be faster, mainly by substituting 'as we go'.
# If you want to modify this routine, read the comments carefully
sub quoteUrls { sub quoteUrls {
my ($text) = (@_); my ($text) = (@_);
return $text unless $text; return $text unless $text;
my $base = Param('urlbase');
my $protocol = join '|',
qw(afs cid ftp gopher http https mid news nntp prospero telnet wais);
my $count = 0;
# Now, quote any "#" characters so they won't confuse stuff later
$text =~ s/#/%#/g;
# Next, find anything that looks like a URL or an email address and # We use /g for speed, but uris can have other things inside them
# pull them out the the text, replacing them with a "##<digits>## # (http://foo/bug#3 for example). Filtering that out filters valid
# marker, and writing them into an array. All this confusion is # bug refs out, so we have to do replacements.
# necessary so that we don't match on something we've already replaced, # mailto can't contain space or #, so we don't have to bother for that
# which can happen if you do multiple s///g operations. # Do this by escaping \0 to \1\0, and replacing matches with \0\0$count\0\0
# \0 is used because its unliklely to occur in the text, so the cost of
# doing this should be very small
# Also, \0 won't appear in the value_quote'd bug title, so we don't have
# to worry about bogus substitutions from there
# escape the 2nd escape char we're using
my $chr1 = chr(1);
$text =~ s/\0/$chr1\0/g;
# However, note that adding the title (for buglinks) can affect things
# In particular, attachment matches go before bug titles, so that titles
# with 'attachment 1' don't double match.
# Dupe checks go afterwards, because that uses ^ and \Z, which won't occur
# if it was subsituted as a bug title (since that always involve leading
# and trailing text)
# Because of entities, its easier (and quicker) to do this before escaping
my @things; my @things;
while ($text =~ s%((mailto:)?([\w\.\-\+\=]+\@[\w\-]+(?:\.[\w\-]+)+)\b| my $count = 0;
(\b((?:$protocol):[^ \t\n<>"]+[\w/])))%"##$count##"%exo) { my $tmp;
my $item = $&;
# non-mailto protocols
$item = value_quote($item); my $protocol_re = qr/(afs|cid|ftp|gopher|http|https|mid|news|nntp|prospero|telnet|wais)/i;
if ($item !~ m/^$protocol:/o && $item !~ /^mailto:/) { $text =~ s~\b(${protocol_re}: # The protocol:
# We must have grabbed this one because it looks like an email [^\s<>\"]+ # Any non-whitespace
# address. [\w\/]) # so that we end in \w or /
$item = qq{<A HREF="mailto:$item">$item</A>}; ~($tmp = html_quote($1)) &&
} else { ($things[$count++] = "<a href=\"$tmp\">$tmp</a>") &&
$item = qq{<A HREF="$item">$item</A>}; ("\0\0" . ($count-1) . "\0\0")
} ~egox;
$things[$count++] = $item; # We have to quote now, otherwise our html is itsself escaped
} # THIS MEANS THAT A LITERAL ", <, >, ' MUST BE ESCAPED FOR A MATCH
# Either a comment string or no comma and a compulsory #.
while ($text =~ s/\bbug(\s|%\#)*(\d+),?\s*comment\s*(\s|%\#)(\d+)/"##$count##"/ei) { $text = html_quote($text);
my $item = $&;
my $bugnum = $2; # mailto:
my $comnum = $4; # Use |<nothing> so that $1 is defined regardless
$item = GetBugLink($bugnum, $item); $text =~ s~\b(mailto:|)?([\w\.\-\+\=]+\@[\w\-]+(?:\.[\w\-]+)+)\b
$item =~ s/(id=\d+)/$1#c$comnum/; ~<a href=\"mailto:$2\">$1$2</a>~igx;
$things[$count++] = $item;
} # attachment links - handle both cases separatly for simplicity
while ($text =~ s/\bcomment(\s|%\#)*(\d+)/"##$count##"/ei) { $text =~ s~((?:^Created\ an\ |\b)attachment\s*\(id=(\d+)\))
my $item = $&; ~<a href=\"attachment.cgi?id=$2&amp;action=view\">$1</a>~igx;
my $num = $2;
$item = value_quote($item); $text =~ s~\b(attachment\s*\#?\s*(\d+))
$item = qq{<A HREF="#c$num">$item</A>}; ~<a href=\"attachment.cgi?id=$2&amp;action=view\">$1</a>~igx;
$things[$count++] = $item;
} # This handles bug a, comment b type stuff. Because we're using /g
while ($text =~ s/\bbug(\s|%\#)*(\d+)/"##$count##"/ei) { # we have to do this in one pattern, and so this is semi-messy.
my $item = $&; # Also, we can't use $bug_re?$comment_re? because that will match the
my $num = $2; # empty string
$item = GetBugLink($num, $item); my $bug_re = qr/bug\s*\#?\s*(\d+)/i;
$things[$count++] = $item; my $comment_re = qr/comment\s*\#?\s*(\d+)/i;
} $text =~ s~\b($bug_re(?:\s*,?\s*$comment_re)?|$comment_re)
while ($text =~ s/\b(Created an )?attachment(\s|%\#)*(\(id=)?(\d+)\)?/"##$count##"/ei) { ~ # We have several choices. $1 here is the link, and $2-4 are set
my $item = $&; # depending on which part matched
my $num = $4; (defined($2) ? GetBugLink($2,$1,$3) :
$item = value_quote($item); # Not really necessary, since we know "<a href=\"#c$4\">$1</a>")
# there's no special chars in it. ~egox;
$item = qq{<a href="attachment.cgi?id=$num&amp;action=view">$item</a>};
$things[$count++] = $item; # Duplicate markers
} $text =~ s~(?<=^\*\*\*\ This\ bug\ has\ been\ marked\ as\ a\ duplicate\ of\ )
while ($text =~ s/\*\*\* This bug has been marked as a duplicate of (\d+) \*\*\*/"##$count##"/ei) { (\d+)
my $item = $&; (?=\ \*\*\*\Z)
my $num = $1; ~GetBugLink($1, $1)
my $bug_link; ~egmx;
$bug_link = GetBugLink($num, $num);
$item =~ s@\d+@$bug_link@; # Now remove the encoding hacks
$things[$count++] = $item; $text =~ s/\0\0(\d+)\0\0/$things[$1]/eg;
} $text =~ s/$chr1\0/\0/g;
$text = value_quote($text);
$text =~ s/\&#013;/\n/g;
# Stuff everything back from the array.
for (my $i=0 ; $i<$count ; $i++) {
$text =~ s/##$i##/$things[$i]/e;
}
# And undo the quoting of "#" characters.
$text =~ s/%#/#/g;
return $text; return $text;
} }
# This is a new subroutine written 12/20/00 for the purpose of processing a # GetBugLink creates a link to a bug, including its title.
# link to a bug. It can be called using "GetBugLink (<BugNumber>, <LinkText>);" # It takes either two or three paramaters:
# Where <BugNumber> is the number of the bug and <LinkText> is what apprears # - The bug number
# between '<a>' and '</a>'. # - The link text, to place between the <a>..</a>
# - An optional comment number, for linking to a particular
# comment in the bug
sub GetBugLink { sub GetBugLink {
my ($bug_num, $link_text) = (@_); my ($bug_num, $link_text, $comment_num) = @_;
detaint_natural($bug_num) || die "GetBugLink() called with non-integer bug number"; detaint_natural($bug_num) || die "GetBugLink() called with non-integer bug number";
# If we've run GetBugLink() for this bug number before, %::buglink # If we've run GetBugLink() for this bug number before, %::buglink
...@@ -1122,7 +1122,11 @@ sub GetBugLink { ...@@ -1122,7 +1122,11 @@ sub GetBugLink {
my ($pre, $title, $post) = @{$::buglink{$bug_num}}; my ($pre, $title, $post) = @{$::buglink{$bug_num}};
# $title will be undefined if the bug didn't exist in the database. # $title will be undefined if the bug didn't exist in the database.
if (defined $title) { if (defined $title) {
return qq{$pre<a href="show_bug.cgi?id=$bug_num" title="$title">$link_text</a>$post}; my $linkval = "show_bug.cgi?id=$bug_num";
if (defined $comment_num) {
$linkval .= "#c$comment_num";
}
return qq{$pre<a href="$linkval" title="$title">$link_text</a>$post};
} }
else { else {
return qq{$link_text}; return qq{$link_text};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment