Commit f21583b6 authored by Frédéric Buclin's avatar Frédéric Buclin

Bug 554819: Quicksearch should be using Text::ParseWords instead of custom code in splitString

Also fixes QS with accented characters (bug 730207) r=dkl a=LpSolit
parent b7fb96fa
...@@ -19,6 +19,7 @@ use Bugzilla::Util; ...@@ -19,6 +19,7 @@ use Bugzilla::Util;
use List::Util qw(min max); use List::Util qw(min max);
use List::MoreUtils qw(firstidx); use List::MoreUtils qw(firstidx);
use Text::ParseWords qw(parse_line);
use base qw(Exporter); use base qw(Exporter);
@Bugzilla::Search::Quicksearch::EXPORT = qw(quicksearch); @Bugzilla::Search::Quicksearch::EXPORT = qw(quicksearch);
...@@ -129,54 +130,95 @@ sub quicksearch { ...@@ -129,54 +130,95 @@ sub quicksearch {
$searchstring =~ s/(^[\s,]+|[\s,]+$)//g; $searchstring =~ s/(^[\s,]+|[\s,]+$)//g;
ThrowUserError('buglist_parameters_required') unless ($searchstring); ThrowUserError('buglist_parameters_required') unless ($searchstring);
$fulltext = Bugzilla->user->setting('quicksearch_fulltext') eq 'on' ? 1 : 0;
if ($searchstring =~ m/^[0-9,\s]*$/) { if ($searchstring =~ m/^[0-9,\s]*$/) {
_bug_numbers_only($searchstring); _bug_numbers_only($searchstring);
} }
else { else {
_handle_alias($searchstring); _handle_alias($searchstring);
# Globally translate " AND ", " OR ", " NOT " to space, pipe, dash. # Retain backslashes and quotes, to know which strings are quoted,
$searchstring =~ s/\s+AND\s+/ /g; # and which ones are not.
$searchstring =~ s/\s+OR\s+/|/g; my @words = parse_line('\s+', 1, $searchstring);
$searchstring =~ s/\s+NOT\s+/ -/g; # If parse_line() returns no data, this means strings are badly quoted.
# Rather than trying to guess what the user wanted to do, we throw an error.
scalar(@words)
|| ThrowUserError('quicksearch_unbalanced_quotes', {string => $searchstring});
# A query cannot start with AND or OR, nor can it end with AND, OR or NOT.
ThrowUserError('quicksearch_invalid_query')
if ($words[0] =~ /^(?:AND|OR)$/ || $words[$#words] =~ /^(?:AND|OR|NOT)$/);
my (@qswords, @or_group);
while (scalar @words) {
my $word = shift @words;
# AND is the default word separator, similar to a whitespace,
# but |a AND OR b| is not a valid combination.
if ($word eq 'AND') {
ThrowUserError('quicksearch_invalid_query', {operators => ['AND', 'OR']})
if $words[0] eq 'OR';
}
# |a OR AND b| is not a valid combination.
# |a OR OR b| is equivalent to |a OR b| and so is harmless.
elsif ($word eq 'OR') {
ThrowUserError('quicksearch_invalid_query', {operators => ['OR', 'AND']})
if $words[0] eq 'AND';
}
# NOT negates the following word.
# |NOT AND| and |NOT OR| are not valid combinations.
# |NOT NOT| is fine but has no effect as they cancel themselves.
elsif ($word eq 'NOT') {
$word = shift @words;
next if $word eq 'NOT';
if ($word eq 'AND' || $word eq 'OR') {
ThrowUserError('quicksearch_invalid_query', {operators => ['NOT', $word]});
}
unshift(@words, "-$word");
}
else {
# OR groups words together, as OR has higher precedence than AND.
push(@or_group, $word);
# If the next word is not OR, then we are not in a OR group,
# or we are leaving it.
if (!defined $words[0] || $words[0] ne 'OR') {
push(@qswords, join('|', @or_group));
@or_group = ();
}
}
}
my @words = splitString($searchstring); _handle_status_and_resolution(\@qswords);
_handle_status_and_resolution(\@words);
my (@unknownFields, %ambiguous_fields); my (@unknownFields, %ambiguous_fields);
$fulltext = Bugzilla->user->setting('quicksearch_fulltext') eq 'on' ? 1 : 0;
# Loop over all main-level QuickSearch words. # Loop over all main-level QuickSearch words.
foreach my $qsword (@words) { foreach my $qsword (@qswords) {
my $negate = substr($qsword, 0, 1) eq '-'; my @or_operand = parse_line('\|', 1, $qsword);
foreach my $term (@or_operand) {
my $negate = substr($term, 0, 1) eq '-';
if ($negate) { if ($negate) {
$qsword = substr($qsword, 1); $term = substr($term, 1);
} }
# No special first char next if _handle_special_first_chars($term, $negate);
if (!_handle_special_first_chars($qsword, $negate)) { next if _handle_field_names($term, $negate, \@unknownFields,
# Split by '|' to get all operands for a boolean OR. \%ambiguous_fields);
foreach my $or_operand (split(/\|/, $qsword)) {
if (!_handle_field_names($or_operand, $negate,
\@unknownFields,
\%ambiguous_fields))
{
# Having ruled out the special cases, we may now split # Having ruled out the special cases, we may now split
# by comma, which is another legal boolean OR indicator. # by comma, which is another legal boolean OR indicator.
foreach my $word (split(/,/, $or_operand)) { # Remove quotes from quoted words, if any.
@words = parse_line(',', 0, $term);
foreach my $word (@words) {
if (!_special_field_syntax($word, $negate)) { if (!_special_field_syntax($word, $negate)) {
_default_quicksearch_word($word, $negate); _default_quicksearch_word($word, $negate);
} }
_handle_urls($word, $negate); _handle_urls($word, $negate);
} }
} }
}
}
$chart++; $chart++;
$and = 0; $and = 0;
$or = 0; $or = 0;
} # foreach (@words) }
# Inform user about any unknown fields # Inform user about any unknown fields
if (scalar(@unknownFields) || scalar(keys %ambiguous_fields)) { if (scalar(@unknownFields) || scalar(keys %ambiguous_fields)) {
...@@ -302,7 +344,7 @@ sub _handle_special_first_chars { ...@@ -302,7 +344,7 @@ sub _handle_special_first_chars {
my $firstChar = substr($qsword, 0, 1); my $firstChar = substr($qsword, 0, 1);
my $baseWord = substr($qsword, 1); my $baseWord = substr($qsword, 1);
my @subWords = split(/[\|,]/, $baseWord); my @subWords = split(/,/, $baseWord);
if ($firstChar eq '#') { if ($firstChar eq '#') {
addChart('short_desc', 'substring', $baseWord, $negate); addChart('short_desc', 'substring', $baseWord, $negate);
...@@ -343,31 +385,39 @@ sub _handle_field_names { ...@@ -343,31 +385,39 @@ sub _handle_field_names {
return 1; return 1;
} }
# generic field1,field2,field3:value1,value2 notation # Generic field1,field2,field3:value1,value2 notation.
if ($or_operand =~ /^([^:]+):([^:]+)$/) { # We have to correctly ignore commas and colons in quotes.
my @fields = split(/,/, $1); my @field_values = parse_line(':', 1, $or_operand);
my @values = split(/,/, $2); if (scalar @field_values == 2) {
my @fields = parse_line(',', 1, $field_values[0]);
my @values = parse_line(',', 1, $field_values[1]);
foreach my $field (@fields) { foreach my $field (@fields) {
my $translated = _translate_field_name($field); my $translated = _translate_field_name($field);
# Skip and record any unknown fields # Skip and record any unknown fields
if (!defined $translated) { if (!defined $translated) {
push(@$unknownFields, $field); push(@$unknownFields, $field);
next;
} }
# If we got back an array, that means the substring is # If we got back an array, that means the substring is
# ambiguous and could match more than field name # ambiguous and could match more than field name
elsif (ref $translated) { elsif (ref $translated) {
$ambiguous_fields->{$field} = $translated; $ambiguous_fields->{$field} = $translated;
next;
} }
else {
foreach my $value (@values) { foreach my $value (@values) {
my $operator = FIELD_OPERATOR->{$translated} || 'substring'; my $operator = FIELD_OPERATOR->{$translated} || 'substring';
# If the string was quoted to protect some special
# characters such as commas and colons, we need
# to remove quotes.
if ($value =~ /^(["'])(.+)\g1$/) {
$value = $2;
$value =~ s/\\(["'])/$1/g;
}
addChart($translated, $operator, $value, $negate); addChart($translated, $operator, $value, $negate);
} }
} }
}
return 1; return 1;
} }
return 0; return 0;
} }
...@@ -500,41 +550,6 @@ sub _handle_urls { ...@@ -500,41 +550,6 @@ sub _handle_urls {
# Helpers # Helpers
########################################################################### ###########################################################################
# Split string on whitespace, retaining quoted strings as one
sub splitString {
my $string = shift;
my @quoteparts;
my @parts;
my $i = 0;
# Now split on quote sign; be tolerant about unclosed quotes
@quoteparts = split(/"/, $string);
foreach my $part (@quoteparts) {
# After every odd quote, quote special chars
if ($i++ %2) {
$part = url_quote($part);
# Protect the minus sign from being considered
# as negation, in quotes.
$part =~ s/(?<=^)\-/%2D/;
}
}
# Join again
$string = join('"', @quoteparts);
# Now split on unescaped whitespace
@parts = split(/\s+/, $string);
foreach (@parts) {
# Protect plus signs from becoming a blank.
# If "+" appears as the first character, leave it alone
# as it has a special meaning. Strings which start with
# "+" must be quoted.
s/(?<!^)\+/%2B/g;
# Remove quotes
s/"//g;
}
return @parts;
}
# Quote and escape a phrase appropriately for a "content matches" search. # Quote and escape a phrase appropriately for a "content matches" search.
sub _matches_phrase { sub _matches_phrase {
my ($phrase) = @_; my ($phrase) = @_;
...@@ -600,7 +615,7 @@ sub makeChart { ...@@ -600,7 +615,7 @@ sub makeChart {
my $cgi = Bugzilla->cgi; my $cgi = Bugzilla->cgi;
$cgi->param("field$expr", $field); $cgi->param("field$expr", $field);
$cgi->param("type$expr", $type); $cgi->param("type$expr", $type);
$cgi->param("value$expr", url_decode($value)); $cgi->param("value$expr", $value);
} }
1; 1;
...@@ -12,7 +12,7 @@ use strict; ...@@ -12,7 +12,7 @@ use strict;
use base qw(Exporter); use base qw(Exporter);
@Bugzilla::Util::EXPORT = qw(trick_taint detaint_natural detaint_signed @Bugzilla::Util::EXPORT = qw(trick_taint detaint_natural detaint_signed
html_quote url_quote xml_quote html_quote url_quote xml_quote
css_class_quote html_light_quote url_decode css_class_quote html_light_quote
i_am_cgi correct_urlbase remote_ip i_am_cgi correct_urlbase remote_ip
do_ssl_redirect_if_required use_attachbase do_ssl_redirect_if_required use_attachbase
diff_arrays on_main_db say diff_arrays on_main_db say
...@@ -220,14 +220,6 @@ sub xml_quote { ...@@ -220,14 +220,6 @@ sub xml_quote {
return $var; return $var;
} }
sub url_decode {
my ($todecode) = (@_);
$todecode =~ tr/+/ /; # pluses become spaces
$todecode =~ s/%([0-9a-fA-F]{2})/pack("c",hex($1))/ge;
utf8::decode($todecode) if Bugzilla->params->{'utf8'};
return $todecode;
}
sub i_am_cgi { sub i_am_cgi {
# I use SERVER_SOFTWARE because it's required to be # I use SERVER_SOFTWARE because it's required to be
# defined for all requests in the CGI spec. # defined for all requests in the CGI spec.
...@@ -751,9 +743,6 @@ Bugzilla::Util - Generic utility functions for bugzilla ...@@ -751,9 +743,6 @@ Bugzilla::Util - Generic utility functions for bugzilla
xml_quote($var); xml_quote($var);
email_filter($var); email_filter($var);
# Functions for decoding
$rv = url_decode($var);
# Functions that tell you about your environment # Functions that tell you about your environment
my $is_cgi = i_am_cgi(); my $is_cgi = i_am_cgi();
my $urlbase = correct_urlbase(); my $urlbase = correct_urlbase();
...@@ -866,10 +855,6 @@ This is similar to C<html_quote>, except that ' is escaped to &apos;. This ...@@ -866,10 +855,6 @@ This is similar to C<html_quote>, except that ' is escaped to &apos;. This
is kept separate from html_quote partly for compatibility with previous code is kept separate from html_quote partly for compatibility with previous code
(for &apos;) and partly for future handling of non-ASCII characters. (for &apos;) and partly for future handling of non-ASCII characters.
=item C<url_decode($val)>
Converts the %xx encoding from the given URL back to its original form.
=item C<email_filter> =item C<email_filter>
Removes the hostname from email addresses in the string, if the user Removes the hostname from email addresses in the string, if the user
......
...@@ -1436,6 +1436,27 @@ ...@@ -1436,6 +1436,27 @@
The name of the query must be less than [% constants.MAX_LEN_QUERY_NAME FILTER html %] The name of the query must be less than [% constants.MAX_LEN_QUERY_NAME FILTER html %]
characters long. characters long.
[% ELSIF error == "quicksearch_invalid_query" %]
[% title = "Invalid Query" %]
Your query is invalid.
[% IF operators %]
The [% operators.shift FILTER html %] operator cannot be followed by
[%+ operators.shift FILTER html %].
[% ELSE %]
A query cannot start with AND or OR, nor can it end with AND, OR or NOT.
They are reserved operators and must be quoted if you want to look for
these strings.
[% END %]
[% ELSIF error == "quicksearch_unbalanced_quotes" %]
[% title = "Badly Formatted Query" %]
[% terms.Bugzilla %] is unable to parse your query correctly:
<em>[% string FILTER html %]</em>.<br>
If you use quotes to enclose strings, make sure both quotes are present.
If you really want to look for a quote in a string, type \" instead of ".
For instance, <em>"I'm so \"special\", really"</em> (with quotes) will be
interpreted as <em>I'm so "special", really</em>.
[% ELSIF error == "quicksearch_unknown_field" %] [% ELSIF error == "quicksearch_unknown_field" %]
[% title = "QuickSearch Error" %] [% title = "QuickSearch Error" %]
There is a problem with your search: There is a problem with your search:
......
...@@ -25,7 +25,16 @@ ...@@ -25,7 +25,16 @@
<input type="submit" value="Search" id="find"> <input type="submit" value="Search" id="find">
</form> </form>
<h2>The Basics</h2> <ul>
<li><a href="#basics">The Basics</a></li>
<li><a href="#basic_examples">Examples of Simple Queries</a></li>
<li><a href="#fields">Fields You Can Search On</a></li>
<li><a href="#advanced_features">Advanced Features</a></li>
<li><a href="#shortcuts">Advanced Shortcuts</a></li>
<li><a href="#advanced_examples">Examples of Complex Queries</a></li>
</ul>
<h2 id="basics">The Basics</h2>
<ul class="qs_help"> <ul class="qs_help">
<li>If you just put a word or series of words in the search box, <li>If you just put a word or series of words in the search box,
...@@ -73,8 +82,32 @@ ...@@ -73,8 +82,32 @@
<em>any</em> of those values will be searched for.</li> <em>any</em> of those values will be searched for.</li>
</ul> </ul>
<p>You may also want to read up on the <a href="#advanced">Advanced <h2 id="basic_examples">Examples of Simple Queries</h2>
Features</a>.</p>
<p>Here are some examples of how to write some simple queries.
<a href="#advanced_examples">Examples for more complex queries</a> can be
found lower in this page.</p>
<ul class="qs_help">
<li>All open [% terms.bugs %] where userA@company.com is in the CC list
(no need to mention open [% terms.bugs %], this is the default):<br>
<kbd>cc:userA@company.com</kbd></li>
<li>All unconfirmed [% terms.bugs %] in product productA (putting the
[%+ terms.bug %] status at the first position make it being automagically
considered as [% terms.abug %] status):<br>
<kbd>UNCONFIRMED product:productA</kbd>
<li>All open and closed [% terms.bugs %] reported by userB@company.com
(we must specify ALL as the first word, else only open [% terms.bugs %]
are taken into account):<br>
<kbd>ALL reporter:userB@company.com</kbd>
<li>All open [% terms.bugs %] with severity blocker or critical with the
target milestone set to 2.5:<br>
<kbd>severity:blocker,critical milestone:2.5</kbd>
<li>All open [% terms.bugs %] in the component Research & Development
with priority P1 or P2 (we must use quotes for the component as its name
contains whitespaces):<br>
<kbd>component:"Research & Development" priority:P1,P2</kbd></li>
</ul>
<h2 id="fields">Fields You Can Search On</h2> <h2 id="fields">Fields You Can Search On</h2>
...@@ -127,15 +160,18 @@ ...@@ -127,15 +160,18 @@
</tbody> </tbody>
</table> </table>
<h2 id="advanced">Advanced Features</h2> <h2 id="advanced_features">Advanced Features</h2>
<ul class="qs_help"> <ul class="qs_help">
<li>If you want to search for a <strong>phrase</strong> or something that <li>If you want to search for a <strong>phrase</strong> or something that
contains spaces, you can put it in quotes, like: contains spaces, commas, colons or quotes, you must put it in quotes, like:
<kbd>"this is a phrase"</kbd>. You can also use quotes to search for <kbd>"yes, this is a phrase"</kbd>. You must also use quotes to search for
characters that would otherwise be interpreted specially by quicksearch. characters that would otherwise be interpreted specially by quicksearch.
For example, <kbd>"this|thing"</kbd> would search for the literal phrase For example, <kbd>"this|that"</kbd> would search for the literal string
<em>this|thing</em>.</li> <em>this|that</em> and would not be parsed as <kbd>"this OR that"</kbd>.
Also, <kbd>"-field:value"</kbd> would search for the literal phrase
<em>-field:value</em> and would not be parsed as
<kbd>"NOT field:value"</kbd>.</li>
<li>You can use <strong>AND</strong>, <strong>NOT</strong>, <li>You can use <strong>AND</strong>, <strong>NOT</strong>,
and <strong>OR</strong> in searches. and <strong>OR</strong> in searches.
...@@ -165,6 +201,12 @@ ...@@ -165,6 +201,12 @@
</li> </li>
</ul> </ul>
<p>You cannot use | nor OR to enumerate possible values for a given field.
You must use commas instead. So <kbd>field:value1,value2</kbd> does what
you expect, but <kbd>field:value1|value2</kbd> would be treated as
<kbd>field:value1 OR value2</kbd>, which means value2 is not bound to
the given field.</p>
<p>OR has higher precedence than AND; AND is the top level operation. <p>OR has higher precedence than AND; AND is the top level operation.
For example:</p> For example:</p>
<p>Searching for <em><kbd>url|location bar|field -focus</kbd></em> means <p>Searching for <em><kbd>url|location bar|field -focus</kbd></em> means
...@@ -255,4 +297,29 @@ ...@@ -255,4 +297,29 @@
</tbody> </tbody>
</table> </table>
<h2 id="advanced_examples">Examples of Complex Queries</h2>
<p>It is pretty easy to write rather complex queries without too much effort.
For very complex queries, you have to use the
<a href="query.cgi?format=advanced">Advanced Search</a> form.</p>
<ul class="qs_help">
<li>All [% terms.bugs %] reported by userA@company.com or assigned to him
(the initial @ is a shortcut for the assignee, see the
<a href="#shortcuts">Advanced Shortcuts</a> section above):<br>
<kbd>ALL @userA@company.com OR reporter:userA@company.com</kbd></li>
<li>All open [% terms.bugs %] in product productA with either severity
blocker, critical or major, or with priority P1, or with the blocker+
flag set, and which are neither assigned to userB@company.com nor to
userC@company.com (we make the assumption that there are only two users
matching userB and userC, else we would write the whole login name):<br>
<kbd>:productA sev:blocker,critical,major OR pri:P1 OR flag:blocker+ -assign:userB,userC</kbd></li>
<li>All FIXED [% terms.bugs %] with the blocker+ flag set, but without
the approval+ nor approval? flags set:<br>
<kbd>FIXED flag:blocker+ -flag:approval+ -flag:approval?</kbd></li>
<li>[% terms.Bugs %] with <em>That's a "unusual" issue</em> in the
[%+ terms.bug %] summary (double quotes are escaped using <em>\"</em>):<br>
<kbd>summary:"That's a \"unusual\" issue"</kbd></li>
</ul>
[% PROCESS global/footer.html.tmpl %] [% PROCESS global/footer.html.tmpl %]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment