Commit f21583b6 authored by Frédéric Buclin's avatar Frédéric Buclin

Bug 554819: Quicksearch should be using Text::ParseWords instead of custom code in splitString

Also fixes QS with accented characters (bug 730207) r=dkl a=LpSolit
parent b7fb96fa
......@@ -19,6 +19,7 @@ use Bugzilla::Util;
use List::Util qw(min max);
use List::MoreUtils qw(firstidx);
use Text::ParseWords qw(parse_line);
use base qw(Exporter);
@Bugzilla::Search::Quicksearch::EXPORT = qw(quicksearch);
......@@ -129,54 +130,95 @@ sub quicksearch {
$searchstring =~ s/(^[\s,]+|[\s,]+$)//g;
ThrowUserError('buglist_parameters_required') unless ($searchstring);
$fulltext = Bugzilla->user->setting('quicksearch_fulltext') eq 'on' ? 1 : 0;
if ($searchstring =~ m/^[0-9,\s]*$/) {
_bug_numbers_only($searchstring);
}
else {
_handle_alias($searchstring);
# Globally translate " AND ", " OR ", " NOT " to space, pipe, dash.
$searchstring =~ s/\s+AND\s+/ /g;
$searchstring =~ s/\s+OR\s+/|/g;
$searchstring =~ s/\s+NOT\s+/ -/g;
# Retain backslashes and quotes, to know which strings are quoted,
# and which ones are not.
my @words = parse_line('\s+', 1, $searchstring);
# If parse_line() returns no data, this means strings are badly quoted.
# Rather than trying to guess what the user wanted to do, we throw an error.
scalar(@words)
|| ThrowUserError('quicksearch_unbalanced_quotes', {string => $searchstring});
# A query cannot start with AND or OR, nor can it end with AND, OR or NOT.
ThrowUserError('quicksearch_invalid_query')
if ($words[0] =~ /^(?:AND|OR)$/ || $words[$#words] =~ /^(?:AND|OR|NOT)$/);
my (@qswords, @or_group);
while (scalar @words) {
my $word = shift @words;
# AND is the default word separator, similar to a whitespace,
# but |a AND OR b| is not a valid combination.
if ($word eq 'AND') {
ThrowUserError('quicksearch_invalid_query', {operators => ['AND', 'OR']})
if $words[0] eq 'OR';
}
# |a OR AND b| is not a valid combination.
# |a OR OR b| is equivalent to |a OR b| and so is harmless.
elsif ($word eq 'OR') {
ThrowUserError('quicksearch_invalid_query', {operators => ['OR', 'AND']})
if $words[0] eq 'AND';
}
# NOT negates the following word.
# |NOT AND| and |NOT OR| are not valid combinations.
# |NOT NOT| is fine but has no effect as they cancel themselves.
elsif ($word eq 'NOT') {
$word = shift @words;
next if $word eq 'NOT';
if ($word eq 'AND' || $word eq 'OR') {
ThrowUserError('quicksearch_invalid_query', {operators => ['NOT', $word]});
}
unshift(@words, "-$word");
}
else {
# OR groups words together, as OR has higher precedence than AND.
push(@or_group, $word);
# If the next word is not OR, then we are not in a OR group,
# or we are leaving it.
if (!defined $words[0] || $words[0] ne 'OR') {
push(@qswords, join('|', @or_group));
@or_group = ();
}
}
}
my @words = splitString($searchstring);
_handle_status_and_resolution(\@words);
_handle_status_and_resolution(\@qswords);
my (@unknownFields, %ambiguous_fields);
$fulltext = Bugzilla->user->setting('quicksearch_fulltext') eq 'on' ? 1 : 0;
# Loop over all main-level QuickSearch words.
foreach my $qsword (@words) {
my $negate = substr($qsword, 0, 1) eq '-';
if ($negate) {
$qsword = substr($qsword, 1);
}
foreach my $qsword (@qswords) {
my @or_operand = parse_line('\|', 1, $qsword);
foreach my $term (@or_operand) {
my $negate = substr($term, 0, 1) eq '-';
if ($negate) {
$term = substr($term, 1);
}
# No special first char
if (!_handle_special_first_chars($qsword, $negate)) {
# Split by '|' to get all operands for a boolean OR.
foreach my $or_operand (split(/\|/, $qsword)) {
if (!_handle_field_names($or_operand, $negate,
\@unknownFields,
\%ambiguous_fields))
{
# Having ruled out the special cases, we may now split
# by comma, which is another legal boolean OR indicator.
foreach my $word (split(/,/, $or_operand)) {
if (!_special_field_syntax($word, $negate)) {
_default_quicksearch_word($word, $negate);
}
_handle_urls($word, $negate);
}
next if _handle_special_first_chars($term, $negate);
next if _handle_field_names($term, $negate, \@unknownFields,
\%ambiguous_fields);
# Having ruled out the special cases, we may now split
# by comma, which is another legal boolean OR indicator.
# Remove quotes from quoted words, if any.
@words = parse_line(',', 0, $term);
foreach my $word (@words) {
if (!_special_field_syntax($word, $negate)) {
_default_quicksearch_word($word, $negate);
}
_handle_urls($word, $negate);
}
}
$chart++;
$and = 0;
$or = 0;
} # foreach (@words)
}
# Inform user about any unknown fields
if (scalar(@unknownFields) || scalar(keys %ambiguous_fields)) {
......@@ -302,7 +344,7 @@ sub _handle_special_first_chars {
my $firstChar = substr($qsword, 0, 1);
my $baseWord = substr($qsword, 1);
my @subWords = split(/[\|,]/, $baseWord);
my @subWords = split(/,/, $baseWord);
if ($firstChar eq '#') {
addChart('short_desc', 'substring', $baseWord, $negate);
......@@ -334,7 +376,7 @@ sub _handle_special_first_chars {
sub _handle_field_names {
my ($or_operand, $negate, $unknownFields, $ambiguous_fields) = @_;
# Flag and requestee shortcut
if ($or_operand =~ /^(?:flag:)?([^\?]+\?)([^\?]*)$/) {
addChart('flagtypes.name', 'substring', $1, $negate);
......@@ -342,32 +384,40 @@ sub _handle_field_names {
addChart('requestees.login_name', 'substring', $2, $negate);
return 1;
}
# generic field1,field2,field3:value1,value2 notation
if ($or_operand =~ /^([^:]+):([^:]+)$/) {
my @fields = split(/,/, $1);
my @values = split(/,/, $2);
# Generic field1,field2,field3:value1,value2 notation.
# We have to correctly ignore commas and colons in quotes.
my @field_values = parse_line(':', 1, $or_operand);
if (scalar @field_values == 2) {
my @fields = parse_line(',', 1, $field_values[0]);
my @values = parse_line(',', 1, $field_values[1]);
foreach my $field (@fields) {
my $translated = _translate_field_name($field);
# Skip and record any unknown fields
if (!defined $translated) {
push(@$unknownFields, $field);
next;
}
# If we got back an array, that means the substring is
# ambiguous and could match more than field name
elsif (ref $translated) {
$ambiguous_fields->{$field} = $translated;
next;
}
foreach my $value (@values) {
my $operator = FIELD_OPERATOR->{$translated} || 'substring';
addChart($translated, $operator, $value, $negate);
else {
foreach my $value (@values) {
my $operator = FIELD_OPERATOR->{$translated} || 'substring';
# If the string was quoted to protect some special
# characters such as commas and colons, we need
# to remove quotes.
if ($value =~ /^(["'])(.+)\g1$/) {
$value = $2;
$value =~ s/\\(["'])/$1/g;
}
addChart($translated, $operator, $value, $negate);
}
}
}
return 1;
}
return 0;
}
......@@ -500,41 +550,6 @@ sub _handle_urls {
# Helpers
###########################################################################
# Split string on whitespace, retaining quoted strings as one
sub splitString {
my $string = shift;
my @quoteparts;
my @parts;
my $i = 0;
# Now split on quote sign; be tolerant about unclosed quotes
@quoteparts = split(/"/, $string);
foreach my $part (@quoteparts) {
# After every odd quote, quote special chars
if ($i++ %2) {
$part = url_quote($part);
# Protect the minus sign from being considered
# as negation, in quotes.
$part =~ s/(?<=^)\-/%2D/;
}
}
# Join again
$string = join('"', @quoteparts);
# Now split on unescaped whitespace
@parts = split(/\s+/, $string);
foreach (@parts) {
# Protect plus signs from becoming a blank.
# If "+" appears as the first character, leave it alone
# as it has a special meaning. Strings which start with
# "+" must be quoted.
s/(?<!^)\+/%2B/g;
# Remove quotes
s/"//g;
}
return @parts;
}
# Quote and escape a phrase appropriately for a "content matches" search.
sub _matches_phrase {
my ($phrase) = @_;
......@@ -600,7 +615,7 @@ sub makeChart {
my $cgi = Bugzilla->cgi;
$cgi->param("field$expr", $field);
$cgi->param("type$expr", $type);
$cgi->param("value$expr", url_decode($value));
$cgi->param("value$expr", $value);
}
1;
......@@ -12,7 +12,7 @@ use strict;
use base qw(Exporter);
@Bugzilla::Util::EXPORT = qw(trick_taint detaint_natural detaint_signed
html_quote url_quote xml_quote
css_class_quote html_light_quote url_decode
css_class_quote html_light_quote
i_am_cgi correct_urlbase remote_ip
do_ssl_redirect_if_required use_attachbase
diff_arrays on_main_db say
......@@ -220,14 +220,6 @@ sub xml_quote {
return $var;
}
sub url_decode {
my ($todecode) = (@_);
$todecode =~ tr/+/ /; # pluses become spaces
$todecode =~ s/%([0-9a-fA-F]{2})/pack("c",hex($1))/ge;
utf8::decode($todecode) if Bugzilla->params->{'utf8'};
return $todecode;
}
sub i_am_cgi {
# I use SERVER_SOFTWARE because it's required to be
# defined for all requests in the CGI spec.
......@@ -751,9 +743,6 @@ Bugzilla::Util - Generic utility functions for bugzilla
xml_quote($var);
email_filter($var);
# Functions for decoding
$rv = url_decode($var);
# Functions that tell you about your environment
my $is_cgi = i_am_cgi();
my $urlbase = correct_urlbase();
......@@ -866,10 +855,6 @@ This is similar to C<html_quote>, except that ' is escaped to &apos;. This
is kept separate from html_quote partly for compatibility with previous code
(for &apos;) and partly for future handling of non-ASCII characters.
=item C<url_decode($val)>
Converts the %xx encoding from the given URL back to its original form.
=item C<email_filter>
Removes the hostname from email addresses in the string, if the user
......
......@@ -1436,6 +1436,27 @@
The name of the query must be less than [% constants.MAX_LEN_QUERY_NAME FILTER html %]
characters long.
[% ELSIF error == "quicksearch_invalid_query" %]
[% title = "Invalid Query" %]
Your query is invalid.
[% IF operators %]
The [% operators.shift FILTER html %] operator cannot be followed by
[%+ operators.shift FILTER html %].
[% ELSE %]
A query cannot start with AND or OR, nor can it end with AND, OR or NOT.
They are reserved operators and must be quoted if you want to look for
these strings.
[% END %]
[% ELSIF error == "quicksearch_unbalanced_quotes" %]
[% title = "Badly Formatted Query" %]
[% terms.Bugzilla %] is unable to parse your query correctly:
<em>[% string FILTER html %]</em>.<br>
If you use quotes to enclose strings, make sure both quotes are present.
If you really want to look for a quote in a string, type \" instead of ".
For instance, <em>"I'm so \"special\", really"</em> (with quotes) will be
interpreted as <em>I'm so "special", really</em>.
[% ELSIF error == "quicksearch_unknown_field" %]
[% title = "QuickSearch Error" %]
There is a problem with your search:
......
......@@ -25,7 +25,16 @@
<input type="submit" value="Search" id="find">
</form>
<h2>The Basics</h2>
<ul>
<li><a href="#basics">The Basics</a></li>
<li><a href="#basic_examples">Examples of Simple Queries</a></li>
<li><a href="#fields">Fields You Can Search On</a></li>
<li><a href="#advanced_features">Advanced Features</a></li>
<li><a href="#shortcuts">Advanced Shortcuts</a></li>
<li><a href="#advanced_examples">Examples of Complex Queries</a></li>
</ul>
<h2 id="basics">The Basics</h2>
<ul class="qs_help">
<li>If you just put a word or series of words in the search box,
......@@ -73,8 +82,32 @@
<em>any</em> of those values will be searched for.</li>
</ul>
<p>You may also want to read up on the <a href="#advanced">Advanced
Features</a>.</p>
<h2 id="basic_examples">Examples of Simple Queries</h2>
<p>Here are some examples of how to write some simple queries.
<a href="#advanced_examples">Examples for more complex queries</a> can be
found lower in this page.</p>
<ul class="qs_help">
<li>All open [% terms.bugs %] where userA@company.com is in the CC list
(no need to mention open [% terms.bugs %], this is the default):<br>
<kbd>cc:userA@company.com</kbd></li>
<li>All unconfirmed [% terms.bugs %] in product productA (putting the
[%+ terms.bug %] status at the first position make it being automagically
considered as [% terms.abug %] status):<br>
<kbd>UNCONFIRMED product:productA</kbd>
<li>All open and closed [% terms.bugs %] reported by userB@company.com
(we must specify ALL as the first word, else only open [% terms.bugs %]
are taken into account):<br>
<kbd>ALL reporter:userB@company.com</kbd>
<li>All open [% terms.bugs %] with severity blocker or critical with the
target milestone set to 2.5:<br>
<kbd>severity:blocker,critical milestone:2.5</kbd>
<li>All open [% terms.bugs %] in the component Research & Development
with priority P1 or P2 (we must use quotes for the component as its name
contains whitespaces):<br>
<kbd>component:"Research & Development" priority:P1,P2</kbd></li>
</ul>
<h2 id="fields">Fields You Can Search On</h2>
......@@ -127,15 +160,18 @@
</tbody>
</table>
<h2 id="advanced">Advanced Features</h2>
<h2 id="advanced_features">Advanced Features</h2>
<ul class="qs_help">
<li>If you want to search for a <strong>phrase</strong> or something that
contains spaces, you can put it in quotes, like:
<kbd>"this is a phrase"</kbd>. You can also use quotes to search for
contains spaces, commas, colons or quotes, you must put it in quotes, like:
<kbd>"yes, this is a phrase"</kbd>. You must also use quotes to search for
characters that would otherwise be interpreted specially by quicksearch.
For example, <kbd>"this|thing"</kbd> would search for the literal phrase
<em>this|thing</em>.</li>
For example, <kbd>"this|that"</kbd> would search for the literal string
<em>this|that</em> and would not be parsed as <kbd>"this OR that"</kbd>.
Also, <kbd>"-field:value"</kbd> would search for the literal phrase
<em>-field:value</em> and would not be parsed as
<kbd>"NOT field:value"</kbd>.</li>
<li>You can use <strong>AND</strong>, <strong>NOT</strong>,
and <strong>OR</strong> in searches.
......@@ -165,6 +201,12 @@
</li>
</ul>
<p>You cannot use | nor OR to enumerate possible values for a given field.
You must use commas instead. So <kbd>field:value1,value2</kbd> does what
you expect, but <kbd>field:value1|value2</kbd> would be treated as
<kbd>field:value1 OR value2</kbd>, which means value2 is not bound to
the given field.</p>
<p>OR has higher precedence than AND; AND is the top level operation.
For example:</p>
<p>Searching for <em><kbd>url|location bar|field -focus</kbd></em> means
......@@ -255,4 +297,29 @@
</tbody>
</table>
<h2 id="advanced_examples">Examples of Complex Queries</h2>
<p>It is pretty easy to write rather complex queries without too much effort.
For very complex queries, you have to use the
<a href="query.cgi?format=advanced">Advanced Search</a> form.</p>
<ul class="qs_help">
<li>All [% terms.bugs %] reported by userA@company.com or assigned to him
(the initial @ is a shortcut for the assignee, see the
<a href="#shortcuts">Advanced Shortcuts</a> section above):<br>
<kbd>ALL @userA@company.com OR reporter:userA@company.com</kbd></li>
<li>All open [% terms.bugs %] in product productA with either severity
blocker, critical or major, or with priority P1, or with the blocker+
flag set, and which are neither assigned to userB@company.com nor to
userC@company.com (we make the assumption that there are only two users
matching userB and userC, else we would write the whole login name):<br>
<kbd>:productA sev:blocker,critical,major OR pri:P1 OR flag:blocker+ -assign:userB,userC</kbd></li>
<li>All FIXED [% terms.bugs %] with the blocker+ flag set, but without
the approval+ nor approval? flags set:<br>
<kbd>FIXED flag:blocker+ -flag:approval+ -flag:approval?</kbd></li>
<li>[% terms.Bugs %] with <em>That's a "unusual" issue</em> in the
[%+ terms.bug %] summary (double quotes are escaped using <em>\"</em>):<br>
<kbd>summary:"That's a \"unusual\" issue"</kbd></li>
</ul>
[% PROCESS global/footer.html.tmpl %]
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment