Commit 4acb2424 authored by Max Kanat-Alexander's avatar Max Kanat-Alexander

Bug 22353: Automatic duplicate bug detection on enter_bug.cgi

r=glob, a=mkanat
parent 601bda78
......@@ -19,6 +19,7 @@
......@@ -49,7 +49,7 @@ use Bugzilla::Group;
use Bugzilla::Status;
use Bugzilla::Comment;
use List::MoreUtils qw(firstidx);
use List::MoreUtils qw(firstidx uniq);
use List::Util qw(min first);
use Storable qw(dclone);
use URI;
......@@ -446,6 +446,87 @@ sub match {
return $class->SUPER::match(@_);
sub possible_duplicates {
my ($class, $params) = @_;
my $short_desc = $params->{summary};
my $products = $params->{products} || [];
my $limit = $params->{limit} || MAX_POSSIBLE_DUPLICATES;
$products = [$products] if !ref($products) eq 'ARRAY';
my $orig_limit = $limit;
|| ThrowCodeError('param_must_be_numeric',
{ function => 'possible_duplicates',
param => $orig_limit });
my $dbh = Bugzilla->dbh;
my $user = Bugzilla->user;
my @words = split(/[\b\s]+/, $short_desc || '');
# Exclude punctuation from the array.
@words = map { /(\w+)/; $1 } @words;
# And make sure that each word is longer than 2 characters.
@words = grep { defined $_ and length($_) > 2 } @words;
return [] if !@words;
my ($where_sql, $relevance_sql);
if ($dbh->FULLTEXT_OR) {
my $joined_terms = join($dbh->FULLTEXT_OR, @words);
($where_sql, $relevance_sql) =
$joined_terms, 1);
$relevance_sql ||= $where_sql;
else {
my (@where, @relevance);
my $count = 0;
foreach my $word (@words) {
my ($term, $rel_term) = $dbh->sql_fulltext_search(
'bugs_fulltext.short_desc', $word, $count);
push(@where, $term);
push(@relevance, $rel_term || $term);
$where_sql = join(' OR ', @where);
$relevance_sql = join(' + ', @relevance);
my $product_ids = join(',', map { $_->id } @$products);
my $product_sql = $product_ids ? "AND product_id IN ($product_ids)" : "";
# Because we collapse duplicates, we want to get slightly more bugs
# than were actually asked for.
my $sql_limit = $limit + 5;
my $possible_dupes = $dbh->selectall_arrayref(
"SELECT bugs.bug_id AS bug_id, bugs.resolution AS resolution,
($relevance_sql) AS relevance
FROM bugs
INNER JOIN bugs_fulltext ON bugs.bug_id = bugs_fulltext.bug_id
WHERE ($where_sql) $product_sql
ORDER BY relevance DESC, bug_id DESC
LIMIT $sql_limit", {Slice=>{}});
my @actual_dupe_ids;
# Resolve duplicates into their ultimate target duplicates.
foreach my $bug (@$possible_dupes) {
my $push_id = $bug->{bug_id};
if ($bug->{resolution} && $bug->{resolution} eq 'DUPLICATE') {
$push_id = _resolve_ultimate_dup_id($bug->{bug_id});
push(@actual_dupe_ids, $push_id);
@actual_dupe_ids = uniq @actual_dupe_ids;
if (scalar @actual_dupe_ids > $limit) {
@actual_dupe_ids = @actual_dupe_ids[0..($limit-1)];
my $visible = $user->visible_bugs(\@actual_dupe_ids);
return $class->new_from_list($visible);
# Docs for create() (there's no POD in this file yet, but we very
# much need this documented right now):
......@@ -1426,23 +1507,7 @@ sub _check_dup_id {
# Make sure a loop isn't created when marking this bug
# as duplicate.
my %dupes;
my $this_dup = $dupe_of;
my $sth = $dbh->prepare('SELECT dupe_of FROM duplicates WHERE dupe = ?');
while ($this_dup) {
if ($this_dup == $self->id) {
ThrowUserError('dupe_loop_detected', { bug_id => $self->id,
dupe_of => $dupe_of });
# If $dupes{$this_dup} is already set to 1, then a loop
# already exists which does not involve this bug.
# As the user is not responsible for this loop, do not
# prevent him from marking this bug as a duplicate.
last if exists $dupes{$this_dup};
$dupes{$this_dup} = 1;
$this_dup = $dbh->selectrow_array($sth, undef, $this_dup);
_resolve_ultimate_dup_id($self->id, $dupe_of, 1);
my $cur_dup = $self->dup_id || 0;
if ($cur_dup != $dupe_of && Bugzilla->params->{'commentonduplicate'}
......@@ -2843,6 +2908,38 @@ sub dup_id {
return $self->{'dup_id'};
sub _resolve_ultimate_dup_id {
my ($bug_id, $dupe_of, $loops_are_an_error) = @_;
my $dbh = Bugzilla->dbh;
my $sth = $dbh->prepare('SELECT dupe_of FROM duplicates WHERE dupe = ?');
my $this_dup = $dupe_of || $dbh->selectrow_array($sth, undef, $bug_id);
my $last_dup = $bug_id;
my %dupes;
while ($this_dup) {
if ($this_dup == $bug_id) {
if ($loops_are_an_error) {
ThrowUserError('dupe_loop_detected', { bug_id => $bug_id,
dupe_of => $dupe_of });
else {
return $last_dup;
# If $dupes{$this_dup} is already set to 1, then a loop
# already exists which does not involve this bug.
# As the user is not responsible for this loop, do not
# prevent him from marking this bug as a duplicate.
return $last_dup if exists $dupes{$this_dup};
$dupes{$this_dup} = 1;
$last_dup = $this_dup;
$this_dup = $dbh->selectrow_array($sth, undef, $this_dup);
return $last_dup;
sub actual_time {
my ($self) = @_;
return $self->{'actual_time'} if exists $self->{'actual_time'};
......@@ -175,6 +175,7 @@ use File::Basename;
......@@ -527,6 +528,10 @@ use constant MAX_FREETEXT_LENGTH => 255;
# The longest a bug URL in a BUG_URLS field can be.
use constant MAX_BUG_URL_LENGTH => 255;
# The largest number of possible duplicates that Bug::possible_duplicates
# will return.
use constant MAX_POSSIBLE_DUPLICATES => 25;
# This is the name of the algorithm used to hash passwords before storing
# them in the database. This can be any string that is valid to pass to
# Perl's "Digest" module. Note that if you change this, it won't take
......@@ -73,6 +73,11 @@ use constant ENUM_DEFAULTS => {
# The character that means "OR" in a boolean fulltext search. If empty,
# the database doesn't support OR searches in fulltext searches.
# Used by Bugzilla::Bug::possible_duplicates.
use constant FULLTEXT_OR => '';
# Connection Methods
......@@ -40,8 +40,8 @@ For interface details see L<Bugzilla::DB> and L<DBI>.
package Bugzilla::DB::Mysql;
use strict;
use base qw(Bugzilla::DB);
use Bugzilla::Constants;
use Bugzilla::Install::Util qw(install_string);
......@@ -57,8 +57,7 @@ use Text::ParseWords;
use constant MAX_COMMENTS => 50;
# This module extends the DB interface via inheritance
use base qw(Bugzilla::DB);
use constant FULLTEXT_OR => '|';
sub new {
my ($class, $params) = @_;
......@@ -35,16 +35,14 @@ For interface details see L<Bugzilla::DB> and L<DBI>.
package Bugzilla::DB::Oracle;
use strict;
use base qw(Bugzilla::DB);
use DBD::Oracle;
use DBD::Oracle qw(:ora_types);
use Bugzilla::Constants;
use Bugzilla::Error;
use Bugzilla::Util;
# This module extends the DB interface via inheritance
use base qw(Bugzilla::DB);
# Constants
......@@ -52,6 +50,7 @@ use base qw(Bugzilla::DB);
use constant EMPTY_STRING => '__BZ_EMPTY_STR__';
use constant BLOB_TYPE => { ora_type => ORA_BLOB };
use constant FULLTEXT_OR => ' OR ';
sub new {
my ($class, $params) = @_;
......@@ -900,7 +900,7 @@ sub can_enter_product {
$product && grep($_->name eq $product->name,
@{ $self->get_enterable_products });
return 1 if $can_enter;
return $product if $can_enter;
return 0 unless $warn == THROW_ERROR;
......@@ -36,6 +36,7 @@ use Bugzilla::Util qw(trick_taint trim);
use Bugzilla::Version;
use Bugzilla::Milestone;
use Bugzilla::Status;
use Bugzilla::Token qw(issue_hash_token);
# Constants #
......@@ -322,7 +323,7 @@ sub get {
else {
$bug = Bugzilla::Bug->check($bug_id);
push(@bugs, $self->_bug_to_hash($bug));
push(@bugs, $self->_bug_to_hash($bug, $params));
return { bugs => \@bugs, faults => \@faults };
......@@ -421,7 +422,28 @@ sub search {
my $bugs = Bugzilla::Bug->match($params);
my $visible = Bugzilla->user->visible_bugs($bugs);
my @hashes = map { $self->_bug_to_hash($_) } @$visible;
my @hashes = map { $self->_bug_to_hash($_, $params) } @$visible;
return { bugs => \@hashes };
sub possible_duplicates {
my ($self, $params) = validate(@_, 'product');
my $user = Bugzilla->user;
# Undo the array-ification that validate() does, for "summary".
$params->{summary} || ThrowCodeError('param_required',
{ function => 'Bug.possible_duplicates', param => 'summary' });
my @products;
foreach my $name (@{ $params->{'product'} || [] }) {
my $object = $user->can_enter_product($name, THROW_ERROR);
push(@products, $object);
my $possible_dupes = Bugzilla::Bug->possible_duplicates(
{ summary => $params->{summary}, products => \@products,
limit => $params->{limit} });
my @hashes = map { $self->_bug_to_hash($_, $params) } @$possible_dupes;
return { bugs => \@hashes };
......@@ -617,7 +639,7 @@ sub attachments {
# A helper for get() and search().
sub _bug_to_hash {
my ($self, $bug) = @_;
my ($self, $bug, $filters) = @_;
# Timetracking fields are deleted if the user doesn't belong to
# the corresponding group.
......@@ -646,6 +668,11 @@ sub _bug_to_hash {
$item{'component'} = $self->type('string', $bug->component);
$item{'dupe_of'} = $self->type('int', $bug->dup_id);
if (Bugzilla->user->id) {
my $token = issue_hash_token([$bug->id, $bug->delta_ts]);
$item{'update_token'} = $self->type('string', $token);
# if we do not delete this key, additional user info, including their
# real name, etc, will wind up in the 'internals' hashref
delete $item{internals}->{assigned_to_obj};
......@@ -659,7 +686,7 @@ sub _bug_to_hash {
$item{'alias'} = undef;
return \%item;
return filter $filters, \%item;
sub _attachment_to_hash {
/* The contents of this file are subject to the Mozilla Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
* The Original Code is the Bugzilla Bug Tracking System.
* The Initial Developer of the Original Code is Everything Solved, Inc.
* Portions created by Everything Solved are Copyright (C) 2010 Everything
* Solved, Inc. All Rights Reserved.
* Contributor(s): Max Kanat-Alexander <>
/* This library assumes that the needed YUI libraries have been loaded
already. */
YAHOO.bugzilla.dupTable = {
counter: 0,
dataSource: null,
updateTable: function(dataTable, product_name, summary_field) {
if (summary_field.value.length < 4) return;
YAHOO.bugzilla.dupTable.counter = YAHOO.bugzilla.dupTable.counter + 1;
YAHOO.util.Connect.setDefaultPostHeader('application/json', true);
var json_object = {
version : "1.1",
method : "Bug.possible_duplicates",
id : YAHOO.bugzilla.dupTable.counter,
params : {
product : product_name,
summary : summary_field.value,
limit : 7,
include_fields : [ "id", "summary", "status", "resolution",
"update_token" ]
var post_data = YAHOO.lang.JSON.stringify(json_object);
var callback = {
success: dataTable.onDataReturnInitializeTable,
failure: dataTable.onDataReturnInitializeTable,
scope: dataTable,
argument: dataTable.getState()
dataTable.getDataSource().sendRequest(post_data, callback);
formatBugLink: function(el, oRecord, oColumn, oData) {
el.innerHTML = '<a href="show_bug.cgi?id=' + oData + '">'
+ oData + '</a>';
formatStatus: function(el, oRecord, oColumn, oData) {
var resolution = oRecord.getData('resolution');
if (resolution) {
el.innerHTML = oData + ' ' + resolution;
else {
el.innerHTML = oData;
formatCcButton: function(el, oRecord, oColumn, oData) {
var url = 'process_bug.cgi?id=' + oRecord.getData('id')
+ '&addselfcc=1&token=' + escape(oData);
var button = document.createElement('button');
button.setAttribute('type', 'button');
button.innerHTML = YAHOO.bugzilla.dupTable.addCcMessage;
button.onclick = function() { window.location = url; return false; };
init_ds: function() {
var new_ds = new YAHOO.util.XHRDataSource("jsonrpc.cgi");
new_ds.connTimeout = 30000;
new_ds.connMethodPost = true;
new_ds.connXhrMode = "cancelStaleRequests";
new_ds.maxCacheEntries = 3;
new_ds.responseSchema = {
resultsList : "result.bugs",
metaFields : { error: "error", jsonRpcId: "id" },
// DataSource can't understand a JSON-RPC error response, so
// we have to modify the result data if we get one.
new_ds.doBeforeParseData =
function(oRequest, oFullResponse, oCallback) {
if (oFullResponse.error) {
oFullResponse.result = {};
oFullResponse.result.bugs = [];
if (console) {
console.log("JSON-RPC error:", oFullResponse.error);
return oFullResponse;
this.dataSource = new_ds;
init: function(data) {
if (this.dataSource == null) this.init_ds();
data.options.initialLoad = false;
var dt = new YAHOO.widget.DataTable(data.container, data.columns,
this.dataSource, data.options);
YAHOO.util.Event.on(data.summary_field, 'blur',
function(e) {
YAHOO.bugzilla.dupTable.updateTable(dt, data.product_name,
/* The contents of this file are subject to the Mozilla Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
* The Original Code is the Bugzilla Bug Tracking System.
* The Initial Developer of the Original Code is Netscape Communications
* Corporation. Portions created by Netscape are Copyright (C) 1998
* Netscape Communications Corporation. All Rights Reserved.
* Contributor(s): Byron Jones <>
* Christian Reis <>
* Vitaly Harisov <>
* Svetlana Harisova <>
* Marc Schumann <>
* Pascal Held <>
* Max Kanat-Alexander <>
/* These are specified using the class instead of the id so that they
don't override the YUI CSS. */
.enter_bug_form table {
border-spacing: 0;
border-width: 0;
.enter_bug_form td, .enter_bug_form th { padding: .25em; }
.enter_bug_form th { text-align: right; }
/* This makes the "component" column as small as possible (since it
* contains only fixed-width content) and the Reporter column
* as large as possible, which makes the form not jump around
* when the Component Description changes size. This works
* pretty well on all browsers except IE 8.
#Create #field_container_component { width: 1px; }
#Create #field_container_reporter { width: 100%; }
#Create .comment {
vertical-align: top;
overflow: auto;
color: green;
#Create #comp_desc_container td { padding: 0; }
#Create #comp_desc { height: 11ex; }
#Create #os_guess_note {
padding-top: 0;
#Create #os_guess_note div {
max-width: 35em;
/* The Possible Duplicates table on enter_bug. */
#possible_duplicates th {
text-align: center;
background: none;
border-collapse: collapse;
/* Make the Add Me to CC button never wrap. */
#possible_duplicates .yui-dt-col-update_token { white-space: nowrap; }
......@@ -504,45 +504,6 @@ input.required, select.required, span.required_explanation {
list-style-type: none;
/* enter_bug */
form#Create table {
border-spacing: 0;
border-width: 0;
form#Create td, form#Create th {
padding: .25em;
form#Create th {
text-align: right;
/* This makes the "component" column as small as possible (since it
* contains only fixed-width content) and the Reporter column
* as large as possible, which makes the form not jump around
* when the Component Description changes size. This works
* pretty well on all browsers except IE 8.
form#Create #field_container_component { width: 1px; }
form#Create #field_container_reporter { width: 100%; }
form#Create .comment {
vertical-align: top;
overflow: auto;
color: green;
form#Create #comp_desc_container td { padding: 0; }
form#Create #comp_desc { height: 11ex; }
form#Create #os_guess_note {
padding-top: 0;
form#Create #os_guess_note div {
max-width: 35em;
.image_button {
background-repeat: no-repeat;
background-position: center center;
......@@ -30,10 +30,11 @@
[% PROCESS global/header.html.tmpl
title = title
yui = [ 'autocomplete', 'calendar' ]
style_urls = [ 'skins/standard/attachment.css' ]
yui = [ 'autocomplete', 'calendar', 'datatable' ]
style_urls = [ 'skins/standard/attachment.css',
'skins/standard/enter_bug.css' ]
javascript_urls = [ "js/attachment.js", "js/util.js",
"js/field.js", "js/TUI.js" ]
"js/field.js", "js/TUI.js", "js/bug.js" ]
onload = 'set_assign_to();'
......@@ -169,7 +170,7 @@ TUI_hide_default('expert_fields');
<form name="Create" id="Create" method="post" action="post_bug.cgi"
class="enter_bug_form" enctype="multipart/form-data">
<input type="hidden" name="product" value="[% FILTER html %]">
<input type="hidden" name="token" value="[% token FILTER html %]">
......@@ -508,13 +509,48 @@ TUI_hide_default('expert_fields');
<td colspan="3">
<input name="short_desc" size="70" value="[% short_desc FILTER html %]"
maxlength="255" spellcheck="true" aria-required="true"
class="required" id="short_desc">
[% IF feature_enabled('jsonrpc') AND !cloned_bug_id %]
<tr id="possible_duplicates_container" class="bz_default_hidden">
<td colspan="3">
<div id="possible_duplicates"></div>
<script type="text/javascript">
var dt_columns = [
{ key: "id", label: "[% field_descs.bug_id FILTER js %]",
formatter: YAHOO.bugzilla.dupTable.formatBugLink },
{ key: "summary",
label: "[% field_descs.short_desc FILTER js %]" },
{ key: "status",
label: "[% field_descs.bug_status FILTER js %]",
formatter: YAHOO.bugzilla.dupTable.formatStatus },
{ key: "update_token", label: '',
formatter: YAHOO.bugzilla.dupTable.formatCcButton }
YAHOO.bugzilla.dupTable.addCcMessage = "Add Me to the CC List";
container: 'possible_duplicates',
columns: dt_columns,
product_name: '[% FILTER js %]',
summary_field: 'short_desc',
options: {
MSG_LOADING: 'Searching for possible duplicates...',
MSG_EMPTY: 'No possible duplicates found.',
SUMMARY: 'Possible Duplicates'
[% END %]
<td colspan="3">
[% defaultcontent = BLOCK %]
[% IF cloned_bug_id %]
+++ This [% terms.bug %] was initially created as a clone of [% terms.Bug %] #[% cloned_bug_id %] +++
......@@ -52,6 +52,7 @@
[% SET yui_css = {
autocomplete => 1,
calendar => 1,
datatable => 1,
} %]
[%# Note: This is simple dependency resolution--you can't have dependencies
......@@ -60,6 +61,7 @@
[% SET yui_deps = {
autocomplete => ['json', 'connection', 'datasource'],
datatable => ['json', 'connection', 'datasource', 'element'],
} %]
......@@ -99,7 +101,20 @@
[% END %]
[% style_urls.unshift('skins/standard/global.css') %]
[%# YUI dependency resolution %]
[%# We have to do this in a separate array, because modifying the
# existing array by unshift'ing dependencies confuses FOREACH.
[% SET yui_resolved = [] %]
[% FOREACH yui_name = yui %]
[% FOREACH yui_dep = yui_deps.${yui_name}.reverse %]
[% yui_resolved.push(yui_dep) IF NOT yui_resolved.contains(yui_dep) %]
[% END %]
[% yui_resolved.push(yui_name) IF NOT yui_resolved.contains(yui_name) %]
[% END %]
[% SET yui = yui_resolved %]
[%# YUI CSS %]
[% FOREACH yui_name = yui %]
[% IF yui_css.$yui_name %]
<link rel="stylesheet" type="text/css"
......@@ -218,12 +233,6 @@
<script src="js/yui/yahoo-dom-event/yahoo-dom-event.js"
<script src="js/yui/cookie/cookie-min.js" type="text/javascript"></script>
[%# Resolve YUI dependencies. Note that CSS was already done above. %]
[% FOREACH yui_name = yui %]
[% IF yui_deps.$yui_name %]
[% yui = yui_deps.${yui_name}.merge(yui) %]
[% END %]
[% END %]
[% FOREACH yui_name = yui %]
<script type="text/javascript"
src="js/yui/[% yui_name FILTER html %]/
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment