Commit b4e1dbdc authored by Frédéric Buclin's avatar Frédéric Buclin

Bug 703788: Improve performance of diff_arrays() with large arrays

r/a=mkanat
parent 8e99c55e
...@@ -305,36 +305,39 @@ sub use_attachbase { ...@@ -305,36 +305,39 @@ sub use_attachbase {
sub diff_arrays { sub diff_arrays {
my ($old_ref, $new_ref, $attrib) = @_; my ($old_ref, $new_ref, $attrib) = @_;
my @old = @$old_ref;
my @new = @$new_ref;
$attrib ||= 'name'; $attrib ||= 'name';
# For each pair of (old, new) entries: my (%counts, %pos);
# If object arrays were passed then an attribute should be defined; # We are going to alter the old array.
# If they're equal, set them to empty. When done, @old contains entries my @old = @$old_ref;
# that were removed; @new contains ones that got added. my $i = 0;
foreach my $oldv (@old) {
foreach my $newv (@new) { # $counts{foo}-- means old, $counts{foo}++ means new.
next if ($newv eq '' or $oldv eq ''); # If $counts{foo} becomes positive, then we are adding new items,
if (blessed($oldv) and blessed($newv)) { # else we simply cancel one old existing item. Remaining items
if ($oldv->$attrib eq $newv->$attrib) { # in the old list have been removed.
$newv = $oldv = ''; foreach (@old) {
next unless defined $_;
my $value = blessed($_) ? $_->$attrib : $_;
$counts{$value}--;
push @{$pos{$value}}, $i++;
} }
my @added;
foreach (@$new_ref) {
next unless defined $_;
my $value = blessed($_) ? $_->$attrib : $_;
if (++$counts{$value} > 0) {
# Ignore empty strings, but objects having an empty string
# as attribute are fine.
push(@added, $_) unless ($value eq '' && !blessed($_));
} }
else { else {
if ($oldv eq $newv) { my $old_pos = shift @{$pos{$value}};
$newv = $oldv = '' $old[$old_pos] = undef;
}
} }
} }
} # Ignore cancelled items as well as empty strings.
my @removed = grep { defined $_ && $_ ne '' } @old;
my @removed;
my @added;
@removed = grep { $_ ne '' } @old;
@added = grep { $_ ne '' } @new;
return (\@removed, \@added); return (\@removed, \@added);
} }
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
# #
# Contributor(s): Zach Lipton <zach@zachlipton.com> # Contributor(s): Zach Lipton <zach@zachlipton.com>
# Max Kanat-Alexander <mkanat@bugzilla.org> # Max Kanat-Alexander <mkanat@bugzilla.org>
# Frédéric Buclin <LpSolit@gmail.com>
################# #################
#Bugzilla Test 7# #Bugzilla Test 7#
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
use lib 't'; use lib 't';
use Support::Files; use Support::Files;
use Test::More tests => 13; use Test::More tests => 15;
BEGIN { BEGIN {
use_ok(Bugzilla); use_ok(Bugzilla);
...@@ -72,3 +72,16 @@ foreach my $input (keys %email_strings) { ...@@ -72,3 +72,16 @@ foreach my $input (keys %email_strings) {
is(Bugzilla::Util::email_filter($input), $email_strings{$input}, is(Bugzilla::Util::email_filter($input), $email_strings{$input},
"email_filter('$input')"); "email_filter('$input')");
} }
# diff_arrays():
my @old_array = qw(alpha beta alpha gamma gamma beta alpha delta epsilon gamma);
my @new_array = qw(alpha alpha beta gamma epsilon delta beta delta);
# The order is not relevant when comparing both arrays for matching items,
# i.e. (foo bar) and (bar foo) are the same arrays (same items).
# But when returning data, we try to respect the initial order.
# We remove the leftmost items first, and return what's left. This means:
# Removed (in this order): gamma alpha gamma.
# Added (in this order): delta
my ($removed, $added) = diff_arrays(\@old_array, \@new_array);
is_deeply($removed, [qw(gamma alpha gamma)], 'diff_array(\@old, \@new) (check removal)');
is_deeply($added, [qw(delta)], 'diff_array(\@old, \@new) (check addition)');
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment