Commit 3bc08bc5 authored by mkanat%bugzilla.org's avatar mkanat%bugzilla.org

Bug 286625: Make collectstats.pl --regenerate WAY faster and make it correctly…

Bug 286625: Make collectstats.pl --regenerate WAY faster and make it correctly count the empty resolution. Patch by Max Kanat-Alexander <mkanat@bugzilla.org> r=LpSolit, a=LpSolit
parent b3782f8d
......@@ -30,12 +30,14 @@
# To run new charts for a specific date, pass it in on the command line in
# ISO (2004-08-14) format.
use AnyDBM_File;
use strict;
use lib qw(. lib);
use AnyDBM_File;
use IO::Handle;
use List::Util qw(first);
use Cwd;
use lib qw(. lib);
use Bugzilla;
use Bugzilla::Constants;
......@@ -111,6 +113,37 @@ my @resolutions = @{$fields->{'resolution'}};
# Exclude "" from the resolution list.
@resolutions = grep {$_} @resolutions;
# --regenerate was taking an enormous amount of time to query everything
# per bug, per day. Instead, we now just get all the data out of the DB
# at once and stuff it into some data structures.
my (%bug_status, %bug_resolution, %removed);
if ($regenerate) {
%bug_resolution = @{ $dbh->selectcol_arrayref(
'SELECT bug_id, resolution FROM bugs', {Columns=>[1,2]}) };
%bug_status = @{ $dbh->selectcol_arrayref(
'SELECT bug_id, bug_status FROM bugs', {Columns=>[1,2]}) };
my $removed_sth = $dbh->prepare(
q{SELECT bugs_activity.bug_id, bugs_activity.removed,}
. $dbh->sql_to_days('bugs_activity.bug_when')
. q{FROM bugs_activity
WHERE bugs_activity.fieldid = ?
ORDER BY bugs_activity.bug_when});
%removed = (bug_status => {}, resolution => {});
foreach my $field (qw(bug_status resolution)) {
my $field_id = Bugzilla::Field->check($field)->id;
my $rows = $dbh->selectall_arrayref($removed_sth, undef, $field_id);
my $hash = $removed{$field};
foreach my $row (@$rows) {
my ($bug_id, $removed, $when) = @$row;
$hash->{$bug_id} ||= [];
push(@{ $hash->{$bug_id} }, { when => int($when),
removed => $removed });
}
}
}
my $tstart = time;
foreach (@myproducts) {
my $dir = "$datadir/mining";
......@@ -118,7 +151,7 @@ foreach (@myproducts) {
&check_data_dir ($dir);
if ($regenerate) {
&regenerate_stats($dir, $_);
regenerate_stats($dir, $_, \%bug_resolution, \%bug_status, \%removed);
} else {
&collect_stats($dir, $_);
}
......@@ -343,8 +376,7 @@ sub calculate_dupes {
# This regenerates all statistics from the database.
sub regenerate_stats {
my $dir = shift;
my $product = shift;
my ($dir, $product, $bug_resolution, $bug_status, $removed) = @_;
my $dbh = Bugzilla->dbh;
my $when = localtime(time());
......@@ -356,8 +388,6 @@ sub regenerate_stats {
$file_product =~ s/\//-/gs;
my $file = join '/', $dir, $file_product;
my @bugs;
my $and_product = "";
my $from_product = "";
......@@ -387,7 +417,6 @@ sub regenerate_stats {
}
if (open DATA, ">$file") {
DATA->autoflush(1);
my $fields = join('|', ('DATE', @statuses, @resolutions));
print DATA <<FIN;
# Bugzilla Daily Bug Stats
......@@ -400,6 +429,7 @@ sub regenerate_stats {
FIN
# For each day, generate a line of statistics.
my $total_days = $end - $start;
my @bugs;
for (my $day = $start + 1; $day <= $end; $day++) {
# Some output feedback
my $percent_done = ($day - $start - 1) * 100 / $total_days;
......@@ -416,56 +446,25 @@ FIN
$and_product . q{ ORDER BY bug_id};
my $bug_ids = $dbh->selectcol_arrayref($query, undef, @values);
push(@bugs, @$bug_ids);
# For each bug that existed on that day, determine its status
# at the beginning of the day. If there were no status
# changes on or after that day, the status was the same as it
# is today, which can be found in the bugs table. Otherwise,
# the status was equal to the first "previous value" entry in
# the bugs_activity table for that bug made on or after that
# day.
my %bugcount;
foreach (@statuses) { $bugcount{$_} = 0; }
foreach (@resolutions) { $bugcount{$_} = 0; }
# Get information on bug states and resolutions.
$query = qq{SELECT bugs_activity.removed
FROM bugs_activity
INNER JOIN fielddefs
ON bugs_activity.fieldid = fielddefs.id
WHERE fielddefs.name = ?
AND bugs_activity.bug_id = ?
AND bugs_activity.bug_when >= } .
$dbh->sql_from_days($day) .
" ORDER BY bugs_activity.bug_when " .
$dbh->sql_limit(1);
my $sth_bug = $dbh->prepare($query);
my $sth_status = $dbh->prepare(q{SELECT bug_status
FROM bugs
WHERE bug_id = ?});
my $sth_reso = $dbh->prepare(q{SELECT resolution
FROM bugs
WHERE bug_id = ?});
for my $bug (@bugs) {
my $status = $dbh->selectrow_array($sth_bug, undef,
'bug_status', $bug);
unless ($status) {
$status = $dbh->selectrow_array($sth_status, undef, $bug);
}
my $status = _get_value(
$removed->{'bug_status'}->{$bug},
$bug_status, $day, $bug);
if (defined $bugcount{$status}) {
$bugcount{$status}++;
}
my $resolution = $dbh->selectrow_array($sth_bug, undef,
'resolution', $bug);
unless ($resolution) {
$resolution = $dbh->selectrow_array($sth_reso, undef, $bug);
}
my $resolution = _get_value(
$removed->{'resolution'}->{$bug},
$bug_resolution, $day, $bug);
if (defined $bugcount{$resolution}) {
$bugcount{$resolution}++;
}
......@@ -490,6 +489,24 @@ FIN
}
}
# A helper for --regenerate.
# For each bug that exists on a day, we determine its status/resolution
# at the beginning of the day. If there were no status/resolution
# changes on or after that day, the status was the same as it
# is today (the "current" value). Otherwise, the status was equal to the
# first "previous value" entry in the bugs_activity table for that
# bug made on or after that day.
sub _get_value {
my ($removed, $current, $day, $bug) = @_;
# Get the first change that's on or after this day.
my $item = first { $_->{when} >= $day } @{ $removed || [] };
# If there's no change on or after this day, then we just return the
# current value.
return $item ? $item->{removed} : $current->{$bug};
}
sub today {
my ($dom, $mon, $year) = (localtime(time))[3, 4, 5];
return sprintf "%04d%02d%02d", 1900 + $year, ++$mon, $dom;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment