Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
bugzilla
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
etersoft
bugzilla
Commits
841b5d39
Commit
841b5d39
authored
Sep 06, 2009
by
mkanat%bugzilla.org
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Bug 176002: Move duplicate statistics into the db
Patch by Max Kanat-Alexander <mkanat@bugzilla.org> r=LpSolit, a=LpSolit
parent
283d5785
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
92 additions
and
214 deletions
+92
-214
Schema.pm
Bugzilla/DB/Schema.pm
+2
-1
DB.pm
Bugzilla/Install/DB.pm
+4
-0
Filesystem.pm
Bugzilla/Install/Filesystem.pm
+5
-2
collectstats.pl
collectstats.pl
+0
-80
duplicates.cgi
duplicates.cgi
+81
-101
user-error.html.tmpl
template/en/default/global/user-error.html.tmpl
+0
-25
duplicates-table.html.tmpl
template/en/default/reports/duplicates-table.html.tmpl
+0
-5
No files found.
Bugzilla/DB/Schema.pm
View file @
841b5d39
...
...
@@ -352,7 +352,7 @@ use constant ABSTRACT_SCHEMA => {
fieldid
=>
{
TYPE
=>
'INT3'
,
NOTNULL
=>
1
,
REFERENCES
=>
{
TABLE
=>
'fielddefs'
,
COLUMN
=>
'id'
}},
added
=>
{
TYPE
=>
'
TINYTEXT
'
},
added
=>
{
TYPE
=>
'
varchar(255)
'
},
removed
=>
{
TYPE
=>
'TINYTEXT'
},
],
INDEXES
=>
[
...
...
@@ -360,6 +360,7 @@ use constant ABSTRACT_SCHEMA => {
bugs_activity_who_idx
=>
[
'who'
],
bugs_activity_bug_when_idx
=>
[
'bug_when'
],
bugs_activity_fieldid_idx
=>
[
'fieldid'
],
bugs_activity_added_idx
=>
[
'added'
],
],
},
...
...
Bugzilla/Install/DB.pm
View file @
841b5d39
...
...
@@ -574,6 +574,10 @@ sub update_table_definitions {
_convert_disallownew_to_isactive
();
$dbh
->
bz_alter_column
(
'bugs_activity'
,
'added'
,
{
TYPE
=>
'varchar(255)'
});
$dbh
->
bz_add_index
(
'bugs_activity'
,
'bugs_activity_added_idx'
,
[
'added'
]);
################################################################
# New --TABLE-- changes should go *** A B O V E *** this point #
################################################################
...
...
Bugzilla/Install/Filesystem.pm
View file @
841b5d39
...
...
@@ -157,8 +157,6 @@ sub FILESYSTEM {
# Readable directories
"$datadir/mining"
=>
{
files
=>
$ws_readable
,
dirs
=>
$ws_dir_readable
},
"$datadir/duplicates"
=>
{
files
=>
$ws_readable
,
dirs
=>
$ws_dir_readable
},
"$libdir/Bugzilla"
=>
{
files
=>
$ws_readable
,
dirs
=>
$ws_dir_readable
},
$extlib
=>
{
files
=>
$ws_readable
,
...
...
@@ -379,6 +377,11 @@ EOT
unlink
"$datadir/duplicates.rdf"
;
unlink
"$datadir/duplicates-old.rdf"
;
}
if
(
-
e
"$datadir/duplicates"
)
{
print
"Removing duplicates directory...\n"
;
rmtree
(
"$datadir/duplicates"
);
}
}
# A simple helper for creating "empty" CSS files.
...
...
collectstats.pl
View file @
841b5d39
...
...
@@ -33,12 +33,9 @@
use
strict
;
use
lib
qw(. lib)
;
use
AnyDBM_File
;
use
IO::
Handle
;
use
List::
Util
qw(first)
;
use
Cwd
;
use
Bugzilla
;
use
Bugzilla::
Constants
;
use
Bugzilla::
Error
;
...
...
@@ -160,8 +157,6 @@ my $tend = time;
# Uncomment the following line for performance testing.
#print "Total time taken " . delta_time($tstart, $tend) . "\n";
&
calculate_dupes
();
CollectSeriesData
();
sub
check_data_dir
{
...
...
@@ -299,81 +294,6 @@ sub get_old_data {
return
@data
;
}
sub
calculate_dupes
{
my
$dbh
=
Bugzilla
->
dbh
;
my
$rows
=
$dbh
->
selectall_arrayref
(
"SELECT dupe_of, dupe FROM duplicates"
);
my
%
dupes
;
my
%
count
;
my
$key
;
my
$changed
=
1
;
my
$today
=
&
today_dash
;
# Save % count here in a date-named file
# so we can read it back in to do changed counters
# First, delete it if it exists, so we don't add to the contents of an old file
my
$datadir
=
bz_locations
()
->
{
'datadir'
};
if
(
my
@files
=
<
$datadir
/duplicates/
dupes$today
*>
)
{
map
{
trick_taint
(
$_
)
}
@files
;
unlink
@files
;
}
dbmopen
(
%
count
,
"$datadir/duplicates/dupes$today"
,
0644
)
||
die
"Can't open DBM dupes file: $!"
;
# Create a hash with key "a bug number", value "bug which that bug is a
# direct dupe of" - straight from the duplicates table.
foreach
my
$row
(
@$rows
)
{
my
(
$dupe_of
,
$dupe
)
=
@$row
;
$dupes
{
$dupe
}
=
$dupe_of
;
}
# Total up the number of bugs which are dupes of a given bug
# count will then have key = "bug number",
# value = "number of immediate dupes of that bug".
foreach
$key
(
keys
(
%
dupes
))
{
my
$dupe_of
=
$dupes
{
$key
};
if
(
!
defined
(
$count
{
$dupe_of
}))
{
$count
{
$dupe_of
}
=
0
;
}
$count
{
$dupe_of
}
++
;
}
# Now we collapse the dupe tree by iterating over %count until
# there is no further change.
while
(
$changed
==
1
)
{
$changed
=
0
;
foreach
$key
(
keys
(
%
count
))
{
# if this bug is actually itself a dupe, and has a count...
if
(
defined
(
$dupes
{
$key
})
&&
$count
{
$key
}
>
0
)
{
# add that count onto the bug it is a dupe of,
# and zero the count; the check is to avoid
# loops
if
(
$count
{
$dupes
{
$key
}}
!=
0
)
{
$count
{
$dupes
{
$key
}}
+=
$count
{
$key
};
$count
{
$key
}
=
0
;
$changed
=
1
;
}
}
}
}
# Remove the values for which the count is zero
foreach
$key
(
keys
(
%
count
))
{
if
(
$count
{
$key
}
==
0
)
{
delete
$count
{
$key
};
}
}
dbmclose
(
%
count
);
}
# This regenerates all statistics from the database.
sub
regenerate_stats
{
my
(
$dir
,
$product
,
$bug_resolution
,
$bug_status
,
$removed
)
=
@_
;
...
...
duplicates.cgi
View file @
841b5d39
...
...
@@ -18,15 +18,11 @@
# Copyright (C) 1998 Netscape Communications Corporation. All
# Rights Reserved.
#
# Contributor(s): Gervase Markham <gerv@gerv.net>
#
# Generates mostfreq list from data collected by collectstats.pl.
# Contributor(s):
# Gervase Markham <gerv@gerv.net>
# Max Kanat-Alexander <mkanat@bugzilla.org>
use
strict
;
use
AnyDBM_File
;
use
lib
qw(. lib)
;
use
Bugzilla
;
...
...
@@ -34,28 +30,52 @@ use Bugzilla::Constants;
use
Bugzilla::
Util
;
use
Bugzilla::
Error
;
use
Bugzilla::
Search
;
use
Bugzilla::
Field
;
use
Bugzilla::
Product
;
###############
# Subroutines #
###############
# $counts is a count of exactly how many direct duplicates there are for
# each bug we're considering. $dups is a map of duplicates, from one
# bug_id to another. We go through the duplicates map ($dups) and if one bug
# in $count is a duplicate of another bug in $count, we add their counts
# together under the target bug.
sub
add_indirect_dups
{
my
(
$counts
,
$dups
)
=
@_
;
foreach
my
$add_from
(
keys
%
$dups
)
{
my
$add_to
=
walk_dup_chain
(
$dups
,
$add_from
);
my
$add_amount
=
delete
$counts
->
{
$add_from
}
||
0
;
$counts
->
{
$add_to
}
+=
$add_amount
;
}
}
sub
walk_dup_chain
{
my
(
$dups
,
$from_id
)
=
@_
;
my
$to_id
=
$dups
->
{
$from_id
};
while
(
my
$bug_id
=
$dups
->
{
$to_id
})
{
last
if
$bug_id
==
$from_id
;
# avoid duplicate loops
$to_id
=
$bug_id
;
}
# Optimize for future calls to add_indirect_dups.
$dups
->
{
$from_id
}
=
$to_id
;
return
$to_id
;
}
###############
# Main Script #
###############
my
$cgi
=
Bugzilla
->
cgi
;
my
$template
=
Bugzilla
->
template
;
my
$vars
=
{};
# collectstats.pl uses duplicates.cgi to generate the RDF duplicates stats.
# However, this conflicts with requirelogin if it's enabled; so we make
# logging-in optional if we are running from the command line.
if
(
$::ENV
{
'GATEWAY_INTERFACE'
}
eq
"cmdline"
)
{
Bugzilla
->
login
(
LOGIN_OPTIONAL
);
}
else
{
Bugzilla
->
login
();
}
Bugzilla
->
login
();
my
$dbh
=
Bugzilla
->
switch_to_shadow_db
();
my
%
dbmcount
;
my
%
count
;
my
%
before
;
# Get params from URL
sub
formvalue
{
my
(
$name
,
$default
)
=
(
@_
);
...
...
@@ -70,6 +90,10 @@ my $reverse = formvalue("reverse") ? 1 : 0;
my
@query_products
=
$cgi
->
param
(
'product'
);
my
$sortvisible
=
formvalue
(
"sortvisible"
);
my
@buglist
=
(
split
(
/[:,]/
,
formvalue
(
"bug_id"
)));
detaint_natural
(
$_
)
foreach
@buglist
;
# If we got any non-numeric items, they will now be undef. Remove them from
# the list.
@buglist
=
grep
(
$_
,
@buglist
);
# Make sure all products are valid.
foreach
my
$p
(
@query_products
)
{
...
...
@@ -79,54 +103,6 @@ foreach my $p (@query_products) {
# Small backwards-compatibility hack, dated 2002-04-10.
$sortby
=
"count"
if
$sortby
eq
"dup_count"
;
# Open today's record of dupes
my
$today
=
days_ago
(
0
);
my
$yesterday
=
days_ago
(
1
);
# We don't know the exact file name, because the extension depends on the
# underlying dbm library, which could be anything. We can't glob, because
# perl < 5.6 considers if (<*>) { ... } to be tainted
# Instead, just check the return value for today's data and yesterday's,
# and ignore file not found errors
use
Errno
;
use
Fcntl
;
my
$datadir
=
bz_locations
()
->
{
'datadir'
};
if
(
!
tie
(
%
dbmcount
,
'AnyDBM_File'
,
"$datadir/duplicates/dupes$today"
,
O_RDONLY
,
0644
))
{
if
(
$!
{
ENOENT
})
{
if
(
!
tie
(
%
dbmcount
,
'AnyDBM_File'
,
"$datadir/duplicates/dupes$yesterday"
,
O_RDONLY
,
0644
))
{
my
$vars
=
{
today
=>
$today
};
if
(
$!
{
ENOENT
})
{
ThrowUserError
(
"no_dupe_stats"
,
$vars
);
}
else
{
$vars
->
{
'error_msg'
}
=
$!
;
ThrowUserError
(
"no_dupe_stats_error_yesterday"
,
$vars
);
}
}
}
else
{
ThrowUserError
(
"no_dupe_stats_error_today"
,
{
error_msg
=>
$!
});
}
}
# Copy hash (so we don't mess up the on-disk file when we remove entries)
%
count
=
%
dbmcount
;
# Remove all those dupes under the threshold parameter.
# We do this, before the sorting, for performance reasons.
my
$threshold
=
Bugzilla
->
params
->
{
"mostfreqthreshold"
};
while
(
my
(
$key
,
$value
)
=
each
%
count
)
{
delete
$count
{
$key
}
if
(
$value
<
$threshold
);
# If there's a buglist, restrict the bugs to that list.
delete
$count
{
$key
}
if
$sortvisible
&&
(
lsearch
(
\
@buglist
,
$key
)
==
-
1
);
}
my
$origmaxrows
=
$maxrows
;
detaint_natural
(
$maxrows
)
||
ThrowUserError
(
"invalid_maxrows"
,
{
maxrows
=>
$origmaxrows
});
...
...
@@ -136,34 +112,45 @@ detaint_natural($changedsince)
||
ThrowUserError
(
"invalid_changedsince"
,
{
changedsince
=>
$origchangedsince
});
# Try and open the database from "changedsince" days ago
my
$dobefore
=
0
;
my
%
delta
;
my
$whenever
=
days_ago
(
$changedsince
);
if
(
!
tie
(
%
before
,
'AnyDBM_File'
,
"$datadir/duplicates/dupes$whenever"
,
O_RDONLY
,
0644
))
{
# Ignore file not found errors
if
(
!
$!
{
ENOENT
})
{
ThrowUserError
(
"no_dupe_stats_error_whenever"
,
{
error_msg
=>
$!
,
changedsince
=>
$changedsince
,
whenever
=>
$whenever
,
});
}
}
else
{
# Calculate the deltas
(
$delta
{
$_
}
=
$count
{
$_
}
-
(
$before
{
$_
}
||
0
))
foreach
(
keys
(
%
count
));
$dobefore
=
1
;
my
%
total_dups
=
@
{
$dbh
->
selectcol_arrayref
(
"SELECT dupe_of, COUNT(dupe)
FROM duplicates
GROUP BY dupe_of"
,
{
Columns
=>
[
1
,
2
]})};
my
%
dupe_relation
=
@
{
$dbh
->
selectcol_arrayref
(
"SELECT dupe, dupe_of FROM duplicates
WHERE dupe IN (SELECT dupe_of FROM duplicates)"
,
{
Columns
=>
[
1
,
2
]})};
add_indirect_dups
(
\%
total_dups
,
\%
dupe_relation
);
my
$reso_field_id
=
get_field_id
(
'resolution'
);
my
%
since_dups
=
@
{
$dbh
->
selectcol_arrayref
(
"SELECT dupe_of, COUNT(dupe)
FROM duplicates INNER JOIN bugs_activity
ON bugs_activity.bug_id = duplicates.dupe
WHERE added = 'DUPLICATE' AND fieldid = ? AND "
.
$dbh
->
sql_to_days
(
'bug_when'
)
.
" >= ("
.
$dbh
->
sql_to_days
(
'NOW()'
)
.
" - ?)
GROUP BY dupe_of"
,
{
Columns
=>
[
1
,
2
]},
$reso_field_id
,
$changedsince
)};
add_indirect_dups
(
\%
since_dups
,
\%
dupe_relation
);
my
(
@bugs
,
@bug_ids
);
foreach
my
$id
(
keys
%
total_dups
)
{
if
(
$total_dups
{
$id
}
<
Bugzilla
->
params
->
{
'mostfreqthreshold'
})
{
delete
$total_dups
{
$id
};
next
;
}
if
(
$sortvisible
and
@buglist
and
!
grep
(
$_
==
$id
,
@buglist
))
{
delete
$total_dups
{
$id
};
}
}
my
@bugs
;
my
@bug_ids
;
if
(
scalar
(
%
count
))
{
if
(
scalar
%
total_dups
)
{
# use Bugzilla::Search so that we get the security checking
my
$params
=
new
Bugzilla::
CGI
({
'bug_id'
=>
[
keys
%
count
]
});
my
$params
=
new
Bugzilla::
CGI
({
'bug_id'
=>
[
keys
%
total_dups
]
});
if
(
$openonly
)
{
$params
->
param
(
'resolution'
,
'---'
);
...
...
@@ -221,8 +208,8 @@ if (scalar(%count)) {
$short_desc
,
$bug_status
,
$resolution
)
=
@$result
;
push
(
@bugs
,
{
id
=>
$id
,
count
=>
$
count
{
$id
},
delta
=>
$
delta
{
$id
}
,
count
=>
$
total_dups
{
$id
},
delta
=>
$
since_dups
{
$id
}
||
0
,
component
=>
$component
,
bug_severity
=>
$bug_severity
,
op_sys
=>
$op_sys
,
...
...
@@ -237,7 +224,6 @@ if (scalar(%count)) {
$vars
->
{
'bugs'
}
=
\
@bugs
;
$vars
->
{
'bug_ids'
}
=
\
@bug_ids
;
$vars
->
{
'dobefore'
}
=
$dobefore
;
$vars
->
{
'sortby'
}
=
$sortby
;
$vars
->
{
'sortvisible'
}
=
$sortvisible
;
$vars
->
{
'changedsince'
}
=
$changedsince
;
...
...
@@ -264,9 +250,3 @@ print $cgi->header(
# Generate and return the UI (HTML page) from the appropriate template.
$template
->
process
(
$format
->
{
'template'
},
$vars
)
||
ThrowTemplateError
(
$template
->
error
());
sub
days_ago
{
my
(
$dom
,
$mon
,
$year
)
=
(
localtime
(
time
-
(
$_
[
0
]
*
24
*
60
*
60
)))[
3
,
4
,
5
];
return
sprintf
"%04d-%02d-%02d"
,
1900
+
$year
,
++
$mon
,
$dom
;
}
template/en/default/global/user-error.html.tmpl
View file @
841b5d39
...
...
@@ -1168,31 +1168,6 @@
[% title = "No Tag Selected" %]
You didn't select a tag from which to remove [% terms.bugs %].
[% ELSIF error == "no_dupe_stats" %]
[% title = "Cannot Find Duplicate Statistics" %]
[% admindocslinks = {'extraconfig.html' => 'Setting up the collecstats.pl job'} %]
There are no duplicate statistics for today ([% today FILTER html %])
or yesterday.
[% ELSIF error == "no_dupe_stats_error_today" %]
[% title = "Error Reading Today's Dupes File" %]
[% admindocslinks = {'extraconfig.html' => 'Setting up the collecstats.pl job'} %]
An error occurred opening today's dupes file: [% error_msg FILTER html %].
[% ELSIF error == "no_dupe_stats_error_whenever" %]
[% title = "Error Reading Previous Dupes File" %]
[% admindocslinks = {'extraconfig.html' => 'Setting up the collecstats.pl job'} %]
An error occurred opening [% changedsince FILTER html %] days ago
([% whenever FILTER html %])'s dupes file:
[% error_msg FILTER html %].
[% ELSIF error == "no_dupe_stats_error_yesterday" %]
[% title = "Error Reading Yesterday's Dupes File" %]
[% admindocslinks = {'extraconfig.html' => 'Setting up the collecstats.pl job'} %]
There are no duplicate statistics for today ([% today FILTER html %]),
and an error
occurred opening yesterday's dupes file: [% error_msg FILTER html %].
[% ELSIF error == "no_initial_bug_status" %]
[% title = "No Initial $terms.Bug Status" %]
No [% terms.bug %] status is available on [% terms.bug %] creation.
...
...
template/en/default/reports/duplicates-table.html.tmpl
View file @
841b5d39
...
...
@@ -61,9 +61,6 @@
{ name => "short_desc", description => "Summary" } ]
%]
[%# Small hack to keep delta column out if we don't need it %]
[% NEXT IF column.name == "delta" AND NOT dobefore %]
<th>
[% bug_ids_string = bug_ids.join(',') %]
<a href="duplicates.cgi?sortby=[% column.name %]
...
...
@@ -127,9 +124,7 @@
</center>
</td>
[% IF dobefore %]
<td><center>[% bug.delta %]</center></td>
[% END %]
<td>[% bug.component FILTER html %]</td>
<td><center>[% bug.bug_severity FILTER html %]</center></td>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment