Bug 347439: Implement support for referential integrity in Bugzilla::DB::Schema…

Bug 347439: Implement support for referential integrity in Bugzilla::DB::Schema and implement it on profiles_activity Patch By Max Kanat-Alexander <mkanat@bugzilla.org> (module owner) a=mkanat

Bug 347439: Implement support for referential integrity in Bugzilla::DB::Schema…
3bc6ea42 · mkanat%bugzilla.org · 3a943fe0 · 3bc6ea42 · 3bc6ea42 · 3bc6ea42
Commit 3bc6ea42 authored Mar 10, 2007 by mkanat%bugzilla.org
Hide whitespace changes
Inline Side-by-side

Showing with 297 additions and 29 deletions

DB.pm Bugzilla/DB.pm +3 -2

Schema.pm Bugzilla/DB/Schema.pm +218 -27

Mysql.pm Bugzilla/DB/Schema/Mysql.pm +29 -0

DB.pm Bugzilla/Install/DB.pm +47 -0

No files found.
--- a/Bugzilla/DB.pm
+++ b/Bugzilla/DB.pm
@@ -515,10 +515,11 @@ sub bz_alter_column_raw {
    my @statements = $self->_bz_real_schema->get_alter_column_ddl(
        $table, $name, $new_def,
        defined $set_nulls_to ? $self->quote($set_nulls_to) : undef);
-    my $new_ddl = $self->_bz_schema->get_type_ddl($new_def);
+    my $new_ddl = $self->_bz_schema->get_display_ddl($table, $name, $new_def);
    print "Updating column $name in table $table ...\n";
    if (defined $current_def) {
-        my $old_ddl = $self->_bz_schema->get_type_ddl($current_def);
+        my $old_ddl = $self->_bz_schema->get_display_ddl($table, $name, 
+                                                         $current_def);
        print "Old: $old_ddl\n";
    }
    print "New: $new_ddl\n";

--- a/Bugzilla/DB/Schema.pm
+++ b/Bugzilla/DB/Schema.pm
@@ -96,7 +96,21 @@ more about how this integrates into the rest of Bugzilla.
 =head1 CONSTANTS
-=over 4
+=over
+=item C<TABLES_FIRST>
+Because of L</"Referential Integrity">, certain tables must be created
+before other tables. L</ABSTRACT_SCHEMA> can't specify this, because
+it's a hash. So we specify it here. When calling C<get_table_list()>,
+these tables will be returned before the other tables.
+=cut
+use constant TABLES_FIRST => qw(
+    fielddefs
+    profiles
+);
 =item C<SCHEMA_VERSION>
@@ -157,6 +171,49 @@ which can be used to specify the type of index such as UNIQUE or FULLTEXT.
 =back
+=head2 Referential Integrity
+Bugzilla::DB::Schema supports "foreign keys", a way of saying
+that "Column X may only contain values from Column Y in Table Z".
+For example, in Bugzilla, bugs.resolution should only contain
+values from the resolution.values field.
+It does this by adding an additional item to a column, called C<REFERENCES>.
+This is a hash with the following members:
+=over
+=item C<TABLE>
+The table the foreign key points at
+=item C<COLUMN>
+The column pointed at in that table.
+=item C<DELETE>
+What to do if the row in the parent table is deleted. Choices are
+C<RESTRICT> or C<CASCADE>. 
+C<RESTRICT> means the deletion of the row in the parent table will 
+be forbidden by the database if there is a row in I<this> table that 
+still refers to it. This is the default, if you don't specify
+C<DELETE>.
+C<CASCADE> means that this row will be deleted along with that row.
+=item C<UPDATE>
+What to do if the value in the parent table is updated. It has the
+same choices as L</DELETE>, except it defaults to C<CASCADE>, which means
+"also update this column in this table."
+=back
+Note that not all our supported databases actually enforce C<RESTRICT>
+and C<CASCADE>, so don't depend on them.
 =cut
 use constant SCHEMA_VERSION  => '2.00';
@@ -645,10 +702,17 @@ use constant ABSTRACT_SCHEMA => {
    profiles_activity => {
        FIELDS => [
-            userid        => {TYPE => 'INT3', NOTNULL => 1},
+            userid        => {TYPE => 'INT3', NOTNULL => 1,
-            who           => {TYPE => 'INT3', NOTNULL => 1},
+                              REFERENCES => {TABLE  => 'profiles', 
+                                             COLUMN => 'userid',
+                                             DELETE => 'CASCADE'}},
+            who           => {TYPE => 'INT3', NOTNULL => 1,
+                              REFERENCES => {TABLE  => 'profiles',
+                                             COLUMN => 'userid'}},
            profiles_when => {TYPE => 'DATETIME', NOTNULL => 1},
-            fieldid       => {TYPE => 'INT3', NOTNULL => 1},
+            fieldid       => {TYPE => 'INT3', NOTNULL => 1,
+                              REFERENCES => {TABLE  => 'fielddefs',
+                                             COLUMN => 'id'}},
            oldvalue      => {TYPE => 'TINYTEXT'},
            newvalue      => {TYPE => 'TINYTEXT'},
        ],
@@ -1281,23 +1345,34 @@ sub get_type_ddl {
 =item C<get_type_ddl>
- Description: Public method to convert abstract (database-generic) field
+=over
-              specifiers to database-specific data types suitable for use
-              in a C<CREATE TABLE> or C<ALTER TABLE> SQL statment. If no
+=item B<Description>
-              database-specific field type has been defined for the given
-              field type, then it will just return the same field type.
+Public method to convert abstract (database-generic) field specifiers to
- Parameters:  a hash or a reference to a hash of a field containing the
+database-specific data types suitable for use in a C<CREATE TABLE> or 
-              following keys: C<TYPE> (required), C<NOTNULL> (optional),
+C<ALTER TABLE> SQL statment. If no database-specific field type has been
-              C<DEFAULT> (optional), C<PRIMARYKEY> (optional), C<REFERENCES>
+defined for the given field type, then it will just return the same field type.
-              (optional)
- Returns:     a DDL string suitable for describing a field in a
+=item B<Parameters>
-              C<CREATE TABLE> or C<ALTER TABLE> SQL statement
+=over
+=item C<$def> - A reference to a hash of a field containing the following keys:
+C<TYPE> (required), C<NOTNULL> (optional), C<DEFAULT> (optional), 
+C<PRIMARYKEY> (optional), C<REFERENCES> (optional)
+=back
+-item B<Returns>
+A DDL string suitable for describing a field in a C<CREATE TABLE> or 
+C<ALTER TABLE> SQL statement
 =cut
    my $self = shift;
    my $finfo = (@_ == 1 && ref($_[0]) eq 'HASH') ? $_[0] : { @_ };
    my $type = $finfo->{TYPE};
    die "A valid TYPE was not specified for this column." unless ($type);
@@ -1308,19 +1383,91 @@ sub get_type_ddl {
        $default = $self->{db_specific}->{$default};
    }
-    my $fkref = $self->{enable_references} ? $finfo->{REFERENCES} : undef;
    my $type_ddl = $self->convert_type($type);
    # DEFAULT attribute must appear before any column constraints
    # (e.g., NOT NULL), for Oracle
    $type_ddl .= " DEFAULT $default" if (defined($default));
    $type_ddl .= " NOT NULL" if ($finfo->{NOTNULL});
    $type_ddl .= " PRIMARY KEY" if ($finfo->{PRIMARYKEY});
-    $type_ddl .= "\n\t\t\t\tREFERENCES $fkref" if $fkref;
    return($type_ddl);
 } #eosub--get_type_ddl
+# Used if you want to display the DDL to the user, as opposed to actually
+# use it in the database.
+sub get_display_ddl {
+    my ($self, $table, $column, $def) = @_;
+    my $ddl = $self->get_type_ddl($def);
+    if ($def->{REFERENCES}) {
+        $ddl .= $self->get_fk_ddl($table, $column, $def->{REFERENCES});
+    }
+    return $ddl;
+}
+sub get_fk_ddl {
+=item C<_get_fk_ddl>
+=over
+=item B<Description>
+Protected method. Translates the C<REFERENCES> item of a column into SQL.
+=item B<Params>
+=over
+=item C<$table>  - The name of the table the reference is from.
+=item C<$column> - The name of the column the reference is from
+=item C<$references> - The C<REFERENCES> hashref from a column.
+=back
+Returns:     SQL for to define the foreign key, or an empty string
+             if C<$references> is undefined.
+=cut
+    my ($self, $table, $column, $references) = @_;
+    return "" if !$references;
+    my $update    = $references->{UPDATE} || 'CASCADE';
+    my $delete    = $references->{DELETE} || 'RESTRICT';
+    my $to_table  = $references->{TABLE}  || die "No table in reference";
+    my $to_column = $references->{COLUMN} || die "No column in reference";
+    my $fk_name   = $self->_get_fk_name($table, $column, $references);
+    return "\n     CONSTRAINT $fk_name FOREIGN KEY ($column)\n"
+         . "     REFERENCES $to_table($to_column)\n"
+         . "      ON UPDATE $update ON DELETE $delete";
+}
+# Generates a name for a Foreign Key. It's separate from get_fk_ddl
+# so that certain databases can override it (for shorter identifiers or
+# other reasons).
+sub _get_fk_name {
+    my ($self, $table, $column, $references) = @_;
+    my $to_table  = $references->{TABLE}; 
+    my $to_column = $references->{COLUMN};
+    return "fk_${table}_${column}_${to_table}_${to_column}";
+}
+sub _get_add_fk_sql {
+    my ($self, $table, $column, $new_def) = @_;
+    my $fk_string = $self->get_fk_ddl($table, $column, $new_def->{REFERENCES});
+    return ("ALTER TABLE $table ADD $fk_string");
+}
+sub _get_drop_fk_sql { 
+    my ($self, $table, $column, $old_def) = @_;
+    my $fk_name = $self->_get_fk_name($table, $column, $old_def->{REFERENCES});
+    return ("ALTER TABLE $table DROP CONSTRAINT $fk_name");
+}
 sub convert_type {
 =item C<convert_type>
@@ -1363,17 +1510,27 @@ sub get_table_list {
 Description: Public method for discovering what tables should exist in the
              Bugzilla database.
 Parameters:  none
- Returns:     an array of table names
+ Returns:     An array of table names. The tables specified 
+              in L</TABLES_FIRST> will come first, in order. The
+              rest of the tables will be in random order.
 =cut
    my $self = shift;
-    return(sort(keys %{ $self->{schema} }));
+    my %schema = %{$self->{schema}};
+    my @tables;
+    foreach my $table (TABLES_FIRST) {
+        push(@tables, $table);
+        delete $schema{$table};
+    }
+    push(@tables, keys %schema);
+    return @tables;   
+}
-} #eosub--get_table_list
-#--------------------------------------------------------------------------
 sub get_table_columns {
 =item C<get_table_columns>
@@ -1440,6 +1597,14 @@ sub get_table_ddl {
        push(@ddl, $index_sql) if $index_sql;
    }
+    my %fields = @{$self->{schema}{$table}{FIELDS}};
+    foreach my $col (keys %fields) {
+        my $def = $fields{$col};
+        if ($def->{REFERENCES}) {
+            push(@ddl, $self->_get_add_fk_sql($table, $col, $def));
+        }
+    }
    push(@ddl, @{ $self->{schema}{$table}{DB_EXTRAS} })
      if (ref($self->{schema}{$table}{DB_EXTRAS}));
@@ -1653,6 +1818,15 @@ sub get_alter_column_ddl {
        push(@statements, "ALTER TABLE $table DROP PRIMARY KEY");
    }
+    # If we went from not having an FK to having an FK
+    # XXX For right now, you can't change an FK's actual definition.
+    if (!$old_def->{REFERENCES} && $new_def->{REFERENCES}) {
+        push(@statements, $self->_get_add_fk_sql($table, $column, $new_def));
+    }
+    elsif ($old_def->{REFERENCES} && !$new_def->{REFERENCES}) {
+        push(@statements, $self->_get_drop_fk_sql($table, $column, $old_def));
+    }
    return @statements;
 }
@@ -2040,14 +2214,31 @@ sub columns_equal {
    $col_one->{TYPE} = uc($col_one->{TYPE});
    $col_two->{TYPE} = uc($col_two->{TYPE});
-    my @col_one_array = %$col_one;
+    # It doesn't work to compare the two REFERENCES items, because
-    my @col_two_array = %$col_two;
+    # they look like 'HASH(0xaf3c434)' to diff_arrays--they'll never
+    # be equal.
+    my %col_one_def = %$col_one;
+    delete $col_one_def{REFERENCES};
+    my %col_two_def = %$col_two;
+    delete $col_two_def{REFERENCES};
+    my @col_one_array = %col_one_def;
+    my @col_two_array = %col_two_def;
    my ($removed, $added) = diff_arrays(\@col_one_array, \@col_two_array);
-    # If there are no differences between the arrays,
+    # If there are no differences between the arrays, then they are equal.
-    # then they are equal.
+    my $defs_identical = !scalar(@$removed) && !scalar(@$added);
-    return !scalar(@$removed) && !scalar(@$added) ? 1 : 0;
+    # If the basic definitions are identical, we still want to check
+    # if the two foreign key definitions are different.
+    my @fk_array_one = %{$col_one->{REFERENCES} || {}};
+    my @fk_array_two = %{$col_two->{REFERENCES} || {}};
+    my ($fk_removed, $fk_added) = 
+        diff_arrays(\@fk_array_one, \@fk_array_two);
+    my $fk_identical = !scalar(@$fk_removed) && !scalar(@$fk_added);
+    return $defs_identical && $fk_identical ? 1 : 0;
 }

--- a/Bugzilla/DB/Schema/Mysql.pm
+++ b/Bugzilla/DB/Schema/Mysql.pm
@@ -176,9 +176,17 @@ sub get_alter_column_ddl {
        # keys are not allowed.
        delete $new_def_copy{PRIMARYKEY};
    }
+    # CHANGE COLUMN doesn't support REFERENCES
+    delete $new_def_copy{REFERENCES};
    my $new_ddl = $self->get_type_ddl(\%new_def_copy);
    my @statements;
+    # Drop the FK if the new definition doesn't have one.
+    if ($old_def->{REFERENCES} && !$new_def->{REFERENCES}) {
+        push(@statements, $self->_get_drop_fk_sql($table, $column, $old_def));
+    }
    push(@statements, "UPDATE $table SET $column = $set_nulls_to
                        WHERE $column IS NULL") if defined $set_nulls_to;
    push(@statements, "ALTER TABLE $table CHANGE COLUMN 
@@ -187,9 +195,30 @@ sub get_alter_column_ddl {
        # Dropping a PRIMARY KEY needs an explicit DROP PRIMARY KEY
        push(@statements, "ALTER TABLE $table DROP PRIMARY KEY");
    }
+    # Add the FK if the new definition has one and the old definition doesn't.
+    if ($new_def->{REFERENCES} && !$old_def->{REFERENCES}) {
+        push(@statements, $self->_get_add_fk_sql($table, $column, $new_def));
+    }
    return @statements;
 }
+sub _get_drop_fk_sql {
+    my ($self, $table, $column, $old_def) = @_;
+    my $fk_name = $self->_get_fk_name($table, $column, $old_def->{REFERENCES});
+    my @sql = ("ALTER TABLE $table DROP FOREIGN KEY $fk_name");
+    my $dbh = Bugzilla->dbh;
+    # MySQL requires, and will create, an index on any column with
+    # an FK. It will name it after the fk, which we never do.
+    # So if there's an index named after the fk, we also have to delete it. 
+    if ($dbh->bz_index_info_real($table, $fk_name)) {
+        push(@sql, $self->get_drop_index_ddl($table, $fk_name));
+    }
+    return @sql;
+}
 sub get_drop_index_ddl {
    my ($self, $table, $name) = @_;
    return ("DROP INDEX \`$name\` ON $table");

--- a/Bugzilla/Install/DB.pm
+++ b/Bugzilla/Install/DB.pm
@@ -49,6 +49,39 @@ sub indicate_progress {
    }
 }
+# This is used before adding a foreign key to a column, to make sure
+# that the database won't fail adding the key.
+sub check_references {
+    my ($table, $column, $foreign_table, $foreign_column) = @_;
+    my $dbh = Bugzilla->dbh;
+    my $bad_values = $dbh->selectcol_arrayref(
+        "SELECT DISTINCT $table.$column 
+           FROM $table LEFT JOIN $foreign_table
+                ON $table.$column = $foreign_table.$foreign_column
+          WHERE $foreign_table.$foreign_column IS NULL");
+    if (@$bad_values) {
+        my $values = join(', ', @$bad_values);
+        print <<EOT;
+ERROR: There are invalid values for the $column column in the $table 
+table. (These values do not exist in the $foreign_table table, in the 
+$foreign_column column.)
+Before continuing with checksetup, you will need to fix these values,
+either by deleting these rows from the database, or changing the values
+of $column in $table to point to valid values in $foreign_table.$foreign_column.
+The bad values from the $table.$column column are:
+$values
+EOT
+        # I just picked a number above 2, to be considered "abnormal exit."
+        exit 3;
+    }
+}
 # NOTE: This is NOT the function for general table updates. See
 # update_table_definitions for that. This is only for the fielddefs table.
 sub update_fielddefs_definition {
@@ -519,6 +552,20 @@ sub update_table_definitions {
    $dbh->bz_add_column('milestones', 'id',
        {TYPE => 'MEDIUMSERIAL', NOTNULL => 1, PRIMARYKEY => 1});
+    # Referential Integrity begins here
+    check_references('profiles_activity', 'userid', 'profiles', 'userid');
+    $dbh->bz_alter_column('profiles_activity', 'userid',
+        {TYPE => 'INT3', NOTNULL => 1,  REFERENCES =>
+        {TABLE  => 'profiles', COLUMN => 'userid', DELETE => 'CASCADE'}});
+    check_references('profiles_activity', 'who', 'profiles', 'userid');
+    $dbh->bz_alter_column('profiles_activity', 'who',
+        {TYPE => 'INT3', NOTNULL => 1, REFERENCES =>
+        {TABLE  => 'profiles', COLUMN => 'userid'}});
+    check_references('profiles_activity', 'fieldid', 'fielddefs', 'id');
+    $dbh->bz_alter_column('profiles_activity', 'fieldid',
+        {TYPE => 'INT3', NOTNULL => 1, REFERENCES =>
+        {TABLE  => 'fielddefs', COLUMN => 'id'}});
    ################################################################
    # New --TABLE-- changes should go *** A B O V E *** this point #
    ################################################################