#!/usr/local/bin/perl -w
########################################################################
#  Copyright (c) 2000 by cisco Systems, Inc.
#  All Rights Reserved
#
########################################################################

=head1 check_cvs.pl

    Script to check the integrity of the Repository

=head1 SYNOPSIS

    check_cvs.pl

=head1 DESCRIPTION

    This script will search through a repository and determine if
    any of the files in it are corrupted.

    Please do not run this script inside of the repository itself,
    it will cause it too fail.

=head1 OPTIONS

    There are no options.

=head1 EXAMPLES

    setenv CVSROOT /release/111/cvs

    # To see more verbose output
    setenv CVSDEBUGEDIT 1

    check_cvs.pl

=head1 SEE ALSO

    None

=cut

######################################################################
#                    MODULES                                         #
######################################################################
use strict;

use File::Find;
use File::Basename;
use File::Path;
use Cwd;

######################################################################
#                    GLOBALS                                         #
######################################################################

my @list_of_broken_files;
my @extra_files;
my $verbose = 0;

my $total_revisions;
my $total_interesting_revisions;
my $total_files;

######################################################################
#                    SUBROUTINES                                     #
######################################################################

######################################################################
#
#    NAME :
#      main
#
#    PURPOSE :
#      To search the repository for broken files
#
#    PARAMETERS :
#      NONE
#
#    GLOBALS :
#      $ENV{ CVSROOT }       - The CVS repository to search through
#      $ENV{ CVSDEBUGEDIT }  - Turn on Debugging.
#      @list_of_broken_files - The list of files that need to
#                              be fixed.
#      $verbose              - is verbose mode on?
#      $total_revisions      - The number of revisions considered
#      $total_interesting_revisions - The number of revisions used
#      $total_files          - The total number of files looked at.
#
#    RETURNS :
#      A list of broken files
#
#    COMMENTS :
#      Do not run this script inside the repository.  Choose
#      a nice safe spot( like /tmp ) outside of the repository.
#
######################################################################
my $directory_to_look_at;

$total_revisions = 0;
$total_interesting_revisions = 0;
$total_files = 0;

if( !exists( $ENV{ CVSROOT } ) )
{
    die( "The script should be run with the CVSROOT environment variable set" );
}

if( exists( $ENV{ CVSDEBUGEDIT } ) )
{
    $verbose = 1;
    print( "Verbose Mode Turned On\n" );
}

$directory_to_look_at = $ENV{ CVSROOT };
if( -l $directory_to_look_at )
{
    $directory_to_look_at = readlink( $directory_to_look_at );
}

print( "Processing: $directory_to_look_at\n" ) if( $verbose );
find( \&process_file, $directory_to_look_at );

my $num_files = @list_of_broken_files;
print( "List of corrupted files\n" ) if( $num_files > 0 );
foreach my $broken ( @list_of_broken_files )
{
    print( "**** File: $broken\n" );
}

$num_files = @extra_files;
print( "List of Files That Don't belong in Repository:\n" ) if( $num_files > 0 );
foreach my $extra ( @extra_files )
{
    print( "**** File: $extra\n" );
}
print( "Total Files: $total_files\n" );
print( "Total Revisions: $total_revisions Interesting Revisions: $total_interesting_revisions\n" );

######################################################################
#
#    NAME :
#      process_file
#
#    PURPOSE :
#      This function is called by the find function, it's purpose
#      is to decide if it is important to look at a file or not.
#      We only care about files that have the ,v at the end.
#
#    PARAMETERS :
#      NONE
#
#    GLOBALS :
#      $ENV{ CVSROOT } - The CVS repository to search through
#
#    RETURNS :
#      NONE
#
#    COMMENTS :
#      NONE
#
######################################################################
sub process_file
{
    my $path = $File::Find::name;

    $total_files += 1;
    $path =~ s/^$directory_to_look_at\///;

    print( "\tProcessing File: $path\n" ) if( $verbose );
    if( $path =~ /,v$/ )
    {
        $path =~ s/,v$//;
        look_at_cvs_file( $path );
    }
    elsif( ! -d $File::Find::name )
    {
        my $save = 0;
        my @ignore_files = ( 'CVS\/fileattr$',
                             '^CVSROOT\/modules',
                             '^CVSROOT\/loginfo',
                             '^CVSROOT\/rcsinfo',
                             '^CVSROOT\/editinfo',
                             '^CVSROOT\/commitinfo',
                             '^CVSROOT\/taginfo',
                             '^CVSROOT\/notify',
                             '^CVSROOT\/checkoutlist',
                             '^CVSROOT\/cvswrappers',
                             '^CVSROOT\/val-tags',
                             '^RestrictedToBugs',
                             '^AdminLock',
                             '^CHANGES',
                             '^SyncUsers',
                             '^UnAdminLock' );

        foreach my $ignore ( @ignore_files )
        {
            if( $path =~ /$ignore/ )
            {
                $save = 1;
                last;
            }
        }

        if( !$save )
        {
            push( @extra_files, $path );
        }
    }
}

######################################################################
#
#    NAME :
#      look_at_cvs_file
#
#    PURPOSE :
#      To decide if a file is broken or not.  The algorithm is:
#      a)  Get the revision history for the file.
#              - If that fails the file is broken, save the fact
#                and continue processing other files.
#              - If that succeeds we have a list of revisions.
#      b)  For Each revision try to retrieve that version
#              - If that fails the file is broken, save the fact
#                and continue processing other files.
#      c)  Continue on 
#
#    PARAMETERS :
#      $file - The file to look at.
#
#    GLOBALS :
#      NONE
#
#    RETURNS :
#      NONE
#
#    COMMENTS :
#      We have to handle Attic files in a special manner.
#      Basically remove the Attic from the string if it
#      exists at the end of the $path variable.
#
######################################################################
sub look_at_cvs_file
{
    my( $file ) = @_;
    my( $name, $path, $suffix ) = fileparse( $file );

    if( $path =~ s/Attic\/$// )
    {
        $file = $path . $name;
    }

    my $revisions = get_history( $name );

    if( !defined( $revisions ) )
    {
        print( "\t$file is corrupted, this was determined via a cvs log command\n" ) if( $verbose );
        push( @list_of_broken_files, $file );
        return();
    }

    my @int_revisions = find_interesting_revisions( @$revisions );

    foreach my $revision ( @int_revisions )
    {
        print( "\t\tLooking at Revision: $revision\n" ) if( $verbose );
        if( !check_revision( $file, $revision ) )
        {
            print( "\t$file is corrupted in revision: $revision\n" ) if( $verbose );
            push( @list_of_broken_files, $file );
            return();
        }
    }

}

######################################################################
#
#    NAME :
#      get_history
#
#    PURPOSE :
#      To retrieve a array of revision numbers.
#
#    PARAMETERS :
#      $file - The file to retrieve the revision numbers for
#
#    GLOBALS :
#      NONE
#
#    RETURNS :
#      On Success - Reference to the list of revision numbers
#      On Failure - undef.
#
#    COMMENTS :
#      The $_ is saved off because The File::find functionality
#      expects the $_ to not have been changed.
#      The -N option for the rlog command means to spit out 
#      tags or branch names.
#
######################################################################
sub get_history
{
    my( $file ) = @_;
    my @revisions;
    my $revision;

    my $save_ = $_;

    open( FILE, "rlog -N '$file' 2>&1 |" ) or die( "unable to run rlog, help" );

    while( <FILE> )
    {
        if( ( $revision ) = /^revision (.*)$/  )
        {
           push( @revisions, $revision );
        }
    }

    $_ = $save_;

    if( !close( FILE ) )
    {
        return( undef );
    }

    return( \@revisions );
}

######################################################################
#
#    NAME :
#      check_revision
#
#    PURPOSE :
#      Given a file and a revision number ensure that we can 
#      check out that file
#
#    PARAMETERS :
#      $file     - The file to look at.
#      $revision - The revision to look at.
#
#    GLOBALS :
#      NONE
#
#    RETURNS :
#      If we can get the File - 1
#      If we can not get the File - 0
#
#    COMMENTS :
#      cvs command line options are as followed:
#        -l - Do not log this command in the history file.  I am
#             doing this because we will literaly be looking at 
#             thousands of files with *lots* of revisions.
#        -n - Do not run any checkout program as specified by the -o
#             option in the modules file
#        -p - Put all output to standard out.
#        -r - The revision of the file that we would like to look at.
#      Please note that cvs will return 0 for being able to successfully
#      read the file and 1 for failure to read the file.
#
######################################################################
sub check_revision
{
    my( $file, $revision ) = @_;

    my $cwd = getcwd();
    chdir( "/tmp" );

    my $ret_code = 0xffff & system( "cvs -l co -n -p -r $revision '$file' > /dev/null 2>&1" );

    chdir( $cwd );
    return( 1 ) if ( $ret_code == 0 );
    return( 0 );

    return( $ret_code );
}

######################################################################
#
#    NAME :
#      find_interesting_revisions
#
#    PURPOSE :
#      CVS stores information in a logical manner.  We only really
#      need to look at some interestin revisions.  These are:
#      The first version
#      And the last version on every branch.
#      This is because cvs stores changes descending from 
#      main line. ie suppose the last version on mainline is 1.6
#      version 1.6 of the file is stored in toto.  version 1.5
#      is stored as a diff between 1.5 and 1.6.  1.4 is stored 
#      as a diff between 1.5 and 1.4.
#      branches are stored a little differently.  They are 
#      stored in ascending order.  Suppose there is a branch
#      on 1.4 of the file.  The first branches revision number
#      would be 1.4.1.1.  This is stored as a diff between 
#      version 1.4 and 1.4.1.1.  The 1.4.1.2 version is stored
#      as a diff between 1.4.1.1 and 1.4.1.2.  Therefore
#      we are only interested in the earliest revision number
#      and the highest revision number on a branch.
#
#    PARAMETERS :
#      @revisions - The list of revisions to find interesting ones
#
#    GLOBALS :
#      NONE
#
#    RETURNS :
#      @new_revisions - The list of revisions that we find interesting
#
#    COMMENTS :
#
######################################################################
sub find_interesting_revisions
{
    my( @revisions ) = @_;
    my @new_revisions;
    my %branch_revision;
    my $branch_number;
    my $branch_rev;
    my $key;
    my $value;

    START_OVER:
    foreach my $revision( @revisions )
    {
        my $start_over = 0;
        ( $branch_number, $branch_rev ) = branch_split( $revision );

        #if the number of elements in the branch is 1
        #and the new branch is less than the old branch
        if( elements_in_branch( $branch_number ) == 1 )
        {
            ( $start_over,
              %branch_revision ) = find_int_mainline_revision( $branch_number,
                                                               $branch_rev,
                                                               %branch_revision );
            next START_OVER if( $start_over );
        }

        %branch_revision = find_int_branch_revision( $branch_number,
                                                     $branch_rev,
                                                     %branch_revision );

    }

    %branch_revision = remove_duplicate_branches( %branch_revision );

    while( ( $key, $value ) = each ( %branch_revision ) )
    {
        push( @new_revisions, $key . "." . $value );
    }

    my $nrc;
    my $rc;

    $rc = @revisions;
    $nrc = @new_revisions;

    $total_revisions += $rc;
    $total_interesting_revisions += $nrc;

    print( "\t\tTotal Revisions: $rc Interesting Revisions: $nrc\n" ) if( $verbose );

    return( @new_revisions );
}

########################################################################
#
#    NAME :
#      remove_duplicate_branches
#
#    PURPOSE :
#      To remove from the list of branches that we are interested
#      in duplication that will cause cvs to check a revision multiple
#      times.  For Instance revision 1.1.1.1 should be prefered
#      to be checked over revision 1.1, as that v1.1.1.1 can
#      only be retrieved by going through v1.1.  Therefore
#      we should remove v1.1 from the list of branches that
#      are interesting.
#
#    PARAMETERS :
#      %branch_revisions - The hash of the interesting revisions
#
#    GLOBALS :
#      NONE
#
#    RETURNS :
#      %branch_revisions - The hash of the modified interesting revisions
#
#    COMMENTS :
#      NONE
#
########################################################################
sub remove_duplicate_branches
{
    my( %branch_revisions ) = @_;
    my $key;
    my $value;
    my $branch_comp;
    my $branch;


  RESTART:
    {
        my @keys = keys( %branch_revisions );
        while( ( $key, $value ) = each ( %branch_revisions ) )
        {
            $branch_comp = $key . "." . $value;
            foreach $branch ( @keys )
            {
                if( $branch eq $key )
                {
                    next;
                }
                if( elements_in_branch( $branch_comp ) ==
                    elements_in_branch( $branch ) - 1 )
                {
                    if( $branch =~ /^$branch_comp/ )
                    {
                        delete( $branch_revisions{ $key } );
                        goto RESTART;
                    }
                }
            }
        }
    }

    return( %branch_revisions );
}

######################################################################
#
#    NAME :
#      find_int_branch_revision
#
#    PURPOSE :
#      To Find a interesting branch revision.
#      Algorithm:
#        If the $branch_revision exists in the interesting branch
#        hash and the new $branch_rev is less than currently saved
#        one replace it with the new $branch_rev.
#        else if the $branch_revision doesn't exist in the interesting
#        branch hash, then just store the $branch_number and $branch_rev
#
#    PARAMETERS :
#      $branch_number - The branch that we are looking at
#      $branch_rev    - The particular revision we are looking
#                       at on the $branch_number.
#      %branch_revision - The hash storing the interesting branches
#                         and the revisions on them.
#
#    GLOBALS :
#      NONE
#
#    RETURNS :
#      %branch_revision - The modified hash that stores interesting
#                         branches.
#
#    COMMENTS :
#      NONE
#
######################################################################
sub find_int_branch_revision
{
    my( $branch_number, $branch_rev, %branch_revision ) = @_;

    if( exists( $branch_revision{ $branch_number } ) )
    {
        if( $branch_rev > $branch_revision{ $branch_number } )
        {
            $branch_revision{ $branch_number } = $branch_rev;
        }
    }
    else
    {
        $branch_revision{ $branch_number } = $branch_rev;
    }

    return( %branch_revision );
}

######################################################################
#
#    NAME :
#      find_int_mainline_revision
#
#    PURPOSE :
#      To Find a interesting mainline revision.
#      Algorithm:
#        if the $branch_number is less then a branch number
#        with one element in it, then delete the old branch_number
#        and return.
#        if the $branch_number is greater than a branch number
#        then return, and tell the calling function that we
#        should skip this element, as that it's not important.
#        if the $branch_number is the same as a branch number
#        with one element in it, then check to see if the
#        $branch_rev is less than the stored branch rev if
#        it is replace with new $branch_rev.  Else ignore revision
#
#    PARAMETERS :
#      $branch_number - The branch that we are looking at
#      $branch_rev    - The particular revision we are looking
#                       at on the $branch_number.
#      %branch_revision - The hash storing the interesting branches
#                         and the revisions on them.
#
#    GLOBALS :
#      NONE
#
#    RETURNS :
#      ( $skip, %branch_revision ) -
#      $skip - 1 if we need to ignore this particular $branch_number
#              $branch_rev combo.  Else 0.
#      %branch_revision - The modified hash that stores interesting
#                         branches.
#
#    COMMENTS :
#      NONE
#
######################################################################
sub find_int_mainline_revision
{
    my( $branch_number, $branch_rev, %branch_revision ) = @_;

    foreach my $key ( keys %branch_revision )
    {
        if( elements_in_branch( $key ) == 1 )
        {
            if( $branch_number < $key )
            {
                delete( $branch_revision{ $key } );
                next;
            }

            if( $branch_number > $key )
            {
                return( 1, %branch_revision );
            }
            if( ( exists( $branch_revision{ $branch_number } ) ) &&
                ( $branch_rev < $branch_revision{ $branch_number } ) )
            {
                $branch_revision{ $branch_number } = $branch_rev;
                return( 1, %branch_revision );
            }
        }
    }

    return( 0, %branch_revision );
}

######################################################################
#
#    NAME :
#      elements_in_branch
#
#    PURPOSE :
#      Determine the number of elements in a revision number
#      Elements are defined by numbers seperated by ".".
#      the revision 1.2.3.4 would have 4 elements
#      the revision 1.2.4.5.6.7 would have 6 elements
#
#    PARAMETERS :
#      $branch - The revision to look at.
#
#    GLOBALS :
#      NONE
#
#    RETURNS :
#      $count - The number of elements
#
#    COMMENTS :
#      NONE
#
######################################################################
sub elements_in_branch
{
    my( $branch ) = @_;
    my @split_rev;

    @split_rev = split /\./, $branch;

    my $count = @split_rev;
    return( $count );
}

######################################################################
#
#    NAME :
#      branch_split
#
#    PURPOSE :
#      To split up a revision number up into the branch part and
#      the number part.  For Instance:
#      1.1.1.1 - is split 1.1.1 and 1
#      2.1     - is split 2 and 1
#      1.3.4.5.7.8 - is split 1.3.4.5.7 and 8
#
#    PARAMETERS :
#      $revision - The revision to look at.
#
#    GLOBALS :
#      NONE
#
#    RETURNS :
#      ( $branch, $revision ) - 
#      $branch - The branch part of the revision number 
#      $revision - The revision part of the revision number
#
#    COMMENTS :
#      NONE
#
######################################################################
sub branch_split
{
    my( $revision ) = @_;
    my $branch;
    my $version;
    my @split_rev;
    my $count;

    @split_rev = split /\./, $revision;

    my $numbers = @split_rev;     
    @split_rev = reverse( @split_rev );
    $branch = pop( @split_rev );
    for( $count = 0; $count < $numbers - 2 ; $count++ )
    {
        $branch .= "." . pop( @split_rev );
    }

    return( $branch, pop( @split_rev ) );
}
