Home

Perl Scripts to Extract Rows and Columns from Many Excel Files

I had to aggregate rows and columns from about 100 MS Excel spreadsheet files from different sheets in the spreadsheets, they were reports filed by a hundred different offices, all the same format. Each extract was in a tab delimited format to throw into another spreadsheet. Why? Well it is a cheap report and analysis tool, everyone knows Excel.

The code is quick, dirty and ugly to boot. But it worked, in a couple hours of scripting I saved some people many many hours of screwing with the 100 spreadsheets.

# Get rows from many Excel spreadsheets in a directory
###################################
#! /usr/local/bin/perl -w
    use strict;
    use Spreadsheet::ParseExcel::Simple;


my $excel_directory = 'Budget';
my $out_directory = 'xxxout';
opendir(EXCELDIR, $excel_directory) || die ("no excel directory");
my @excelfiles = readdir(EXCELDIR);
closedir(EXCELDIR);

chdir($excel_directory);
       my $LPHname;    # String to hold Local Public Health Name.
       my @sheetarray; # Array to hold the row.
       my $sheetcount; # Array element in the row.
       my $sheetname; # Name of the Excel spreadsheet.
       my $sheettemp;  # Temporary string to hold row for join.
       my $cellnumber;  # Cell number in the row.
       my $cellwanted;  # Cell number in the row.
       my $rowwanted;  # Row number wanted.
       my $county_namecell;  # Cell for county name.
       my $county_namerow;  # Row for county name.
foreach my $exxfilename (@excelfiles){
    if ($exxfilename =~ /^\.+.*/) { next; }
    my $xls = Spreadsheet::ParseExcel::Simple->read($exxfilename);
    foreach my $sheet ($xls->sheets) {
       $sheetname= $sheet->{sheet}->{Name}; # Sheet Name


       if ($sheetname !~ '2007 Budget') { next; }
       $sheetcount=0;
       $county_namecell=11;
       $county_namerow=1;
#       $cellwanted=4;
       $rowwanted=11;

       while ($sheet->has_data) {
            my @data = $sheet->next_row;
            $sheetcount++;
         if ($sheetcount==$county_namerow){
            $cellnumber=0;
            foreach my $ttcell (@data) {
                $cellnumber++;
                if ($cellnumber != $county_namecell ){next;};
                 $sheettemp=$sheetarray[$sheetcount];
#                 $sheetarray[$sheetcount]=join("\t",$sheettemp,$ttcell);
                 $LPHname=$ttcell;
            }
          } # Get the name of the Local Health

#     if (($sheetcount < ($rowwanted-1)) || ($sheetcount > ($rowwanted+7))){next
;}
            if ($sheetcount != $rowwanted){next;};
            $cellnumber=0;
     $sheetarray[$sheetcount]=join("\t",$sheettemp,$LPHname);
            foreach my $ttcell (@data) {
                $cellnumber++;
#                if ($cellnumber != $cellwanted ){next;};
                 $sheettemp=$sheetarray[$sheetcount];
                 $sheetarray[$sheetcount]=join("\t",$sheettemp,$ttcell);
            }
       }
    }
    foreach my $sheetline (@sheetarray){
        print $sheetline,"\n";
    }
}
#    foreach my $sheetline (@sheetarray){
#        print $sheetline,"\n";
#    }
exit


###############################################################
# Column extract.
# Get columns from many Excel spreadsheets in a directory
###############################################################

#! /usr/local/bin/perl -w
    use strict;
    use Spreadsheet::ParseExcel::Simple;


my $excel_directory = 'TEST';
opendir(EXCELDIR, $excel_directory) || die ("no excel directory");
my @excelfiles = readdir(EXCELDIR);
closedir(EXCELDIR);

chdir($excel_directory);
       my @sheetarray; # Array to hold the row.
       my $sheetcount; # Array element in the row.
       my $sheetname; # Name of the Excel spreadsheet.
       my $sheettemp;  # Temporary string to hold row for join.
       my $cellnumber;  # cell number in the row.
       my $cellwanted;  # cell number in the row.
       my $rowwanted;  # row number wanted.
       my $county_namecell;  # cell for county name.
       my $county_namerow;  # row for county name.
foreach my $exxfilename (@excelfiles){
    if ($exxfilename =~ /^\.+.*/) { next; }
    my $xls = Spreadsheet::ParseExcel::Simple->read($exxfilename);
    foreach my $sheet ($xls->sheets) {
       $sheetname= $sheet->{sheet}->{Name};


# name the sheet to take stuff out of.
       if ($sheetname !~ '2007 Budget') { next; }
       $sheetcount=0;
$county_namecell=11;
$county_namerow=1;
       $cellwanted=2;
       $rowwanted=5;

       while ($sheet->has_data) {
            my @data = $sheet->next_row;
            $sheetcount++;
if ($sheetcount==$county_namerow){
            $cellnumber=0;
            foreach my $ttcell (@data) {
                $cellnumber++;
                if ($cellnumber != $county_namecell ){next;};
                 $sheettemp=$sheetarray[$sheetcount];
                 $sheetarray[$sheetcount]=join("\t",$sheettemp,$ttcell);
            }
}
     if (($sheetcount < ($rowwanted)) || ($sheetcount > ($rowwanted+5))){next;}
#column boundary starting from rowwanted and getting cellwanted column.
#            if ($sheetcount != $rowwanted){next;};
            $cellnumber=0;
            foreach my $ttcell (@data) {
                $cellnumber++;
                if ($cellnumber != $cellwanted ){next;};
                 $sheettemp=$sheetarray[$sheetcount];
                 $sheetarray[$sheetcount]=join("\t",$sheettemp,$ttcell);
            }
       }
    }
}
    foreach my $sheetline (@sheetarray){
        print $sheetline,"\n";
    }
exit