#!/usr/bin/perl -w
#
# Extract specific col,row entries from a table.
#
# Row and columns patterns may include perl regular expressions.
#
# Example usages:
#
# To extract binding p-values for INO4 to 3 genes (YOL144W, GCN4, RFX1)
# use this command:
#
# extract_from_table.pl BLA_MMS_pvalues.tab INO4 YOL144W GCN4 RFX1
#
# ---------------------------------------------------------------------
# To extract binding p-values for any TF that starts with "A"
# to any gene that matches "Y.*145C" use this command:
#
# extract_from_table.pl BLA_MMS_pvalues.tab "^A" "Y.*145C"
#
if(scalar(@ARGV) < 3)
{
die "usage: $0
\n";
}
my ($TABLE, $COL, @ROWS) = @ARGV;
my $DELIM = qr/\t/; ## assume tab-delimited tables
my @ROW_PATTERNS;
foreach my $r (@ROWS)
{
push @ROW_PATTERNS, qr/$r/; ## precompile the row patterns
}
my @col_index;
open(IN, $TABLE) || die "Error opening [$TABLE]: $!\n";
while()
{
chomp;
# we expect the first line to be a header
# columns must be tab delimited
# find the indicies of any columns that match the $COL pattern
if($. == 1)
{
my @header = split(/$DELIM/);
for my $x (0..$#header)
{
if($header[$x] =~ /$COL/o)
{
push @col_index, $x;
}
}
print join("\t",
@header[0,1],
map { $header[$_] . "[$_]"} @col_index) . "\n";
next;
}
# now find rows that match any of the ROW_PATTERNS
# and print the appropriate columns
my @rowdata = split(/$DELIM/);
foreach $re (@ROW_PATTERNS)
{
if(/$re/)
{
print join("\t", @rowdata[0,1], @rowdata[@col_index]) . "\n";
}
}
}
close IN;