Skip to content

Commit 083f1c5

Browse files
committed
Adding the gene list and structure list options
1 parent 4bbb532 commit 083f1c5

File tree

5 files changed

+148
-26
lines changed

5 files changed

+148
-26
lines changed
8.38 MB
Binary file not shown.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Usage
1010

1111
Stable: v0.6.0
1212

13-
Beta: up to v1.2.2
13+
Beta: up to v1.3.0
1414
1515
Author: Beifang Niu, John Wallis, Adam D Scott, Sohini Sengupta, & Amila Weerasinghe
1616

bin/hotspot3d

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
use strict;
99
use warnings;
1010

11-
our $VERSION = 'V1.2.2';
11+
our $VERSION = 'V1.3.0';
1212

1313
use Carp;
1414
use FileHandle;

dist.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = HotSpot3D
22
author = Beifang Niu, John Wallis, Adam D Scott, Sohini Sengupta , & Amila Weerasinghe from McDonnell Genome Institute of Washington University at St. Louis
3-
version = 1.2.2
3+
version = 1.3.0
44
license = Perl_5
55
copyright_holder = McDonnell Genome Institute at Washington University
66
copyright_year = 2013

lib/TGI/Mutpro/Main/Cluster.pm

Lines changed: 145 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ sub new {
8484
$this->{'subunit_dependence'} = undef;
8585
#TODO add meric based on protein or gene id, if protein, need hugo.uniprot.pdb.transcript.csv file
8686
$this->{'meric_type'} = undef;
87+
88+
$this->{'gene_list_file'} = undef;
89+
$this->{'structure_list_file'} = undef;
90+
$this->{'listOption'} = undef;
8791

8892
$this->{'processed'} = undef;
8993
$this->{'distance_matrix'} = undef;
@@ -175,6 +179,12 @@ sub setOptions {
175179
'parallel=s' => \$this->{'parallel'},
176180
'max-processes=i' => \$this->{'max_processes'},
177181
'meric-type=s' => \$this->{'meric_type'},
182+
'gene-list-file=s' => \$this->{'gene_list_file'},
183+
'structure-list-file=s' => \$this->{'structure_list_file'},
184+
'epsilon=f' => \$this->{'Epsilon'},
185+
'minPts=f' => \$this->{'MinPts'},
186+
'number-of-runs=f' => \$this->{'number_of_runs'},
187+
'probability-cut-off=f' => \$this->{'probability_cut_off'},
178188

179189
'help' => \$help,
180190
);
@@ -276,6 +286,25 @@ sub setOptions {
276286
if ( not exists $tempMericHash->{$this->{'meric_type'}} ) {
277287
die "Error: meric-type should be one of the following: intra, monomer, homomer, inter, heteromer, multimer, unspecified\n";
278288
}
289+
290+
##### START gene-list and structure-list options
291+
if ( defined $this->{'gene_list_file'} ) {
292+
if ( not -e $this->{'gene_list_file'} ) {
293+
warn "The input gene list file (".$this->{'gene_list_file'}.") does not exist! ", "\n";
294+
die $this->help_text();
295+
}
296+
}
297+
298+
if ( defined $this->{'structure_list_file'} ) {
299+
if ( not -e $this->{'structure_list_file'} ) {
300+
warn "The input structure list file (".$this->{'structure_list_file'}.") does not exist! ", "\n";
301+
die $this->help_text();
302+
}
303+
}
304+
305+
$this->initializeLists();
306+
##### END
307+
279308
print STDOUT "=====Parameters=====\n";
280309
print STDOUT " p-value-cutoff = ".$this->{'p_value_cutoff'}."\n";
281310
print STDOUT " 3d-distance-cutoff = ".$this->{'3d_distance_cutoff'}."\n";
@@ -294,6 +323,46 @@ sub setOptions {
294323
return;
295324
}
296325

326+
sub initializeLists {
327+
my $this = shift;
328+
#print "at initializeLists\n";
329+
330+
if ( defined $this->{'gene_list_file'} ) {
331+
my $fh = new FileHandle;
332+
unless( $fh->open( $this->{'gene_list_file'} , "r" ) ) { die "Could not open gene list file $! \n" };
333+
while ( my $gene = <$fh> ) {
334+
chomp( $gene );
335+
$this->{'providedGenes'}->{$gene} = 0; # hash with provided gene names as keys, will be used later in filterByProvidedLists
336+
}
337+
}
338+
if ( defined $this->{'structure_list_file'} ) {
339+
my $fh = new FileHandle;
340+
unless( $fh->open( $this->{'structure_list_file'} , "r" ) ) { die "Could not open structures list file $! \n" };
341+
while ( my $structure = <$fh> ) {
342+
chomp( $structure );
343+
$this->{'providedStructures'}->{$structure} = 0; # hash with provided structure names as keys, will be used later in filterByProvidedLists
344+
}
345+
}
346+
##
347+
if ( (defined $this->{'gene_list_file'}) && (not defined $this->{'structure_list_file'}) ) {
348+
$this->{'listOption'} = "GeneListOnly";
349+
#print "gene list only\n";
350+
}
351+
if ( (not defined $this->{'gene_list_file'}) && (defined $this->{'structure_list_file'}) ) {
352+
$this->{'listOption'} = "StructureListOnly";
353+
warn "HotSpot3D::Cluster::setOptions warning: only the mutations present in the given structures are being considered\n";
354+
#print "structure list only\n";
355+
}
356+
if ( (defined $this->{'gene_list_file'}) && (defined $this->{'structure_list_file'}) ) {
357+
$this->{'listOption'} = "BothLists";
358+
#print "both lists\n";
359+
}
360+
if ( (not defined $this->{'gene_list_file'}) && (not defined $this->{'structure_list_file'}) ) {
361+
$this->{'listOption'} = "None";
362+
#print "no lists\n";
363+
}
364+
}
365+
297366
sub launchClustering {
298367
my $this = shift; # removed my ( $this, $help ) = @_;
299368
if ( $this->{'clustering'} eq $NETWORK ) {
@@ -449,29 +518,35 @@ sub readPairwise { # shared
449518
$gene2 , $chromosome2 , $start2 , $stop2 , $aa_2 , $chain2 , $loc_2 , $domain2 , $cosmic2 ,
450519
$linearDistance , $infos ) = split /\t/ , $_;
451520

452-
if ( $this->checkMeric( $gene1 , $gene2 , $chain1 , $chain2 ) ) {
453-
#print $_."\n";
454-
$chain1 =~ s/\[(\w)\]/$1/g;
455-
$chain2 =~ s/\[(\w)\]/$1/g;
456-
my $proteinMutation = TGI::ProteinVariant->new();
457-
my $mutation1 = TGI::Variant->new();
458-
$mutation1->gene( $gene1 );
459-
$mutation1->chromosome( $chromosome1 );
460-
$mutation1->start( $start1 );
461-
$mutation1->stop( $stop1 );
462-
$proteinMutation->aminoAcidChange( $aa_1 );
463-
$mutation1->addProteinVariant( $proteinMutation );
464-
465-
my $mutation2 = TGI::Variant->new();
466-
$mutation2->gene( $gene2 );
467-
$mutation2->chromosome( $chromosome2 );
468-
$mutation2->start( $start2 );
469-
$mutation2->stop( $stop2 );
470-
$proteinMutation->aminoAcidChange( $aa_2 );
471-
$mutation2->addProteinVariant( $proteinMutation );
472-
# print "pair structures ".join( " " , ( $infos , $chain1 , $chain2 ) )."\n";
473-
$this->setDistance( $distance_matrix , $mutation1 , $mutation2 ,
474-
$chain1 , $chain2 , $infos , $pdbCount );
521+
if ( $this->checkByProvidedLists( $gene1, $gene2, $infos ) ) { # check to filter out mutation pairs according to a given gene list or structures list or both
522+
if ( defined $this->{'structure_list_file'} ) { # if true, get only the pdbIDs which are in this list
523+
$infos = $this->filterInfosByGivenStructures( $infos );
524+
#print "new infos= $infos\n";
525+
}
526+
if ( $this->checkMeric( $gene1 , $gene2 , $chain1 , $chain2 ) ) {
527+
#print $_."\n";
528+
$chain1 =~ s/\[(\w)\]/$1/g;
529+
$chain2 =~ s/\[(\w)\]/$1/g;
530+
my $proteinMutation = TGI::ProteinVariant->new();
531+
my $mutation1 = TGI::Variant->new();
532+
$mutation1->gene( $gene1 );
533+
$mutation1->chromosome( $chromosome1 );
534+
$mutation1->start( $start1 );
535+
$mutation1->stop( $stop1 );
536+
$proteinMutation->aminoAcidChange( $aa_1 );
537+
$mutation1->addProteinVariant( $proteinMutation );
538+
539+
my $mutation2 = TGI::Variant->new();
540+
$mutation2->gene( $gene2 );
541+
$mutation2->chromosome( $chromosome2 );
542+
$mutation2->start( $start2 );
543+
$mutation2->stop( $stop2 );
544+
$proteinMutation->aminoAcidChange( $aa_2 );
545+
$mutation2->addProteinVariant( $proteinMutation );
546+
# print "pair structures ".join( " " , ( $infos , $chain1 , $chain2 ) )."\n";
547+
$this->setDistance( $distance_matrix , $mutation1 , $mutation2 ,
548+
$chain1 , $chain2 , $infos , $pdbCount );
549+
}
475550
}
476551
} $fh->getlines;
477552
$fh->close();
@@ -486,6 +561,53 @@ sub readPairwise { # shared
486561
return;
487562
}
488563

564+
sub checkByProvidedLists {
565+
my ( $this, $gene1, $gene2, $infos ) = @_;
566+
#print join( " " , ( $gene1 , $gene2 , $infos , "" ) );
567+
if ( $this->{'listOption'} eq "None" ) { # no lists given, proceed as usual
568+
#print "no lists: process as usual\n";
569+
return 1;
570+
571+
} elsif ( $this->{'listOption'} eq "GeneListOnly" ) {
572+
if ( (exists $this->{'providedGenes'}->{$gene1}) && (exists $this->{'providedGenes'}->{$gene2}) ) {
573+
#print "gene check okay\n";
574+
return 1;
575+
} else { return 0; }
576+
577+
} elsif ( $this->{'listOption'} eq "StructureListOnly" ) {
578+
my $newInfos = $this->filterInfosByGivenStructures( $infos );
579+
if ( defined $newInfos ) {
580+
#print "structure check okay\n";
581+
return 1;
582+
}else { return 0; }
583+
584+
} elsif ( $this->{'listOption'} eq "BothLists" ) {
585+
my $newInfos = filterInfosByGivenStructures( $this, $infos );
586+
if ( (defined $newInfos) && (exists $this->{'providedGenes'}->{$gene1}) && (exists $this->{'providedGenes'}->{$gene2}) ) {
587+
#print "both checks okay\n";
588+
return 1;
589+
} else { return 0; }
590+
}
591+
}
592+
593+
sub filterInfosByGivenStructures { # takes the infos column from pairwise file and filter out the structures that are not present in the structures list
594+
my ( $this, $infos ) = @_;
595+
my $newInfos = undef;
596+
my @infos = split /\|/ , $infos;
597+
foreach my $info ( @infos ) {
598+
chomp( $info );
599+
next if ( $info eq "" );
600+
my ( $distance , $pdbID , $pvalue ) = split / / , $info;
601+
next if ( not exists $this->{'providedStructures'}->{$pdbID} );
602+
if ( defined $newInfos ) {
603+
$newInfos = join ( "\|", $newInfos, $info );
604+
} else {
605+
$newInfos = $info;
606+
}
607+
}
608+
return $newInfos;
609+
}
610+
489611
sub checkMeric {
490612
my ( $this , $gene1 , $gene2 , $chain1 , $chain2 ) = @_;
491613
# print join( " " , ( $gene1 , $chain1 , $gene2 , $chain2 , "" ) );

0 commit comments

Comments
 (0)