@@ -84,6 +84,10 @@ sub new {
84
84
$this -> {' subunit_dependence' } = undef ;
85
85
# TODO add meric based on protein or gene id, if protein, need hugo.uniprot.pdb.transcript.csv file
86
86
$this -> {' meric_type' } = undef ;
87
+
88
+ $this -> {' gene_list_file' } = undef ;
89
+ $this -> {' structure_list_file' } = undef ;
90
+ $this -> {' listOption' } = undef ;
87
91
88
92
$this -> {' processed' } = undef ;
89
93
$this -> {' distance_matrix' } = undef ;
@@ -175,6 +179,12 @@ sub setOptions {
175
179
' parallel=s' => \$this -> {' parallel' },
176
180
' max-processes=i' => \$this -> {' max_processes' },
177
181
' meric-type=s' => \$this -> {' meric_type' },
182
+ ' gene-list-file=s' => \$this -> {' gene_list_file' },
183
+ ' structure-list-file=s' => \$this -> {' structure_list_file' },
184
+ ' epsilon=f' => \$this -> {' Epsilon' },
185
+ ' minPts=f' => \$this -> {' MinPts' },
186
+ ' number-of-runs=f' => \$this -> {' number_of_runs' },
187
+ ' probability-cut-off=f' => \$this -> {' probability_cut_off' },
178
188
179
189
' help' => \$help ,
180
190
);
@@ -276,6 +286,25 @@ sub setOptions {
276
286
if ( not exists $tempMericHash -> {$this -> {' meric_type' }} ) {
277
287
die " Error: meric-type should be one of the following: intra, monomer, homomer, inter, heteromer, multimer, unspecified\n " ;
278
288
}
289
+
290
+ # #### START gene-list and structure-list options
291
+ if ( defined $this -> {' gene_list_file' } ) {
292
+ if ( not -e $this -> {' gene_list_file' } ) {
293
+ warn " The input gene list file (" .$this -> {' gene_list_file' }." ) does not exist! " , " \n " ;
294
+ die $this -> help_text();
295
+ }
296
+ }
297
+
298
+ if ( defined $this -> {' structure_list_file' } ) {
299
+ if ( not -e $this -> {' structure_list_file' } ) {
300
+ warn " The input structure list file (" .$this -> {' structure_list_file' }." ) does not exist! " , " \n " ;
301
+ die $this -> help_text();
302
+ }
303
+ }
304
+
305
+ $this -> initializeLists();
306
+ # #### END
307
+
279
308
print STDOUT " =====Parameters=====\n " ;
280
309
print STDOUT " p-value-cutoff = " .$this -> {' p_value_cutoff' }." \n " ;
281
310
print STDOUT " 3d-distance-cutoff = " .$this -> {' 3d_distance_cutoff' }." \n " ;
@@ -294,6 +323,46 @@ sub setOptions {
294
323
return ;
295
324
}
296
325
326
+ sub initializeLists {
327
+ my $this = shift ;
328
+ # print "at initializeLists\n";
329
+
330
+ if ( defined $this -> {' gene_list_file' } ) {
331
+ my $fh = new FileHandle;
332
+ unless ( $fh -> open ( $this -> {' gene_list_file' } , " r" ) ) { die " Could not open gene list file $! \n " };
333
+ while ( my $gene = <$fh > ) {
334
+ chomp ( $gene );
335
+ $this -> {' providedGenes' }-> {$gene } = 0; # hash with provided gene names as keys, will be used later in filterByProvidedLists
336
+ }
337
+ }
338
+ if ( defined $this -> {' structure_list_file' } ) {
339
+ my $fh = new FileHandle;
340
+ unless ( $fh -> open ( $this -> {' structure_list_file' } , " r" ) ) { die " Could not open structures list file $! \n " };
341
+ while ( my $structure = <$fh > ) {
342
+ chomp ( $structure );
343
+ $this -> {' providedStructures' }-> {$structure } = 0; # hash with provided structure names as keys, will be used later in filterByProvidedLists
344
+ }
345
+ }
346
+ # #
347
+ if ( (defined $this -> {' gene_list_file' }) && (not defined $this -> {' structure_list_file' }) ) {
348
+ $this -> {' listOption' } = " GeneListOnly" ;
349
+ # print "gene list only\n";
350
+ }
351
+ if ( (not defined $this -> {' gene_list_file' }) && (defined $this -> {' structure_list_file' }) ) {
352
+ $this -> {' listOption' } = " StructureListOnly" ;
353
+ warn " HotSpot3D::Cluster::setOptions warning: only the mutations present in the given structures are being considered\n " ;
354
+ # print "structure list only\n";
355
+ }
356
+ if ( (defined $this -> {' gene_list_file' }) && (defined $this -> {' structure_list_file' }) ) {
357
+ $this -> {' listOption' } = " BothLists" ;
358
+ # print "both lists\n";
359
+ }
360
+ if ( (not defined $this -> {' gene_list_file' }) && (not defined $this -> {' structure_list_file' }) ) {
361
+ $this -> {' listOption' } = " None" ;
362
+ # print "no lists\n";
363
+ }
364
+ }
365
+
297
366
sub launchClustering {
298
367
my $this = shift ; # removed my ( $this, $help ) = @_;
299
368
if ( $this -> {' clustering' } eq $NETWORK ) {
@@ -449,29 +518,35 @@ sub readPairwise { # shared
449
518
$gene2 , $chromosome2 , $start2 , $stop2 , $aa_2 , $chain2 , $loc_2 , $domain2 , $cosmic2 ,
450
519
$linearDistance , $infos ) = split /\t/ , $_ ;
451
520
452
- if ( $this -> checkMeric( $gene1 , $gene2 , $chain1 , $chain2 ) ) {
453
- # print $_."\n";
454
- $chain1 =~ s /\[ (\w )\] / $1 / g ;
455
- $chain2 =~ s /\[ (\w )\] / $1 / g ;
456
- my $proteinMutation = TGI::ProteinVariant-> new();
457
- my $mutation1 = TGI::Variant-> new();
458
- $mutation1 -> gene( $gene1 );
459
- $mutation1 -> chromosome( $chromosome1 );
460
- $mutation1 -> start( $start1 );
461
- $mutation1 -> stop( $stop1 );
462
- $proteinMutation -> aminoAcidChange( $aa_1 );
463
- $mutation1 -> addProteinVariant( $proteinMutation );
464
-
465
- my $mutation2 = TGI::Variant-> new();
466
- $mutation2 -> gene( $gene2 );
467
- $mutation2 -> chromosome( $chromosome2 );
468
- $mutation2 -> start( $start2 );
469
- $mutation2 -> stop( $stop2 );
470
- $proteinMutation -> aminoAcidChange( $aa_2 );
471
- $mutation2 -> addProteinVariant( $proteinMutation );
472
- # print "pair structures ".join( " " , ( $infos , $chain1 , $chain2 ) )."\n";
473
- $this -> setDistance( $distance_matrix , $mutation1 , $mutation2 ,
474
- $chain1 , $chain2 , $infos , $pdbCount );
521
+ if ( $this -> checkByProvidedLists( $gene1 , $gene2 , $infos ) ) { # check to filter out mutation pairs according to a given gene list or structures list or both
522
+ if ( defined $this -> {' structure_list_file' } ) { # if true, get only the pdbIDs which are in this list
523
+ $infos = $this -> filterInfosByGivenStructures( $infos );
524
+ # print "new infos= $infos\n";
525
+ }
526
+ if ( $this -> checkMeric( $gene1 , $gene2 , $chain1 , $chain2 ) ) {
527
+ # print $_."\n";
528
+ $chain1 =~ s /\[ (\w )\] / $1 / g ;
529
+ $chain2 =~ s /\[ (\w )\] / $1 / g ;
530
+ my $proteinMutation = TGI::ProteinVariant-> new();
531
+ my $mutation1 = TGI::Variant-> new();
532
+ $mutation1 -> gene( $gene1 );
533
+ $mutation1 -> chromosome( $chromosome1 );
534
+ $mutation1 -> start( $start1 );
535
+ $mutation1 -> stop( $stop1 );
536
+ $proteinMutation -> aminoAcidChange( $aa_1 );
537
+ $mutation1 -> addProteinVariant( $proteinMutation );
538
+
539
+ my $mutation2 = TGI::Variant-> new();
540
+ $mutation2 -> gene( $gene2 );
541
+ $mutation2 -> chromosome( $chromosome2 );
542
+ $mutation2 -> start( $start2 );
543
+ $mutation2 -> stop( $stop2 );
544
+ $proteinMutation -> aminoAcidChange( $aa_2 );
545
+ $mutation2 -> addProteinVariant( $proteinMutation );
546
+ # print "pair structures ".join( " " , ( $infos , $chain1 , $chain2 ) )."\n";
547
+ $this -> setDistance( $distance_matrix , $mutation1 , $mutation2 ,
548
+ $chain1 , $chain2 , $infos , $pdbCount );
549
+ }
475
550
}
476
551
} $fh -> getlines;
477
552
$fh -> close ();
@@ -486,6 +561,53 @@ sub readPairwise { # shared
486
561
return ;
487
562
}
488
563
564
+ sub checkByProvidedLists {
565
+ my ( $this , $gene1 , $gene2 , $infos ) = @_ ;
566
+ # print join( " " , ( $gene1 , $gene2 , $infos , "" ) );
567
+ if ( $this -> {' listOption' } eq " None" ) { # no lists given, proceed as usual
568
+ # print "no lists: process as usual\n";
569
+ return 1;
570
+
571
+ } elsif ( $this -> {' listOption' } eq " GeneListOnly" ) {
572
+ if ( (exists $this -> {' providedGenes' }-> {$gene1 }) && (exists $this -> {' providedGenes' }-> {$gene2 }) ) {
573
+ # print "gene check okay\n";
574
+ return 1;
575
+ } else { return 0; }
576
+
577
+ } elsif ( $this -> {' listOption' } eq " StructureListOnly" ) {
578
+ my $newInfos = $this -> filterInfosByGivenStructures( $infos );
579
+ if ( defined $newInfos ) {
580
+ # print "structure check okay\n";
581
+ return 1;
582
+ }else { return 0; }
583
+
584
+ } elsif ( $this -> {' listOption' } eq " BothLists" ) {
585
+ my $newInfos = filterInfosByGivenStructures( $this , $infos );
586
+ if ( (defined $newInfos ) && (exists $this -> {' providedGenes' }-> {$gene1 }) && (exists $this -> {' providedGenes' }-> {$gene2 }) ) {
587
+ # print "both checks okay\n";
588
+ return 1;
589
+ } else { return 0; }
590
+ }
591
+ }
592
+
593
+ sub filterInfosByGivenStructures { # takes the infos column from pairwise file and filter out the structures that are not present in the structures list
594
+ my ( $this , $infos ) = @_ ;
595
+ my $newInfos = undef ;
596
+ my @infos = split /\|/ , $infos ;
597
+ foreach my $info ( @infos ) {
598
+ chomp( $info );
599
+ next if ( $info eq "" );
600
+ my ( $distance , $pdbID , $pvalue ) = split / / , $info ;
601
+ next if ( not exists $this -> {' providedStructures' }-> {$pdbID } );
602
+ if ( defined $newInfos ) {
603
+ $newInfos = join ( " \| " , $newInfos , $info );
604
+ } else {
605
+ $newInfos = $info ;
606
+ }
607
+ }
608
+ return $newInfos ;
609
+ }
610
+
489
611
sub checkMeric {
490
612
my ( $this , $gene1 , $gene2 , $chain1 , $chain2 ) = @_ ;
491
613
# print join( " " , ( $gene1 , $chain1 , $gene2 , $chain2 , "" ) );
0 commit comments