Skip to content

Commit 9479a10

Browse files
amilacswAdamDS
authored andcommitted
Adding the density module (#7)
* added the density module
1 parent a0301c0 commit 9479a10

File tree

2 files changed

+214
-1
lines changed

2 files changed

+214
-1
lines changed

bin/hotspot3d

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use TGI::Mutpro::Main::Cluster;
2222
use TGI::Mutpro::Main::Significance;
2323
use TGI::Mutpro::Main::Summary;
2424
use TGI::Mutpro::Main::Visual;
25+
use TGI::Mutpro::Main::Cluster::Density;
2526
use TGI::Mutpro::Preprocess::Drugport;
2627
use TGI::Mutpro::Preprocess::Uppro;
2728
use TGI::Mutpro::Preprocess::Calpro;
@@ -36,7 +37,7 @@ use TGI::Mutpro::Preprocess::AllPreprocess;
3637

3738
my $subCmd = shift;
3839
## Add module option here
39-
my %cmds = map{ ($_, 1) } qw( search post visual cluster sigclus summary drugport uppro calpro calroi statis anno trans homo cosmic prior prep help );
40+
my %cmds = map{ ($_, 1) } qw( search post visual cluster sigclus summary drugport uppro calpro calroi statis anno trans homo cosmic prior prep density help );
4041
unless (defined $subCmd) { die help_text(); };
4142
unless (exists $cmds{$subCmd}) {
4243
warn ' Please give valid sub command ! ', "\n";
@@ -61,6 +62,7 @@ SWITCH:{
6162
$subCmd eq 'cosmic' && do { TGI::Mutpro::Preprocess::Cosmic->new(); last SWITCH; };
6263
$subCmd eq 'prior' && do { TGI::Mutpro::Preprocess::Prior->new(); last SWITCH; };
6364
$subCmd eq 'prep' && do { TGI::Mutpro::Preprocess::AllPreprocess->new(); last SWITCH; };
65+
$subCmd eq 'density' && do { TGI::Mutpro::Main::Cluster::Density->new(); last SWITCH; };
6466
$subCmd eq 'help' && do { die help_text(); last SWITCH; };
6567
}
6668
sub help_text {
@@ -92,6 +94,7 @@ Version: $VERSION
9294
sigclus -- 3) Determine significance of clusters
9395
summary -- 4) Summarize clusters
9496
visual -- 5) Visulization of 3D proximity
97+
density -- 6) Determine mutation-mutation and mutation-drug density-based clusters
9598
9699
help -- this message
97100
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
package TGI::Mutpro::Main::Cluster::Density;
2+
3+
use strict;
4+
use warnings;
5+
6+
use LWP::Simple;
7+
use FileHandle;
8+
use Data::Dumper qw(Dumper);
9+
use List::Util qw[min max];
10+
use Getopt::Long;
11+
12+
my $EPSILONDEFAULT = 4;
13+
my $MINPTSDEFAULT = 3;
14+
my $CUTOFFSTARTDEFAULT = 2;
15+
my $CUTOFFENDDEFAULT = 4;
16+
17+
sub new {
18+
my $class = shift;
19+
my $this = {};
20+
$this->{'pairwise_file'} = '3D_Proximity.pairwise';
21+
$this->{'epsilon'} = undef;
22+
$this->{'minpts'} = undef;
23+
$this->{'cutoffstart'} = undef;
24+
$this->{'cutoffend'} = undef;
25+
bless $this, $class;
26+
$this->process();
27+
return $this;
28+
}
29+
30+
sub process {
31+
my $this = shift;
32+
my ( $help, $options );
33+
unless( @ARGV ) { die $this->help_text(); }
34+
$options = GetOptions (
35+
'pairwise-file=s' => \$this->{'pairwise_file'},
36+
'epsilon=f' => \$this->{'epsilon'},
37+
'minpts=f' => \$this->{'minpts'},
38+
'cut-off-start=f' => \$this->{'cut_off_start'},
39+
'cut-off-end=f' => \$this->{'cut_off_end'},
40+
'help' => \$help,
41+
);
42+
if ( $help ) { print STDERR help_text(); exit 0; }
43+
unless( $options ) { die $this->help_text(); }
44+
unless( $this->{'pairwise_file'} ) { warn 'You must provide a pairwise file! ', "\n"; die $this->help_text(); }
45+
unless( -e $this->{'pairwise_file'} ) { warn "The input pairwise file (".$this->{'pairwise_file'}.") does not exist! ", "\n"; die $this->help_text(); }
46+
if ( not defined $this->{'epsilon'} ) {
47+
warn "HotSpot3D::Cluster warning: no Epsilon value given, setting to default Epsilon value = 4\n";
48+
$this->{'epsilon'} = $EPSILONDEFAULT;
49+
}
50+
if ( not defined $this->{'minpts'} ) {
51+
warn "HotSpot3D::Cluster warning: no MinPts value given, setting to default MinPts value = 3\n";
52+
$this->{'minpts'} = $MINPTSDEFAULT;
53+
}
54+
if ( not defined $this->{'cut-off-start'} ) {
55+
warn "HotSpot3D::Cluster warning: no cut_off_start given, setting to default value = 2\n";
56+
$this->{'cut-off-start'} = $CUTOFFSTARTDEFAULT;
57+
}
58+
if ( not defined $this->{'cut-off-end'} ) {
59+
warn "HotSpot3D::Cluster warning: no cut_off_end given, setting to default value = 4\n";
60+
$this->{'cut-off-end'} = $CUTOFFENDDEFAULT;
61+
}
62+
print STDOUT "Preparing Density-based Clusters...\n";
63+
##################
64+
# OPTICS #
65+
##################
66+
my %SetOfNodes; # Hash containing all the nodes
67+
my $file = "$this->{'pairwise_file'}";
68+
open( IN, "<$file" ) || die "Could not open the pairwise file $!\n";
69+
70+
while ( my $line = <IN> ) {
71+
chomp $line;
72+
my @tabs = split(/\t/,$line);
73+
my @char19 = split("",$tabs[19]);
74+
my $dis = $char19[0].$char19[1].$char19[2].$char19[3].$char19[4];
75+
my $key1 = CombineWords($tabs[0],$tabs[4]);
76+
my $value1 = CombineWords($tabs[9],$tabs[13]);
77+
78+
$SetOfNodes{$key1}{distances}{$value1} = $dis;
79+
$SetOfNodes{$value1}{distances}{$key1} = $dis;
80+
}
81+
82+
close (IN);
83+
84+
foreach my $i ( keys %SetOfNodes ) {
85+
$SetOfNodes{$i}{processInfo} = "False";
86+
}
87+
88+
my @OrderedNodes;
89+
90+
################# Main OPTICS function ####################
91+
92+
foreach my $p ( keys %SetOfNodes ) {
93+
if ( $SetOfNodes{$p}{processInfo} =~ "False" ) {
94+
########## Expand Cluster Order ###########
95+
my %neighbors; # is a hash with keys neigbor indices whose values are mutual separations
96+
my %OrderSeeds; # is a hash to add seeds
97+
%neighbors = %{GetNeighbors($p,\%SetOfNodes)};
98+
$SetOfNodes{$p}{processInfo} = "True"; # set as processed
99+
my $RD = undef;
100+
my $CD;
101+
$CD = GetCoreDistance(\%neighbors,$this);
102+
push @OrderedNodes, [$p,$RD,$CD]; # write to the file
103+
if (defined $CD) {
104+
OrderSeedsUpdate(\%neighbors,$p,$CD, \%OrderSeeds, \%SetOfNodes);
105+
while (scalar keys %OrderSeeds != 0) {
106+
my @SeedKeys = sort { $OrderSeeds{$a} <=> $OrderSeeds{$b} } keys %OrderSeeds;
107+
my @SeedValues = @OrderSeeds{@SeedKeys};
108+
my $CurrentObject = $SeedKeys[0]; # CurrentObject is the object having the least RD in OrderSeeds
109+
%neighbors = %{GetNeighbors($CurrentObject,\%SetOfNodes)};
110+
$SetOfNodes{$CurrentObject}{processInfo} = "True"; # set as processed
111+
$RD = $SeedValues[0];
112+
$CD = GetCoreDistance(\%neighbors,$this);
113+
push @OrderedNodes, [$CurrentObject,$RD,$CD]; # write to the file
114+
delete $OrderSeeds{$CurrentObject};
115+
if (defined $CD) {
116+
OrderSeedsUpdate(\%neighbors,$CurrentObject,$CD, \%OrderSeeds, \%SetOfNodes);
117+
}
118+
} # loop through nodes in the OrderedNodes
119+
} # if the node is a core
120+
} # if the node is not processed
121+
} # get nodes from the pool
122+
123+
##############################################################
124+
125+
my $OrderedFile = "RD.$this->{'epsilon'}.$this->{'minpts'}.$this->{'pairwise_file'}.out";
126+
open (OUT, ">$OrderedFile");
127+
foreach my $x (1...scalar keys %SetOfNodes) {
128+
if (defined $OrderedNodes[$x-1][1]) {
129+
print OUT "$OrderedNodes[$x-1][0]\t $OrderedNodes[$x-1][1]\n";
130+
}
131+
else {
132+
print OUT "$OrderedNodes[$x-1][0]\t 10\n";
133+
}
134+
}
135+
close (OUT);
136+
print STDOUT "Done.\n";
137+
}
138+
###################
139+
# Sub Functions #
140+
###################
141+
sub GetNeighbors {
142+
my ($Obj, $Set_ref)=@_;
143+
my %neighborHash;
144+
foreach my $i (keys %{$Set_ref->{$Obj}->{distances}}) {
145+
$neighborHash{$i} = "$Set_ref->{$Obj}->{distances}->{$i}";
146+
}
147+
return \%neighborHash;
148+
}
149+
150+
sub GetCoreDistance {
151+
my ($neighbors_ref, $this)=@_;
152+
my @keys = sort { $neighbors_ref->{$a} <=> $neighbors_ref->{$b} } keys %{$neighbors_ref}; # sort keys according to distances
153+
my @vals = @{$neighbors_ref}{@keys};
154+
my $CoreDist;
155+
if (scalar keys %{$neighbors_ref} >= $this->{'minpts'}){
156+
$CoreDist = $vals[$this->{'minpts'}-1]; # MinPt^th-distance
157+
}
158+
else {
159+
$CoreDist = undef;
160+
}
161+
return $CoreDist;
162+
}
163+
164+
sub OrderSeedsUpdate {
165+
my ($neighbors_ref, $CenterObject, $CD, $OrderSeeds_ref, $Set_ref) = @_;
166+
my $c_dist = $CD;
167+
my %neighborsHash = % { $neighbors_ref };
168+
my %OrderSeedsHash = % { $OrderSeeds_ref};
169+
foreach my $q (keys %{$neighbors_ref}) {
170+
if (${$Set_ref}{$q}{processInfo} =~ "False") {
171+
my $new_r_dist = max ($c_dist,${$neighbors_ref}{$q});
172+
if (exists ${$OrderSeeds_ref}{$q}) {
173+
if ($new_r_dist < ${$OrderSeeds_ref}{$q}) {
174+
${$OrderSeeds_ref}{$q}="$new_r_dist";
175+
}
176+
}
177+
else {
178+
${$OrderSeeds_ref}{$q}="$new_r_dist";
179+
}
180+
}
181+
}
182+
}
183+
184+
sub CombineWords {
185+
my ($word1,$word2)=@_;
186+
return $word1.":".$word2;
187+
}
188+
189+
sub help_text{
190+
my $this = shift;
191+
return <<HELP
192+
193+
Usage: hotspot3d density [options]
194+
195+
REQUIRED
196+
--pairwise-file 3D pairwise data file
197+
198+
OPTIONAL
199+
--epsilon Epsilon value, default: 4
200+
--minpts MinPts, default: 3
201+
--cut-off-start Starting Epsilon-prime value to look for clusters, default: 2
202+
--cut-off-end Ending Epsilon-prime value to look for clusters, default: 4
203+
204+
--help this message
205+
206+
HELP
207+
208+
}
209+
210+
1;

0 commit comments

Comments
 (0)