diff --git a/.github/workflows/task.yml b/.github/workflows/task.yml
index ac09089acb..f71e27a41d 100644
--- a/.github/workflows/task.yml
+++ b/.github/workflows/task.yml
@@ -21,5 +21,5 @@ jobs:
     uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop
     needs: test
     with:
-      cli: python3 ./build/cloud/docker/docker-build.py push --images base --tag ${{ github.ref_name }}
+      cli: python3 ./build/cloud/docker/docker-build.py push --images base,builder --tag ${{ github.ref_name }}
     secrets: inherit
diff --git a/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile b/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile
index 6e1657d1bf..bcb2de9cb8 100644
--- a/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile
+++ b/cellbase-app/app/cloud/docker/cellbase-builder/Dockerfile
@@ -11,7 +11,7 @@ LABEL org.label-schema.vendor="OpenCB" \
 ## We need to be root to install dependencies
 USER root
 RUN apt-get update -y && \
-    apt-get install -y git default-mysql-client libjson-perl libdbi-perl libdbd-mysql-perl libdbd-mysql-perl libtry-tiny-perl && \
+    apt-get install -y git default-mysql-client libjson-perl libdbi-perl libdbd-mysql-perl libdbd-mysql-perl libtry-tiny-perl libxml-simple-perl liblog-log4perl-perl libxml-parser-perl libxml-dom-perl && \
     mkdir /opt/ensembl && chown cellbase:cellbase /opt/ensembl && \
     rm -rf /var/lib/apt/lists/*
 
@@ -26,6 +26,10 @@ RUN cd /opt/ensembl && \
     git clone https://github.com/Ensembl/ensembl-variation.git && \
     git clone https://github.com/Ensembl/ensembl-funcgen.git && \
     git clone https://github.com/Ensembl/ensembl-compara.git && \
-    git clone https://github.com/Ensembl/ensembl-io.git
+    git clone https://github.com/Ensembl/ensembl-io.git && \
+    git clone --branch cvs/release-0_7 https://github.com/biomart/biomart-perl
 
-ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase/scripts/ensembl-scripts
+## Give writting permissions to allow the script ensembl_canonical.pl to create sub-folder for cache purposes
+RUN chmod -R 777 /opt/cellbase/scripts/ensembl-scripts/
+
+ENV PERL5LIB=$PERL5LIB:/opt/ensembl/bioperl-live:/opt/ensembl/ensembl/modules:/opt/ensembl/ensembl-variation/modules:/opt/ensembl/ensembl-funcgen/modules:/opt/ensembl/ensembl-compara/modules:/opt/ensembl/lib/perl/5.18.2:/opt/cellbase/scripts/ensembl-scripts:/opt/ensembl/biomart-perl/lib
diff --git a/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm b/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm
index 70865465e9..90f2f8208e 100755
--- a/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm
+++ b/cellbase-app/app/scripts/ensembl-scripts/DB_CONFIG.pm
@@ -134,16 +134,16 @@ our $ENSEMBL_GENOMES_PORT = "4157";
 our $ENSEMBL_GENOMES_USER = "anonymous";
 
 ## Vertebrates
-our $HOMO_SAPIENS_CORE = "homo_sapiens_core_110_38";
-our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_110_38";
-our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_110_38";
-our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_110_38";
+our $HOMO_SAPIENS_CORE = "homo_sapiens_core_111_38";
+our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_111_38";
+our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_111_38";
+our $HOMO_SAPIENS_COMPARA = "homo_sapiens_compara_111_38";
 #our $HOMO_SAPIENS_CORE = "homo_sapiens_core_78_38";
 #our $HOMO_SAPIENS_VARIATION = "homo_sapiens_variation_78_38";
 #our $HOMO_SAPIENS_FUNCTIONAL = "homo_sapiens_funcgen_78_38";
-our $MUS_MUSCULUS_CORE = "mus_musculus_core_78_38";
-our $MUS_MUSCULUS_VARIATION = "mus_musculus_variation_78_38";
-our $MUS_MUSCULUS_FUNCTIONAL = "mus_musculus_funcgen_78_38";
+our $MUS_MUSCULUS_CORE = "mus_musculus_core_111_39";
+our $MUS_MUSCULUS_VARIATION = "mus_musculus_variation_111_39";
+our $MUS_MUSCULUS_FUNCTIONAL = "mus_musculus_funcgen_111_39";
 our $RATTUS_NORVEGICUS_CORE = "rattus_norvegicus_core_78_5";
 our $RATTUS_NORVEGICUS_VARIATION = "rattus_norvegicus_variation_78_5";
 our $RATTUS_NORVEGICUS_FUNCTIONAL = "rattus_norvegicus_funcgen_78_5";
diff --git a/cellbase-app/app/scripts/ensembl-scripts/ensembl_canonical.pl b/cellbase-app/app/scripts/ensembl-scripts/ensembl_canonical.pl
new file mode 100755
index 0000000000..bed648e2d0
--- /dev/null
+++ b/cellbase-app/app/scripts/ensembl-scripts/ensembl_canonical.pl
@@ -0,0 +1,61 @@
+#!/usr/bin/env perl
+
+use strict;
+use Getopt::Long;
+use Data::Dumper;
+use JSON;
+use DB_CONFIG;
+
+use BioMart::Initializer;
+use BioMart::Query;
+use BioMart::QueryRunner;
+
+## Default values
+my $species = 'hsapiens';
+my $outdir = "./";
+
+## Parsing command line
+GetOptions ('species=s' => \$species, 'outdir=s' => \$outdir);
+
+
+my $confFile = "/opt/cellbase/scripts/ensembl-scripts/martURLLocation.xml";
+
+# NB: change action to 'clean' if you wish to start a fresh configuration
+# and to 'cached' if you want to skip configuration step on subsequent runs from the same registry
+my $action='clean';
+my $initializer = BioMart::Initializer->new('registryFile'=>$confFile, 'action'=>$action);
+my $registry = $initializer->getRegistry;
+
+my $query = BioMart::Query->new('registry'=>$registry,'virtualSchemaName'=>'default');
+
+$query->setDataset($species."_gene_ensembl");
+
+$query->addAttribute("ensembl_gene_id");
+$query->addAttribute("ensembl_transcript_id");
+$query->addAttribute("transcript_is_canonical");
+
+$query->formatter("TSV");
+
+# Open the file for writing
+open(my $fh, '>', "$outdir/ensembl_canonical.txt") or die "Cannot open ensembl_canonical.txt file: $!";
+
+# Save the original stdout
+my $original_stdout = *STDOUT;
+open(STDOUT, '>&', $fh) or die "Can't redirect STDOUT: $!";
+
+my $query_runner = BioMart::QueryRunner->new();
+
+# to obtain unique rows only
+$query_runner->uniqueRowsOnly(1);
+$query_runner->execute($query);
+#$query_runner->printHeader();
+#print ENSEMBL_CANONICAL $query_runner->printResults();
+# Call printResults which prints to STDOUT (now redirected to the file)
+$query_runner->printResults();
+#$query_runner->printFooter();
+
+# Restore the original stdout
+open(STDOUT, '>&', $original_stdout) or die "Can't restore STDOUT: $!";
+
+# Close the filehandle
+close($fh) or die "Failed to close file: $!";
\ No newline at end of file
diff --git a/cellbase-app/app/scripts/ensembl-scripts/gene_extra_info.pl b/cellbase-app/app/scripts/ensembl-scripts/gene_extra_info.pl
index 5e3aa9c46a..22b6a825b2 100755
--- a/cellbase-app/app/scripts/ensembl-scripts/gene_extra_info.pl
+++ b/cellbase-app/app/scripts/ensembl-scripts/gene_extra_info.pl
@@ -16,7 +16,9 @@
 ####################################################################
 ## Parsing command line options	####################################
 ####################################################################
-# USAGE: ./gene_extra_info.pl --species "Homo sapiens" --outdir ../../appl_db/ird_v1/hsa ...
+##docker run -it --mount type=bind,source=/tmp,target=/tmp opencb/cellbase-builder:6.2.0-SNAPSHOT /opt/cellbase/scripts/ensembl-scripts/gene_extra_info.pl -s "Mus musculus" -o /tmp
+
+# USAGE: ./gene_extra_info.pl --species "Homo sapiens" --assembly "GRCh38" --outdir ../../appl_db/ird_v1/hsa ...
 
 ## Parsing command line
 GetOptions ('species=s' => \$species, 'assembly=s' => \$assembly, 'outdir=s' => \$outdir, 'phylo=s' => \$phylo,
@@ -50,8 +52,8 @@
 
 if ($phylo eq "" || $phylo eq "vertebrate") {
 	print ("In vertebrates section\n");
-	if ($species eq "Homo sapiens" && $assembly eq "GRCh38") {
-		print ("Human selected, assembly ".$assembly." selected, connecting to port ".$ENSEMBL_PORT."\n");
+	if ($species eq "Homo sapiens" || $species eq "Mus musculus") {
+		print ($species." selected, assembly ".$assembly." selected, connecting to port ".$ENSEMBL_PORT."\n");
 		Bio::EnsEMBL::Registry->load_registry_from_db(
 			-host     => $ENSEMBL_HOST,
 			-user     => $ENSEMBL_USER,
diff --git a/cellbase-app/app/scripts/ensembl-scripts/genome_info.pl b/cellbase-app/app/scripts/ensembl-scripts/genome_info.pl
index 50520f1f92..8ecf3d7c8f 100755
--- a/cellbase-app/app/scripts/ensembl-scripts/genome_info.pl
+++ b/cellbase-app/app/scripts/ensembl-scripts/genome_info.pl
@@ -17,7 +17,9 @@
 ####################################################################
 ## Parsing command line options ####################################
 ####################################################################
-# USAGE: ./genome_info.pl --species "Homo sapiens" --outfile ../../appl_db/ird_v1/hsa ...
+##docker run -it --mount type=bind,source=/tmp,target=/tmp opencb/cellbase-builder:6.2.0-SNAPSHOT /opt/cellbase/scripts/ensembl-scripts/genome_info.pl --species "Mus musculus" --assembly GRCm39 --outfile /tmp
+
+# USAGE: ./genome_info.pl --species "Homo sapiens" --assembly GRCh38 --outfile ../../appl_db/ird_v1/hsa ...
 
 ## Parsing command line
 GetOptions ('species=s' => \$species, 'assembly=s' => \$assembly, 'o|outfile=s' => \$outfile, 'phylo=s' => \$phylo,
@@ -29,7 +31,6 @@
 
 if ($outfile eq "") {
     $outfile = "/ensembl-data/genome_info.json";
-    # $outfile = "/ensembl-data/$species.json";
 }
 
 ####################################################################
@@ -42,17 +43,13 @@
 # Bio::EnsEMBL::Registry->load_all("$ENSEMBL_REGISTRY");
 if($phylo eq "" || $phylo eq "vertebrate") {
     print ("In vertebrates section\n");
-    if ($species eq "Homo sapiens" && $assembly eq "GRCh38") {
-        print ("Human selected, assembly ".$assembly." selected, connecting to port ".$ENSEMBL_PORT."\n");
-        Bio::EnsEMBL::Registry->load_registry_from_db(
-            -host     => $ENSEMBL_HOST,
-            -user     => $ENSEMBL_USER,
-            -port     => $ENSEMBL_PORT,
-            -verbose  => $verbose
-        );
-    } else {
-        print ("Human selected, assembly ".$assembly." no supported\n");
-    }
+    print ("Species: ".$species.", assembly ".$assembly.", connecting to: ".$ENSEMBL_HOST.":".$ENSEMBL_PORT."\n");
+    Bio::EnsEMBL::Registry->load_registry_from_db(
+        -host     => $ENSEMBL_HOST,
+        -user     => $ENSEMBL_USER,
+        -port     => $ENSEMBL_PORT,
+        -verbose  => $verbose
+    );
 } else {
     print ("In no-vertebrates section\n");
     Bio::EnsEMBL::Registry->load_registry_from_db(
@@ -64,7 +61,6 @@
 
 my $slice_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "Slice");
 my $karyotype_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "KaryotypeBand");
-# my $gene_adaptor = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "Gene");
 ####################################################################
 
 my %info_stats = ();
@@ -81,12 +77,10 @@
 	$chromosome{'start'} = int($chrom->start());
 	$chromosome{'end'} = int($chrom->end());
 	$chromosome{'size'} = int($chrom->seq_region_length());
-#	$chromosome{'numberGenes'} = scalar @{$chrom->get_all_Genes()};
 	$chromosome{'isCircular'} = $chrom->is_circular();
 
 	my @cytobands = ();
 	foreach my $cyto(@{$karyotype_adaptor->fetch_all_by_chr_name($chrom->seq_region_name)}) {
-#		print $cytoband->name."\n";
         my %cytoband = ();
         $cytoband{'name'} = $cyto->name();
         $cytoband{'start'} = int($cyto->start());
@@ -96,7 +90,7 @@
 		push(@cytobands, \%cytoband);
 	}
 	
-	## check if any cytoband has been added
+	## Check if any cytoband has been added
 	## If not a unique cytoband covering all chromosome is added.
 	if(@cytobands == 0) {
 		my %cytoband = ();
@@ -110,7 +104,6 @@
 	$chromosome{'cytobands'} = \@cytobands;
 	
 	push(@chromosomes, \%chromosome);
-#    push(@chrom_ids, $chrom->seq_region_name);
 }
 $info_stats{'chromosomes'} = \@chromosomes;
 
@@ -124,7 +117,6 @@
         $supercontig{'start'} = int($supercon->start());
         $supercontig{'end'} = int($supercon->end());
         $supercontig{'size'} = int($supercon->seq_region_length());
-#        $supercontig{'numberGenes'} = scalar @{$supercon->get_all_Genes()};
         $supercontig{'isCircular'} = $supercon->is_circular();
 
         ## Adding an unique cytoband covering all chromosome is added.
@@ -151,7 +143,7 @@
 
 sub print_parameters {
     print "Parameters: ";
-    print "species: $species, outfile: $outfile, ";
+    print "species: $species, assembly: $assembly, outfile: $outfile, ";
     print "ensembl-registry: $ENSEMBL_REGISTRY, ";
     print "ensembl-host: $ENSEMBL_HOST, ensembl-port: $ENSEMBL_PORT, ";
     print "ensembl-user: $ENSEMBL_USER, verbose: $verbose, help: $help";
diff --git a/cellbase-app/app/scripts/ensembl-scripts/martURLLocation.xml b/cellbase-app/app/scripts/ensembl-scripts/martURLLocation.xml
new file mode 100644
index 0000000000..a710368f8f
--- /dev/null
+++ b/cellbase-app/app/scripts/ensembl-scripts/martURLLocation.xml
@@ -0,0 +1,19 @@
+<!--
+  ~ Copyright 2015-2020 OpenCB
+  ~
+  ~ Licensed under the Apache License, Version 2.0 (the "License");
+  ~ you may not use this file except in compliance with the License.
+  ~ You may obtain a copy of the License at
+  ~
+  ~     http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<MartRegistry>
+    <MartURLLocation database="ensembl_mart_111" default="1" displayName="Ensembl Genes 111" host="www.ensembl.org" includeDatasets="" martUser="" name="ENSEMBL_MART_ENSEMBL" path="/biomart/martservice" port="80" serverVirtualSchema="default" visible="1" />
+</MartRegistry>
\ No newline at end of file
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CliOptionsParser.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CliOptionsParser.java
index 088db087f0..a71663f19f 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CliOptionsParser.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CliOptionsParser.java
@@ -66,8 +66,8 @@ public class CommonCommandOptions {
                 description = "Set the logging level, accepted values are: debug, info, warn, error and fatal")
         public String logLevel = "info";
 
-        @Parameter(names = {"-C", "--config"}, arity = 1,
-                description = "Path to CellBase configuration.yml file")
+        @Deprecated
+        @Parameter(names = {"-C", "--config"}, arity = 1, hidden = true,  description = "Path to CellBase configuration.yml file")
         public String conf;
     }
 
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CommandExecutor.java
index 39018bf170..64dcc05bfb 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CommandExecutor.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CommandExecutor.java
@@ -35,18 +35,12 @@
 import java.nio.file.Path;
 import java.nio.file.Paths;
 
-/**
- * Created by imedina on 03/02/15.
- */
+
 public abstract class CommandExecutor {
 
     protected String logLevel;
-//    protected boolean verbose;
     protected String conf;
 
-    @Deprecated
-    protected String configFile;
-
     protected String appHome;
 
     protected CellBaseConfiguration configuration;
@@ -55,35 +49,13 @@ public abstract class CommandExecutor {
     protected Logger logger;
 
     public CommandExecutor() {
-
     }
 
     public CommandExecutor(String logLevel, String conf) {
         this.logLevel = logLevel;
         this.conf = conf;
 
-        /**
-         * System property 'app.home' is set up by cellbase.sh. If by any reason this is null
-         * then CELLBASE_HOME environment variable is used instead.
-         */
-        this.appHome = System.getProperty("app.home", System.getenv("CELLBASE_HOME"));
-
-        if (StringUtils.isEmpty(conf)) {
-            this.conf = this.appHome + "/conf";
-        }
-
-        if (logLevel != null && !logLevel.isEmpty()) {
-            // We must call to this method
-            setLogLevel(logLevel);
-        }
-    }
-
-    public CommandExecutor(String logLevel, boolean verbose, String conf) {
-        this.logLevel = logLevel;
-//        this.verbose = verbose;
-        this.conf = conf;
-
-        /**
+        /*
          * System property 'app.home' is set up by cellbase.sh. If by any reason this is null
          * then CELLBASE_HOME environment variable is used instead.
          */
@@ -124,29 +96,16 @@ public void setLogLevel(String logLevel) {
         this.logLevel = logLevel;
     }
 
-//    public boolean isVerbose() {
-//        return verbose;
-//    }
-//
-//    public void setVerbose(boolean verbose) {
-//        this.verbose = verbose;
-//    }
-
-    public String getConfigFile() {
-        return configFile;
-    }
-
-    public void setConfigFile(String configFile) {
-        this.configFile = configFile;
-    }
-
     public Logger getLogger() {
         return logger;
     }
 
-    /*
+    /**
      * This method attempts to first data configuration from CLI parameter, if not present then uses
      * the configuration from installation directory, if not exists then loads JAR configuration.json or yml.
+     *
+     * @throws URISyntaxException If any URI problem occurs
+     * @throws IOException If any IO problem occurs
      */
     public void loadCellBaseConfiguration() throws URISyntaxException, IOException {
         Path confPath = Paths.get(this.conf);
@@ -154,11 +113,13 @@ public void loadCellBaseConfiguration() throws URISyntaxException, IOException {
 
         if (Files.exists(confPath.resolve("configuration.json"))) {
             logger.debug("Loading configuration from '{}'", confPath.resolve("configuration.json").toAbsolutePath());
-            this.configuration = CellBaseConfiguration.load(new FileInputStream(confPath.resolve("configuration.json").toFile()),
-                    CellBaseConfiguration.ConfigurationFileFormat.JSON);
+            this.configuration = CellBaseConfiguration
+                    .load(Files.newInputStream(confPath.resolve("configuration.json").toFile().toPath()),
+                            CellBaseConfiguration.ConfigurationFileFormat.JSON);
         } else if (Files.exists(Paths.get(this.appHome + "/conf/configuration.yml"))) {
             logger.debug("Loading configuration from '{}'", this.appHome + "/conf/configuration.yml");
-            this.configuration = CellBaseConfiguration.load(new FileInputStream(new File(this.appHome + "/conf/configuration.yml")));
+            this.configuration = CellBaseConfiguration
+                    .load(Files.newInputStream(new File(this.appHome + "/conf/configuration.yml").toPath()));
         } else {
             InputStream inputStream = CellBaseConfiguration.class.getClassLoader().getResourceAsStream("conf/configuration.json");
             String configurationFilePath = "conf/configuration.json";
@@ -198,10 +159,4 @@ public void loadClientConfiguration() throws IOException {
             }
         }
     }
-
-    protected void makeDir(Path folderPath) throws IOException {
-        if (!Files.exists(folderPath)) {
-            Files.createDirectories(folderPath);
-        }
-    }
 }
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java
index 4a5f2c085f..ec1d8503de 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminCliOptionsParser.java
@@ -19,15 +19,12 @@
 import com.beust.jcommander.*;
 import org.opencb.cellbase.app.cli.CliOptionsParser;
 import org.opencb.cellbase.core.api.key.ApiKeyQuota;
-import org.opencb.cellbase.lib.EtlCommons;
 
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-/**
- * Created by imedina on 03/02/15.
- */
+
 public class AdminCliOptionsParser extends CliOptionsParser {
 
     private final CommonCommandOptions commonCommandOptions;
@@ -35,13 +32,13 @@ public class AdminCliOptionsParser extends CliOptionsParser {
 
     private DownloadCommandOptions downloadCommandOptions;
     private BuildCommandOptions buildCommandOptions;
+    private DataListCommandOptions dataListCommandOptions;
     private DataReleaseCommandOptions dataReleaseCommandOptions;
     private ApiKeyCommandOptions apiKeyCommandOptions;
     private LoadCommandOptions loadCommandOptions;
     private ExportCommandOptions exportCommandOptions;
     private CustomiseCommandOptions customiseCommandOptions;
     private IndexCommandOptions indexCommandOptions;
-    private InstallCommandOptions installCommandOptions;
     private ServerCommandOptions serverCommandOptions;
     private ValidationCommandOptions validationCommandOptions;
 
@@ -52,25 +49,25 @@ public AdminCliOptionsParser() {
 
         downloadCommandOptions = new DownloadCommandOptions();
         buildCommandOptions = new BuildCommandOptions();
+        dataListCommandOptions = new DataListCommandOptions();
         dataReleaseCommandOptions = new DataReleaseCommandOptions();
         apiKeyCommandOptions = new ApiKeyCommandOptions();
         loadCommandOptions = new LoadCommandOptions();
         exportCommandOptions = new ExportCommandOptions();
         customiseCommandOptions = new CustomiseCommandOptions();
         indexCommandOptions = new IndexCommandOptions();
-        installCommandOptions = new InstallCommandOptions();
         serverCommandOptions = new ServerCommandOptions();
         validationCommandOptions = new ValidationCommandOptions();
 
         jCommander.addCommand("download", downloadCommandOptions);
         jCommander.addCommand("build", buildCommandOptions);
+        jCommander.addCommand("data-list", dataListCommandOptions);
         jCommander.addCommand("data-release", dataReleaseCommandOptions);
         jCommander.addCommand("api-key", apiKeyCommandOptions);
         jCommander.addCommand("load", loadCommandOptions);
         jCommander.addCommand("export", exportCommandOptions);
         jCommander.addCommand("customise", customiseCommandOptions);
         jCommander.addCommand("index", indexCommandOptions);
-        jCommander.addCommand("install", installCommandOptions);
         jCommander.addCommand("server", serverCommandOptions);
         jCommander.addCommand("validate", validationCommandOptions);
     }
@@ -80,7 +77,8 @@ public void parse(String[] args) throws ParameterException {
         jCommander.parse(args);
     }
 
-    @Parameters(commandNames = {"download"}, commandDescription = "Download all different data sources provided in the configuration.yml file")
+    @Parameters(commandNames = {"download"}, commandDescription = "Download all different data sources provided in the configuration.yml"
+            + " file")
     public class DownloadCommandOptions {
 
         @ParametersDelegate
@@ -89,16 +87,13 @@ public class DownloadCommandOptions {
         @ParametersDelegate
         public SpeciesAndAssemblyCommandOptions speciesAndAssemblyOptions = speciesAndAssemblyCommandOptions;
 
-        @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to download:"
-                + EtlCommons.GENOME_DATA + ", " + EtlCommons.GENE_DATA + ", " + EtlCommons.VARIATION_DATA + ", "
-                + EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA + ", " + EtlCommons.MISSENSE_VARIATION_SCORE_DATA + ", "
-                + EtlCommons.REGULATION_DATA + ", " + EtlCommons.PROTEIN_DATA + ", " + EtlCommons.CONSERVATION_DATA + ", "
-                + EtlCommons.CLINICAL_VARIANTS_DATA + ", " + EtlCommons.REPEATS_DATA + ", " + EtlCommons.OBO_DATA + ", "
-                + EtlCommons.PUBMED_DATA + ", " + EtlCommons.PHARMACOGENOMICS_DATA + "; and 'all' to download everything",
-                required = true, arity = 1)
+        @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to download, it depends on the species; use the"
+                + " command 'cellbase-admin.sh data-list' to know the data list available for each species; or use 'all' to download"
+                + " everything", required = true, arity = 1)
         public String data;
 
-        @Parameter(names = {"-o", "--outdir"}, description = "Downloaded files will be saved in this directory.", required = true, arity = 1)
+        @Parameter(names = {"-o", "--outdir"}, description = "Downloaded files will be saved in this directory.", required = true,
+                arity = 1)
         public String outputDirectory;
     }
 
@@ -108,18 +103,21 @@ public class BuildCommandOptions {
         @ParametersDelegate
         public CommonCommandOptions commonOptions = commonCommandOptions;
 
-        @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to build: genome, genome_info, "
-                + "gene, variation, variation_functional_score, regulation, protein, ppi, conservation, drug, "
-                + "clinical_variants, repeats, svs, splice_score, pubmed. 'all' builds everything.", required = true, arity = 1)
+        @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to build, it depends on the species; use the"
+                + " command 'cellbase-admin.sh data-list' to know the data list available for each species; or use 'all' to build"
+                + " everything", required = true, arity = 1)
         public String data;
 
-        @Parameter(names = {"-s", "--species"}, description = "Name of the species to be built, valid formats include 'Homo sapiens' or 'hsapiens'", required = false, arity = 1)
+        @Parameter(names = {"-s", "--species"}, description = "Name of the species to be built, valid formats include 'Homo sapiens' or"
+                + " 'hsapiens'", arity = 1)
         public String species = "Homo sapiens";
 
-        @Parameter(names = {"-a", "--assembly"}, description = "Name of the assembly, if empty the first assembly in configuration.yml will be used", required = false, arity = 1)
+        @Parameter(names = {"-a", "--assembly"}, description = "Name of the assembly, if empty the first assembly in configuration.yml"
+                + " will be used", arity = 1)
         public String assembly;
 
-        @Parameter(names = {"-o", "--outdir"}, description = "Downloaded files will be saved in this directory.", required = true, arity = 1)
+        @Parameter(names = {"-o", "--outdir"}, description = "Downloaded files will be saved in this directory.", required = true,
+                arity = 1)
         public String outputDirectory;
 
         @Parameter(names = {"--skip-normalize"}, description = "Skip normalization of clinical variants. Normalization"
@@ -137,6 +135,17 @@ public class BuildCommandOptions {
 
     }
 
+    @Parameters(commandNames = {"data-list"}, commandDescription = "List the data supported by the given species")
+    public class DataListCommandOptions {
+
+        @ParametersDelegate
+        public CommonCommandOptions commonOptions = commonCommandOptions;
+
+        @Parameter(names = {"-s", "--species"}, description = "Name of the species to list the data, valid formats include 'Homo sapiens'"
+                + " or 'hsapiens'", arity = 1)
+        public String species = "Homo sapiens";
+    }
+
     @Parameters(commandNames = {"data-release"}, commandDescription = "Manage data releases in order to support multiple versions of data")
     public class DataReleaseCommandOptions {
 
@@ -155,11 +164,13 @@ public class DataReleaseCommandOptions {
         @Parameter(names = {"--update"}, description = "Data release to be updated by adding CellBase vesions", arity = 1)
         public int update;
 
-        @Parameter(names = {"--add-versions"}, description = "CellBase versions separated by commas, e.g.: v5.2,v5.3. This parameter has to be used together to the parameter --update", arity = 1)
+        @Parameter(names = {"--add-versions"}, description = "CellBase versions separated by commas, e.g.: v5.2,v5.3. This parameter has"
+                + " to be used together to the parameter --update", arity = 1)
         public String versions;
     }
 
-    @Parameters(commandNames = {"api-key"}, commandDescription = "Manage API keys in order to access to restricted/licensed data sources and set quota")
+    @Parameters(commandNames = {"api-key"}, commandDescription = "Manage API keys in order to access to restricted/licensed data sources"
+            + " and set quota")
     public class ApiKeyCommandOptions {
 
         @ParametersDelegate
@@ -168,9 +179,9 @@ public class ApiKeyCommandOptions {
         @Parameter(names = {"--create-api-key"}, description = "Create an API key", arity = 0)
         public boolean createApiKey;
 
-        @Parameter(names = {"--licensed-data-sources"}, description = "Use this parameter in conjunction with --create-api-key to specify the"
-                + " licensed data sources separated by commas and optionally the expiration date: source[:dd/mm/yyyy]. e.g.:"
-                + " cosmic:31/01/2025,hgmd", arity = 1)
+        @Parameter(names = {"--licensed-data-sources"}, description = "Use this parameter in conjunction with --create-api-key to"
+                +" specify the licensed data sources separated by commas and optionally the expiration date: source[:dd/mm/yyyy]. e.g.:"
+                + " spliceai:31/01/2025,hgmd", arity = 1)
         public String dataSources;
 
         @Parameter(names = {"--expiration"}, description = "Use this parameter in conjunction with --create-api-key to specify the"
@@ -195,9 +206,9 @@ public class LoadCommandOptions {
         @ParametersDelegate
         public CommonCommandOptions commonOptions = commonCommandOptions;
 
-        @Parameter(names = {"-d", "--data"}, description = "Data model type to be loaded: genome, gene, variation,"
-                + " conservation, regulation, protein, clinical_variants, repeats, regulatory_pfm, splice_score, pubmed, pharmacogenomics."
-                + " 'all' loads everything", required = true, arity = 1)
+        @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to load, it depends on the species; use the"
+                + " command 'cellbase-admin.sh data-list' to know the data list available for each species; or use 'all' to load"
+                + " everything", required = true, arity = 1)
         public String data;
 
         @Parameter(names = {"-i", "--input"}, required = true, arity = 1,
@@ -242,9 +253,9 @@ public class ExportCommandOptions {
         @ParametersDelegate
         public CommonCommandOptions commonOptions = commonCommandOptions;
 
-        @Parameter(names = {"-d", "--data"}, description = "Data model type to be loaded: genome, gene, variation, "
-                + "conservation, regulation, protein, clinical_variants, repeats, regulatory_pfm, splice_score, pubmed. 'all' "
-                + " loads everything", required = true, arity = 1)
+        @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to export, it depends on the species; use the"
+                + " command 'cellbase-admin.sh data-list' to know the data list available for each species; or use 'all' to export"
+                + " everything", required = true, arity = 1)
         public String data;
 
         @Parameter(names = {"--db", "--database"}, description = "Database name, e.g., cellbase_hsapiens_grch38_v5", required = true,
@@ -304,10 +315,9 @@ public class IndexCommandOptions {
         @ParametersDelegate
         public CommonCommandOptions commonOptions = commonCommandOptions;
 
-        @Parameter(names = {"-d", "--data"}, description = "Data model type to be indexed: genome, gene, variation, "
-                + "regulation, protein, ontology, clinical_variants, repeats, refseq and missense_variation_functional_score. 'all' "
-                + "indexes everything", required = true,
-                arity = 1)
+        @Parameter(names = {"-d", "--data"}, description = "Comma separated list of data to index, it depends on the species; use the"
+                + " command 'cellbase-admin.sh data-list' to know the data list available for each species; or use 'all' to index"
+                + " everything", required = true, arity = 1)
         public String data;
 
         @Parameter(names = {"--db", "--database"}, description = "Database name.", required = true, arity = 1)
@@ -321,16 +331,6 @@ public class IndexCommandOptions {
         public boolean validate;
     }
 
-    @Parameters(commandNames = {"install"}, commandDescription = "Set up sharding for CellBase")
-    public class InstallCommandOptions {
-
-        @ParametersDelegate
-        public CommonCommandOptions commonOptions = commonCommandOptions;
-
-        @ParametersDelegate
-        public SpeciesAndAssemblyCommandOptions speciesAndAssemblyOptions = speciesAndAssemblyCommandOptions;
-    }
-
     @Parameters(commandNames = {"server"}, commandDescription = "Manage REST server")
     public class ServerCommandOptions {
 
@@ -353,16 +353,20 @@ public class ValidationCommandOptions {
         @ParametersDelegate
         public CommonCommandOptions commonOptions = commonCommandOptions;
 
-        @Parameter(names = {"-s", "--species"}, description = "Name of the species to be downloaded, valid format include 'Homo sapiens' or 'hsapiens'", arity = 1)
+        @Parameter(names = {"-s", "--species"}, description = "Name of the species to be downloaded, valid format include 'Homo sapiens'"
+                + " or 'hsapiens'", arity = 1)
         public String species = "Homo sapiens";
 
-        @Parameter(names = {"-a", "--assembly"}, description = "Name of the assembly, if empty the first assembly in configuration.json will be used", required = false, arity = 1)
+        @Parameter(names = {"-a", "--assembly"}, description = "Name of the assembly, if empty the first assembly in configuration.json"
+                + " will be used", arity = 1)
         public String assembly = "GRCh38";
 
-        @Parameter(names = {"--data-release"}, description = "Data release. To use the default data release, please, set this parameter to 0", required = false, arity = 1)
+        @Parameter(names = {"--data-release"}, description = "Data release. To use the default data release, please, set this parameter"
+                + " to 0", arity = 1)
         public int dataRelease = 0;
 
-        @Parameter(names = {"--api-key"}, description = "API key to get access to licensed/restricted data sources such as COSMIC or HGMD", required = false, arity = 1)
+        @Parameter(names = {"--api-key"}, description = "API key to get access to licensed/restricted data sources such as SpliceAI or"
+                + " HGMD", arity = 1)
         public String apiKey;
 
         @Parameter(names = {"-i", "--input-file"}, description = "Full path to VCF", required = true, arity = 1)
@@ -371,8 +375,7 @@ public class ValidationCommandOptions {
         @Parameter(names = {"-V", "--vep-file"}, description = "Full path to VEP annotation JSON file", required = true, arity = 1)
         public String vepFile;
 
-        @Parameter(names = {"-o", "--output-dir"}, description = "Output directory where the comparison report is saved", required = false,
-                arity = 1)
+        @Parameter(names = {"-o", "--output-dir"}, description = "Output directory where the comparison report is saved", arity = 1)
         public String outputDirectory = "/tmp";
 
         @Parameter(names = {"-t", "--type"}, description = "Which type to analyse: 'Protein', 'Transcript' or 'Both'", required =
@@ -410,6 +413,10 @@ public BuildCommandOptions getBuildCommandOptions() {
         return buildCommandOptions;
     }
 
+    public DataListCommandOptions getDataListCommandOptions() {
+        return dataListCommandOptions;
+    }
+
     public DataReleaseCommandOptions getDataReleaseCommandOptions() {
         return dataReleaseCommandOptions;
     }
@@ -424,8 +431,6 @@ public IndexCommandOptions getIndexCommandOptions() {
         return indexCommandOptions;
     }
 
-    public InstallCommandOptions getInstallCommandOptions() { return installCommandOptions; }
-
     public ServerCommandOptions getServerCommandOptions() { return serverCommandOptions; }
 
     public ValidationCommandOptions getValidationCommandOptions() { return validationCommandOptions; }
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminMain.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminMain.java
index 10c43d637c..d46d32709f 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminMain.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/AdminMain.java
@@ -25,9 +25,7 @@
 import java.io.IOException;
 import java.net.URISyntaxException;
 
-/**
- * Created by imedina on 03/02/15.
- */
+
 public class AdminMain {
 
     public static void main(String[] args) {
@@ -63,30 +61,30 @@ public static void main(String[] args) {
                     case "build":
                         commandExecutor = new BuildCommandExecutor(cliOptionsParser.getBuildCommandOptions());
                         break;
+                    case "load":
+                        commandExecutor = new LoadCommandExecutor(cliOptionsParser.getLoadCommandOptions());
+                        break;
+                    case "data-list":
+                        commandExecutor = new DataListCommandExecutor(cliOptionsParser.getDataListCommandOptions());
+                        break;
                     case "data-release":
                         commandExecutor = new DataReleaseCommandExecutor(cliOptionsParser.getDataReleaseCommandOptions());
                         break;
                     case "api-key":
                         commandExecutor = new ApiKeyCommandExecutor(cliOptionsParser.getApiKeyCommandOptions());
                         break;
-                    case "load":
-                        commandExecutor = new LoadCommandExecutor(cliOptionsParser.getLoadCommandOptions());
-                        break;
                     case "export":
                         commandExecutor = new ExportCommandExecutor(cliOptionsParser.getExportCommandOptions());
                         break;
                     case "index":
                         commandExecutor = new IndexCommandExecutor(cliOptionsParser.getIndexCommandOptions());
                         break;
-                    case "install":
-                        commandExecutor = new InstallCommandExecutor(cliOptionsParser.getInstallCommandOptions());
+                    case "validate":
+                        commandExecutor = new ValidationCommandExecutor(cliOptionsParser.getValidationCommandOptions());
                         break;
                     case "server":
                         commandExecutor = new ServerCommandExecutor(cliOptionsParser.getServerCommandOptions());
                         break;
-                    case "validate":
-                        commandExecutor = new ValidationCommandExecutor(cliOptionsParser.getValidationCommandOptions());
-                        break;
                     default:
                         break;
                 }
@@ -98,10 +96,10 @@ public static void main(String[] args) {
                     commandExecutor.execute();
                 } catch (IOException | URISyntaxException | CellBaseException e) {
                     commandExecutor.getLogger().error("Error: " + e.getMessage());
+                    e.printStackTrace();
                     System.exit(1);
                 }
             }
         }
     }
-
 }
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java
index 16db1f82bc..542cc3e129 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/BuildCommandExecutor.java
@@ -17,11 +17,14 @@
 package org.opencb.cellbase.app.cli.admin.executors;
 
 import com.beust.jcommander.ParameterException;
-import org.apache.commons.lang.StringUtils;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectReader;
+import org.apache.commons.lang3.StringUtils;
 import org.opencb.cellbase.app.cli.CommandExecutor;
 import org.opencb.cellbase.app.cli.admin.AdminCliOptionsParser;
 import org.opencb.cellbase.core.config.SpeciesConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
 import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
 import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
@@ -31,59 +34,74 @@
 import org.opencb.cellbase.lib.builders.*;
 import org.opencb.cellbase.lib.builders.clinical.variant.ClinicalVariantBuilder;
 
-import java.io.File;
 import java.io.IOException;
 import java.nio.file.*;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 
 import static org.opencb.cellbase.lib.EtlCommons.*;
+import static org.opencb.cellbase.lib.builders.AbstractBuilder.BUILDING_DONE_LOG_MESSAGE;
+import static org.opencb.cellbase.lib.builders.AbstractBuilder.BUILDING_LOG_MESSAGE;
+import static org.opencb.cellbase.lib.builders.EnsemblGeneBuilder.ENSEMBL_GENE_OUTPUT_FILENAME;
+import static org.opencb.cellbase.lib.builders.GenomeSequenceFastaBuilder.GENOME_JSON_FILENAME;
+import static org.opencb.cellbase.lib.builders.OntologyBuilder.OBO_OUTPUT_BASENAME;
+import static org.opencb.cellbase.lib.builders.ProteinBuilder.PROTEIN_OUTPUT_FILENAME;
+import static org.opencb.cellbase.lib.builders.RefSeqGeneBuilder.REFSEQ_GENE_OUTPUT_FILENAME;
+import static org.opencb.cellbase.lib.builders.RegulatoryFeatureBuilder.*;
+import static org.opencb.cellbase.lib.builders.RepeatsBuilder.REPEATS_OUTPUT_FILENAME;
+import static org.opencb.cellbase.lib.builders.VariationBuilder.VARIATION_CHR_PREFIX;
+import static org.opencb.cellbase.lib.download.GenomeDownloadManager.GENOME_INFO_FILENAME;
 
-/**
- * Created by imedina on 03/02/15.
- */
 public class BuildCommandExecutor extends CommandExecutor {
-    private AdminCliOptionsParser.BuildCommandOptions buildCommandOptions;
 
-    private Path output;
-    private Path buildFolder = null; // <output>/<species>_<assembly>/generated-json
-    private Path downloadFolder = null; // <output>/<species>_<assembly>/download
+    private final AdminCliOptionsParser.BuildCommandOptions buildCommandOptions;
+    private final Path outputDirectory;
+
+    private Path buildFolder = null;
+    private Path downloadFolder = null;
     private boolean normalize = true;
 
-    private File ensemblScriptsFolder;
+    private SpeciesConfiguration speciesConfiguration;
+    private SpeciesConfiguration.Assembly assembly;
+    private String ensemblRelease;
 
     private boolean flexibleGTFParsing;
-    private SpeciesConfiguration speciesConfiguration;
+
+//    private SpeciesConfiguration speciesConfiguration;
 
     public BuildCommandExecutor(AdminCliOptionsParser.BuildCommandOptions buildCommandOptions) {
         super(buildCommandOptions.commonOptions.logLevel, buildCommandOptions.commonOptions.conf);
 
         this.buildCommandOptions = buildCommandOptions;
-        this.output = Paths.get(buildCommandOptions.outputDirectory);
+        this.outputDirectory = Paths.get(buildCommandOptions.outputDirectory);
         normalize = !buildCommandOptions.skipNormalize;
 
-        this.ensemblScriptsFolder = new File(System.getProperty("basedir") + "/bin/ensembl-scripts/");
         this.flexibleGTFParsing = buildCommandOptions.flexibleGTFParsing;
     }
 
-
     /**
      * Parse specific 'build' command options.
+     *
+     * @throws CellBaseException Exception
      */
-    public void execute() {
+    public void execute() throws CellBaseException {
         try {
             // Output directory need to be created if it doesn't exist
-            if (!Files.exists(output)) {
-                Files.createDirectories(output);
+            if (!Files.exists(outputDirectory)) {
+                Files.createDirectories(outputDirectory);
             }
 
-            speciesConfiguration = SpeciesUtils.getSpeciesConfiguration(configuration, buildCommandOptions.species);
+            // Get the species
+            String species = buildCommandOptions.species;
+            speciesConfiguration = SpeciesUtils.getSpeciesConfiguration(configuration, species);
             if (speciesConfiguration == null) {
                 throw new CellBaseException("Invalid species: '" + buildCommandOptions.species + "'");
             }
-            SpeciesConfiguration.Assembly assembly = null;
-            if (!StringUtils.isEmpty(buildCommandOptions.assembly)) {
+
+            // Get the assembly
+            if (StringUtils.isNotEmpty(buildCommandOptions.assembly)) {
                 assembly = SpeciesUtils.getAssembly(speciesConfiguration, buildCommandOptions.assembly);
                 if (assembly == null) {
                     throw new CellBaseException("Invalid assembly: '" + buildCommandOptions.assembly + "'");
@@ -92,294 +110,383 @@ public void execute() {
                 assembly = SpeciesUtils.getDefaultAssembly(speciesConfiguration);
             }
 
+            String ensemblVersion = assembly.getEnsemblVersion();
+            ensemblRelease = "release-" + ensemblVersion.split("_")[0];
+
             String spShortName = SpeciesUtils.getSpeciesShortname(speciesConfiguration);
             String spAssembly = assembly.getName().toLowerCase();
-            Path spFolder = output.resolve(spShortName + "_" + spAssembly);
-            // <output>/<species>_<assembly>/download
-            downloadFolder = output.resolve(spFolder + "/download");
+            Path spFolder = outputDirectory.resolve(spShortName + "_" + spAssembly);
+            downloadFolder = outputDirectory.resolve(spFolder + "/download");
             if (!Files.exists(downloadFolder)) {
                 throw new CellBaseException("Download folder not found '" + spShortName + "_" + spAssembly + "/download'");
             }
-            // <output>/<species>_<assembly>/generated_json
-            buildFolder = output.resolve(spFolder + "/generated_json");
-            if (!buildFolder.toFile().exists()) {
-                makeDir(buildFolder);
+            buildFolder = outputDirectory.resolve(spFolder + "/generated_json");
+            if (!Files.exists(buildFolder)) {
+                Files.createDirectories(buildFolder);
             }
 
-            if (buildCommandOptions.data != null) {
-                String[] buildOptions;
-                if (buildCommandOptions.data.equals("all")) {
-                    buildOptions = speciesConfiguration.getData().toArray(new String[0]);
-                } else {
-                    buildOptions = buildCommandOptions.data.split(",");
+            // Check data sources
+            List<String> dataList = getDataList(species, speciesConfiguration);
+            AbstractBuilder parser;
+            for (String data : dataList) {
+                switch (data) {
+                    case GENOME_DATA:
+                        parser = buildGenomeSequence();
+                        break;
+                    case CONSERVATION_DATA:
+                        parser = buildConservation();
+                        break;
+                    case REPEATS_DATA:
+                        parser = buildRepeats();
+                        break;
+                    case GENE_DATA:
+                        parser = buildGene();
+                        break;
+                    case PROTEIN_DATA:
+                        parser = buildProtein();
+                        break;
+                    case VARIATION_DATA:
+                        parser = buildVariation();
+                        break;
+                    case REGULATION_DATA:
+                        parser = buildRegulation();
+                        break;
+                    case VARIATION_FUNCTIONAL_SCORE_DATA:
+                        parser = buildCadd();
+                        break;
+                    case MISSENSE_VARIATION_SCORE_DATA:
+                        parser = buildRevel();
+                        break;
+                    case CLINICAL_VARIANT_DATA:
+                        parser = buildClinicalVariants();
+                        break;
+                    case SPLICE_SCORE_DATA:
+                        parser = buildSplice();
+                        break;
+                    case ONTOLOGY_DATA:
+                        parser = buildObo();
+                        break;
+                    case PUBMED_DATA:
+                        parser = buildPubMed();
+                        break;
+                    case PHARMACOGENOMICS_DATA:
+                        parser = buildPharmacogenomics();
+                        break;
+                    case PGS_DATA:
+                        parser = buildPolygenicScores();
+                        break;
+                    default:
+                        throw new IllegalArgumentException("Data parameter '" + data + "' is not allowed for '" + species + "'. "
+                                + "Valid values are: " + StringUtils.join(speciesConfiguration.getData(), ",")
+                                + ". You can use data parameter 'all' to download everything");
                 }
 
-                for (int i = 0; i < buildOptions.length; i++) {
-                    String buildOption = buildOptions[i];
-
-                    logger.info("Building '{}' data", buildOption);
-                    CellBaseBuilder parser = null;
-                    switch (buildOption) {
-//                        case EtlCommons.GENOME_INFO_DATA:
-//                            buildGenomeInfo();
-//                            break;
-                        case EtlCommons.GENOME_DATA:
-                            parser = buildGenomeSequence();
-                            break;
-                        case EtlCommons.GENE_DATA:
-                            parser = buildGene();
-                            break;
-                        case EtlCommons.REFSEQ_DATA:
-                            parser = buildRefSeq();
-                            break;
-                        case EtlCommons.VARIATION_DATA:
-                            parser = buildVariation();
-                            break;
-                        case EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA:
-                            parser = buildCadd();
-                            break;
-                        case EtlCommons.MISSENSE_VARIATION_SCORE_DATA:
-                            parser = buildRevel();
-                            break;
-                        case EtlCommons.REGULATION_DATA:
-                            parser = buildRegulation();
-                            break;
-                        case EtlCommons.PROTEIN_DATA:
-                            parser = buildProtein();
-                            break;
-//                        case EtlCommons.PPI_DATA:
-//                            parser = getInteractionParser();
-//                            break;
-                        case EtlCommons.CONSERVATION_DATA:
-                            parser = buildConservation();
-                            break;
-                        case EtlCommons.CLINICAL_VARIANTS_DATA:
-                            parser = buildClinicalVariants();
-                            break;
-                        case EtlCommons.REPEATS_DATA:
-                            parser = buildRepeats();
-                            break;
-                        case EtlCommons.OBO_DATA:
-                            parser = buildObo();
-                            break;
-                        case EtlCommons.SPLICE_SCORE_DATA:
-                            parser = buildSplice();
-                            break;
-                        case EtlCommons.PUBMED_DATA:
-                            parser = buildPubMed();
-                            break;
-                        case EtlCommons.PHARMACOGENOMICS_DATA:
-                            parser = buildPharmacogenomics();
-                            break;
-                        default:
-                            logger.error("Build option '" + buildCommandOptions.data + "' is not valid");
-                            break;
-                    }
-
-                    if (parser != null) {
-                        try {
-                            parser.parse();
-                        } catch (Exception e) {
-                            logger.error("Error executing 'build' command " + buildCommandOptions.data + ": " + e.getMessage(), e);
-                        }
-                        parser.disconnect();
-                    }
+                if (parser != null) {
+                    parser.parse();
+                    parser.disconnect();
+                    logger.info(BUILDING_DONE_LOG_MESSAGE);
                 }
             }
-        } catch (ParameterException e) {
-            logger.error("Error parsing build command line parameters: " + e.getMessage(), e);
-        } catch (IOException | CellBaseException e) {
-            logger.error(e.getMessage());
+        } catch (InterruptedException e) {
+            // Restore interrupted state...
+            Thread.currentThread().interrupt();
+            throw new CellBaseException("Error executing command line 'build': " + e.getMessage(), e);
+        } catch (Exception e) {
+            throw new CellBaseException("Error executing command line 'build': " + e.getMessage(), e);
         }
     }
 
-    private CellBaseBuilder buildRepeats() {
-        Path repeatsFilesDir = downloadFolder.resolve(EtlCommons.REPEATS_FOLDER);
-        copyVersionFiles(Arrays.asList(repeatsFilesDir.resolve(EtlCommons.TRF_VERSION_FILE)));
-        copyVersionFiles(Arrays.asList(repeatsFilesDir.resolve(EtlCommons.GSD_VERSION_FILE)));
-        copyVersionFiles(Arrays.asList(repeatsFilesDir.resolve(EtlCommons.WM_VERSION_FILE)));
-        // TODO: chunk size is not really used in ConvervedRegionParser, remove?
-        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, EtlCommons.REPEATS_JSON);
-        return new RepeatsBuilder(repeatsFilesDir, serializer);
-    }
+    private AbstractBuilder buildGenomeSequence() throws CellBaseException {
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(GENOME_DATA));
 
-    private CellBaseBuilder buildObo() {
-        Path oboDir = downloadFolder.resolve(EtlCommons.OBO_DATA);
-        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, EtlCommons.OBO_JSON);
-        return new OntologyBuilder(oboDir, serializer);
-    }
+        Path genomeDownloadFolder = downloadFolder.resolve(GENOME_DATA);
+        Path genomeBuildFolder = buildFolder.resolve(GENOME_DATA);
+
+        if (Files.exists(genomeBuildFolder.resolve(GENOME_JSON_FILENAME))
+                && Files.exists(genomeBuildFolder.resolve(GENOME_INFO_FILENAME))
+                && Files.exists(genomeBuildFolder.resolve(getDataVersionFilename(GENOME_DATA)))) {
+            logger.warn(DATA_ALREADY_BUILT, getDataName(GENOME_DATA));
+            return null;
+        }
 
-    private void copyVersionFiles(List<Path> pathList) {
-        for (Path path : pathList) {
+        // Sanity check
+        if (!Files.exists(genomeDownloadFolder.resolve(GENOME_INFO_FILENAME))) {
+            throw new CellBaseException("Genome info file " + GENOME_INFO_FILENAME + " does not exist at " + genomeDownloadFolder);
+        }
+
+        // Copy files if necessary
+        if (!Files.exists(genomeBuildFolder.resolve(getDataVersionFilename(GENOME_DATA)))) {
+            Path genomeVersionPath = genomeDownloadFolder.resolve(getDataVersionFilename(GENOME_DATA));
+            copyVersionFiles(Collections.singletonList(genomeVersionPath), buildFolder.resolve(GENOME_DATA));
+        }
+
+        if (!Files.exists(genomeBuildFolder.resolve(GENOME_INFO_FILENAME))) {
             try {
-                Files.copy(path, downloadFolder.resolve(path.getFileName()), StandardCopyOption.REPLACE_EXISTING);
+                Files.copy(genomeDownloadFolder.resolve(GENOME_INFO_FILENAME), genomeBuildFolder.resolve(GENOME_INFO_FILENAME));
             } catch (IOException e) {
-                logger.warn("Version file {} not found - skipping", path.toString());
+                throw new CellBaseException("Error copying file " + GENOME_INFO_FILENAME, e);
             }
         }
-    }
 
-//    private void buildGenomeInfo() {
-//        /**
-//         * To get some extra info about the genome such as chromosome length or cytobands
-//         * we execute the following script.
-//         */
-//        try {
-//            String outputFileName = downloadFolder.resolve("genome_info.json").toAbsolutePath().toString();
-//            List<String> args = new ArrayList<>();
-//            args.addAll(Arrays.asList("--species", speciesConfigurathtion.getScientificName(),
-//                    "--assembly", buildCommandOptions.assembly == null ? getDefaultHumanAssembly() : buildCommandOptions.assembly,
-//                    "-o", outputFileName,
-//                    "--ensembl-libs", configuration.getDownload().getEnsembl().getLibs()));
-//            if (!configuration.getSpecies().getVertebrates().contains(speciesConfiguration)
-//                    && !speciesConfiguration.getScientificName().equals("Drosophila melanogaster")) {
-//                args.add("--phylo");
-//                args.add("no-vertebrate");
-//            }
-//
-//            String geneInfoLogFileName = downloadFolder.resolve("genome_info.log").toAbsolutePath().toString();
-//
-//            boolean downloadedGenomeInfo;
-//            downloadedGenomeInfo = EtlCommons.runCommandLineProcess(ensemblScriptsFolder, "./genome_info.pl", args, geneInfoLogFileName);
-//
-//            if (downloadedGenomeInfo) {
-//                logger.info(outputFileName + " created OK");
-//            } else {
-//                logger.error("Genome info for " + speciesConfiguration.getScientificName() + " cannot be downloaded");
-//            }
-//        } catch (IOException | InterruptedException e) {
-//            e.printStackTrace();
-//        }
-//    }
+        // Parse file
+        if (!Files.exists(genomeBuildFolder.resolve(GENOME_JSON_FILENAME))) {
+            // Get FASTA path
+            Path fastaPath = getFastaReferenceGenome();
 
-    private CellBaseBuilder buildGenomeSequence() {
-        copyVersionFiles(Collections.singletonList(downloadFolder.resolve("genome/genomeVersion.json")));
-        Path fastaFile = getFastaReferenceGenome();
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "genome_sequence");
-        return new GenomeSequenceFastaBuilder(fastaFile, serializer);
+            // Create serializer and return the genome builder
+            CellBaseSerializer serializer = new CellBaseJsonFileSerializer(genomeBuildFolder, GENOME_DATA);
+            return new GenomeSequenceFastaBuilder(fastaPath, serializer);
+        }
+        return null;
     }
 
-    private CellBaseBuilder buildGene() throws CellBaseException {
-        Path geneFolderPath = downloadFolder.resolve("gene");
-        copyVersionFiles(Arrays.asList(geneFolderPath.resolve("dgidbVersion.json"),
-                geneFolderPath.resolve("ensemblCoreVersion.json"), geneFolderPath.resolve("uniprotXrefVersion.json"),
-                geneFolderPath.resolve("geneExpressionAtlasVersion.json"),
-                geneFolderPath.resolve("hpoVersion.json"), geneFolderPath.resolve("disgenetVersion.json"),
-                geneFolderPath.resolve("gnomadVersion.json")));
-        Path genomeFastaFilePath = getFastaReferenceGenome();
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "gene");
-        return new GeneBuilder(geneFolderPath, genomeFastaFilePath, speciesConfiguration, flexibleGTFParsing, serializer);
+    private AbstractBuilder buildGene() throws CellBaseException {
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(GENE_DATA));
+
+        // Sanity check
+        Path geneDownloadPath = downloadFolder.resolve(GENE_DATA);
+        Path geneBuildPath = buildFolder.resolve(GENE_DATA);
+
+        List<Path> versionFiles = new ArrayList<>(Arrays.asList(
+                geneDownloadPath.resolve(ENSEMBL_DATA).resolve(getDataVersionFilename(ENSEMBL_DATA)),
+                geneDownloadPath.resolve(REFSEQ_DATA).resolve(getDataVersionFilename(REFSEQ_DATA))));
+        List<String> dataList = GeneBuilder.getCommonDataSources(speciesConfiguration, configuration);
+        for (String data : dataList) {
+            Path versionFile;
+            switch (data) {
+                case MIRTARBASE_DATA:
+                    versionFile = downloadFolder.resolve(REGULATION_DATA).resolve(MIRTARBASE_DATA).resolve(getDataVersionFilename(data));
+                    break;
+                case MIRBASE_DATA:
+                    versionFile = downloadFolder.resolve(REGULATION_DATA).resolve(MIRBASE_DATA).resolve(getDataVersionFilename(data));
+                    break;
+                default:
+                    versionFile = downloadFolder.resolve(GERP_DATA).resolve(getDataVersionFilename(data));
+                    break;
+            }
+            versionFiles.add(versionFile);
+        }
+
+        List<Path> filesToCheck = new ArrayList<>(Arrays.asList(geneBuildPath.resolve(ENSEMBL_GENE_OUTPUT_FILENAME),
+                geneBuildPath.resolve(REFSEQ_GENE_OUTPUT_FILENAME)));
+        for (Path versionFile : versionFiles) {
+            filesToCheck.add(geneBuildPath.resolve(versionFile.getFileName()));
+        }
+        filesToCheck.addAll(versionFiles);
+
+        if (AbstractBuilder.existFiles(filesToCheck)) {
+            logger.warn(DATA_ALREADY_BUILT, getDataName(ENSEMBL_DATA) + " and " + getDataName(REFSEQ_DATA) + " genes");
+            return null;
+        }
+
+        copyVersionFiles(versionFiles, geneBuildPath);
+
+        return new GeneBuilder(geneDownloadPath, geneBuildPath, speciesConfiguration, flexibleGTFParsing, configuration);
     }
 
-    private CellBaseBuilder buildRefSeq() {
-        Path refseqFolderPath = downloadFolder.resolve("refseq");
-        copyVersionFiles(Arrays.asList(refseqFolderPath.resolve("refSeqVersion.json")));
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "refseq");
-        return new RefSeqGeneBuilder(refseqFolderPath, speciesConfiguration, serializer);
+    private AbstractBuilder buildRepeats() throws CellBaseException {
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(REPEATS_DATA));
+
+        // Sanity check
+        Path repeatsDownloadPath = downloadFolder.resolve(REPEATS_DATA);
+        Path repeatsBuildPath = buildFolder.resolve(REPEATS_DATA);
+        List<String> dataList = EtlCommons.getDataList(REPEATS_DATA, configuration, speciesConfiguration);
+        List<Path> filesToCheck = new ArrayList<>(Arrays.asList(repeatsBuildPath.resolve(REPEATS_OUTPUT_FILENAME)));
+        for (String data : dataList) {
+            filesToCheck.add(repeatsBuildPath.resolve(getDataVersionFilename(data)));
+        }
+        if (AbstractBuilder.existFiles(filesToCheck)) {
+            logger.warn(DATA_ALREADY_BUILT, getDataName(REPEATS_DATA));
+            return null;
+        }
+        for (String data : dataList) {
+            checkVersionFiles(Collections.singletonList(repeatsDownloadPath.resolve(data).resolve(getDataVersionFilename(data))));
+        }
+        for (String data : dataList) {
+            copyVersionFiles(Collections.singletonList(repeatsDownloadPath.resolve(data).resolve(getDataVersionFilename(data))),
+                    repeatsBuildPath);
+        }
+
+        // Create serializer and return the repeats builder
+        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder.resolve(REPEATS_DATA), REPEATS_DATA);
+        return new RepeatsBuilder(dataList, repeatsDownloadPath, serializer, configuration);
     }
 
-    private CellBaseBuilder buildVariation() throws IOException {
-        Path downloadVariationPath = downloadFolder.resolve(VARIATION_DATA);
-        Path buildVariationPath = buildFolder.resolve(VARIATION_DATA);
-        if (!buildVariationPath.toFile().exists()) {
-            buildVariationPath.toFile().mkdirs();
+    private AbstractBuilder buildObo() throws CellBaseException {
+        // Sanity check
+        Path oboDownloadPath = downloadFolder.resolve(ONTOLOGY_DATA);
+        Path oboBuildPath = buildFolder.resolve(ONTOLOGY_DATA);
+        List<Path> filesToCheck = new ArrayList<>(Arrays.asList(oboBuildPath.resolve(OBO_OUTPUT_BASENAME)));
+        List<String> dataList = new ArrayList<>(Arrays.asList(GO_OBO_DATA));
+        if (speciesConfiguration.getScientificName().equalsIgnoreCase(HOMO_SAPIENS)) {
+            dataList.add(HPO_OBO_DATA);
+            dataList.add(DOID_OBO_DATA);
+            dataList.add(MONDO_OBO_DATA);
         }
 
-        CellBaseFileSerializer variationSerializer = new CellBaseJsonFileSerializer(buildVariationPath);
+        for (String data : dataList) {
+            filesToCheck.add(oboBuildPath.resolve(data).resolve(getDataVersionFilename(data)));
+        }
 
-        // Currently, only dbSNP data
-        Files.copy(downloadVariationPath.resolve(DBSNP_VERSION_FILENAME), buildVariationPath.resolve(DBSNP_VERSION_FILENAME),
-                StandardCopyOption.REPLACE_EXISTING);
-        return new VariationBuilder(downloadVariationPath, variationSerializer, configuration);
+        if (AbstractBuilder.existFiles(filesToCheck)) {
+            logger.warn(DATA_ALREADY_BUILT, getDataName(ONTOLOGY_DATA));
+            return null;
+        }
+
+        for (String data : dataList) {
+            checkVersionFiles(Collections.singletonList(oboDownloadPath.resolve(data).resolve(getDataVersionFilename(data))));
+        }
+        for (String data : dataList) {
+            copyVersionFiles(Collections.singletonList(oboDownloadPath.resolve(data).resolve(getDataVersionFilename(data))),
+                    oboBuildPath);
+        }
+
+        // Create serializer and return the ontology builder
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(oboBuildPath, OBO_OUTPUT_BASENAME);
+        return new OntologyBuilder(oboDownloadPath, speciesConfiguration, serializer);
     }
 
-    private CellBaseBuilder buildCadd() {
-        Path variationFunctionalScorePath = downloadFolder.resolve("variation_functional_score");
-        copyVersionFiles(Arrays.asList(variationFunctionalScorePath.resolve("caddVersion.json")));
-        Path caddFilePath = variationFunctionalScorePath.resolve("whole_genome_SNVs.tsv.gz");
-        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "cadd");
-        return new CaddScoreBuilder(caddFilePath, serializer);
+    private AbstractBuilder buildCadd() throws CellBaseException {
+        // Sanity check
+        Path caddDownloadPath = downloadFolder.resolve(VARIATION_FUNCTIONAL_SCORE_DATA).resolve(CADD_DATA);
+        Path caddBuildPath = buildFolder.resolve(VARIATION_FUNCTIONAL_SCORE_DATA).resolve(CADD_DATA);
+        copyVersionFiles(Collections.singletonList(caddDownloadPath.resolve(getDataVersionFilename(CADD_DATA))), caddBuildPath);
+
+        // Create the file serializer and the protein builder
+        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(caddBuildPath, CADD_DATA);
+        return new CaddScoreBuilder(caddDownloadPath, serializer);
     }
 
-    private CellBaseBuilder buildRevel() {
-        Path missensePredictionScorePath = downloadFolder.resolve(EtlCommons.MISSENSE_VARIATION_SCORE_DATA);
-        copyVersionFiles(Arrays.asList(missensePredictionScorePath.resolve("revelVersion.json")));
-        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, EtlCommons.MISSENSE_VARIATION_SCORE_DATA);
-        return new RevelScoreBuilder(missensePredictionScorePath, serializer);
+    private AbstractBuilder buildRevel() throws CellBaseException {
+        // Sanity check
+        Path revelDownloadPath = downloadFolder.resolve(MISSENSE_VARIATION_SCORE_DATA).resolve(REVEL_DATA);
+        Path revelBuildPath = buildFolder.resolve(MISSENSE_VARIATION_SCORE_DATA).resolve(REVEL_DATA);
+        copyVersionFiles(Collections.singletonList(revelDownloadPath.resolve(getDataVersionFilename(REVEL_DATA))), revelBuildPath);
+
+        // Create the file serializer and the regulatory feature builder
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(revelBuildPath, REVEL_DATA);
+        return new RevelScoreBuilder(revelDownloadPath, serializer);
     }
 
-    private CellBaseBuilder buildRegulation() {
-        Path regulatoryRegionFilesDir = downloadFolder.resolve("regulation");
-        copyVersionFiles(Collections.singletonList(regulatoryRegionFilesDir.resolve("ensemblRegulationVersion.json")));
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "regulatory_region");
-        return new RegulatoryFeatureBuilder(regulatoryRegionFilesDir, serializer);
+    private AbstractBuilder buildRegulation() throws CellBaseException {
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(REGULATION_DATA));
+
+        // Sanity check
+        Path regulationDownloadPath = downloadFolder.resolve(REGULATION_DATA);
+        Path regulationBuildPath = buildFolder.resolve(REGULATION_DATA);
+        List<Path> filesToCheck = Arrays.asList(regulationBuildPath.resolve(REGULATORY_REGION_OUTPUT_FILENAME),
+                regulationBuildPath.resolve(REGULATORY_PFM_OUTPUT_FILENAME),
+                regulationBuildPath.resolve(getDataVersionFilename(REGULATORY_BUILD_DATA)),
+                regulationBuildPath.resolve(getDataVersionFilename(MOTIF_FEATURES_DATA)));
+        if (AbstractBuilder.existFiles(filesToCheck)) {
+            logger.warn(DATA_ALREADY_BUILT, getDataName(REGULATION_DATA));
+            return null;
+        }
+
+        copyVersionFiles(Arrays.asList(regulationDownloadPath.resolve(REGULATORY_BUILD_DATA).resolve(getDataVersionFilename(
+                REGULATORY_BUILD_DATA)), regulationDownloadPath.resolve(MOTIF_FEATURES_DATA).resolve(getDataVersionFilename(
+                MOTIF_FEATURES_DATA))), regulationBuildPath);
+
+        // Create the file serializer and the regulatory feature builder
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(regulationBuildPath, REGULATORY_REGION_BASENAME);
+        return new RegulatoryFeatureBuilder(regulationDownloadPath, serializer);
     }
 
-    private CellBaseBuilder buildProtein() {
-        Path proteinFolder = downloadFolder.resolve("protein");
-        copyVersionFiles(Arrays.asList(proteinFolder.resolve("uniprotVersion.json"),
-                proteinFolder.resolve("interproVersion.json")));
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "protein");
-        return new ProteinBuilder(proteinFolder.resolve("uniprot_chunks"),
-                downloadFolder.resolve("protein").resolve("protein2ipr.dat.gz"), speciesConfiguration.getScientificName(), serializer);
+    private AbstractBuilder buildProtein() throws CellBaseException {
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(PROTEIN_DATA));
+
+        // Sanity check
+        Path proteinDownloadPath = downloadFolder.resolve(PROTEIN_DATA);
+        Path proteinBuildPath = buildFolder.resolve(PROTEIN_DATA);
+        List<Path> filesToCheck = Arrays.asList(proteinBuildPath.resolve(PROTEIN_OUTPUT_FILENAME),
+                proteinBuildPath.resolve(getDataVersionFilename(INTERPRO_DATA)),
+                proteinBuildPath.resolve(getDataVersionFilename(INTACT_DATA)),
+                proteinBuildPath.resolve(getDataVersionFilename(UNIPROT_DATA)));
+        if (AbstractBuilder.existFiles(filesToCheck)) {
+            logger.warn(DATA_ALREADY_BUILT, getDataName(PROTEIN_DATA));
+            return null;
+        }
+
+        copyVersionFiles(Arrays.asList(proteinDownloadPath.resolve(INTERPRO_DATA).resolve(getDataVersionFilename(
+                INTERPRO_DATA)), proteinDownloadPath.resolve(INTACT_DATA).resolve(getDataVersionFilename(
+                INTACT_DATA)), proteinDownloadPath.resolve(UNIPROT_DATA).resolve(getDataVersionFilename(
+                UNIPROT_DATA))), proteinBuildPath);
+
+        // Create the file serializer and the protein builder
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(proteinBuildPath, PROTEIN_DATA);
+        return new ProteinBuilder(proteinDownloadPath, speciesConfiguration.getScientificName(), serializer);
     }
 
-    private void getProteinFunctionPredictionMatrices(SpeciesConfiguration sp, Path geneFolder)
-            throws IOException, InterruptedException {
-        logger.info("Downloading protein function prediction matrices ...");
+    private AbstractBuilder buildVariation() throws CellBaseException, IOException {
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(VARIATION_DATA));
 
-        // run protein_function_prediction_matrices.pl
-        String proteinFunctionProcessLogFile = geneFolder.resolve("protein_function_prediction_matrices.log").toString();
-        List<String> args = Arrays.asList("--species", sp.getScientificName(), "--outdir", geneFolder.toString(),
-                "--ensembl-libs", configuration.getDownload().getEnsembl().getLibs());
+        // Sanity check
+        Path variationDownloadPath = downloadFolder.resolve(VARIATION_DATA);
+        Path variationBuildPath = buildFolder.resolve(VARIATION_DATA);
 
-        boolean proteinFunctionPredictionMatricesObtaines = EtlCommons.runCommandLineProcess(ensemblScriptsFolder,
-                "./protein_function_prediction_matrices.pl",
-                args,
-                proteinFunctionProcessLogFile);
+        if (Files.exists(variationBuildPath)) {
+            List<Path> filesToCheck = new ArrayList<>();
+            if (!speciesConfiguration.getId().equalsIgnoreCase(HSAPIENS)) {
+                filesToCheck.add(variationBuildPath.resolve(getDataVersionFilename(VARIATION_DATA)));
+            }
 
-        // check output
-        if (proteinFunctionPredictionMatricesObtaines) {
-            logger.info("Protein function prediction matrices created OK");
-        } else {
-            logger.error("Protein function prediction matrices for " + sp.getScientificName() + " cannot be downloaded");
+            try (DirectoryStream<Path> vcfPaths = Files.newDirectoryStream(variationBuildPath,
+                    entry -> entry.getFileName().toString().startsWith(VARIATION_CHR_PREFIX))) {
+                if (AbstractBuilder.existFiles(filesToCheck) && vcfPaths.iterator().hasNext()) {
+                    logger.warn(DATA_ALREADY_BUILT, getDataName(VARIATION_DATA));
+                    return null;
+                }
+            }
         }
-    }
 
-    private CellBaseBuilder getInteractionParser() {
-        Path proteinFolder = downloadFolder.resolve("protein");
-        Path psimiTabFile = proteinFolder.resolve("intact.txt");
-        copyVersionFiles(Arrays.asList(proteinFolder.resolve("intactVersion.json")));
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "protein_protein_interaction");
-        return new InteractionBuilder(psimiTabFile, speciesConfiguration.getScientificName(), serializer);
+        // Copy version files
+        if (!speciesConfiguration.getId().equalsIgnoreCase(HSAPIENS)) {
+            copyVersionFiles(Arrays.asList(variationDownloadPath.resolve(getDataVersionFilename(VARIATION_DATA))), variationBuildPath);
+        }
+
+        // Create the file serializer and the variation builder
+        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(variationBuildPath);
+        return new VariationBuilder(variationDownloadPath, speciesConfiguration.getScientificName(), serializer, configuration);
     }
 
-    private CellBaseBuilder buildConservation() {
-        Path conservationFilesDir = downloadFolder.resolve("conservation");
-        copyVersionFiles(Arrays.asList(conservationFilesDir.resolve("gerpVersion.json"),
-                conservationFilesDir.resolve("phastConsVersion.json"),
-                conservationFilesDir.resolve("phyloPVersion.json")));
-        // TODO: chunk size is not really used in ConvervedRegionParser, remove?
+    private AbstractBuilder buildConservation() throws CellBaseException {
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(CONSERVATION_DATA));
+
+        // Sanity check
+        Path conservationDownloadPath = downloadFolder.resolve(CONSERVATION_DATA);
+        Path conservationBuildPath = buildFolder.resolve(CONSERVATION_DATA);
+        List<String> dataList = Arrays.asList(GERP_DATA, PHASTCONS_DATA, PHYLOP_DATA);
+        for (String data : dataList) {
+            checkVersionFiles(Collections.singletonList(conservationDownloadPath.resolve(data).resolve(getDataVersionFilename(data))));
+        }
+        copyVersionFiles(Arrays.asList(conservationDownloadPath.resolve(GERP_DATA).resolve(getDataVersionFilename(GERP_DATA)),
+                conservationDownloadPath.resolve(PHASTCONS_DATA).resolve(getDataVersionFilename(PHASTCONS_DATA)),
+                conservationDownloadPath.resolve(PHYLOP_DATA).resolve(getDataVersionFilename(PHYLOP_DATA))), conservationBuildPath);
+
         int conservationChunkSize = MongoDBCollectionConfiguration.CONSERVATION_CHUNK_SIZE;
-        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(buildFolder);
-        return new ConservationBuilder(conservationFilesDir, conservationChunkSize, serializer);
+        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(conservationBuildPath);
+        return new ConservationBuilder(conservationDownloadPath, conservationChunkSize, serializer);
     }
 
-    private CellBaseBuilder buildClinicalVariants() {
-        Path clinicalVariantFolder = downloadFolder.resolve(EtlCommons.CLINICAL_VARIANTS_FOLDER);
-        copyVersionFiles(Arrays.asList(clinicalVariantFolder.resolve("clinvarVersion.json")));
-        copyVersionFiles(Arrays.asList(clinicalVariantFolder.resolve("gwasVersion.json")));
-
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder,
-                EtlCommons.CLINICAL_VARIANTS_JSON_FILE.replace(".json.gz", ""), true);
-        return new ClinicalVariantBuilder(clinicalVariantFolder, normalize, getFastaReferenceGenome(),
+    private AbstractBuilder buildClinicalVariants() throws CellBaseException {
+        // Sanity check
+        Path clinicalDownloadPath = downloadFolder.resolve(CLINICAL_VARIANT_DATA);
+        Path clinicalBuildPath = buildFolder.resolve(CLINICAL_VARIANT_DATA);
+        copyVersionFiles(Arrays.asList(clinicalDownloadPath.resolve(getDataVersionFilename(CLINVAR_DATA)),
+                clinicalDownloadPath.resolve(getDataVersionFilename(COSMIC_DATA)),
+                clinicalDownloadPath.resolve(getDataVersionFilename(HGMD_DATA)),
+                clinicalDownloadPath.resolve(getDataVersionFilename(GWAS_DATA))), clinicalBuildPath);
+
+        // Create the file serializer and the clinical variants builder
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(clinicalBuildPath, CLINICAL_VARIANTS_BASENAME, true);
+        return new ClinicalVariantBuilder(clinicalDownloadPath, normalize, getFastaReferenceGenome(),
                 buildCommandOptions.assembly == null ? getDefaultHumanAssembly() : buildCommandOptions.assembly,
-                serializer);
+                configuration, serializer);
     }
 
     private String getDefaultHumanAssembly() {
         for (SpeciesConfiguration species : configuration.getSpecies().getVertebrates()) {
-            if (species.getId().equals("hsapiens")) {
+            if (species.getId().equals(HSAPIENS)) {
                 return species.getAssemblies().get(0).getName();
             }
         }
@@ -388,31 +495,26 @@ private String getDefaultHumanAssembly() {
                 + "configuration file. No hsapiens data found within the configuration.json file");
     }
 
-    private Path getFastaReferenceGenome() {
-        Path fastaFile = null;
-        try {
-            DirectoryStream<Path> stream = Files.newDirectoryStream(downloadFolder.resolve("genome"), entry -> {
-                return entry.toString().endsWith(".fa");
-            });
-            for (Path entry : stream) {
-                fastaFile = entry;
-            }
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-        return fastaFile;
+    private Path getFastaReferenceGenome() throws CellBaseException {
+        // Check FASTA and unzip if necessary
+        String ensemblUrl = getEnsemblUrl(configuration.getDownload().getEnsembl(), ensemblRelease, ENSEMBL_PRIMARY_FA_FILE_ID,
+                SpeciesUtils.getSpeciesShortname(speciesConfiguration), assembly.getName(), null);
+        String fastaFilename = Paths.get(ensemblUrl).getFileName().toString();
+        Path gzFastaPath = downloadFolder.resolve(GENOME_DATA).resolve(fastaFilename);
+
+        return EtlCommons.getFastaPath(gzFastaPath);
     }
 
-    private CellBaseBuilder buildSplice() throws IOException {
+    private AbstractBuilder buildSplice() throws IOException, CellBaseException {
         Path spliceInputFolder = downloadFolder.resolve(EtlCommons.SPLICE_SCORE_DATA);
         Path spliceOutputFolder = buildFolder.resolve(EtlCommons.SPLICE_SCORE_DATA);
         if (!spliceOutputFolder.toFile().exists()) {
             spliceOutputFolder.toFile().mkdirs();
         }
 
-        if (spliceInputFolder.resolve(EtlCommons.MMSPLICE_VERSION_FILENAME).toFile().exists()) {
-            Files.copy(spliceInputFolder.resolve(EtlCommons.MMSPLICE_VERSION_FILENAME),
-                    spliceOutputFolder.resolve(EtlCommons.MMSPLICE_VERSION_FILENAME),
+        if (spliceInputFolder.resolve(getDataVersionFilename(MMSPLICE_DATA)).toFile().exists()) {
+            Files.copy(spliceInputFolder.resolve(getDataVersionFilename(MMSPLICE_DATA)),
+                    spliceOutputFolder.resolve(EtlCommons.getDataVersionFilename(MMSPLICE_DATA)),
                     StandardCopyOption.REPLACE_EXISTING);
         }
 
@@ -420,39 +522,130 @@ private CellBaseBuilder buildSplice() throws IOException {
         return new SpliceBuilder(spliceInputFolder, serializer);
     }
 
-    private CellBaseBuilder buildPubMed() throws IOException {
-        Path pubmedInputFolder = downloadFolder.resolve(EtlCommons.PUBMED_DATA);
-        Path pubmedOutputFolder = buildFolder.resolve(EtlCommons.PUBMED_DATA);
-        if (!pubmedOutputFolder.toFile().exists()) {
-            pubmedOutputFolder.toFile().mkdirs();
-        }
+    private AbstractBuilder buildPubMed() throws CellBaseException {
+        // Sanity check
+        Path pubMedDownloadPath = downloadFolder.resolve(PUBMED_DATA);
+        Path pubMedBuildPath = buildFolder.resolve(PUBMED_DATA);
+        copyVersionFiles(Collections.singletonList(pubMedDownloadPath.resolve(getDataVersionFilename(PUBMED_DATA))), pubMedBuildPath);
 
-        logger.info("Copying PubMed version file...");
-        if (pubmedInputFolder.resolve(EtlCommons.PUBMED_VERSION_FILENAME).toFile().exists()) {
-            Files.copy(pubmedInputFolder.resolve(EtlCommons.PUBMED_VERSION_FILENAME),
-                    pubmedOutputFolder.resolve(EtlCommons.PUBMED_VERSION_FILENAME),
-                    StandardCopyOption.REPLACE_EXISTING);
+        // Create the file serializer and the PubMed builder
+        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(pubMedBuildPath);
+        return new PubMedBuilder(pubMedDownloadPath, serializer, configuration);
+    }
+
+    private AbstractBuilder buildPharmacogenomics() throws CellBaseException {
+        // Sanity check
+        Path pharmGkbDownloadPath = downloadFolder.resolve(PHARMACOGENOMICS_DATA).resolve(PHARMGKB_DATA);
+        Path pharmGkbBuildPath = buildFolder.resolve(PHARMACOGENOMICS_DATA).resolve(PHARMGKB_DATA);
+        copyVersionFiles(Collections.singletonList(pharmGkbDownloadPath.resolve(getDataVersionFilename(PHARMGKB_DATA))), pharmGkbBuildPath);
+
+        // Create the file serializer and the PharmGKB builder
+        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(pharmGkbBuildPath);
+        return new PharmGKBBuilder(pharmGkbDownloadPath, serializer);
+    }
+
+    private AbstractBuilder buildPolygenicScores() throws CellBaseException {
+        Path pgsDownloadPath = downloadFolder.resolve(EtlCommons.PGS_DATA);
+        Path pgsBuildPath = buildFolder.resolve(EtlCommons.PGS_DATA);
+        copyVersionFiles(Collections.singletonList(pgsDownloadPath.resolve(getDataVersionFilename(PGS_CATALOG_DATA))), pgsBuildPath);
+
+        // Create the file serializer and the PGS builder
+        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(pgsBuildPath, PGS_VARIANT_COLLECTION);
+        return new PolygenicScoreBuilder(pgsDownloadPath, serializer);
+    }
+
+    private void checkVersionFiles(List<Path> versionPaths) throws CellBaseException {
+        ObjectReader dataSourceReader = new ObjectMapper().readerFor(DataSource.class);
+        for (Path versionPath : versionPaths) {
+            if (!versionPath.toFile().exists()) {
+                throw new CellBaseException("Version file " +  versionPath + " does not exist: this file is mandatory for version control");
+            }
+            try {
+                DataSource dataSource = dataSourceReader.readValue(versionPath.toFile());
+                if (StringUtils.isEmpty(dataSource.getVersion())) {
+                    throw new CellBaseException("Version missing version in file " +  versionPath + ": a version must be specified in the"
+                            + " file");
+                }
+            } catch (IOException e) {
+                throw new CellBaseException("Error parsing the version file " + versionPath, e);
+            }
         }
+    }
 
-        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(pubmedOutputFolder);
-        return new PubMedBuilder(pubmedInputFolder, serializer);
+    private void copyVersionFiles(List<Path> versionPaths, Path targetPath) throws CellBaseException {
+        // Check version files before copying them
+        checkVersionFiles(versionPaths);
+        copyFiles(versionPaths, targetPath);
     }
 
-    private CellBaseBuilder buildPharmacogenomics() throws IOException {
-        Path inFolder = downloadFolder.resolve(EtlCommons.PHARMACOGENOMICS_DATA);
-        Path outFolder = buildFolder.resolve(EtlCommons.PHARMACOGENOMICS_DATA);
-        if (!outFolder.toFile().exists()) {
-            outFolder.toFile().mkdirs();
+    private void copyFiles(List<Path> versionPaths, Path targetPath) throws CellBaseException {
+        if (!Files.exists(targetPath)) {
+            try {
+                Files.createDirectories(targetPath);
+            } catch (IOException e) {
+                throw new CellBaseException("Error creating folder " + targetPath, e);
+            }
         }
 
-        logger.info("Copying PharmGKB version file...");
-        if (inFolder.resolve(PHARMGKB_DATA).resolve(EtlCommons.PHARMGKB_VERSION_FILENAME).toFile().exists()) {
-            Files.copy(inFolder.resolve(PHARMGKB_DATA).resolve(EtlCommons.PHARMGKB_VERSION_FILENAME),
-                    outFolder.resolve(EtlCommons.PHARMGKB_VERSION_FILENAME),
-                    StandardCopyOption.REPLACE_EXISTING);
+        for (Path versionPath : versionPaths) {
+            try {
+                Files.copy(versionPath, targetPath.resolve(versionPath.getFileName()), StandardCopyOption.REPLACE_EXISTING);
+            } catch (IOException e) {
+                throw new CellBaseException("Error copying version file " + versionPath + " to " + targetPath, e);
+            }
+            // Sanity check after copying
+            if (!targetPath.resolve(versionPath.getFileName()).toFile().exists()) {
+                throw new CellBaseException("Something wrong happened when copying version file " + versionPath + " to " + targetPath);
+            }
         }
+    }
 
-        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(outFolder);
-        return new PharmGKBBuilder(inFolder, serializer);
+//<<<<<<< HEAD
+//    private List<String> checkDataSources() {
+//        if (StringUtils.isEmpty(buildCommandOptions.data)) {
+//            throw new IllegalArgumentException("Missing data parameter. Valid values are: "
+//                    + StringUtils.join(VALID_SOURCES_TO_BUILD, ",") + "; or use 'all' to download everything");
+//        }
+//        List<String> dataList = Arrays.asList(buildCommandOptions.data.split(","));
+//        for (String data : dataList) {
+//            switch (data) {
+//                case GENOME_DATA:
+//                case GENE_DATA:
+//                case REFSEQ_DATA:
+//                case VARIATION_FUNCTIONAL_SCORE_DATA:
+//                case MISSENSE_VARIATION_SCORE_DATA:
+//                case REGULATION_DATA:
+//                case PROTEIN_DATA:
+//                case CONSERVATION_DATA:
+//                case CLINICAL_VARIANT_DATA:
+//                case REPEATS_DATA:
+//                case ONTOLOGY_DATA:
+//                case SPLICE_SCORE_DATA:
+//                case PUBMED_DATA:
+//                case PHARMACOGENOMICS_DATA:
+//                case PGS_DATA:
+//                    break;
+//                default:
+//                    throw new IllegalArgumentException("Value '" + data + "' is not allowed for the data parameter. Valid values are: "
+//                            + StringUtils.join(VALID_SOURCES_TO_BUILD, ",") + "; or use 'all' to build everything");
+//=======
+    private List<String> getDataList(String species, SpeciesConfiguration speciesConfig) throws CellBaseException {
+        // No need to check if 'data' exists since it is declared as required in JCommander
+        List<String> dataList;
+        if ("all".equalsIgnoreCase(buildCommandOptions.data)) {
+            // Download all data sources for the species in the configuration.yml file
+            dataList = speciesConfig.getData();
+        } else {
+            // Check if the data sources requested are valid for the species
+            dataList = Arrays.asList(buildCommandOptions.data.split(","));
+            for (String data : dataList) {
+                if (!speciesConfig.getData().contains(data)) {
+                    throw new CellBaseException("Data parameter '" + data + "' does not exist or it is not allowed for '" + species + "'. "
+                            + "Valid values are: " + StringUtils.join(speciesConfig.getData(), ",") + ". "
+                            + "You can use data parameter 'all' to build everything");
+                }
+            }
+        }
+        return dataList;
     }
 }
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DataListCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DataListCommandExecutor.java
new file mode 100644
index 0000000000..8ec6a5e421
--- /dev/null
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DataListCommandExecutor.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.app.cli.admin.executors;
+
+import org.apache.commons.lang3.StringUtils;
+import org.opencb.cellbase.app.cli.CommandExecutor;
+import org.opencb.cellbase.app.cli.admin.AdminCliOptionsParser;
+import org.opencb.cellbase.core.config.SpeciesConfiguration;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
+
+import java.util.List;
+
+public class DataListCommandExecutor extends CommandExecutor {
+
+    private AdminCliOptionsParser.DataListCommandOptions dataListCommandOptions;
+
+    public DataListCommandExecutor(AdminCliOptionsParser.DataListCommandOptions dataListCommandOptions) {
+        super(dataListCommandOptions.commonOptions.logLevel, dataListCommandOptions.commonOptions.conf);
+
+        this.dataListCommandOptions = dataListCommandOptions;
+    }
+
+
+    /**
+     * Execute one of the selected actions according to the input parameters.
+     */
+    public void execute() {
+        SpeciesConfiguration speciesConfiguration = SpeciesUtils.getSpeciesConfiguration(configuration, dataListCommandOptions.species);
+        if (speciesConfiguration == null) {
+            System.out.println("Unknown species: " + dataListCommandOptions.species);
+            System.out.println("Available species:");
+            List<SpeciesConfiguration> allSpecies = SpeciesUtils.getAllSpecies(configuration);
+            for (SpeciesConfiguration species : allSpecies) {
+                System.out.println("\t- " + species.getScientificName() + " (" + species.getId() + ")");
+            }
+            return;
+        }
+
+        System.out.println("Species: " + dataListCommandOptions.species);
+        System.out.println("Available data: " + StringUtils.join(speciesConfiguration.getData(), ","));
+    }
+}
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DownloadCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DownloadCommandExecutor.java
index abb0629374..f2c3de6e6a 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DownloadCommandExecutor.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/DownloadCommandExecutor.java
@@ -16,29 +16,22 @@
 
 package org.opencb.cellbase.app.cli.admin.executors;
 
-import com.beust.jcommander.ParameterException;
+import org.apache.commons.collections4.CollectionUtils;
 import org.apache.commons.lang3.StringUtils;
-import org.opencb.biodata.formats.io.FileFormatException;
 import org.opencb.cellbase.app.cli.CommandExecutor;
 import org.opencb.cellbase.app.cli.admin.AdminCliOptionsParser;
 import org.opencb.cellbase.core.config.SpeciesConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.utils.SpeciesUtils;
-import org.opencb.cellbase.lib.EtlCommons;
-import org.opencb.cellbase.lib.download.AbstractDownloadManager;
-import org.opencb.cellbase.lib.download.DownloadFile;
-import org.opencb.cellbase.lib.download.Downloader;
+import org.opencb.cellbase.lib.download.*;
 
-import java.io.IOException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
+import java.util.*;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 
-/**
- * Created by imedina on 03/02/15.
- */
 public class DownloadCommandExecutor extends CommandExecutor {
 
     private AdminCliOptionsParser.DownloadCommandOptions downloadCommandOptions;
@@ -52,88 +45,123 @@ public DownloadCommandExecutor(AdminCliOptionsParser.DownloadCommandOptions down
     }
 
     /**
-     * Execute specific 'download' command options.
+     * Process CellBase command 'download'.
+     *
+     * @throws CellBaseException Exception
      */
-    public void execute() {
+    public void execute() throws CellBaseException {
         try {
+            // Get the species and the assembly
             String species = downloadCommandOptions.speciesAndAssemblyOptions.species;
             String assembly = downloadCommandOptions.speciesAndAssemblyOptions.assembly;
+
+            // Get the valid list of data sources
+            SpeciesConfiguration speciesConfiguration = SpeciesUtils.getSpeciesConfiguration(configuration, species);
+            if (speciesConfiguration == null) {
+                throw new CellBaseException("Invalid species: '" + downloadCommandOptions.speciesAndAssemblyOptions.species + "'");
+            }
+            List<String> dataList = getDataList(species, speciesConfiguration);
+            logger.info("Downloading the following data sources: {}", CollectionUtils.isEmpty(dataList)
+                    ? Collections.emptyList()
+                    : StringUtils.join(dataList, ","));
+
             List<DownloadFile> downloadFiles = new ArrayList<>();
-            List<String> dataList = getDataList(species);
-            Downloader downloader = new Downloader(species, assembly, outputDirectory, configuration);
+            AbstractDownloadManager downloader = null;
             for (String data : dataList) {
                 switch (data) {
-                    case EtlCommons.GENOME_DATA:
-                        downloadFiles.addAll(downloader.downloadGenome());
+                    case GENOME_DATA:
+                        downloader = new GenomeDownloadManager(species, assembly, outputDirectory, configuration);
                         break;
-                    case EtlCommons.GENE_DATA:
-                        downloadFiles.addAll(downloader.downloadGene());
+                    case CONSERVATION_DATA:
+                        downloader = new ConservationDownloadManager(species, assembly, outputDirectory, configuration);
                         break;
-                    case EtlCommons.VARIATION_DATA:
-                        downloadFiles.addAll(downloader.downloadVariation());
+                    case REPEATS_DATA:
+                        downloader = new RepeatsDownloadManager(species, assembly, outputDirectory, configuration);
                         break;
-                    case EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA:
-                        downloadFiles.addAll(downloader.downloadCaddScores());
+                    case GENE_DATA:
+                        downloader = new GeneDownloadManager(species, assembly, outputDirectory, configuration);
                         break;
-                    case EtlCommons.MISSENSE_VARIATION_SCORE_DATA:
-                        downloadFiles.addAll(downloader.downloadPredictionScores());
+                    case PROTEIN_DATA:
+                        downloader = new ProteinDownloadManager(species, assembly, outputDirectory, configuration);
                         break;
-                    case EtlCommons.REGULATION_DATA:
-                        downloadFiles.addAll(downloader.downloadRegulation());
+                    case REGULATION_DATA:
+                        downloader = new RegulationDownloadManager(species, assembly, outputDirectory, configuration);
                         break;
-                    case EtlCommons.PROTEIN_DATA:
-                        downloadFiles.addAll(downloader.downloadProtein());
+                    case VARIATION_DATA:
+                        downloader = new VariationDownloadManager(species, assembly, outputDirectory, configuration);
                         break;
-                    case EtlCommons.CONSERVATION_DATA:
-                        downloadFiles.addAll(downloader.downloadConservation());
+                    case VARIATION_FUNCTIONAL_SCORE_DATA:
+                        downloader = new CaddDownloadManager(species, assembly, outputDirectory, configuration);
                         break;
-                    case EtlCommons.CLINICAL_VARIANTS_DATA:
-                        downloadFiles.addAll(downloader.downloadClinicalVariants());
+                    case MISSENSE_VARIATION_SCORE_DATA:
+                        downloader = new MissenseScoresDownloadManager(species, assembly, outputDirectory, configuration);
                         break;
-//                    case EtlCommons.STRUCTURAL_VARIANTS_DATA:
-//                        downloadFiles.add(downloadManager.downloadStructuralVariants());
-//                        break;
-                    case EtlCommons.REPEATS_DATA:
-                        downloadFiles.addAll(downloader.downloadRepeats());
+                    case CLINICAL_VARIANT_DATA:
+                        downloader = new ClinicalDownloadManager(species, assembly, outputDirectory, configuration);
                         break;
-                    case EtlCommons.OBO_DATA:
-                        downloadFiles.addAll(downloader.downloadOntologies());
+                    case SPLICE_SCORE_DATA:
+                        downloader = new SpliceScoreDownloadManager(species, assembly, outputDirectory, configuration);
                         break;
-                    case EtlCommons.PUBMED_DATA:
-                        downloadFiles.addAll(downloader.downloadPubMed());
+                    case ONTOLOGY_DATA:
+                        downloader = new OntologyDownloadManager(species, assembly, outputDirectory, configuration);
                         break;
-                    case EtlCommons.PHARMACOGENOMICS_DATA:
-                        downloadFiles.addAll(downloader.downloadPharmKGB());
+                    case PUBMED_DATA:
+                        downloader = new PubMedDownloadManager(species, assembly, outputDirectory, configuration);
                         break;
-                    default:
-                        System.out.println("Value \"" + data + "\" is not allowed for the data parameter. Allowed values"
-                                + " are: {genome, gene, gene_disease_association, variation, variation_functional_score,"
-                                + " regulation, protein, conservation, clinical_variants, ontology, pubmed}");
+                    case PHARMACOGENOMICS_DATA:
+                        downloader = new PharmGKBDownloadManager(species, assembly, outputDirectory, configuration);
+                        break;
+                    case PGS_DATA:
+                        downloader = new PgsDownloadManager(species, assembly, outputDirectory, configuration);
                         break;
+                    default:
+                        throw new IllegalArgumentException("Data parameter '" + data + "' is not allowed for '" + species + "'. "
+                                + "Valid values are: " + StringUtils.join(speciesConfiguration.getData(), ",")
+                                + ". You can use data parameter 'all' to download everything");
                 }
-            }
-            AbstractDownloadManager.writeDownloadLogFile(outputDirectory, downloadFiles);
-        } catch (ParameterException | IOException | CellBaseException | InterruptedException | NoSuchMethodException
-                | FileFormatException e) {
-            logger.error("Error in 'download' command line: " + e.getMessage());
-        }
-    }
 
-    private List<String> getDataList(String species) throws CellBaseException {
-        if (StringUtils.isEmpty(downloadCommandOptions.data) || downloadCommandOptions.data.equals("all")) {
-            return SpeciesUtils.getSpeciesConfiguration(configuration, species).getData();
-        } else {
-            return Arrays.asList(downloadCommandOptions.data.split(","));
+                // Call to download method and add the files to the list
+                downloadFiles.addAll(downloader.download());
+            }
+            if (downloader != null) {
+                Map<String, Object> params = new HashMap<>();
+                params.put("species", species);
+                params.put("assembly", assembly);
+                params.put("data", dataList);
+                params.put("outDir", outputDirectory);
+                downloader.writeDownloadLogFile(params, downloadFiles);
+            } else {
+                logger.warn("Impossible to write log summary: downloader is null");
+            }
+        } catch (InterruptedException e) {
+            // Restore interrupted state...
+            Thread.currentThread().interrupt();
+            throw new CellBaseException("Error executing command line 'download': " + e.getMessage(), e);
+        } catch (Exception e) {
+            throw new CellBaseException("Error executing command line 'download': " + e.getMessage(), e);
         }
     }
 
-    @Deprecated
-    private List<String> getDataList(SpeciesConfiguration sp) {
+    private List<String> getDataList(String species, SpeciesConfiguration speciesConfig) throws CellBaseException {
+        // No need to check if 'data' exists since it is declared as required in JCommander
         List<String> dataList;
-        if (downloadCommandOptions.data.equals("all")) {
-            dataList = sp.getData();
+        if ("all".equalsIgnoreCase(downloadCommandOptions.data)) {
+            // Download all data sources for the species in the configuration.yml file
+            dataList = speciesConfig.getData();
         } else {
+            // Check if the data sources requested are valid for the species
             dataList = Arrays.asList(downloadCommandOptions.data.split(","));
+            Set<String> invalidData = new HashSet<>();
+            for (String data : dataList) {
+                if (!speciesConfig.getData().contains(data)) {
+                    invalidData.add(data);
+                }
+            }
+            if (!CollectionUtils.isEmpty(invalidData)) {
+                throw new CellBaseException("Data '" + StringUtils.join(invalidData, ",") + "' not supported by species '" + species + "'."
+                        + "Valid values are: " + StringUtils.join(speciesConfig.getData(), ",") + ". Our use data parameter 'all' to"
+                        + " download everything");
+            }
         }
         return dataList;
     }
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java
index 72f992f344..4fba479a36 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ExportCommandExecutor.java
@@ -85,8 +85,8 @@ public ExportCommandExecutor(AdminCliOptionsParser.ExportCommandOptions exportCo
             this.dataToExport = new String[]{EtlCommons.GENOME_DATA, EtlCommons.GENE_DATA, EtlCommons.REFSEQ_DATA,
                     EtlCommons.CONSERVATION_DATA, EtlCommons.REGULATION_DATA, EtlCommons.PROTEIN_DATA,
                     EtlCommons.PROTEIN_FUNCTIONAL_PREDICTION_DATA, EtlCommons.VARIATION_DATA,
-                    EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, EtlCommons.CLINICAL_VARIANTS_DATA, EtlCommons.REPEATS_DATA,
-                    OBO_DATA, EtlCommons.MISSENSE_VARIATION_SCORE_DATA, EtlCommons.SPLICE_SCORE_DATA, EtlCommons.PHARMACOGENOMICS_DATA};
+                    EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, EtlCommons.CLINICAL_VARIANT_DATA, EtlCommons.REPEATS_DATA,
+                    ONTOLOGY_DATA, MISSENSE_VARIATION_SCORE_DATA, EtlCommons.SPLICE_SCORE_DATA, EtlCommons.PHARMACOGENOMICS_DATA};
         } else {
             this.dataToExport = exportCommandOptions.data.split(",");
         }
@@ -293,7 +293,7 @@ public void execute() throws CellBaseException {
                             counterMsg = counter + " protein functional predictions";
                             break;
                         }
-                        case EtlCommons.CLINICAL_VARIANTS_DATA: {
+                        case EtlCommons.CLINICAL_VARIANT_DATA: {
                             counter = exportClinicalVariantData(regions);
                             counterMsg = counter + " clinical variants";
                             break;
@@ -309,7 +309,7 @@ public void execute() throws CellBaseException {
                             counterMsg = counter + " repeats";
                             break;
                         }
-                        case OBO_DATA: {
+                        case ONTOLOGY_DATA: {
                             counter = exportOntologyData();
                             counterMsg = counter + " ontology items";
                             break;
@@ -424,7 +424,7 @@ private String exportPharmacogenomicsData(List<Gene> genes)
 
     private int exportClinicalVariantData(List<Region> regions) throws CellBaseException, QueryException, IllegalAccessException,
             IOException {
-        String baseFilename = CLINICAL_VARIANTS_DATA + ".full";
+        String baseFilename = CLINICAL_VARIANT_DATA + ".full";
         CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output, baseFilename);
         ClinicalManager clinicalManager = managerFactory.getClinicalManager(species, assembly);
         ClinicalVariantQuery query = new ClinicalVariantQuery();
@@ -449,7 +449,7 @@ private int exportClinicalVariantData(List<Region> regions) throws CellBaseExcep
 
     private int exportOntologyData() throws CellBaseException, IOException {
         int counter = 0;
-        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output, OBO_DATA);
+        CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(output, ONTOLOGY_DATA);
         OntologyManager ontologyManager = managerFactory.getOntologyManager(species, assembly);
         CellBaseIterator<OntologyTerm> iterator = ontologyManager.iterator(new OntologyQuery());
         while (iterator.hasNext()) {
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/InstallCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/InstallCommandExecutor.java
deleted file mode 100644
index 70849eb924..0000000000
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/InstallCommandExecutor.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright 2015-2020 OpenCB
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.opencb.cellbase.app.cli.admin.executors;
-
-import org.opencb.cellbase.app.cli.CommandExecutor;
-import org.opencb.cellbase.app.cli.admin.AdminCliOptionsParser;
-import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.install.InstallManager;
-
-public class InstallCommandExecutor extends CommandExecutor {
-
-    private AdminCliOptionsParser.InstallCommandOptions installCommandOptions;
-
-    public InstallCommandExecutor(AdminCliOptionsParser.InstallCommandOptions installCommandOptions) {
-        super(installCommandOptions.commonOptions.logLevel, installCommandOptions.commonOptions.conf);
-
-        this.installCommandOptions = installCommandOptions;
-    }
-
-    public void execute() throws CellBaseException {
-        try {
-            logger.info("Starting installation ...");
-            InstallManager installManager = new InstallManager(configuration);
-            installManager.install(installCommandOptions.speciesAndAssemblyOptions.species,
-                    installCommandOptions.speciesAndAssemblyOptions.assembly);
-        } catch (CellBaseException e) {
-            logger.error("Error installing:" + e.toString());
-        }
-    }
-}
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java
index 97460d5a71..b155d2cfcf 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java
@@ -20,9 +20,12 @@
 import org.apache.commons.lang3.StringUtils;
 import org.opencb.cellbase.app.cli.CommandExecutor;
 import org.opencb.cellbase.app.cli.admin.AdminCliOptionsParser;
+import org.opencb.cellbase.core.config.SpeciesConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.models.DataRelease;
 import org.opencb.cellbase.core.result.CellBaseDataResult;
+import org.opencb.cellbase.core.utils.DatabaseNameUtils;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
 import org.opencb.cellbase.lib.EtlCommons;
 import org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor;
 import org.opencb.cellbase.lib.indexer.IndexManager;
@@ -38,25 +41,33 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
+import java.util.*;
 import java.util.concurrent.ExecutionException;
 
 import static org.opencb.cellbase.lib.EtlCommons.*;
+import static org.opencb.cellbase.lib.builders.DbSnpBuilder.DBSNP_OUTPUT_FILENAME;
+import static org.opencb.cellbase.lib.builders.EnsemblGeneBuilder.ENSEMBL_GENE_OUTPUT_FILENAME;
+import static org.opencb.cellbase.lib.builders.GenomeSequenceFastaBuilder.GENOME_JSON_FILENAME;
+import static org.opencb.cellbase.lib.builders.OntologyBuilder.OBO_OUTPUT_FILENAME;
+import static org.opencb.cellbase.lib.builders.PolygenicScoreBuilder.PGS_COMMON_OUTPUT_FILENAME;
+import static org.opencb.cellbase.lib.builders.PolygenicScoreBuilder.PGS_VARIANT_OUTPUT_FILENAME;
+import static org.opencb.cellbase.lib.builders.ProteinBuilder.PROTEIN_OUTPUT_FILENAME;
+import static org.opencb.cellbase.lib.builders.RefSeqGeneBuilder.REFSEQ_GENE_OUTPUT_FILENAME;
+import static org.opencb.cellbase.lib.builders.RegulatoryFeatureBuilder.*;
+import static org.opencb.cellbase.lib.builders.RepeatsBuilder.REPEATS_OUTPUT_FILENAME;
+import static org.opencb.cellbase.lib.builders.VariationBuilder.VARIATION_CHR_PREFIX;
+import static org.opencb.cellbase.lib.download.GenomeDownloadManager.GENOME_INFO_FILENAME;
 
 /**
  * Created by imedina on 03/02/15.
  */
 public class LoadCommandExecutor extends CommandExecutor {
 
-    private static final String METADATA = "metadata";
     private LoadRunner loadRunner;
     private AdminCliOptionsParser.LoadCommandOptions loadCommandOptions;
 
     private Path input;
-    private String[] loadOptions;
+    private List<String> dataList;
     private int dataRelease;
 
     private String database;
@@ -68,35 +79,45 @@ public class LoadCommandExecutor extends CommandExecutor {
     private IndexManager indexManager;
     private DataReleaseManager dataReleaseManager;
 
+    private static final String AUTHENTICATION_DATABASE = "authenticationDatabase";
+
+    private static final String LOADING_FILE_LOG_MESSAGE = "Loading file '{}'";
+    private static final String ERROR_LOADING_FILE_LOG_MESSAGE = "Error loading file '{}': {}";
+    private static final String ERROR_LOADING_DATA = "Error loading data in collection ";
+    private static final String LOADING_JSON_IN_COLLECTION_MSG = "Loading JSON file '{}' in collection '{}' for data release '{}' ...";
+
     public LoadCommandExecutor(AdminCliOptionsParser.LoadCommandOptions loadCommandOptions) {
         super(loadCommandOptions.commonOptions.logLevel, loadCommandOptions.commonOptions.conf);
 
         this.loadCommandOptions = loadCommandOptions;
-
-        input = Paths.get(loadCommandOptions.input);
-        if (loadCommandOptions.database != null) {
-            database = loadCommandOptions.database;
-        }
-        if (loadCommandOptions.data.equals("all")) {
-            loadOptions = new String[]{EtlCommons.GENOME_DATA, EtlCommons.GENE_DATA, EtlCommons.REFSEQ_DATA,
-                    EtlCommons.CONSERVATION_DATA, EtlCommons.REGULATION_DATA, EtlCommons.PROTEIN_DATA,
-                    EtlCommons.PROTEIN_FUNCTIONAL_PREDICTION_DATA, EtlCommons.VARIATION_DATA,
-                    EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, EtlCommons.CLINICAL_VARIANTS_DATA, EtlCommons.REPEATS_DATA,
-                    EtlCommons.OBO_DATA, EtlCommons.MISSENSE_VARIATION_SCORE_DATA, EtlCommons.SPLICE_SCORE_DATA, EtlCommons.PUBMED_DATA,
-                    EtlCommons.PHARMACOGENOMICS_DATA};
-        } else {
-            loadOptions = loadCommandOptions.data.split(",");
-        }
-        if (loadCommandOptions.field != null) {
-            field = loadCommandOptions.field;
-        }
-        if (loadCommandOptions.innerFields != null) {
-            innerFields = loadCommandOptions.innerFields.split(",");
-        }
-        if (loadCommandOptions.loader != null) {
-            loader = loadCommandOptions.loader;
-        }
-        createIndexes = !loadCommandOptions.skipIndex;
+//<<<<<<< HEAD
+//
+//        input = Paths.get(loadCommandOptions.input);
+//        if (loadCommandOptions.database != null) {
+//            database = loadCommandOptions.database;
+//        }
+//        if (loadCommandOptions.data.equals("all")) {
+//            loadOptions = new String[]{EtlCommons.GENOME_DATA, EtlCommons.GENE_DATA, EtlCommons.REFSEQ_DATA,
+//                    EtlCommons.CONSERVATION_DATA, EtlCommons.REGULATION_DATA, EtlCommons.PROTEIN_DATA,
+//                    EtlCommons.PROTEIN_FUNCTIONAL_PREDICTION_DATA, EtlCommons.VARIATION_DATA,
+//                    EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, EtlCommons.CLINICAL_VARIANT_DATA, EtlCommons.REPEATS_DATA,
+//                    EtlCommons.ONTOLOGY_DATA, EtlCommons.MISSENSE_VARIATION_SCORE_DATA, EtlCommons.SPLICE_SCORE_DATA,
+//                    EtlCommons.PUBMED_DATA, EtlCommons.PHARMACOGENOMICS_DATA, EtlCommons.PGS_DATA};
+//        } else {
+//            loadOptions = loadCommandOptions.data.split(",");
+//        }
+//        if (loadCommandOptions.field != null) {
+//            field = loadCommandOptions.field;
+//        }
+//        if (loadCommandOptions.innerFields != null) {
+//            innerFields = loadCommandOptions.innerFields.split(",");
+//        }
+//        if (loadCommandOptions.loader != null) {
+//            loader = loadCommandOptions.loader;
+//        }
+//        createIndexes = !loadCommandOptions.skipIndex;
+//=======
+//>>>>>>> TASK-5564
     }
 
     /**
@@ -105,17 +126,14 @@ public LoadCommandExecutor(AdminCliOptionsParser.LoadCommandOptions loadCommandO
      * @throws CellBaseException CellBase exception
      */
     public void execute() throws CellBaseException {
-        // Init release manager
-        dataReleaseManager = new DataReleaseManager(database, configuration);
-
         checkParameters();
         logger.info("Loading in data release {}", dataRelease);
 
-        if (loadCommandOptions.data != null) {
+        if (CollectionUtils.isNotEmpty(dataList)) {
             // If 'authenticationDatabase' is not passed by argument then we read it from configuration.json
-            if (loadCommandOptions.loaderParams.containsKey("authenticationDatabase")) {
-                configuration.getDatabases().getMongodb().getOptions().put("authenticationDatabase",
-                        loadCommandOptions.loaderParams.get("authenticationDatabase"));
+            if (loadCommandOptions.loaderParams.containsKey(AUTHENTICATION_DATABASE)) {
+                configuration.getDatabases().getMongodb().getOptions().put(AUTHENTICATION_DATABASE,
+                        loadCommandOptions.loaderParams.get(AUTHENTICATION_DATABASE));
             }
             loadRunner = new LoadRunner(loader, database, numThreads, dataReleaseManager, configuration);
             if (createIndexes) {
@@ -123,167 +141,73 @@ public void execute() throws CellBaseException {
                 indexManager = new IndexManager(database, indexFile, configuration);
             }
 
-            for (String loadOption : loadOptions) {
+            for (String data : dataList) {
                 try {
-                    switch (loadOption) {
+                    switch (data) {
                         case EtlCommons.GENOME_DATA: {
-                            // Load data
-                            if (input.resolve("genome_info.json").toFile().exists()) {
-                                loadIfExists(input.resolve("genome_info.json"), "genome_info");
-                            } else {
-                                loadIfExists(input.resolve("genome_info.json.gz"), "genome_info");
-                            }
-                            loadIfExists(input.resolve("genome_sequence.json.gz"), "genome_sequence");
-
-                            // Create index
-                            createIndex("genome_info");
-                            createIndex("genome_sequence");
-
-                            // Update release (collection and sources)
-                            List<Path> sources = new ArrayList<>(Arrays.asList(
-                                    input.resolve("genomeVersion.json")
-                            ));
-                            dataReleaseManager.update(dataRelease, "genome_info", EtlCommons.GENOME_DATA, sources);
-                            dataReleaseManager.update(dataRelease, "genome_sequence", null, null);
+                            loadGenome();
                             break;
                         }
                         case EtlCommons.GENE_DATA: {
-                            // Load data
-                            loadIfExists(input.resolve("gene.json.gz"), "gene");
-
-                            // Create index
-                            createIndex("gene");
-
-                            // Update release (collection and sources)
-                            List<Path> sources = new ArrayList<>(Arrays.asList(
-                                    input.resolve("dgidbVersion.json"),
-                                    input.resolve("ensemblCoreVersion.json"),
-                                    input.resolve("uniprotXrefVersion.json"),
-                                    input.resolve("geneExpressionAtlasVersion.json"),
-                                    input.resolve("hpoVersion.json"),
-                                    input.resolve("disgenetVersion.json"),
-                                    input.resolve("gnomadVersion.json")
-                            ));
-                            dataReleaseManager.update(dataRelease, "gene", EtlCommons.GENE_DATA, sources);
-                            break;
-                        }
-                        case EtlCommons.REFSEQ_DATA: {
-                            // Load data
-                            loadIfExists(input.resolve("refseq.json.gz"), "refseq");
-
-                            // Create index
-                            createIndex("refseq");
-
-                            // Update release (collection and sources)
-                            List<Path> sources = new ArrayList<>(
-                                    Collections.singletonList(input.resolve("refseqVersion.json")));
-                            dataReleaseManager.update(dataRelease, "refseq", EtlCommons.REFSEQ_DATA, sources);
+                            loadGene();
                             break;
                         }
                         case EtlCommons.VARIATION_DATA: {
-                            // Load data, create index and update release
-                            loadVariationData();
+                            loadVariation();
                             break;
                         }
                         case EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA: {
                             // Load data
-                            loadIfExists(input.resolve("cadd.json.gz"), "variation_functional_score");
+                            loadIfExists(input.resolve("cadd.json.gz"), VARIATION_FUNCTIONAL_SCORE_DATA);
 
                             // Create index
-                            createIndex("variation_functional_score");
+                            createIndex(VARIATION_FUNCTIONAL_SCORE_DATA);
 
                             // Update release (collection and sources)
                             List<Path> sources = new ArrayList<>(Collections.singletonList(input.resolve("caddVersion.json")));
-                            dataReleaseManager.update(dataRelease, "variation_functional_score",
-                                    EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, sources);
+                            dataReleaseManager.update(dataRelease, VARIATION_FUNCTIONAL_SCORE_DATA, sources);
                             break;
                         }
                         case EtlCommons.MISSENSE_VARIATION_SCORE_DATA: {
                             // Load data
-                            loadIfExists(input.resolve("missense_variation_functional_score.json.gz"),
-                                    "missense_variation_functional_score");
+                            loadIfExists(input.resolve("missense_variation_functional_score.json.gz"), MISSENSE_VARIATION_SCORE_DATA);
 
                             // Create index
-                            createIndex("missense_variation_functional_score");
+                            createIndex(MISSENSE_VARIATION_SCORE_DATA);
 
                             // Update release (collection and sources)
                             List<Path> sources = new ArrayList<>(Collections.singletonList(input.resolve("revelVersion.json")));
-                            dataReleaseManager.update(dataRelease, "missense_variation_functional_score",
-                                    EtlCommons.MISSENSE_VARIATION_SCORE_DATA, sources);
+                            dataReleaseManager.update(dataRelease, MISSENSE_VARIATION_SCORE_DATA, sources);
                             break;
                         }
                         case EtlCommons.CONSERVATION_DATA: {
-                            // Load data, create index and update release
                             loadConservation();
                             break;
                         }
                         case EtlCommons.REGULATION_DATA: {
-                            // Load data (regulatory region and regulatory PFM))
-                            loadIfExists(input.resolve("regulatory_region.json.gz"), "regulatory_region");
-                            loadIfExists(input.resolve("regulatory_pfm.json.gz"), "regulatory_pfm");
-
-                            // Create index
-                            createIndex("regulatory_region");
-                            createIndex("regulatory_pfm");
-
-                            // Update release (collection and sources)
-                            List<Path> sources = new ArrayList<>(Collections.singletonList(input.resolve("ensemblRegulationVersion.json")));
-                            dataReleaseManager.update(dataRelease, "regulatory_region", EtlCommons.REGULATION_DATA, sources);
-                            dataReleaseManager.update(dataRelease, "regulatory_pfm", null, null);
+                            loadRegulation();
                             break;
                         }
                         case EtlCommons.PROTEIN_DATA: {
-                            // Load data
-                            loadIfExists(input.resolve("protein.json.gz"), "protein");
-
-                            // Create index
-                            createIndex("protein");
-
-                            // Update release (collection and sources)
-                            List<Path> sources = new ArrayList<>(Arrays.asList(
-                                    input.resolve("uniprotVersion.json"),
-                                    input.resolve("interproVersion.json")
-                            ));
-                            dataReleaseManager.update(dataRelease, "protein", EtlCommons.PROTEIN_DATA, sources);
+                            loadProtein();
                             break;
                         }
-//                        case EtlCommons.PPI_DATA:
-//                            loadIfExists(input.resolve("protein_protein_interaction.json.gz"), "protein_protein_interaction");
-//                            loadIfExists(input.resolve("intactVersion.json"), METADATA);
-//                            createIndex("protein_protein_interaction");
-//                            break;
                         case EtlCommons.PROTEIN_FUNCTIONAL_PREDICTION_DATA: {
                             // Load data, create index and update release
                             loadProteinFunctionalPrediction();
                             break;
                         }
-                        case EtlCommons.CLINICAL_VARIANTS_DATA: {
+                        case EtlCommons.CLINICAL_VARIANT_DATA: {
                             // Load data, create index and update release
                             loadClinical();
                             break;
                         }
                         case EtlCommons.REPEATS_DATA: {
-                            // Load data, create index and update release
                             loadRepeats();
                             break;
                         }
-//                        case EtlCommons.STRUCTURAL_VARIANTS_DATA:
-//                            loadStructuralVariants();
-//                            break;
-                        case EtlCommons.OBO_DATA: {
-                            // Load data
-                            loadIfExists(input.resolve("ontology.json.gz"), "ontology");
-
-                            // Create index
-                            createIndex("ontology");
-
-                            // Update release (collection and sources)
-                            List<Path> sources = new ArrayList<>(Arrays.asList(
-                                    input.resolve(EtlCommons.HPO_VERSION_FILE),
-                                    input.resolve(EtlCommons.GO_VERSION_FILE),
-                                    input.resolve(EtlCommons.DO_VERSION_FILE)
-                            ));
-                            dataReleaseManager.update(dataRelease, "ontology", EtlCommons.OBO_DATA, sources);
+                        case EtlCommons.ONTOLOGY_DATA: {
+                            loadOntology();
                             break;
                         }
                         case EtlCommons.SPLICE_SCORE_DATA: {
@@ -291,7 +215,7 @@ public void execute() throws CellBaseException {
                             loadSpliceScores();
                             break;
                         }
-                        case EtlCommons.PUBMED_DATA: {
+                        case PUBMED_DATA: {
                             // Load data, create index and update release
                             loadPubMed();
                             break;
@@ -301,32 +225,27 @@ public void execute() throws CellBaseException {
                             loadPharmacogenomica();
                             break;
                         }
+                        case EtlCommons.PGS_DATA: {
+                            // Load data, create index and update release
+                            loadPolygenicScores();
+                            break;
+                        }
                         default:
-                            logger.warn("Not valid 'data'. We should not reach this point");
+                            logger.warn("Not valid data: {}. We should not reach this point", data);
                             break;
                     }
                 } catch (IllegalAccessException | InstantiationException | InvocationTargetException | ExecutionException
-                        | NoSuchMethodException | InterruptedException | ClassNotFoundException | LoaderException | IOException e) {
-                    e.printStackTrace();
+                        | NoSuchMethodException | ClassNotFoundException | LoaderException | IOException e) {
+                    logger.error(Arrays.toString(e.getStackTrace()));
+                } catch (InterruptedException e) {
+                    logger.error(Arrays.toString(e.getStackTrace()));
+                    // Restore interrupted state...
+                    Thread.currentThread().interrupt();
                 }
             }
         }
     }
 
-//    private void loadStructuralVariants() {
-//        Path path = input.resolve(EtlCommons.STRUCTURAL_VARIANTS_JSON + ".json.gz");
-//        if (Files.exists(path)) {
-//            try {
-//                logger.debug("Loading '{}' ...", path.toString());
-//                loadRunner.load(path, EtlCommons.STRUCTURAL_VARIANTS_DATA);
-//                loadIfExists(input.resolve(EtlCommons.DGV_VERSION_FILE), "metadata");
-//            } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | InvocationTargetException
-//                    | IllegalAccessException | ExecutionException | IOException | InterruptedException e) {
-//                logger.error(e.toString());
-//            }
-//        }
-//    }
-
     private void loadIfExists(Path path, String collection) throws NoSuchMethodException, InterruptedException,
             ExecutionException, InstantiationException, IOException, IllegalAccessException, InvocationTargetException,
             ClassNotFoundException, LoaderException, CellBaseException {
@@ -343,132 +262,132 @@ private void loadIfExists(Path path, String collection) throws NoSuchMethodExcep
     }
 
     private void checkParameters() throws CellBaseException {
-        if (loadCommandOptions.numThreads > 1) {
-            numThreads = loadCommandOptions.numThreads;
+        // Input folder
+        if (!Files.exists(Paths.get(loadCommandOptions.input))) {
+            throw new CellBaseException("Input path '" + loadCommandOptions.input + "' does not exist");
+        }
+        if (!Files.isDirectory(Paths.get(loadCommandOptions.input))) {
+            throw new CellBaseException("Input path '" + loadCommandOptions.input + "' is not a directyory");
+        }
+        input = Paths.get(loadCommandOptions.input);
+
+        // Database
+        if (StringUtils.isEmpty(loadCommandOptions.database)) {
+            throw new CellBaseException("Missing database");
+        }
+        database = loadCommandOptions.database;
+
+        // Data
+        if (StringUtils.isEmpty(loadCommandOptions.data)) {
+            throw new CellBaseException("Missing data. Please, specify a list of data separated by commas, or use 'all' to load"
+                    + " everything");
+        }
+        String species = DatabaseNameUtils.getSpeciesFromDatabaseName(database);
+        SpeciesConfiguration speciesConfiguration = SpeciesUtils.getSpeciesConfiguration(configuration, species);
+        if (speciesConfiguration == null) {
+            throw new CellBaseException("Species '" + species + "' not supported (database name '" + database + "')");
+        }
+        if (loadCommandOptions.data.equals("all")) {
+            dataList = speciesConfiguration.getData();
         } else {
-            numThreads = 1;
-            logger.warn("Incorrect number of numThreads, it must be a positive value. This has been set to '{}'", numThreads);
+            dataList = Arrays.asList(loadCommandOptions.data.split(","));
+            if (CollectionUtils.isEmpty(dataList)) {
+                throw new CellBaseException("Missing data. Please, specify a list of data separated by commas, or use 'all' to load"
+                        + " everything");
+            }
+            Set<String> invalidData = new HashSet<>();
+            for (String data : dataList) {
+                if (!speciesConfiguration.getData().contains(data)) {
+                    invalidData.add(data);
+                }
+            }
+            if (!CollectionUtils.isEmpty(invalidData)) {
+                throw new CellBaseException("Data '" + StringUtils.join(invalidData, ",") + "' not supported by species '" + species + "'");
+            }
         }
 
-        if (field != null) {
-            if (loadCommandOptions.data == null) {
-                logger.error("--data option cannot be empty. Please provide a valid value for the --data parameter.");
-            } else if (!Files.exists(input)) {
-                logger.error("Input parameter {} does not exist", input);
+        // Field
+        if (StringUtils.isNotEmpty(loadCommandOptions.field)) {
+            field = loadCommandOptions.field;
+        }
+
+        // Inner fields
+        if (StringUtils.isNotEmpty(loadCommandOptions.innerFields)) {
+            innerFields = loadCommandOptions.innerFields.split(",");
+        }
+
+        // Loader
+        if (StringUtils.isNotEmpty(loadCommandOptions.loader)) {
+            loader = loadCommandOptions.loader;
+            try {
+                Class.forName(loader);
+            } catch (ClassNotFoundException e) {
+                throw new CellBaseException("Loader Java class '" + loader + "' does not exist", e);
             }
-        } else if (!Files.exists(input) || !Files.isDirectory(input)) {
-            logger.error("Input parameter {} does not exist or is not a directory", input);
         }
-        try {
-            Class.forName(loader);
-        } catch (ClassNotFoundException e) {
-            logger.error("Loader Java class '{}' does not exist", loader);
-            e.printStackTrace();
-            System.exit(-1);
+
+        // Skip indexes
+        createIndexes = !loadCommandOptions.skipIndex;
+
+        // Num. threads
+        if (loadCommandOptions.numThreads > 1) {
+            numThreads = loadCommandOptions.numThreads;
+        } else {
+            numThreads = 1;
+            logger.warn("Incorrect number of numThreads, it must be a positive value. This has been set to '{}'", numThreads);
         }
 
-        // Check data release
+        // Data release
+        dataReleaseManager = new DataReleaseManager(database, configuration);
         dataRelease = getDataReleaseForLoading(dataReleaseManager).getRelease();
     }
 
-    private void loadVariationData() throws NoSuchMethodException, InterruptedException, ExecutionException,
+    private void loadVariation() throws NoSuchMethodException, InterruptedException, ExecutionException,
             InstantiationException, IllegalAccessException, InvocationTargetException, ClassNotFoundException,
             IOException, LoaderException, CellBaseException {
-        Path variationPath = input.resolve(VARIATION_DATA);
-        // First load data
         if (field == null) {
-            // Common loading process from CellBase variation data models
-            DirectoryStream<Path> stream = Files.newDirectoryStream(variationPath,
-                    entry -> entry.getFileName().toString().startsWith("variation_chr"));
+            Path variationPath = input.resolve(VARIATION_DATA);
 
-            int numLoadings = 0;
-            for (Path entry : stream) {
-                logger.info("Loading file '{}'", entry);
-                loadRunner.load(variationPath.resolve(entry.getFileName()), "variation", dataRelease);
-                numLoadings++;
+            // Loading variant_chrXXX files, if necessary
+            File[] chrFiles = variationPath.toFile().listFiles((dir, name) -> name.startsWith(VARIATION_CHR_PREFIX));
+            if (chrFiles.length > 0) {
+                // Common loading process from CellBase variation data models
+                loadData(variationPath, VARIATION_DATA, VARIATION_CHR_PREFIX);
             }
 
-            if (numLoadings > 0) {
-                // Create index
-                createIndex("variation");
-
-                // Update release (collection and sources)
-                List<Path> sources = new ArrayList<>(Arrays.asList(
-                        variationPath.resolve("ensemblVariationVersion.json")
-                ));
-                dataReleaseManager.update(dataRelease, "variation", EtlCommons.VARIATION_DATA, sources);
-            } else {
-                logger.info("Any variation file 'variation_chr...' found within folder '{}'", variationPath);
-            }
+            // Loading dbSNP file, if necessary
+            HashMap<String, String> collectionMap = new HashMap<>();
+            collectionMap.put(SNP_DATA, DBSNP_OUTPUT_FILENAME);
+            loadData(variationPath.resolve(DBSNP_DATA), collectionMap);
         } else {
             // Custom update required e.g. population freqs loading
-            logger.info("Loading file '{}'", variationPath);
-            loadRunner.load(variationPath, "variation", dataRelease, field, innerFields);
-        }
-
-        // Load dbSNP
-        Path dbSnpFilePath = variationPath.resolve(DBSNP_NAME + ".json.gz");
-        if (dbSnpFilePath.toFile().exists()) {
-            if (variationPath.resolve(DBSNP_VERSION_FILENAME).toFile().exists()) {
-                logger.info("Loading dbSNP file '{}'", dbSnpFilePath);
-                loadRunner.load(dbSnpFilePath, SNP_COLLECTION_NAME, dataRelease);
-
-                // Create index
-                createIndex(SNP_COLLECTION_NAME);
-
-                // Update release (collection and sources)
-                List<Path> sources = Collections.singletonList(variationPath.resolve(DBSNP_VERSION_FILENAME));
-                dataReleaseManager.update(dataRelease, SNP_COLLECTION_NAME, EtlCommons.VARIATION_DATA, sources);
-            } else {
-                logger.warn("In order to load the dbSNP file you need the version file {} within the folder '{}'", DBSNP_VERSION_FILENAME,
-                        variationPath);
-            }
-        } else {
-            logger.warn("Any dbSNP file found within the folder '{}'", variationPath);
+            logger.info(LOADING_FILE_LOG_MESSAGE, input);
+            loadRunner.load(input, VARIATION_DATA, dataRelease, field, innerFields);
         }
     }
 
-    private void loadConservation() throws NoSuchMethodException, InterruptedException, ExecutionException,
-            InstantiationException, IllegalAccessException, InvocationTargetException, ClassNotFoundException,
-            IOException, CellBaseException, LoaderException {
-        // Load data
-        DirectoryStream<Path> stream = Files.newDirectoryStream(input,
-                entry -> entry.getFileName().toString().startsWith("conservation_"));
-
-        for (Path entry : stream) {
-            logger.info("Loading file '{}'", entry);
-            loadRunner.load(input.resolve(entry.getFileName()), "conservation", dataRelease);
-        }
-
-        // Create index
-        createIndex("conservation");
-
-        // Update release (collection and sources)
-        List<Path> sources = new ArrayList<>(Arrays.asList(
-                input.resolve("gerpVersion.json"),
-                input.resolve("phastConsVersion.json"),
-                input.resolve("phyloPVersion.json")
-        ));
-        dataReleaseManager.update(dataRelease, "conservation", EtlCommons.CONSERVATION_DATA, sources);
+    private void loadConservation() throws IOException, CellBaseException {
+        loadData(input.resolve(CONSERVATION_DATA), CONSERVATION_DATA, "conservation_");
     }
 
     private void loadProteinFunctionalPrediction() throws NoSuchMethodException, InterruptedException, ExecutionException,
             InstantiationException, IllegalAccessException, InvocationTargetException, ClassNotFoundException,
             IOException, CellBaseException, LoaderException {
         // Load data
-        DirectoryStream<Path> stream = Files.newDirectoryStream(input,
-                entry -> entry.getFileName().toString().startsWith("prot_func_pred_"));
+        try (DirectoryStream<Path> stream = Files.newDirectoryStream(input,
+                entry -> entry.getFileName().toString().startsWith("prot_func_pred_"))) {
 
-        for (Path entry : stream) {
-            logger.info("Loading file '{}'", entry);
-            loadRunner.load(input.resolve(entry.getFileName()), "protein_functional_prediction", dataRelease);
-        }
+            for (Path entry : stream) {
+                logger.info(LOADING_FILE_LOG_MESSAGE, entry);
+                loadRunner.load(input.resolve(entry.getFileName()), PROTEIN_FUNCTIONAL_PREDICTION_DATA, dataRelease);
+            }
 
-        // Create index
-        createIndex("protein_functional_prediction");
+            // Create index
+            createIndex(PROTEIN_FUNCTIONAL_PREDICTION_DATA);
 
-        // Update release (collection and sources)
-        dataReleaseManager.update(dataRelease, "protein_functional_prediction", null, null);
+            // Update release (collection and sources)
+            dataReleaseManager.update(dataRelease, PROTEIN_FUNCTIONAL_PREDICTION_DATA, null);
+        }
     }
 
     private void loadClinical() throws FileNotFoundException {
@@ -477,10 +396,10 @@ private void loadClinical() throws FileNotFoundException {
             try {
                 // Load data
                 logger.info("Loading '{}' ...", path);
-                loadRunner.load(path, "clinical_variants", dataRelease);
+                loadRunner.load(path, CLINICAL_VARIANT_DATA, dataRelease);
 
                 // Create index
-                createIndex("clinical_variants");
+                createIndex(CLINICAL_VARIANT_DATA);
 
                 // Update release (collection and sources)
                 List<Path> sources = new ArrayList<>(Arrays.asList(
@@ -488,46 +407,63 @@ private void loadClinical() throws FileNotFoundException {
                         input.resolve("cosmicVersion.json"),
                         input.resolve("gwasVersion.json")
                 ));
-                dataReleaseManager.update(dataRelease, "clinical_variants", EtlCommons.CLINICAL_VARIANTS_DATA, sources);
+                dataReleaseManager.update(dataRelease, CLINICAL_VARIANT_DATA, sources);
             } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | InvocationTargetException
-                    | IllegalAccessException | ExecutionException | IOException | InterruptedException | CellBaseException e) {
-                logger.error(e.toString());
-            } catch (LoaderException e) {
-                e.printStackTrace();
+                    | IllegalAccessException | ExecutionException | IOException | LoaderException | CellBaseException e) {
+                logger.error(Arrays.toString(e.getStackTrace()));
+            } catch (InterruptedException e) {
+                logger.error(Arrays.toString(e.getStackTrace()));
+                // Restore interrupted state...
+                Thread.currentThread().interrupt();
             }
         } else {
             throw new FileNotFoundException("File " + path + " does not exist");
         }
     }
 
-    private void loadRepeats() {
-        Path path = input.resolve(EtlCommons.REPEATS_JSON + ".json.gz");
-        if (Files.exists(path)) {
-            try {
-                // Load data
-                logger.debug("Loading '{}' ...", path);
-                loadRunner.load(path, "repeats", dataRelease);
+    private void loadGenome() throws CellBaseException {
+        HashMap<String, String> collectionMap = new HashMap<>();
+        collectionMap.put(GENOME_SEQUENCE_COLLECTION_NAME, GENOME_JSON_FILENAME);
+        collectionMap.put(GENOME_INFO_DATA, GENOME_INFO_FILENAME);
 
-                // Create index
-                createIndex("repeats");
+        loadData(input.resolve(GENOME_DATA), collectionMap);
+    }
 
-                // Update release (collection and sources)
-                List<Path> sources = new ArrayList<>(Arrays.asList(
-                        input.resolve(EtlCommons.TRF_VERSION_FILE),
-                        input.resolve(EtlCommons.GSD_VERSION_FILE),
-                        input.resolve(EtlCommons.WM_VERSION_FILE)
-                ));
-                dataReleaseManager.update(dataRelease, "repeats", EtlCommons.REPEATS_DATA, sources);
-            } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | InvocationTargetException
-                    | IllegalAccessException | ExecutionException | IOException | InterruptedException | CellBaseException e) {
-                logger.error(e.toString());
-            } catch (LoaderException e) {
-                e.printStackTrace();
-            }
-        } else {
-            logger.warn("Repeats file {} not found", path);
-            logger.warn("No repeats data will be loaded");
-        }
+    private void loadGene() throws CellBaseException {
+        HashMap<String, String> collectionMap = new HashMap<>();
+        collectionMap.put(GENE_DATA, ENSEMBL_GENE_OUTPUT_FILENAME);
+        collectionMap.put(REFSEQ_DATA, REFSEQ_GENE_OUTPUT_FILENAME);
+
+        loadData(input.resolve(GENE_DATA), collectionMap);
+    }
+
+    private void loadRepeats() throws CellBaseException {
+        HashMap<String, String> collectionMap = new HashMap<>();
+        collectionMap.put(REPEATS_DATA, REPEATS_OUTPUT_FILENAME);
+
+        loadData(input.resolve(REPEATS_DATA), collectionMap);
+    }
+
+    private void loadOntology() throws CellBaseException {
+        HashMap<String, String> collectionMap = new HashMap<>();
+        collectionMap.put(ONTOLOGY_DATA, OBO_OUTPUT_FILENAME);
+
+        loadData(input.resolve(ONTOLOGY_DATA), collectionMap);
+    }
+
+    private void loadRegulation() throws CellBaseException {
+        HashMap<String, String> collectionMap = new HashMap<>();
+        collectionMap.put(REGULATORY_REGION_BASENAME, REGULATORY_REGION_OUTPUT_FILENAME);
+        collectionMap.put(REGULATORY_PFM_BASENAME, REGULATORY_PFM_OUTPUT_FILENAME);
+
+        loadData(input.resolve(REGULATION_DATA), collectionMap);
+    }
+
+    private void loadProtein() throws CellBaseException {
+        HashMap<String, String> collectionMap = new HashMap<>();
+        collectionMap.put(PROTEIN_DATA, PROTEIN_OUTPUT_FILENAME);
+
+        loadData(input.resolve(PROTEIN_DATA), collectionMap);
     }
 
     private void loadSpliceScores() throws NoSuchMethodException, InterruptedException, ExecutionException, InstantiationException,
@@ -535,57 +471,62 @@ private void loadSpliceScores() throws NoSuchMethodException, InterruptedExcepti
         // Load data
         logger.info("Loading splice scores from '{}'", input);
         // MMSplice scores
-        loadSpliceScores(input.resolve(EtlCommons.SPLICE_SCORE_DATA + "/" + EtlCommons.MMSPLICE_SUBDIRECTORY));
+        loadSpliceScores(input.resolve(SPLICE_SCORE_DATA + "/" + MMSPLICE_DATA));
         // SpliceAI scores
-        loadSpliceScores(input.resolve(EtlCommons.SPLICE_SCORE_DATA + "/" + EtlCommons.SPLICEAI_SUBDIRECTORY));
+        loadSpliceScores(input.resolve(SPLICE_SCORE_DATA + "/" + SPLICEAI_DATA));
 
         // Create index
         createIndex("splice_score");
 
         // Update release (collection and sources)
         List<Path> sources = new ArrayList<>(Arrays.asList(
-                input.resolve(EtlCommons.SPLICE_SCORE_DATA + "/" + EtlCommons.MMSPLICE_VERSION_FILENAME),
-                input.resolve(EtlCommons.SPLICE_SCORE_DATA + "/" + EtlCommons.SPLICEAI_VERSION_FILENAME)
+                input.resolve(SPLICE_SCORE_DATA + "/" + getDataVersionFilename(MMSPLICE_DATA)),
+                input.resolve(SPLICE_SCORE_DATA + "/" + getDataVersionFilename(SPLICEAI_DATA))
         ));
-        dataReleaseManager.update(dataRelease, "splice_score", EtlCommons.SPLICE_SCORE_DATA, sources);
+        dataReleaseManager.update(dataRelease, SPLICE_SCORE_DATA, sources);
     }
 
     private void loadSpliceScores(Path spliceFolder) throws IOException, ExecutionException, InterruptedException,
             ClassNotFoundException, InvocationTargetException, NoSuchMethodException, InstantiationException, IllegalAccessException,
             LoaderException, CellBaseException {
         // Get files from folder
-        DirectoryStream<Path> stream = Files.newDirectoryStream(spliceFolder,
-                entry -> entry.getFileName().toString().startsWith("splice_score_"));
+        try (DirectoryStream<Path> stream = Files.newDirectoryStream(spliceFolder,
+                entry -> entry.getFileName().toString().startsWith("splice_score_"))) {
 
-        // Load from JSON files
-        for (Path entry : stream) {
-            logger.info("Loading file '{}'", entry);
-            loadRunner.load(spliceFolder.resolve(entry.getFileName()), "splice_score", dataRelease);
+            // Load from JSON files
+            for (Path entry : stream) {
+                logger.info(LOADING_FILE_LOG_MESSAGE, entry);
+                loadRunner.load(spliceFolder.resolve(entry.getFileName()), "splice_score", dataRelease);
+            }
         }
     }
 
     private void loadPubMed() throws CellBaseException {
-        Path pubmedPath = input.resolve(EtlCommons.PUBMED_DATA);
+        Path pubmedPath = input.resolve(PUBMED_DATA);
 
         if (Files.exists(pubmedPath)) {
             // Load data
             for (File file : pubmedPath.toFile().listFiles()) {
                 if (file.isFile() && (file.getName().endsWith("gz"))) {
-                    logger.info("Loading file '{}'", file.getName());
+                    logger.info(LOADING_FILE_LOG_MESSAGE, file.getName());
                     try {
-                        loadRunner.load(file.toPath(), EtlCommons.PUBMED_DATA, dataRelease);
+                        loadRunner.load(file.toPath(), PUBMED_DATA, dataRelease);
                     } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | InvocationTargetException
-                            | IllegalAccessException | ExecutionException | IOException | InterruptedException | LoaderException e) {
-                        logger.error("Error loading file '{}': {}", file.getName(), e.toString());
+                            | IllegalAccessException | ExecutionException | IOException | LoaderException e) {
+                        logger.error(ERROR_LOADING_FILE_LOG_MESSAGE, file.getName(), Arrays.toString(e.getStackTrace()));
+                    } catch (InterruptedException e) {
+                        logger.error(ERROR_LOADING_FILE_LOG_MESSAGE, file.getName(), Arrays.toString(e.getStackTrace()));
+                        // Restore interrupted state...
+                        Thread.currentThread().interrupt();
                     }
                 }
             }
             // Create index
-            createIndex(EtlCommons.PUBMED_DATA);
+            createIndex(PUBMED_DATA);
 
             // Update release (collection and sources)
-            List<Path> sources = Collections.singletonList(pubmedPath.resolve(EtlCommons.PUBMED_VERSION_FILENAME));
-            dataReleaseManager.update(dataRelease, EtlCommons.PUBMED_DATA, EtlCommons.PUBMED_DATA, sources);
+            List<Path> sources = Collections.singletonList(pubmedPath.resolve(EtlCommons.getDataVersionFilename(PUBMED_DATA)));
+            dataReleaseManager.update(dataRelease, PUBMED_DATA, sources);
         } else {
             logger.warn("PubMed folder {} not found", pubmedPath);
         }
@@ -601,34 +542,126 @@ private void loadPharmacogenomica() throws IOException, CellBaseException {
 
         // Load data
         Path pharmaJsonPath = pharmaPath.resolve(EtlCommons.PHARMACOGENOMICS_DATA + ".json.gz");
-        logger.info("Loading file '{}'", pharmaJsonPath.toFile().getName());
+        logger.info(LOADING_FILE_LOG_MESSAGE, pharmaJsonPath.toFile().getName());
         try {
             loadRunner.load(pharmaJsonPath, EtlCommons.PHARMACOGENOMICS_DATA, dataRelease);
         } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | InvocationTargetException
-                | IllegalAccessException | ExecutionException | IOException | InterruptedException | CellBaseException
-                | LoaderException e) {
-            logger.error("Error loading file '{}': {}", pharmaJsonPath.toFile().getName(), e.toString());
+                | IllegalAccessException | ExecutionException | IOException | CellBaseException | LoaderException e) {
+            logger.error(ERROR_LOADING_FILE_LOG_MESSAGE, pharmaJsonPath.toFile().getName(), Arrays.toString(e.getStackTrace()));
+        } catch (InterruptedException e) {
+            logger.error(ERROR_LOADING_FILE_LOG_MESSAGE, pharmaJsonPath.toFile().getName(), Arrays.toString(e.getStackTrace()));
+            // Restore interrupted state...
+            Thread.currentThread().interrupt();
         }
-
         // Create index
         createIndex(EtlCommons.PHARMACOGENOMICS_DATA);
 
         // Update release (collection and sources)
-        List<Path> sources = Collections.singletonList(pharmaPath.resolve(EtlCommons.PHARMGKB_VERSION_FILENAME));
-        dataReleaseManager.update(dataRelease, EtlCommons.PHARMACOGENOMICS_DATA, EtlCommons.PHARMACOGENOMICS_DATA, sources);
+        List<Path> sources = Collections.singletonList(pharmaPath.resolve(getDataVersionFilename(PHARMGKB_DATA)));
+        dataReleaseManager.update(dataRelease, EtlCommons.PHARMACOGENOMICS_DATA, sources);
+    }
+
+    private void loadPolygenicScores() throws NoSuchMethodException, InterruptedException, ExecutionException, InstantiationException,
+            IllegalAccessException, InvocationTargetException, ClassNotFoundException, IOException, CellBaseException, LoaderException {
+        HashMap<String, String> collectionMap = new HashMap<>();
+        collectionMap.put(PGS_COMMON_COLLECTION, PGS_COMMON_OUTPUT_FILENAME);
+        collectionMap.put(PGS_VARIANT_COLLECTION, PGS_VARIANT_OUTPUT_FILENAME);
+
+        loadData(input.resolve(PGS_DATA), collectionMap);
+    }
+
+    private void loadData(Path buildPath, Map<String, String> collectionMap) throws CellBaseException {
+        // Load data from the different files into the input collections
+        for (Map.Entry<String, String> entry : collectionMap.entrySet()) {
+            Path jsonPath = buildPath.resolve(entry.getValue());
+            loadJsonFile(entry.getKey(), jsonPath);
+        }
+
+        // Load sources
+        loadSources(buildPath);
+    }
+
+    private void loadData(Path buildPath, String collection, String prefix) throws CellBaseException, IOException {
+        // Load data
+        try (DirectoryStream<Path> stream = Files.newDirectoryStream(buildPath,
+                entry -> entry.getFileName().toString().startsWith(prefix))) {
+
+            for (Path entry : stream) {
+                logger.info("Loading JSON file '{}' ...", entry);
+                try {
+                    loadRunner.load(buildPath.resolve(entry.getFileName()), collection, dataRelease);
+                    logger.info(DONE_MSG);
+                } catch (InterruptedException e) {
+                    Thread.currentThread().interrupt();
+                } catch (Exception e) {
+                    throw new CellBaseException(ERROR_LOADING_DATA + collection, e);
+                }
+            }
+
+            // Create index
+            createIndex(collection);
+
+            // Update the data release collection
+            dataReleaseManager.update(dataRelease, collection, getVersionPaths(buildPath));
+        }
+    }
+
+    private void loadJsonFile(String collection, Path jsonPath) throws CellBaseException {
+        if (!Files.exists(jsonPath)) {
+            String collectionName = CellBaseDBAdaptor.buildCollectionName(collection, dataRelease);
+            logger.warn("JSON file '{}' not found. No data will be loaded in collection '{}'.", jsonPath, collectionName);
+            return;
+        }
+
+        try {
+            // Load data
+            logger.info(LOADING_JSON_IN_COLLECTION_MSG, jsonPath.getFileName(), collection, dataRelease);
+            loadRunner.load(jsonPath, collection, dataRelease);
+            logger.info(DONE_MSG);
+        } catch (InterruptedException e) {
+            // Restore interrupted state...
+            Thread.currentThread().interrupt();
+            throw new CellBaseException(ERROR_LOADING_DATA + collection, e);
+        } catch (Exception e) {
+            throw new CellBaseException(ERROR_LOADING_DATA + collection, e);
+        }
+
+        // Create index
+        createIndex(collection);
+
+        // Update collection in data release
+        dataReleaseManager.update(dataRelease, collection);
     }
 
     private void createIndex(String collection) {
         if (!createIndexes) {
             return;
         }
-        String collectionName = CellBaseDBAdaptor.buildCollectionName(collection, dataRelease);
-        logger.info("Loading indexes for '{}' collection ...", collectionName);
+
+        String collectionName = null;
         try {
+            collectionName = CellBaseDBAdaptor.buildCollectionName(collection, dataRelease);
+            logger.info("Creating indexes for collection '{}' ...", collectionName);
             indexManager.createMongoDBIndexes(Collections.singletonList(collectionName), true);
+            logger.info(DONE_MSG);
         } catch (IOException e) {
-            logger.error("Error creating index: {}", e.getMessage());
+            logger.error("Error creating indexes for collection '{}': {}", collectionName, Arrays.toString(e.getStackTrace()));
+        }
+    }
+
+    private void loadSources(Path path) throws CellBaseException {
+        // Update data source in data release
+        dataReleaseManager.updateSources(dataRelease, getVersionPaths(path));
+    }
+
+    private List<Path> getVersionPaths(Path path) {
+        List<Path> sources = new ArrayList<>();
+        for (File file : path.toFile().listFiles()) {
+            if (file.getName().endsWith(SUFFIX_VERSION_FILENAME)) {
+                sources.add(file.getAbsoluteFile().toPath());
+            }
         }
+        return sources;
     }
 
     private DataRelease getDataReleaseForLoading(DataReleaseManager dataReleaseManager) throws CellBaseException {
@@ -638,11 +671,9 @@ private DataRelease getDataReleaseForLoading(DataReleaseManager dataReleaseManag
             throw new CellBaseException("No data releases are available");
         }
         DataRelease lastDataRelease = null;
-        for (DataRelease dataRelease : dataReleaseResults.getResults()) {
-            if (lastDataRelease == null) {
-                lastDataRelease = dataRelease;
-            } else if (dataRelease.getRelease() > lastDataRelease.getRelease()) {
-                lastDataRelease = dataRelease;
+        for (DataRelease dr : dataReleaseResults.getResults()) {
+            if (lastDataRelease == null || dr.getRelease() > lastDataRelease.getRelease()) {
+                lastDataRelease = dr;
             }
         }
         if (lastDataRelease == null) {
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ValidationCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ValidationCommandExecutor.java
index 612e8d6a38..764de7b0df 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ValidationCommandExecutor.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/ValidationCommandExecutor.java
@@ -78,7 +78,7 @@ public void execute() {
             DataRelease dataRelease = dataReleaseManager.get(validationCommandOptions.dataRelease);
             variantAnnotationCalculator = new VariantAnnotationCalculator(validationCommandOptions.species,
                     validationCommandOptions.assembly, dataRelease, validationCommandOptions.apiKey,
-                    cellBaseManagerFactory);
+                    cellBaseManagerFactory, configuration);
         } catch (CellBaseException e) {
             e.printStackTrace();
             return;
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/executors/VariantAnnotationCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/executors/VariantAnnotationCommandExecutor.java
index d2285d5550..731a7220ca 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/executors/VariantAnnotationCommandExecutor.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/main/executors/VariantAnnotationCommandExecutor.java
@@ -207,7 +207,7 @@ private boolean runAnnotation() throws Exception {
                     DataReleaseManager dataReleaseManager = cellBaseManagerFactory.getDataReleaseManager(species, assembly);
                     DataRelease dataRelease = dataReleaseManager.get(variantAnnotationCommandOptions.dataRelease);
                     VariantAnnotationCalculator variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly,
-                            dataRelease, variantAnnotationCommandOptions.apiKey, cellBaseManagerFactory);
+                            dataRelease, variantAnnotationCommandOptions.apiKey, cellBaseManagerFactory, configuration);
                     List<CellBaseDataResult<VariantAnnotation>> annotationByVariantList =
                             variantAnnotationCalculator.getAnnotationByVariantList(variants, serverQueryOptions);
 
@@ -485,7 +485,7 @@ private VariantAnnotator createCellBaseAnnotator() throws CellBaseException {
             DataReleaseManager dataReleaseManager = cellBaseManagerFactory.getDataReleaseManager(species, assembly);
             DataRelease dataRelease = dataReleaseManager.get(variantAnnotationCommandOptions.dataRelease);
             return new CellBaseLocalVariantAnnotator(new VariantAnnotationCalculator(species, assembly, dataRelease,
-                    variantAnnotationCommandOptions.apiKey, cellBaseManagerFactory), serverQueryOptions);
+                    variantAnnotationCommandOptions.apiKey, cellBaseManagerFactory, configuration), serverQueryOptions);
         } else {
             try {
                 ClientConfiguration clientConfiguration = ClientConfiguration.load(getClass()
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/api/GeneQuery.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/GeneQuery.java
index 1451fc213b..b987afa29f 100644
--- a/cellbase-core/src/main/java/org/opencb/cellbase/core/api/GeneQuery.java
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/GeneQuery.java
@@ -42,12 +42,11 @@ public class GeneQuery extends AbstractQuery {
     @QueryParameter(id = "region")
     private List<Region> regions;
 
-    @QueryParameter(id = "transcripts.biotype", alias = {ParamConstants.TRANSCRIPT_BIOTYPES_PARAM,
-            "transcriptsBiotype"})
+    @QueryParameter(id = "transcripts.biotype", alias = {ParamConstants.TRANSCRIPT_BIOTYPES_PARAM, "transcriptsBiotype"})
     private List<String> transcriptsBiotype;
 
-    @QueryParameter(id = "transcripts.xrefs.id", alias = {ParamConstants.TRANSCRIPT_XREFS_PARAM, "xrefs", "transcriptsXrefsId",
-            "transcripts.xrefs"})
+    @QueryParameter(id = "transcripts.xrefs.id",
+            alias = {ParamConstants.TRANSCRIPT_XREFS_PARAM, "xrefs", "transcriptsXrefsId", "transcripts.xrefs"})
     private List<String> transcriptsXrefs;
     @QueryParameter(id = "transcripts.id", alias = {ParamConstants.TRANSCRIPT_IDS_PARAM, "transcriptsId"})
     private List<String> transcriptsId;
@@ -61,22 +60,23 @@ public class GeneQuery extends AbstractQuery {
     private LogicalList<String> transcriptsTfbsId;
     @QueryParameter(id = "transcripts.tfbs.pfmId", alias = {ParamConstants.TRANSCRIPT_TFBS_PFMIDS_PARAM, "transcriptsTfbsPfmId"})
     private LogicalList<String> transcriptsTfbsPfmId;
-    @QueryParameter(id = "transcripts.tfbs.transcriptionFactors", alias = {ParamConstants.TRANSCRIPT_TRANSCRIPTION_FACTORS_PARAM,
-            "transcriptsTfbsTranscriptionFactors"})
+    @QueryParameter(id = "transcripts.tfbs.transcriptionFactors",
+            alias = {ParamConstants.TRANSCRIPT_TRANSCRIPTION_FACTORS_PARAM, "transcriptsTfbsTranscriptionFactors"})
     private LogicalList<String> transcriptsTfbsTranscriptionFactors;
-    @QueryParameter(id = ParamConstants.ONTOLOGY_PARAM, alias = {"transcripts.annotation.ontologies.id",
-            "transcripts.annotation.ontologies.name", "transcriptAnnotationOntologiesId"})
+    @QueryParameter(id = ParamConstants.ONTOLOGY_PARAM,
+            alias = {"transcripts.annotation.ontologies.id", "transcripts.annotation.ontologies.name", "transcriptAnnotationOntologiesId"})
     private LogicalList<String> transcriptAnnotationOntologiesId;
-    @QueryParameter(id = ParamConstants.ANNOTATION_DISEASES_PARAM, alias = {"annotation.diseases.id", "annotation.diseases.name"})
+    @QueryParameter(id = ParamConstants.ANNOTATION_DISEASES_PARAM,
+            alias = {"annotation.diseases.id", "annotation.diseases.name", "annotation.diseases.hpo"})
     private LogicalList<String> annotationDiseases;
-    @QueryParameter(id = "annotation.expression.tissue", alias = {ParamConstants.ANNOTATION_EXPRESSION_TISSUE_PARAM,
-            "annotationExpressionTissue"})
+    @QueryParameter(id = "annotation.expression.tissue",
+            alias = {ParamConstants.ANNOTATION_EXPRESSION_TISSUE_PARAM, "annotationExpressionTissue"})
     private LogicalList<String> annotationExpressionTissue;
-    @QueryParameter(id = "annotation.expression.value", alias = {ParamConstants.ANNOTATION_EXPRESSION_VALUE_PARAM,
-            "annotationExpressionValue"})
+    @QueryParameter(id = "annotation.expression.value",
+            alias = {ParamConstants.ANNOTATION_EXPRESSION_VALUE_PARAM, "annotationExpressionValue"})
     private LogicalList<String> annotationExpressionValue;
-    @QueryParameter(id = "annotation.drugs.drugName", alias = {ParamConstants.ANNOTATION_DRUGS_NAME_PARAM, "annotation.drugs.name",
-            "annotationDrugsName"})
+    @QueryParameter(id = "annotation.drugs.drugName",
+            alias = {ParamConstants.ANNOTATION_DRUGS_NAME_PARAM, "annotation.drugs.name", "annotationDrugsName"})
     private LogicalList<String> annotationDrugsName;
     @QueryParameter(id = "constraints", alias = {ParamConstants.ANNOTATION_CONSTRAINTS_PARAM})
     private LogicalList<String> annotationConstraints;
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/api/PolygenicScoreQuery.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/PolygenicScoreQuery.java
new file mode 100644
index 0000000000..106b01e1fe
--- /dev/null
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/api/PolygenicScoreQuery.java
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.core.api;
+
+import org.opencb.cellbase.core.api.query.AbstractQuery;
+import org.opencb.cellbase.core.api.query.QueryException;
+import org.opencb.cellbase.core.api.query.QueryParameter;
+
+import java.util.List;
+import java.util.Map;
+
+public class PolygenicScoreQuery extends AbstractQuery {
+
+    @QueryParameter(id = "id")
+    private List<String> ids;
+
+    @QueryParameter(id = "name")
+    private List<String> names;
+
+    @QueryParameter(id = "source", allowedValues = {"PGS Catalog"})
+    private List<String> sources;
+
+    public PolygenicScoreQuery() {
+    }
+
+    public PolygenicScoreQuery(Map<String, String> params) throws QueryException {
+        super(params);
+
+        objectMapper.readerForUpdating(this);
+        objectMapper.readerFor(PolygenicScoreQuery.class);
+        objectWriter = objectMapper.writerFor(PolygenicScoreQuery.class);
+    }
+
+    @Override
+    protected void validateQuery() throws QueryException {
+        // Nothing to to
+        return;
+    }
+
+    @Override
+    public String toString() {
+        final StringBuilder sb = new StringBuilder("PolygenicScoreQuery{");
+        sb.append("ids=").append(ids);
+        sb.append(", names=").append(names);
+        sb.append(", sources=").append(sources);
+        sb.append(", limit=").append(limit);
+        sb.append(", skip=").append(skip);
+        sb.append(", count=").append(count);
+        sb.append(", sort='").append(sort).append('\'');
+        sb.append(", order=").append(order);
+        sb.append(", facet='").append(facet).append('\'');
+        sb.append(", includes=").append(includes);
+        sb.append(", excludes=").append(excludes);
+        sb.append('}');
+        return sb.toString();
+    }
+
+    public List<String> getIds() {
+        return ids;
+    }
+
+    public PolygenicScoreQuery setIds(List<String> ids) {
+        this.ids = ids;
+        return this;
+    }
+
+    public List<String> getNames() {
+        return names;
+    }
+
+    public PolygenicScoreQuery setNames(List<String> names) {
+        this.names = names;
+        return this;
+    }
+
+    public List<String> getSources() {
+        return sources;
+    }
+
+    public PolygenicScoreQuery setSources(List<String> sources) {
+        this.sources = sources;
+        return this;
+    }
+}
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/CellBaseConfiguration.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/CellBaseConfiguration.java
index de470db66d..c30d3d6bea 100644
--- a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/CellBaseConfiguration.java
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/CellBaseConfiguration.java
@@ -19,7 +19,6 @@
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
 import com.google.common.base.CaseFormat;
-import org.apache.commons.lang.StringUtils;
 import org.opencb.commons.utils.FileUtils;
 import org.slf4j.LoggerFactory;
 
@@ -27,7 +26,8 @@
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.nio.file.Path;
-import java.util.*;
+import java.util.HashMap;
+import java.util.Map;
 
 public class CellBaseConfiguration {
 
@@ -135,10 +135,7 @@ private static DatabaseCredentials secureGetMongodb(CellBaseConfiguration config
             configuration.setDatabases(new Databases());
         }
         if (configuration.getDatabases().getMongodb() == null) {
-            configuration.getDatabases().setMongodb(new MongoDBDatabaseCredentials());
-        }
-        if (configuration.getDatabases().getMongodb().getShards() == null) {
-            configuration.getDatabases().getMongodb().setShards(Collections.emptyList());
+            configuration.getDatabases().setMongodb(new DatabaseCredentials());
         }
         if (configuration.getDatabases().getMongodb().getOptions() == null) {
             configuration.getDatabases().getMongodb().setOptions(new HashMap<>());
@@ -228,51 +225,6 @@ public void setSpecies(SpeciesProperties species) {
         this.species = species;
     }
 
-    /**
-     * get the config for this species.
-     * @param id shortName for species, e.g. hsapiens
-     * @return configuration for this species
-     */
-    public SpeciesConfiguration getSpeciesConfig(String id) {
-        if (StringUtils.isEmpty(id)) {
-            return null;
-        }
-        List<SpeciesConfiguration> allSpecies = getAllSpecies();
-        for (SpeciesConfiguration config : allSpecies) {
-            if (config.getId().equals(id)) {
-                return config;
-            }
-        }
-        return null;
-    }
-
-    public List<SpeciesConfiguration> getAllSpecies() {
-        List<SpeciesConfiguration> allSpecies = new ArrayList<>();
-        if (species.getVertebrates() != null && !species.getVertebrates().isEmpty()) {
-            allSpecies.addAll(species.getVertebrates());
-        }
-        if (species.getMetazoa() != null && !species.getMetazoa().isEmpty()) {
-            allSpecies.addAll(species.getMetazoa());
-        }
-        if (species.getFungi() != null && !species.getFungi().isEmpty()) {
-            allSpecies.addAll(species.getFungi());
-        }
-        if (species.getProtist() != null && !species.getProtist().isEmpty()) {
-            allSpecies.addAll(species.getProtist());
-        }
-        if (species.getPlants() != null && !species.getPlants().isEmpty()) {
-            allSpecies.addAll(species.getPlants());
-        }
-        if (species.getVirus() != null && !species.getVirus().isEmpty()) {
-            allSpecies.addAll(species.getVirus());
-        }
-        if (species.getBacteria() != null && !species.getBacteria().isEmpty()) {
-            allSpecies.addAll(species.getBacteria());
-        }
-
-        return allSpecies;
-    }
-
     public ServerProperties getServer() {
         return server;
     }
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DatabaseCredentials.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DatabaseCredentials.java
index 304c191d78..ab9c8a6e94 100644
--- a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DatabaseCredentials.java
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DatabaseCredentials.java
@@ -18,15 +18,13 @@
 
 import java.util.Map;
 
-/**
- * Created by imedina on 19/08/16.
- */
+
 public class DatabaseCredentials {
 
-    private String host;
-    private String user;
-    private String password;
-    private Map<String, String> options;
+    protected String host;
+    protected String user;
+    protected String password;
+    protected Map<String, String> options;
 
     public DatabaseCredentials() {
     }
@@ -40,7 +38,7 @@ public DatabaseCredentials(String host, String user, String password, Map<String
 
     @Override
     public String toString() {
-        final StringBuilder sb = new StringBuilder("DatabaseProperties{");
+        final StringBuilder sb = new StringBuilder("DatabaseCredentials{");
         sb.append("host='").append(host).append('\'');
         sb.append(", user='").append(user).append('\'');
         sb.append(", password='").append(password).append('\'');
@@ -53,31 +51,35 @@ public String getHost() {
         return host;
     }
 
-    public void setHost(String host) {
+    public DatabaseCredentials setHost(String host) {
         this.host = host;
+        return this;
     }
 
     public String getUser() {
         return user;
     }
 
-    public void setUser(String user) {
+    public DatabaseCredentials setUser(String user) {
         this.user = user;
+        return this;
     }
 
     public String getPassword() {
         return password;
     }
 
-    public void setPassword(String password) {
+    public DatabaseCredentials setPassword(String password) {
         this.password = password;
+        return this;
     }
 
     public Map<String, String> getOptions() {
         return options;
     }
 
-    public void setOptions(Map<String, String> options) {
+    public DatabaseCredentials setOptions(Map<String, String> options) {
         this.options = options;
+        return this;
     }
 }
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/Databases.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/Databases.java
index 4c0cf374c7..905780fcdb 100644
--- a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/Databases.java
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/Databases.java
@@ -16,48 +16,32 @@
 
 package org.opencb.cellbase.core.config;
 
-import java.util.Map;
-
-/**
- * Created by imedina on 16/09/16.
- */
 public class Databases {
 
-    private MongoDBDatabaseCredentials mongodb;
-    private Map<String, DatabaseCredentials> neo4j;
+    private DatabaseCredentials mongodb;
 
     public Databases() {
     }
 
-    public Databases(MongoDBDatabaseCredentials mongodb, Map<String, DatabaseCredentials> neo4j) {
+    public Databases(DatabaseCredentials mongodb) {
         this.mongodb = mongodb;
-        this.neo4j = neo4j;
     }
 
     @Override
     public String toString() {
         final StringBuilder sb = new StringBuilder("Databases{");
         sb.append("mongodb=").append(mongodb);
-        sb.append(", neo4j=").append(neo4j);
         sb.append('}');
         return sb.toString();
     }
 
-    public MongoDBDatabaseCredentials getMongodb() {
+    public DatabaseCredentials getMongodb() {
         return mongodb;
     }
 
-    public Databases setMongodb(MongoDBDatabaseCredentials mongodb) {
+    public Databases setMongodb(DatabaseCredentials mongodb) {
         this.mongodb = mongodb;
         return this;
     }
 
-    public Map<String, DatabaseCredentials> getNeo4j() {
-        return neo4j;
-    }
-
-    public Databases setNeo4j(Map<String, DatabaseCredentials> neo4j) {
-        this.neo4j = neo4j;
-        return this;
-    }
 }
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java
index 507e85a75f..915dfa086b 100644
--- a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/DownloadProperties.java
@@ -16,7 +16,7 @@
 
 package org.opencb.cellbase.core.config;
 
-import java.util.List;
+import java.util.Map;
 
 /**
  * Created by imedina on 19/08/16.
@@ -28,16 +28,11 @@ public class DownloadProperties {
     private URLProperties hgnc;
     private URLProperties cancerHotspot;
     private URLProperties refSeq;
-    private URLProperties refSeqFasta;
-    private URLProperties refSeqProteinFasta;
-    private URLProperties refSeqCdna;
     private URLProperties maneSelect;
     private URLProperties lrg;
-
     private URLProperties geneUniprotXref;
     private URLProperties geneExpressionAtlas;
     private URLProperties mirbase;
-    private URLProperties mirbaseReadme;
     private URLProperties targetScan;
     private URLProperties miRTarBase;
     private URLProperties uniprot;
@@ -45,25 +40,20 @@ public class DownloadProperties {
     private URLProperties intact;
     private URLProperties interpro;
     private URLProperties interproRelNotes;
-    private URLProperties conservation;
+    private URLProperties phastCons;
+    private URLProperties phylop;
     private URLProperties gerp;
     private URLProperties clinvar;
-    private URLProperties clinvarVariation;
-    private URLProperties clinvarSummary;
-    private URLProperties clinvarVariationAllele;
-    private URLProperties clinvarEfoTerms;
+    private URLProperties cosmic;
+    private URLProperties hgmd;
     private URLProperties dbSNP;
-    private URLProperties iarctp53;
-    private URLProperties docm;
-    private URLProperties docmVersion;
     private URLProperties dgv;
     private URLProperties simpleRepeats;
     private URLProperties windowMasker;
     private URLProperties genomicSuperDups;
     private URLProperties hpo;
-    private URLProperties disgenet;
-    private URLProperties disgenetReadme;
     private URLProperties dgidb;
+    private URLProperties cancerGeneCensus;
     private URLProperties gwasCatalog;
     private URLProperties dbsnp;
     private URLProperties cadd;
@@ -75,8 +65,11 @@ public class DownloadProperties {
     private URLProperties mondoObo;
     private URLProperties goAnnotation;
     private URLProperties revel;
+    private URLProperties mmSplice;
+    private URLProperties spliceAi;
     private URLProperties pubmed;
     private URLProperties pharmGKB;
+    private URLProperties pgsCatalog;
 
     public EnsemblProperties getEnsembl() {
         return ensembl;
@@ -132,15 +125,6 @@ public DownloadProperties setMirbase(URLProperties mirbase) {
         return this;
     }
 
-    public URLProperties getMirbaseReadme() {
-        return mirbaseReadme;
-    }
-
-    public DownloadProperties setMirbaseReadme(URLProperties mirbaseReadme) {
-        this.mirbaseReadme = mirbaseReadme;
-        return this;
-    }
-
     public URLProperties getTargetScan() {
         return targetScan;
     }
@@ -204,12 +188,21 @@ public DownloadProperties setInterproRelNotes(URLProperties interproRelNotes) {
         return this;
     }
 
-    public URLProperties getConservation() {
-        return conservation;
+    public URLProperties getPhastCons() {
+        return phastCons;
     }
 
-    public DownloadProperties setConservation(URLProperties conservation) {
-        this.conservation = conservation;
+    public DownloadProperties setPhastCons(URLProperties phastCons) {
+        this.phastCons = phastCons;
+        return this;
+    }
+
+    public URLProperties getPhylop() {
+        return phylop;
+    }
+
+    public DownloadProperties setPhylop(URLProperties phylop) {
+        this.phylop = phylop;
         return this;
     }
 
@@ -231,38 +224,21 @@ public DownloadProperties setClinvar(URLProperties clinvar) {
         return this;
     }
 
-    public URLProperties getClinvarVariation() {
-        return clinvarVariation;
-    }
-
-    public DownloadProperties setClinvarVariation(URLProperties clinvarVariation) {
-        this.clinvarVariation = clinvarVariation;
-        return this;
-    }
-
-    public URLProperties getClinvarSummary() {
-        return clinvarSummary;
+    public URLProperties getCosmic() {
+        return cosmic;
     }
 
-    public DownloadProperties setClinvarSummary(URLProperties clinvarSummary) {
-        this.clinvarSummary = clinvarSummary;
+    public DownloadProperties setCosmic(URLProperties cosmic) {
+        this.cosmic = cosmic;
         return this;
     }
 
-    public URLProperties getClinvarVariationAllele() {
-        return clinvarVariationAllele;
-    }
-
-    public void setClinvarVariationAllele(URLProperties clinvarVariationAllele) {
-        this.clinvarVariationAllele = clinvarVariationAllele;
-    }
-
-    public URLProperties getClinvarEfoTerms() {
-        return clinvarEfoTerms;
+    public URLProperties getHgmd() {
+        return hgmd;
     }
 
-    public DownloadProperties setClinvarEfoTerms(URLProperties clinvarEfoTerms) {
-        this.clinvarEfoTerms = clinvarEfoTerms;
+    public DownloadProperties setHgmd(URLProperties hgmd) {
+        this.hgmd = hgmd;
         return this;
     }
 
@@ -275,30 +251,6 @@ public DownloadProperties setDbSNP(URLProperties dbSNP) {
         return this;
     }
 
-    public URLProperties getIarctp53() {
-        return iarctp53;
-    }
-
-    public void setIarctp53(URLProperties iarctp53) {
-        this.iarctp53 = iarctp53;
-    }
-
-    public URLProperties getDocm() {
-        return docm;
-    }
-
-    public void setDocm(URLProperties docm) {
-        this.docm = docm;
-    }
-
-    public URLProperties getDocmVersion() {
-        return docmVersion;
-    }
-
-    public void setDocmVersion(URLProperties docmVersion) {
-        this.docmVersion = docmVersion;
-    }
-
     public URLProperties getDgv() {
         return dgv;
     }
@@ -340,30 +292,21 @@ public DownloadProperties setHpo(URLProperties hpo) {
         return this;
     }
 
-    public URLProperties getDisgenet() {
-        return disgenet;
-    }
-
-    public DownloadProperties setDisgenet(URLProperties disgenet) {
-        this.disgenet = disgenet;
-        return this;
-    }
-
-    public URLProperties getDisgenetReadme() {
-        return disgenetReadme;
+    public URLProperties getDgidb() {
+        return dgidb;
     }
 
-    public DownloadProperties setDisgenetReadme(URLProperties disgenetReadme) {
-        this.disgenetReadme = disgenetReadme;
+    public DownloadProperties setDgidb(URLProperties dgidb) {
+        this.dgidb = dgidb;
         return this;
     }
 
-    public URLProperties getDgidb() {
-        return dgidb;
+    public URLProperties getCancerGeneCensus() {
+        return cancerGeneCensus;
     }
 
-    public DownloadProperties setDgidb(URLProperties dgidb) {
-        this.dgidb = dgidb;
+    public DownloadProperties setCancerGeneCensus(URLProperties cancerGeneCensus) {
+        this.cancerGeneCensus = cancerGeneCensus;
         return this;
     }
 
@@ -457,25 +400,30 @@ public DownloadProperties setRefSeq(URLProperties refSeq) {
         return this;
     }
 
-    public URLProperties getRefSeqFasta() {
-        return refSeqFasta;
+    public URLProperties getRevel() {
+        return revel;
     }
 
-    public DownloadProperties setRefSeqFasta(URLProperties refSeqFasta) {
-        this.refSeqFasta = refSeqFasta;
+    public DownloadProperties setRevel(URLProperties revel) {
+        this.revel = revel;
         return this;
     }
 
-    public URLProperties getRefSeqProteinFasta() {
-        return refSeqProteinFasta;
+    public URLProperties getMmSplice() {
+        return mmSplice;
     }
 
-    public URLProperties getRevel() {
-        return revel;
+    public DownloadProperties setMmSplice(URLProperties mmSplice) {
+        this.mmSplice = mmSplice;
+        return this;
     }
 
-    public DownloadProperties setRevel(URLProperties revel) {
-        this.revel = revel;
+    public URLProperties getSpliceAi() {
+        return spliceAi;
+    }
+
+    public DownloadProperties setSpliceAi(URLProperties spliceAi) {
+        this.spliceAi = spliceAi;
         return this;
     }
 
@@ -497,17 +445,12 @@ public DownloadProperties setPharmGKB(URLProperties pharmGKB) {
         return this;
     }
 
-    public DownloadProperties setRefSeqProteinFasta(URLProperties refSeqProteinFasta) {
-        this.refSeqProteinFasta = refSeqProteinFasta;
-        return this;
+    public URLProperties getPgsCatalog() {
+        return pgsCatalog;
     }
 
-    public URLProperties getRefSeqCdna() {
-        return refSeqCdna;
-    }
-
-    public DownloadProperties setRefSeqCdna(URLProperties refSeqCdna) {
-        this.refSeqCdna = refSeqCdna;
+    public DownloadProperties setPgsCatalog(URLProperties pgsCatalog) {
+        this.pgsCatalog = pgsCatalog;
         return this;
     }
 
@@ -582,7 +525,7 @@ public static class URLProperties {
 
         private String host;
         private String version;
-        private List<String> files;
+        private Map<String, String> files;
 
         public String getHost() {
             return host;
@@ -601,14 +544,13 @@ public URLProperties setVersion(String version) {
             return this;
         }
 
-        public List<String> getFiles() {
+        public Map<String, String> getFiles() {
             return files;
         }
 
-        public URLProperties setFiles(List<String> files) {
+        public URLProperties setFiles(Map<String, String> files) {
             this.files = files;
             return this;
         }
-
     }
 }
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/MongoDBDatabaseCredentials.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/MongoDBDatabaseCredentials.java
deleted file mode 100644
index 2582b24115..0000000000
--- a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/MongoDBDatabaseCredentials.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright 2015-2020 OpenCB
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.opencb.cellbase.core.config;
-
-import java.util.List;
-import java.util.Map;
-
-/**
- * Created by imedina on 19/08/16.
- */
-public class MongoDBDatabaseCredentials extends DatabaseCredentials {
-
-    private List<ReplicaSet> shards;
-    private String host;
-    private String user;
-    private String password;
-    private Map<String, String> options;
-
-    public MongoDBDatabaseCredentials() {
-    }
-
-    public MongoDBDatabaseCredentials(String host, String user, String password, List<ReplicaSet> shards, Map<String, String> options) {
-        super(host, user, password, options);
-        this.shards = shards;
-    }
-
-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("DatabaseProperties{");
-        sb.append("host='").append(host).append('\'');
-        sb.append(", user='").append(user).append('\'');
-        sb.append(", password='").append(password).append('\'');
-        sb.append(", replicaSets='").append(shards).append('\'');
-        sb.append(", options=").append(options);
-        sb.append('}');
-        return sb.toString();
-    }
-
-    public List<ReplicaSet> getShards() {
-        return shards;
-    }
-
-    public MongoDBDatabaseCredentials setShards(List<ReplicaSet> shards) {
-        this.shards = shards;
-        return this;
-    }
-
-    public static class ReplicaSet {
-        private String id;
-        private String nodes;
-
-        /**
-         * @return the replicaset name, e.g. rs0
-         */
-        public String getId() {
-            return id;
-        }
-
-        /**
-         * @param id label for the replicaset, e.g. rs0
-         * @return the replicaset of interest
-         */
-        public ReplicaSet setId(String id) {
-            this.id = id;
-            return this;
-        }
-
-        /**
-         * @return nodes for replica set, e.g. cb-mongo-shard1-1:27017,cb-mongo-shard1-2:27017,cb-mongo-shard1-3:27017
-         */
-        public String getNodes() {
-            return nodes;
-        }
-
-        /**
-         * @param nodes nodes for replica set, e.g. cb-mongo-shard1-1:27017,cb-mongo-shard1-2:27017,cb-mongo-shard1-3:27017
-         * @return nodes for this replica set
-         */
-        public ReplicaSet setNodes(String nodes) {
-            this.nodes = nodes;
-            return this;
-        }
-    }
-}
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/SpeciesConfiguration.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/SpeciesConfiguration.java
index 3a3fae4d9f..5c4976675c 100644
--- a/cellbase-core/src/main/java/org/opencb/cellbase/core/config/SpeciesConfiguration.java
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/config/SpeciesConfiguration.java
@@ -18,9 +18,7 @@
 
 import java.util.List;
 
-/**
- * Created by imedina on 19/08/16.
- */
+
 public class SpeciesConfiguration {
 
     private String id;
@@ -28,23 +26,19 @@ public class SpeciesConfiguration {
     private String commonName;
     private List<Assembly> assemblies;
     private List<String> data;
-    private List<ShardConfig> shards;
 
 
     public SpeciesConfiguration() {
     }
 
-    public SpeciesConfiguration(String id, String scientificName, String commonName, List<Assembly> assemblies, List<String> data,
-                                List<ShardConfig> shards) {
+    public SpeciesConfiguration(String id, String scientificName, String commonName, List<Assembly> assemblies, List<String> data) {
         this.id = id;
         this.scientificName = scientificName;
         this.commonName = commonName;
         this.assemblies = assemblies;
         this.data = data;
-        this.shards = shards;
     }
 
-
     @Override
     public String toString() {
         final StringBuilder sb = new StringBuilder("Species{");
@@ -53,7 +47,6 @@ public String toString() {
         sb.append(", commonName='").append(commonName).append('\'');
         sb.append(", assemblies=").append(assemblies);
         sb.append(", data=").append(data);
-        sb.append(", shards=").append(shards);
         sb.append('}');
         return sb.toString();
     }
@@ -62,40 +55,45 @@ public String getId() {
         return id;
     }
 
-    public void setId(String id) {
+    public SpeciesConfiguration setId(String id) {
         this.id = id;
+        return this;
     }
 
     public String getScientificName() {
         return scientificName;
     }
 
-    public void setScientificName(String scientificName) {
+    public SpeciesConfiguration setScientificName(String scientificName) {
         this.scientificName = scientificName;
+        return this;
     }
 
     public String getCommonName() {
         return commonName;
     }
 
-    public void setCommonName(String commonName) {
+    public SpeciesConfiguration setCommonName(String commonName) {
         this.commonName = commonName;
+        return this;
     }
 
     public List<Assembly> getAssemblies() {
         return assemblies;
     }
 
-    public void setAssemblies(List<Assembly> assemblies) {
+    public SpeciesConfiguration setAssemblies(List<Assembly> assemblies) {
         this.assemblies = assemblies;
+        return this;
     }
 
     public List<String> getData() {
         return data;
     }
 
-    public void setData(List<String> data) {
+    public SpeciesConfiguration setData(List<String> data) {
         this.data = data;
+        return this;
     }
 
     public static class Assembly {
@@ -103,126 +101,51 @@ public static class Assembly {
         private String ensemblVersion;
         private String ensemblCollection;  // Only for bacteria
 
-        public String getName() {
-            return name;
+        public Assembly() {
         }
 
-        public void setName(String name) {
+        public Assembly(String ensemblCollection, String ensemblVersion, String name) {
+            this.ensemblCollection = ensemblCollection;
+            this.ensemblVersion = ensemblVersion;
             this.name = name;
         }
 
-        public String getEnsemblVersion() {
-            return ensemblVersion;
-        }
-
-        public void setEnsemblVersion(String ensemblVersion) {
-            this.ensemblVersion = ensemblVersion;
+        @Override
+        public String toString() {
+            final StringBuilder sb = new StringBuilder("Assembly{");
+            sb.append("ensemblCollection='").append(ensemblCollection).append('\'');
+            sb.append(", name='").append(name).append('\'');
+            sb.append(", ensemblVersion='").append(ensemblVersion).append('\'');
+            sb.append('}');
+            return sb.toString();
         }
 
         public String getEnsemblCollection() {
             return ensemblCollection;
         }
 
-        public void setEnsemblCollection(String ensemblCollection) {
+        public Assembly setEnsemblCollection(String ensemblCollection) {
             this.ensemblCollection = ensemblCollection;
-        }
-    }
-
-    public List<ShardConfig> getShards() {
-        return shards;
-    }
-
-    public SpeciesConfiguration setShards(List<ShardConfig> shards) {
-        this.shards = shards;
-        return this;
-    }
-
-    public static class ShardConfig {
-        private String collection;
-        private List<String> key;
-        private String rangeKey;
-        private List<Zone> zones;
-
-        public String getCollection() {
-            return collection;
-        }
-
-        public ShardConfig setCollection(String collection) {
-            this.collection = collection;
-            return this;
-        }
-
-        public List<String> getKey() {
-            return key;
-        }
-
-        public ShardConfig setKey(List<String> key) {
-            this.key = key;
-            return this;
-        }
-
-        public String getRangeKey() {
-            return rangeKey;
-        }
-
-        public ShardConfig setRangeKey(String rangeKey) {
-            this.rangeKey = rangeKey;
             return this;
         }
 
-        public List<Zone> getZones() {
-            return zones;
+        public String getEnsemblVersion() {
+            return ensemblVersion;
         }
 
-        public ShardConfig setZones(List<Zone> zones) {
-            this.zones = zones;
+        public Assembly setEnsemblVersion(String ensemblVersion) {
+            this.ensemblVersion = ensemblVersion;
             return this;
         }
-    }
-
-    public static class Zone {
-        private String name;
-        private List<ShardRange> shardRanges;
 
         public String getName() {
             return name;
         }
 
-        public Zone setName(String name) {
+        public Assembly setName(String name) {
             this.name = name;
             return this;
         }
-
-        public List<ShardRange> getShardRanges() {
-            return shardRanges;
-        }
-
-        public Zone setShardRanges(List<ShardRange> shardRanges) {
-            this.shardRanges = shardRanges;
-            return this;
-        }
     }
 
-    public static class ShardRange {
-        private String minimum;
-        private String maximum;
-
-        public String getMinimum() {
-            return minimum;
-        }
-
-        public ShardRange setMinimum(String minimum) {
-            this.minimum = minimum;
-            return this;
-        }
-
-        public String getMaximum() {
-            return maximum;
-        }
-
-        public ShardRange setMaximum(String maximum) {
-            this.maximum = maximum;
-            return this;
-        }
-    }
 }
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/exception/CellBaseException.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/exception/CellBaseException.java
index a3b54942d5..422a52b0d4 100644
--- a/cellbase-core/src/main/java/org/opencb/cellbase/core/exception/CellBaseException.java
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/exception/CellBaseException.java
@@ -22,7 +22,7 @@ public CellBaseException(String msg) {
         super(msg);
     }
 
-    public CellBaseException(String msg, Exception e) {
+    public CellBaseException(String msg, Throwable e) {
         super(msg, e);
     }
 }
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataRelease.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataRelease.java
index 5674839aa8..47a694c5d8 100644
--- a/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataRelease.java
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataRelease.java
@@ -21,14 +21,9 @@
 public class DataRelease {
     private int release;
     private String date;
-    /**
-     * @deprecated it is maintained to back-compatibility with previous CellBase versions to v5.5
-     */
-    @Deprecated
-    private boolean active;
     private List<String> activeByDefaultIn;
     private Map<String, String> collections;
-    private List<DataReleaseSource> sources;
+    private List<DataSource> sources;
 
     public DataRelease() {
         this.activeByDefaultIn = Collections.emptyList();
@@ -37,7 +32,7 @@ public DataRelease() {
     }
 
     public DataRelease(int release, String date, List<String> activeByDefaultIn, Map<String, String> collections,
-                       List<DataReleaseSource> sources) {
+                       List<DataSource> sources) {
         this.release = release;
         this.date = date;
         this.activeByDefaultIn = activeByDefaultIn;
@@ -75,15 +70,6 @@ public DataRelease setDate(String date) {
         return this;
     }
 
-    public boolean isActive() {
-        return active;
-    }
-
-    public DataRelease setActive(boolean active) {
-        this.active = active;
-        return this;
-    }
-
     public List<String> getActiveByDefaultIn() {
         return activeByDefaultIn;
     }
@@ -102,11 +88,11 @@ public DataRelease setCollections(Map<String, String> collections) {
         return this;
     }
 
-    public List<DataReleaseSource> getSources() {
+    public List<DataSource> getSources() {
         return sources;
     }
 
-    public DataRelease setSources(List<DataReleaseSource> sources) {
+    public DataRelease setSources(List<DataSource> sources) {
         this.sources = sources;
         return this;
     }
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataReleaseSource.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataReleaseSource.java
deleted file mode 100644
index 3a42de9374..0000000000
--- a/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataReleaseSource.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright 2015-2020 OpenCB
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.opencb.cellbase.core.models;
-
-import java.util.List;
-import java.util.Objects;
-
-public class DataReleaseSource {
-    private String name;
-    private String version;
-    private String data;
-    private String date;
-    private List<String> url;
-
-    public DataReleaseSource() {
-    }
-
-    public DataReleaseSource(String name, String version, String data, String date, List<String> url) {
-        this.name = name;
-        this.version = version;
-        this.data = data;
-        this.date = date;
-        this.url = url;
-    }
-
-    @Override
-    public String toString() {
-        final StringBuilder sb = new StringBuilder("DataReleaseSource{");
-        sb.append("name='").append(name).append('\'');
-        sb.append(", version='").append(version).append('\'');
-        sb.append(", data='").append(data).append('\'');
-        sb.append(", date='").append(date).append('\'');
-        sb.append(", url=").append(url);
-        sb.append('}');
-        return sb.toString();
-    }
-
-    public String getName() {
-        return name;
-    }
-
-    public DataReleaseSource setName(String name) {
-        this.name = name;
-        return this;
-    }
-
-    public String getVersion() {
-        return version;
-    }
-
-    public DataReleaseSource setVersion(String version) {
-        this.version = version;
-        return this;
-    }
-
-    public String getData() {
-        return data;
-    }
-
-    public DataReleaseSource setData(String data) {
-        this.data = data;
-        return this;
-    }
-
-    public String getDate() {
-        return date;
-    }
-
-    public DataReleaseSource setDate(String date) {
-        this.date = date;
-        return this;
-    }
-
-    public List<String> getUrl() {
-        return url;
-    }
-
-    public DataReleaseSource setUrl(List<String> url) {
-        this.url = url;
-        return this;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o) {
-            return true;
-        }
-        if (o == null || getClass() != o.getClass()) {
-            return false;
-        }
-        DataReleaseSource that = (DataReleaseSource) o;
-        return Objects.equals(name, that.name)
-                && Objects.equals(version, that.version)
-                && Objects.equals(data, that.data)
-                && Objects.equals(date, that.date)
-                && Objects.equals(url, that.url);
-    }
-
-    @Override
-    public int hashCode() {
-        return Objects.hash(name, version, data, date, url);
-    }
-}
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataSource.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataSource.java
new file mode 100644
index 0000000000..acc134cb63
--- /dev/null
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/models/DataSource.java
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.core.models;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class DataSource {
+
+    private String id;
+    private String name;
+    private String category;
+    private String version;
+    private String downloadDate;
+    private List<String> urls;
+
+    public DataSource() {
+        this.urls = new ArrayList<>();
+    }
+
+    public DataSource(String id, String name, String category, String version, String downloadDate, List<String> urls) {
+        this.id = id;
+        this.name = name;
+        this.category = category;
+        this.version = version;
+        this.downloadDate = downloadDate;
+        this.urls = urls;
+    }
+
+    @Override
+    public String toString() {
+        final StringBuilder sb = new StringBuilder("DataSource{");
+        sb.append("id='").append(id).append('\'');
+        sb.append(", name='").append(name).append('\'');
+        sb.append(", category='").append(category).append('\'');
+        sb.append(", version='").append(version).append('\'');
+        sb.append(", downloadDate='").append(downloadDate).append('\'');
+        sb.append(", urls=").append(urls);
+        sb.append('}');
+        return sb.toString();
+    }
+
+    public String getId() {
+        return id;
+    }
+
+    public DataSource setId(String id) {
+        this.id = id;
+        return this;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public DataSource setName(String name) {
+        this.name = name;
+        return this;
+    }
+
+    public String getCategory() {
+        return category;
+    }
+
+    public DataSource setCategory(String category) {
+        this.category = category;
+        return this;
+    }
+
+    public String getVersion() {
+        return version;
+    }
+
+    public DataSource setVersion(String version) {
+        this.version = version;
+        return this;
+    }
+
+    public String getDownloadDate() {
+        return downloadDate;
+    }
+
+    public DataSource setDownloadDate(String downloadDate) {
+        this.downloadDate = downloadDate;
+        return this;
+    }
+
+    public List<String> getUrls() {
+        return urls;
+    }
+
+    public DataSource setUrls(List<String> urls) {
+        this.urls = urls;
+        return this;
+    }
+}
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/utils/DatabaseNameUtils.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/utils/DatabaseNameUtils.java
new file mode 100644
index 0000000000..12954e950f
--- /dev/null
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/utils/DatabaseNameUtils.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.core.utils;
+
+import org.apache.commons.lang3.StringUtils;
+
+import java.security.InvalidParameterException;
+import java.util.Locale;
+
+
+public final class DatabaseNameUtils {
+
+    public static final String DBNAME_PREFIX = "cellbase";
+    public static final String DBNAME_SEPARATOR = "_";
+
+    private DatabaseNameUtils() {
+        throw new IllegalStateException("Utility class");
+    }
+
+    public static String getDatabaseName(String species, String assembly, String version) {
+        if (StringUtils.isEmpty(species) || StringUtils.isEmpty(assembly)) {
+            throw new InvalidParameterException("Both species and assembly are required");
+        }
+
+        // Remove special characters
+        String dbnameAssembly = cleanAssembly(assembly);
+
+        // Process version from the configuration file, in order to suffix the database name
+        //  - Production environment, e.g.: if version is "v5", the suffix added wil be "_v5"
+        //  - Test environment, e.g.: if version is "v5.6" or "v5.6.0-SNAPSHOT", the suffix added will be "_v5_6"
+        String auxVersion = version.replace(".", DBNAME_SEPARATOR).replace("-", DBNAME_SEPARATOR);
+        String[] split = auxVersion.split(DBNAME_SEPARATOR);
+        String dbName = DBNAME_PREFIX + DBNAME_SEPARATOR + species.toLowerCase() + DBNAME_SEPARATOR + dbnameAssembly.toLowerCase()
+                + DBNAME_SEPARATOR + split[0];
+        if (split.length > 1) {
+            dbName += (DBNAME_SEPARATOR + split[1]);
+        }
+        return dbName;
+    }
+
+    public static String cleanAssembly(String assembly) {
+        if (StringUtils.isEmpty(assembly)) {
+            throw new InvalidParameterException("Assembly is empty");
+        }
+
+        return assembly.replace("\\.", "")
+                .replace("-", "")
+                .replace("_", "").toLowerCase(Locale.ROOT);
+    }
+
+    public static String getSpeciesFromDatabaseName(String databaseName) {
+        if (StringUtils.isEmpty(databaseName)) {
+            throw new InvalidParameterException("Database name is empty");
+        }
+
+        return databaseName.split(DBNAME_SEPARATOR)[1].toLowerCase(Locale.ROOT);
+    }
+}
diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/utils/SpeciesUtils.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/utils/SpeciesUtils.java
index c928f783e4..39c0b7e0f3 100644
--- a/cellbase-core/src/main/java/org/opencb/cellbase/core/utils/SpeciesUtils.java
+++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/utils/SpeciesUtils.java
@@ -16,12 +16,15 @@
 
 package org.opencb.cellbase.core.utils;
 
+import org.apache.commons.collections4.CollectionUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.opencb.cellbase.core.common.Species;
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.config.SpeciesConfiguration;
+import org.opencb.cellbase.core.config.SpeciesProperties;
 import org.opencb.cellbase.core.exception.CellBaseException;
 
+import java.util.ArrayList;
 import java.util.List;
 
 
@@ -39,7 +42,7 @@ public class SpeciesUtils {
      */
     public static Species getSpecies(CellBaseConfiguration configuration, String speciesStr, String assemblyStr) throws CellBaseException {
         Species species = null;
-        for (SpeciesConfiguration sp : configuration.getAllSpecies()) {
+        for (SpeciesConfiguration sp : SpeciesUtils.getAllSpecies(configuration)) {
             if (speciesStr.equalsIgnoreCase(sp.getScientificName()) || speciesStr.equalsIgnoreCase(sp.getCommonName())
                     || speciesStr.equalsIgnoreCase(sp.getId())) {
                 SpeciesConfiguration.Assembly assembly;
@@ -82,7 +85,7 @@ public static boolean validateSpeciesAndAssembly(CellBaseConfiguration configura
             return false;
         }
 
-        for (SpeciesConfiguration sp : configuration.getAllSpecies()) {
+        for (SpeciesConfiguration sp : SpeciesUtils.getAllSpecies(configuration)) {
             if (species.equalsIgnoreCase(sp.getScientificName()) || species.equalsIgnoreCase(sp.getCommonName())
                     || species.equalsIgnoreCase(sp.getId())) {
                 return getAssembly(sp, assembly) != null;
@@ -96,8 +99,9 @@ public static boolean validateSpecies(CellBaseConfiguration configuration, Strin
             return false;
         }
 
-        for (SpeciesConfiguration sp : configuration.getAllSpecies()) {
-            if (species.equalsIgnoreCase(sp.getScientificName()) || species.equalsIgnoreCase(sp.getCommonName())
+        for (SpeciesConfiguration sp : SpeciesUtils.getAllSpecies(configuration)) {
+            if (species.equalsIgnoreCase(sp.getScientificName())
+                    || species.equalsIgnoreCase(sp.getCommonName())
                     || species.equalsIgnoreCase(sp.getId())) {
                 return true;
             }
@@ -108,7 +112,7 @@ public static boolean validateSpecies(CellBaseConfiguration configuration, Strin
 
     public static SpeciesConfiguration getSpeciesConfiguration(CellBaseConfiguration configuration, String species) {
         SpeciesConfiguration speciesConfiguration = null;
-        for (SpeciesConfiguration sp : configuration.getAllSpecies()) {
+        for (SpeciesConfiguration sp : SpeciesUtils.getAllSpecies(configuration)) {
             if (species.equalsIgnoreCase(sp.getScientificName())
                     || species.equalsIgnoreCase(sp.getCommonName())
                     || species.equalsIgnoreCase(sp.getId())) {
@@ -119,6 +123,11 @@ public static SpeciesConfiguration getSpeciesConfiguration(CellBaseConfiguration
         return speciesConfiguration;
     }
 
+    public static boolean hasData(CellBaseConfiguration configuration, String species, String data) {
+        SpeciesConfiguration speciesConfiguration = SpeciesUtils.getSpeciesConfiguration(configuration, species);
+        return CollectionUtils.isNotEmpty(speciesConfiguration.getData()) && speciesConfiguration.getData().contains(data);
+    }
+
     /**
      * Get the default assembly for species. Is naive and just gets the first one. Order not guaranteed, don't rely on this at all.
      *
@@ -134,6 +143,34 @@ public static SpeciesConfiguration.Assembly getDefaultAssembly(SpeciesConfigurat
         return assemblies.get(0);
     }
 
+    public static List<SpeciesConfiguration> getAllSpecies(CellBaseConfiguration cellBaseConfiguration) {
+        List<SpeciesConfiguration> allSpecies = new ArrayList<>();
+        SpeciesProperties species = cellBaseConfiguration.getSpecies();
+        if (species.getVertebrates() != null && !species.getVertebrates().isEmpty()) {
+            allSpecies.addAll(species.getVertebrates());
+        }
+        if (species.getMetazoa() != null && !species.getMetazoa().isEmpty()) {
+            allSpecies.addAll(species.getMetazoa());
+        }
+        if (species.getFungi() != null && !species.getFungi().isEmpty()) {
+            allSpecies.addAll(species.getFungi());
+        }
+        if (species.getProtist() != null && !species.getProtist().isEmpty()) {
+            allSpecies.addAll(species.getProtist());
+        }
+        if (species.getPlants() != null && !species.getPlants().isEmpty()) {
+            allSpecies.addAll(species.getPlants());
+        }
+        if (species.getVirus() != null && !species.getVirus().isEmpty()) {
+            allSpecies.addAll(species.getVirus());
+        }
+        if (species.getBacteria() != null && !species.getBacteria().isEmpty()) {
+            allSpecies.addAll(species.getBacteria());
+        }
+
+        return allSpecies;
+    }
+
     /**
      * Get the default assembly for species. Is naive and just gets the first one. Order not guaranteed, don't rely on this at all.
      *
diff --git a/cellbase-core/src/main/resources/configuration.yml b/cellbase-core/src/main/resources/configuration.yml
index 409c66ba1e..deff4e0881 100644
--- a/cellbase-core/src/main/resources/configuration.yml
+++ b/cellbase-core/src/main/resources/configuration.yml
@@ -16,11 +16,6 @@ databases:
     host: "${CELLBASE.DB.MONGODB.HOST}"
     user: "${CELLBASE.DB.USER}"
     password: "${CELLBASE.DB.PASSWORD}"
-    shards:
-      - id: "${CELLBASE.DB.MONGODB.REPLICASET.0.NAME}"
-        nodes: "${CELLBASE.DB.MONGODB.REPLICASET.0}"
-      - id: "${CELLBASE.DB.MONGODB.REPLICASET.1.NAME}"
-        nodes: "${CELLBASE.DB.MONGODB.REPLICASET.1}"
     options:
       authenticationDatabase: "${CELLBASE.DB.MONGODB.AUTHENTICATIONDATABASE}"
       authenticationMechanism: "${CELLBASE.DB.MONGODB.AUTHENTICATION_MECHANISM}"
@@ -28,23 +23,15 @@ databases:
       replicaSet: "${CELLBASE.DB.MONGODB.REPLICASET}"
       connectionsPerHost: 20
       sslEnabled: false
-#      sslInvalidCertificatesAllowed: true
-#      sslInvalidHostnameAllowed: true
+      #      sslInvalidCertificatesAllowed: true
+      #      sslInvalidHostnameAllowed: true
       enableSharding: false
-  neo4j:
-    hsapiens:
-      host: "${CELLBASE.DB.NEO4J.HOST}"
-      user: "${CELLBASE.DB.USER}"
-      password: "${CELLBASE.DB.PASSWORD}"
-    mmusculus:
-      host: "${CELLBASE.DB.NEO4J.HOST}"
-      user: "${CELLBASE.DB.USER}"
-      password: "${CELLBASE.DB.PASSWORD}"
 server:
   rest:
     port: "${CELLBASE.SERVER.REST.PORT}"
 defaultOutdir: "/tmp"
 download:
+  ## Genomic and Gene information
   ensembl:
     database:
       host: ensembldb.ensembl.org:3306
@@ -52,7 +39,23 @@ download:
       password: ''
     libs: "${CELLBASE.ENSEMBL.LIBS}"
     url:
-      host: ftp://ftp.ensembl.org/pub
+      host: https://ftp.ensembl.org/pub/
+      files:
+        # New Homo sapiens assemblies contain too many ALT regions, so we download 'primary_assembly' file instead
+        PRIMARY_FA: "release-put_release_here/fasta/put_species_here/dna/put_capital_species_here.put_assembly_here.dna.primary_assembly.fa.gz"
+        GTF: "release-put_release_here/gtf/put_species_here/put_capital_species_here.put_assembly_here.put_release_here.gtf.gz"
+        PEP_FA: "release-put_release_here/fasta/put_species_here/pep/put_capital_species_here.put_assembly_here.pep.all.fa.gz"
+        CDNA_FA: "release-put_release_here/fasta/put_species_here/cdna/put_capital_species_here.put_assembly_here.cdna.all.fa.gz"
+        REGULATORY_BUILD: "release-put_release_here/regulation/put_species_here/put_species_here.put_assembly_here.Regulatory_Build.regulatory_features.20221007.gff.gz"
+        MOTIF_FEATURES: "release-put_release_here/regulation/put_species_here/MotifFeatures/put_species_here.put_assembly_here.motif_features.gff.gz"
+        MOTIF_FEATURES_INDEX: "release-put_release_here/regulation/put_species_here/MotifFeatures/put_species_here.put_assembly_here.motif_features.gff.gz.tbi"
+        DESCRIPTION: "script:gene_extra_info.pl@description.txt"
+        XREFS: "script:gene_extra_info.pl@xrefs.txt"
+        CANONICAL: "script:ensembl_canonical.pl@ensembl_canonical.txt"
+        GENOME_INFO: "script:genome_info.pl@genome_info.json"
+        MMUSCULUS_VARIATION: "release-put_release_here/variation/vcf/put_species_here/put_species_here.vcf.gz"
+        MMUSCULUS_STRUCTURAL_VARIATIONS: "release-put_release_here/variation/vcf/put_species_here/put_species_here_structural_variations.vcf.gz"
+
   ensemblGenomes:
     database:
       host: mysql-eg-publicsql.ebi.ac.uk:4157
@@ -61,237 +64,342 @@ download:
     libs: "${CELLBASE.ENSEMBL.LIBS}"
     url:
       host: ftp://ftp.ensemblgenomes.org/pub
-  hgnc:
-    host: https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2023-11-01.txt
-    version: 2023-11-01
-  cancerHotspot:
-    host: https://www.cancerhotspots.org/files/hotspots_v2.xls
-    version: "v2"
   refSeq:
-    host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz
-  refSeqFasta:
-    host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.fna.gz
-  refSeqProteinFasta:
-    host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_protein.faa.gz
-  refSeqCdna:
-    host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_rna.fna.gz
+    host: https://ftp.ncbi.nih.gov/refseq/
+    version: "2023-10-11"
+    files:
+      GENOMIC_GTF: H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz
+      GENOMIC_FNA: H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.fna.gz
+      PROTEIN_FAA: H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_protein.faa.gz
+      RNA_FNA: H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_rna.fna.gz
+      MMUSCULUS_GENOMIC_GTF: M_musculus/annotation_releases/GCF_000001635.27-RS_2024_02/GCF_000001635.27_GRCm39_genomic.gtf.gz
+      MMUSCULUS_GENOMIC_FNA: M_musculus/annotation_releases/GCF_000001635.27-RS_2024_02/GCF_000001635.27_GRCm39_genomic.fna.gz
+      MMUSCULUS_PROTEIN_FAA: M_musculus/annotation_releases/GCF_000001635.27-RS_2024_02/GCF_000001635.27_GRCm39_protein.faa.gz
+      MMUSCULUS_RNA_FNA: M_musculus/annotation_releases/GCF_000001635.27-RS_2024_02/GCF_000001635.27_GRCm39_rna.fna.gz
+      RNORVEGICUS_GENOMIC_GTF: R_norvegicus/annotation_releases/GCF_036323735.1-RS_2024_02/GCF_036323735.1_GRCr8_genomic.gtf.gz
+      RNORVEGICUS_GENOMIC_FNA: R_norvegicus/annotation_releases/GCF_036323735.1-RS_2024_02/GCF_036323735.1_GRCr8_genomic.fna.gz
+      RNORVEGICUS_PROTEIN_FAA: R_norvegicus/annotation_releases/GCF_036323735.1-RS_2024_02/GCF_036323735.1_GRCr8_protein.faa.gz
+      RNORVEGICUS_RNA_FNA: R_norvegicus/annotation_releases/GCF_036323735.1-RS_2024_02/GCF_036323735.1_GRCr8_rna.fna.gz
+      BTAURUS_GENOMIC_GTF: B_taurus/annotation_releases/GCF_002263795.3-RS_2023_09/GCF_002263795.3_ARS-UCD2.0_genomic.gtf.gz
+      BTAURUS_GENOMIC_FNA: B_taurus/annotation_releases/GCF_002263795.3-RS_2023_09/GCF_002263795.3_ARS-UCD2.0_genomic.fna.gz
+      BTAURUS_PROTEIN_FAA: B_taurus/annotation_releases/GCF_002263795.3-RS_2023_09/GCF_002263795.3_ARS-UCD2.0_protein.faa.gz
+      BTAURUS_RNA_FNA: B_taurus/annotation_releases/GCF_002263795.3-RS_2023_09/GCF_002263795.3_ARS-UCD2.0_rna.fna.gz
   maneSelect:
-#    host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_0.93/MANE.GRCh38.v0.93.summary.txt.gz
-#    host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.0/MANE.GRCh38.v1.0.summary.txt.gz
-    host: https://ftp.ncbi.nlm.nih.gov/refseq/MANE/MANE_human/release_1.1/MANE.GRCh38.v1.1.summary.txt.gz
-    version: "1.1"
+    host: https://ftp.ncbi.nlm.nih.gov/refseq/
+    version: "1.2"
+    files:
+      MANE_SELECT: MANE/MANE_human/release_1.2/MANE.GRCh38.v1.2.summary.txt.gz
   lrg:
-    host: http://ftp.ebi.ac.uk/pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt
+    host: http://ftp.ebi.ac.uk/
     version: "2021-03-30"
+    files:
+      LRG: pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt
+  hgnc:
+    host: https://ftp.ebi.ac.uk/
+    version: "2024-04-01"
+    files:
+      HGNC: pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2024-04-01.txt
+  cancerHotspot:
+    host: https://www.cancerhotspots.org/
+    version: "v2"
+    files:
+      CANCER_HOTSPOT: files/hotspots_v2.xls
+  dgidb:
+    host: https://dgidb.org/
+    version: "DGIdb v.5.0.7 (07/06/2024)"
+    files:
+      DGIDB: data/latest/interactions.tsv
   geneUniprotXref:
     host: http://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
-    version: "2023-11-08"
+    version: "2024-03-27"
+    files:
+      UNIPROT_XREF: HUMAN_9606_idmapping_selected.tab.gz
+      MMUSCULUS_UNIPROT_XREF: MOUSE_10090_idmapping_selected.tab.gz
+      RNORVEGICUS_UNIPROT_XREF: RAT_10116_idmapping_selected.tab.gz
+      DRERIO_UNIPROT_XREF: DANRE_7955_idmapping_selected.tab.gz
+      DMELOANOGASTER_UNIPROT_XREF: DROME_7227_idmapping_selected.tab.gz
+      SCEREVISIAE_UNIPROT_XREF: YEAST_559292_idmapping_selected.tab.gz
+      CELEGANS_UNIPROT_XREF: CAEEL_6239_idmapping_selected.tab.gz
   geneExpressionAtlas:
-    host: ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/gxa/allgenes_updown_in_organism_part_2.0.14.tab.gz
+    host: https://ftp.ebi.ac.uk/
+    version: "2.0.14"
+    files:
+      GENE_EXPRESSION_ATLAS: pub/databases/microarray/data/gxa/allgenes_updown_in_organism_part_2.0.14.tab.gz
+  hpo:
+    ## NOTE: Download manually from here now
+    host: https://hpo.jax.org/app/data/annotations/
+    version: "2024-04-26"
+    files:
+      HPO: "manual@phenotype_to_genes.txt"
+  gnomadConstraints:
+    host: https://storage.googleapis.com/
+    version: "2.1.1"
+    files:
+      GNOMAD_CONSTRAINTS: gcp-public-data--gnomad/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz
+  goAnnotation:
+    host: http://geneontology.org/
+    files:
+      GO_ANNOTATION: gene-associations/goa_human.gaf.gz
+      MMUSCULUS_GO_ANNOTATION: gene-associations/mgi.gaf.gz
+  cancerGeneCensus:
+    ## To be downloaded manually
+    host: https://cancer.sanger.ac.uk/census/
+    version: "v99"
+    files:
+      CANCER_GENE_CENSUS: "manual@cancer-gene-census.tsv"
+  pgsCatalog:
+    host: https://www.pgscatalog.org/
+    version: "Dec. 15, 2023"
+    files:
+      PGS_CATALOG: https://ftp.ebi.ac.uk/pub/databases/spot/pgs/metadata/pgs_all_metadata_scores.csv
+
+  ## Regulation
   mirbase:
-    host: ftp://mirbase.org/pub/mirbase/CURRENT/miRNA.xls.gz
-  mirbaseReadme:
-    host: ftp://mirbase.org/pub/mirbase/CURRENT/README
+    host: https://www.mirbase.org/
+    version: "22.1"
+    files:
+      MIRBASE: download/miRNA.dat
   targetScan:
     host: http://hgdownload.cse.ucsc.edu/goldenPath/
   miRTarBase:
-    host: https://mirtarbase.cuhk.edu.cn/~miRTarBase/miRTarBase_2022/cache/download/9.0/hsa_MTI.xlsx
+    host: https://mirtarbase.cuhk.edu.cn/
     version: "9.0"
+    files:
+      MIRTARBASE: ~miRTarBase/miRTarBase_2022/cache/download/9.0/hsa_MTI.xlsx
+      MMUSCULUS_MIRTARBASE: ~miRTarBase/miRTarBase_2022/cache/download/9.0/mmu_MTI.xlsx
+      RNORVEGICUS_MIRTARBASE: ~miRTarBase/miRTarBase_2022/cache/download/9.0/rno_MTI.xlsx
 
   ## Protein Data
   uniprot:
-    host: https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz
-    version: "2023-11-08"
-  uniprotRelNotes:
-    host: https://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt
-    version: "2023-11-08"
+    host: https://ftp.uniprot.org/
+    version: "2024-03-27"
+    files:
+      UNIPROT: pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz
   interpro:
-    host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/protein2ipr.dat.gz
-    version: "2023-11-08"
-  interproRelNotes:
-    host: https://ftp.ebi.ac.uk/pub/databases/interpro/current_release/release_notes.txt
+    host: https://ftp.ebi.ac.uk/
+    version: "2024-03-27"
+    files:
+      INTERPRO: pub/databases/interpro/current_release/protein2ipr.dat.gz
   intact:
-    host: https://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.txt
-    version: "2023-10-07"
+    host: https://ftp.ebi.ac.uk/
+    version: "2024-02-16"
+    files:
+      INTACT: pub/databases/intact/current/psimitab/intact.txt
 
   ## Conservation Scores
-  conservation:
-    host: https://hgdownload.cse.ucsc.edu/goldenPath/
+  phastCons:
+    ## The CellBase downloader will change put_assembly_here by the assembly, e.g. hg38; and put_chromosome_here by the chromosomes: 1,2,..X,Y,M
+    host: https://hgdownload.cse.ucsc.edu/
     version: "2022-08-30"
+    files:
+      PHASTCONS: goldenPath/hg38/phastCons470way/hg38.470way.phastCons/
+      MMUSCULUS_PHASTCONS: goldenPath/mm39/phastCons35way/mm39.35way.phastCons/
+  phylop:
+    ## The CellBase downloader will change put_assembly_here by the assembly, e.g. hg38; and put_chromosome_here by the chromosomes: 1,2,..X,Y,M
+    host: https://hgdownload.cse.ucsc.edu/
+    version: "2022-08-30"
+    files:
+      PHYLOP: goldenPath/hg38/phyloP470way/hg38.470way.phyloP/
+      MMUSCULUS_PHYLOP: goldenPath/mm39/phyloP35way/mm39.35way.phyloP/
   gerp:
-    host: http://ftp.ensembl.org/pub/release-110/compara/conservation_scores/91_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw
+    host: http://ftp.ensembl.org/
     version: "2023-05-17"
+    files:
+      GERP: pub/release-111/compara/conservation_scores/91_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw
+      MMUSCULUS_GERP: pub/release-111/compara/conservation_scores/91_mammals.gerp_conservation_score/gerp_conservation_scores.mus_musculus.GRCm39.bw
+
+  ## Clinical Variant
   clinvar:
-#    host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2021-07.xml.gz
-#    host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-02.xml.gz
-#    host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2022-11.xml.gz
-    host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2023-12.xml.gz
-    version: "2023-12-01"
-  clinvarVariation:
-#    host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2021-07.xml.gz
-#    host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-02.xml.gz
-#    host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2022-11.xml.gz
-    host: https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_2023-12.xml.gz
-  clinvarSummary:
-    host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz
-    version: "2023-12-01"
-  clinvarVariationAllele:
-    host: http://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variation_allele.txt.gz
-    version: "2023-12-01"
-  clinvarEfoTerms:
-    host: ftp://ftp.ebi.ac.uk/pub/databases/eva/ClinVar/2015/ClinVar_Traits_EFO_Names_260615.csv
+    host: https://ftp.ncbi.nlm.nih.gov/
+    version: "2024-02"
+    files:
+      FULL_RELEASE: pub/clinvar/xml/RCV_xml_old_format/ClinVarFullRelease_2024-02.xml.gz
+      SUMMARY: pub/clinvar/tab_delimited/variant_summary.txt.gz
+      ALLELE: pub/clinvar/tab_delimited/variation_allele.txt.gz
+      EFO_TERMS: ftp://ftp.ebi.ac.uk/pub/databases/eva/ClinVar/2015/ClinVar_Traits_EFO_Names_260615.csv
+  cosmic:
+    ## To be downloaded manually
+    host: https://cancer.sanger.ac.uk/cosmic/
+    version: "v99"
+    files:
+      COSMIC: CosmicMutantExport.tsv.gz
+  hgmd:
+    ## To be downloaded manually
+    host: https://www.hgmd.cf.ac.uk/
+    version: "2020-03"
+    files:
+      HGMD: hgmd.vcf
+  gwasCatalog:
+    ## Download file from https://www.ebi.ac.uk/gwas/docs/file-downloads to find the real version, which is 'e111_r2024-04-22'
+    host: https://ftp.ebi.ac.uk/
+    version: "2024-04-22"
+    files:
+      GWAS: pub/databases/gwas/releases/2024/04/22/gwas-catalog-associations_ontology-annotated.tsv
+      DBSNP: All.vcf.gz
+
   dbSNP:
     host: https://ftp.ncbi.nih.gov/snp/latest_release/VCF/GCF_000001405.40.gz
     version: "156"
-  iarctp53:
-    host: http://p53.iarc.fr/ajax/Zipper.ashx
-  docm:
-    host: http://docm.info/api/
-  docmVersion:
-    host: http://docm.info
+
+  pharmGKB:
+    host: https://api.pharmgkb.org/v1/download/file/data/
+    version: v1
+    files:
+      GENES: genes.zip
+      CHEMICALS: chemicals.zip
+      VARIANTS: variants.zip
+      GUIDELINE_ANNOTATIONS: guidelineAnnotations.json.zip
+      VARIANT_ANNOTATIONS: variantAnnotations.zip
+      CLINICAL_ANNOTATIONS: clinicalAnnotations.zip
+      CLINICAL_VARIANTS: clinicalVariants.zip
+      DRUG_LABELS: drugLabels.zip
+      RELATIONSHIPS: relationships.zip
+
   dgv:
     host: http://dgv.tcag.ca/v106/docs
   simpleRepeats:
-    host: http://hgdownload.cse.ucsc.edu/goldenPath
+    host: http://hgdownload.cse.ucsc.edu/
+    files:
+      SIMPLE_REPEATS: goldenPath/hg38/database/simpleRepeat.txt.gz
+      MMUSCULUS_SIMPLE_REPEATS: goldenPath/mm39/database/simpleRepeat.txt.gz
   windowMasker:
-    host: http://hgdownload.cse.ucsc.edu/goldenPath
+    host: http://hgdownload.cse.ucsc.edu/
+    files:
+      WINDOW_MASKER: goldenPath/hg38/database/windowmaskerSdust.txt.gz
+      MMUSCULUS_WINDOW_MASKER: goldenPath/mm39/database/windowmaskerSdust.txt.gz
   genomicSuperDups:
-    host: http://hgdownload.cse.ucsc.edu/goldenPath
-  gwasCatalog:
-#    host: http://resources.opencb.org/opencb/cellbase/data/gwas/gwas_catalog_v1.0.2-associations_e106_r2022-05-17.tsv
-#    version: "1.0.2 associations_e106_r2022-05-17"
-    host: ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/2023/12/21/gwas-catalog-associations.tsv
-    version: "23-12-21"
-  hpo:
-    ## Downlaod manually from here now:  https://hpo.jax.org/app/data/annotations
-    host: https://ci.monarchinitiative.org/view/hpo/job/hpo.annotations/lastSuccessfulBuild/artifact/rare-diseases/util/annotation/phenotype_to_genes.txt
-  disgenet:
-    host: https://www.disgenet.org/static/disgenet_ap1/files/downloads
+    host: http://hgdownload.cse.ucsc.edu/
     files:
-      - all_gene_disease_associations.tsv.gz
-      - readme.txt
-  dgidb:
-    host: https://old.dgidb.org/data/monthly_tsvs/2022-Feb/interactions.tsv
-    version: "2022-02-01"
+      GENOMIC_SUPER_DUPS: goldenPath/hg38/database/genomicSuperDups.txt.gz
+
+  ## Variant Pathogenic Prediction
+  revel:
+    host: https://zenodo.org/
+    version: "1.3"
+    files:
+      REVEL: record/7072866/files/revel-v1.3_all_chromosomes.zip
   cadd:
-    ## Nacho: Move to https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz ASAP!
-#    host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz
-    host: https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz
-    version: "1.7-pre"
-  reactome:
-    host: http://www.reactome.org/download/current/biopax.zip
-  gnomadConstraints:
-    host: https://storage.googleapis.com/gcp-public-data--gnomad/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz
-    version: "2.1.1"
+    host: https://krishna.gs.washington.edu/
+    version: "1.7"
+    files:
+      CADD: download/CADD/v1.7/GRCh38/whole_genome_SNVs.tsv.gz
+
+  ## OBO Ontologies
+  ## The version is retrieved from the OBO file
   hpoObo:
-    host: http://purl.obolibrary.org/obo/hp.obo
-    version: "2023-12-01"
+    host: http://purl.obolibrary.org/obo/
+    files:
+      HPO: hp.obo
   goObo:
-    host: http://purl.obolibrary.org/obo/go/go-basic.obo
-    version: "2023-12-01"
+    host: http://purl.obolibrary.org/obo/
+    files:
+      GO: go/go-basic.obo
   doidObo:
-    host: http://purl.obolibrary.org/obo/doid.obo
-    version: "2023-12-01"
+    host: http://purl.obolibrary.org/obo/
+    files:
+      DOID: doid.obo
   mondoObo:
-    host: http://purl.obolibrary.org/obo/mondo.obo
-    version: "2023-12-01"
-  goAnnotation:
-    host: http://geneontology.org/gene-associations/goa_human.gaf.gz
-  revel:
-    host: https://zenodo.org/record/7072866/files/revel-v1.3_all_chromosomes.zip
+    host: http://purl.obolibrary.org/obo/
+    files:
+      MONDO: mondo.obo
+
+  ## Splice score
+  mmSplice:
+    host: http://kipoi.org/models/MMSplice/mtsplice/
+    version: 2.0
+  spliceAi:
+    host: https://basespace.illumina.com/s/otSPW8hnhaZR
+    version: 1.3.1
+
+  ## Others
   pubmed:
     host: https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/
+    version: 2024
     files:
-      - pubmed22n[1..1114..4].xml.gz
-  pharmGKB:
-    host: https://www.pharmgkb.org/downloads
-    version: v1
-    files:
-      - https://api.pharmgkb.org/v1/download/file/data/genes.zip
-      - https://api.pharmgkb.org/v1/download/file/data/chemicals.zip
-      - https://api.pharmgkb.org/v1/download/file/data/variants.zip
-      - https://api.pharmgkb.org/v1/download/file/data/guidelineAnnotations.json.zip
-      - https://api.pharmgkb.org/v1/download/file/data/variantAnnotations.zip
-      - https://api.pharmgkb.org/v1/download/file/data/clinicalAnnotations.zip
-      - https://api.pharmgkb.org/v1/download/file/data/clinicalVariants.zip
-      - https://api.pharmgkb.org/v1/download/file/data/drugLabels.zip
-      - https://api.pharmgkb.org/v1/download/file/data/relationships.zip
+      PUBMED_REGEX: pubmed24n[1..1219..4].xml.gz
+  reactome:
+    host: http://www.reactome.org/download/current/biopax.zip
+
+
 species:
   vertebrates:
     - id: hsapiens
       scientificName: Homo sapiens
       assemblies:
-        - ensemblVersion: '110_38'
+        - ensemblVersion: '111_38'
           name: GRCh38
-        - ensemblVersion: '82_37'
-          name: GRCh37
+#        - ensemblVersion: '82_37'
+#          name: GRCh37
       data:
-        - clinical_variants
+        - genome
         - conservation
+        - repeats
         - gene
-        - genome
-        - missense_variation_functional_score
-        - ontology
         - protein
-        - refseq
         - regulation
-        - repeats
         - variation
         - variation_functional_score
+        - missense_variation_functional_score
+        - clinical_variant
         - splice_score
-      shards:
-        - collection: "variation"
-          key:
-            - chromosome
-            - start
-            - end
-          rangeKey: "chromosome"
-          zones:
-            - name: "zone0"
-              shardRanges:
-                - minimum: "1"
-                  maximum: "10"
-                - minimum: "2"
-                  maximum: "20"
-                - minimum: "3"
-                  maximum: "9"
-            - name: "zone1"
-              shardRanges:
-                - minimum: "10"
-                  maximum: "2"
-                - minimum: "20"
-                  maximum: "3"
-                - minimum: "9"
-                  maximum: "Z"
+        - ontology
+        - pubmed
+        - pharmacogenomics
+        - polygenic_score
     - id: mmusculus
       scientificName: Mus musculus
       assemblies:
-        - ensemblVersion: '82_38'
-          name: GRCm38
+        - ensemblVersion: '111_39'
+          name: GRCm39
       data:
         - genome
-        - genome_info
+        - conservation
+        - repeats
         - gene
+        - regulation
+        - protein
         - variation
+        - ontology
+    - id: rnorvegicus
+      scientificName: Rattus norvegicus
+      assemblies:
+        - ensemblVersion: '111_7.2'
+          name: mRatBN7.2
+      data:
+        - genome
+        - gene
         - regulation
         - protein
-        - conservation
+#        - variation
     - id: drerio
       scientificName: Danio rerio
       assemblies:
-        - ensemblVersion: '82_10'
-          name: GRCz10
+        - ensemblVersion: '111_11'
+          name: GRCz11
+      data:
+        - genome
+        - gene
+        - regulation
+        - protein
+#        - variation
+    - id: btaurus
+      scientificName: Bos taurus
+      assemblies:
+        - ensemblVersion: '111_1.3'
+          name: ARS-UCD1.3
       data:
         - genome
         - genome_info
         - gene
+#        - refseq
+        - regulation
         - variation
         - protein
-    - id: rnorvegicus
-      scientificName: Rattus norvegicus
+    - id: sscrofa
+      scientificName: Sus scrofa
       assemblies:
-        - ensemblVersion: '82_6'
-          name: Rnor_6.0
+        - ensemblVersion: '111_11.1'
+          name: Sscrofa11.1
       data:
         - genome
         - genome_info
diff --git a/cellbase-core/src/test/java/org/opencb/cellbase/core/config/CellBaseConfigurationTest.java b/cellbase-core/src/test/java/org/opencb/cellbase/core/config/CellBaseConfigurationTest.java
index 75bc8c2104..29546c02ad 100644
--- a/cellbase-core/src/test/java/org/opencb/cellbase/core/config/CellBaseConfigurationTest.java
+++ b/cellbase-core/src/test/java/org/opencb/cellbase/core/config/CellBaseConfigurationTest.java
@@ -41,7 +41,7 @@ public void defaultOutdir() {
 
     @Test
     public void vertebrates() {
-        Assertions.assertEquals(9, cellBaseConfiguration.getSpecies().getVertebrates().size());
+        Assertions.assertEquals(11, cellBaseConfiguration.getSpecies().getVertebrates().size());
     }
 
     @Test
diff --git a/cellbase-core/src/test/resources/configuration.yml b/cellbase-core/src/test/resources/configuration.yml
index 64ce73d692..8edc5d2581 100644
--- a/cellbase-core/src/test/resources/configuration.yml
+++ b/cellbase-core/src/test/resources/configuration.yml
@@ -1,4 +1,5 @@
-version: ${CELLBASE.VERSION}
+
+version: "${CELLBASE.VERSION}"
 apiVersion: "${project.version}"
 wiki: https://github.com/opencb/cellbase/wiki
 maintenanceFlagFile: "/tmp/maintenance"
@@ -8,38 +9,29 @@ logDir: "./logs"
 # where to output the logs
 # can be "console" or "file", defaults to console
 logOutput: "file"
+# For testing
 secretKey: "xPacig89igHSieEnveJEi4KCfdEslhmssC3vui1JJQGgDQ0y8v"
 databases:
   mongodb:
-    host: "${CELLBASE.DB.MONGODB.HOST}"
-    user: "${CELLBASE.DB.USER}"
-    password: "${CELLBASE.DB.PASSWORD}"
-    shards:
-      - id: "${CELLBASE.DB.MONGODB.REPLICASET.0.NAME}"
-        nodes: "${CELLBASE.DB.MONGODB.REPLICASET.0}"
-      - id: "${CELLBASE.DB.MONGODB.REPLICASET.1.NAME}"
-        nodes: "${CELLBASE.DB.MONGODB.REPLICASET.1}"
+    host: "${JUNIT.CELLBASE.DB.MONGODB.HOST}"
+    user: "${JUNIT.CELLBASE.DB.USER}"
+    password: "${JUNIT.CELLBASE.DB.PASSWORD}"
     options:
-      authenticationDatabase: "${CELLBASE.DB.MONGODB.AUTHENTICATIONDATABASE}"
-      readPreference: "${CELLBASE.DB.MONGODB.READPREFERENCE}"
-      replicaSet: "${CELLBASE.DB.MONGODB.REPLICASET}"
+      authenticationDatabase: "${JUNIT.CELLBASE.DB.MONGODB.AUTHENTICATIONDATABASE}"
+      authenticationMechanism: "${JUNIT.CELLBASE.DB.MONGODB.AUTHENTICATION_MECHANISM}"
+      readPreference: "${JUNIT.CELLBASE.DB.MONGODB.READPREFERENCE}"
+      replicaSet: "${JUNIT.CELLBASE.DB.MONGODB.REPLICASET}"
       connectionsPerHost: 20
       sslEnabled: false
-      enableSharding: true
-  neo4j:
-    hsapiens:
-      host: "${CELLBASE.DB.NEO4J.HOST}"
-      user: "${CELLBASE.DB.USER}"
-      password: "${CELLBASE.DB.PASSWORD}"
-    mmusculus:
-      host: "${CELLBASE.DB.NEO4J.HOST}"
-      user: "${CELLBASE.DB.USER}"
-      password: "${CELLBASE.DB.PASSWORD}"
+      #      sslInvalidCertificatesAllowed: true
+      #      sslInvalidHostnameAllowed: true
+      enableSharding: false
 server:
   rest:
-    port: 9090
+    port: 9090 #"${JUNIT.CELLBASE.SERVER.REST.PORT}"
 defaultOutdir: "/tmp"
 download:
+  ## Genomic and Gene information
   ensembl:
     database:
       host: ensembldb.ensembl.org:3306
@@ -47,7 +39,27 @@ download:
       password: ''
     libs: "${CELLBASE.ENSEMBL.LIBS}"
     url:
-      host: ftp://ftp.ensembl.org/pub
+      host: https://ftp.ensembl.org/pub/
+      files:
+        # New Homo sapiens assemblies contain too many ALT regions, so we download 'primary_assembly' file instead
+        PRIMARY_FA: "release-put_release_here/fasta/put_species_here/dna/put_capital_species_here.put_assembly_here.dna.primary_assembly.fa.gz"
+        GTF: "release-put_release_here/gtf/put_species_here/put_capital_species_here.put_assembly_here.put_release_here.gtf.gz"
+        PEP_FA: "release-put_release_here/fasta/put_species_here/pep/put_capital_species_here.put_assembly_here.pep.all.fa.gz"
+        CDNA_FA: "release-put_release_here/fasta/put_species_here/cdna/put_capital_species_here.put_assembly_here.cdna.all.fa.gz"
+        REGULATORY_BUILD: "release-put_release_here/regulation/put_species_here/put_species_here.put_assembly_here.Regulatory_Build.regulatory_features.20221007.gff.gz"
+        MOTIF_FEATURES: "release-put_release_here/regulation/put_species_here/MotifFeatures/put_species_here.put_assembly_here.motif_features.gff.gz"
+        MOTIF_FEATURES_INDEX: "release-put_release_here/regulation/put_species_here/MotifFeatures/put_species_here.put_assembly_here.motif_features.gff.gz.tbi"
+        # To be generated manually
+        DESCRIPTION: "manual@description.txt"
+        # To be generated manually
+        XREFS: "manual@xrefs.txt"
+        # To be downloaded manually
+        HAEM_ONC_TRANSCRIPTS: "manual@EGLH_HaemOnc_transcripts.txt"
+        # To be downloaded manually
+        TSO500: "manual@TSO500_transcripts.txt"
+        # To be downloaded manually
+        CANONICAL: "manual@ensembl_canonical.txt"
+
   ensemblGenomes:
     database:
       host: mysql-eg-publicsql.ebi.ac.uk:4157
@@ -56,165 +68,334 @@ download:
     libs: "${CELLBASE.ENSEMBL.LIBS}"
     url:
       host: ftp://ftp.ensemblgenomes.org/pub
+  refSeq:
+    host: https://ftp.ncbi.nih.gov/refseq/
+    version: "2023-10-11"
+    files:
+      GENOMIC_GTF: H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz
+      GENOMIC_FNA: H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.fna.gz
+      PROTEIN_FAA: H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_protein.faa.gz
+      RNA_FNA: H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_rna.fna.gz
+      MMUSCULUS_GENOMIC_GTF: M_musculus/annotation_releases/GCF_000001635.27-RS_2024_02/GCF_000001635.27_GRCm39_genomic.gtf.gz
+      MMUSCULUS_GENOMIC_FNA: M_musculus/annotation_releases/GCF_000001635.27-RS_2024_02/GCF_000001635.27_GRCm39_genomic.fna.gz
+      MMUSCULUS_PROTEIN_FAA: M_musculus/annotation_releases/GCF_000001635.27-RS_2024_02/GCF_000001635.27_GRCm39_protein.faa.gz
+      MMUSCULUS_RNA_FNA: M_musculus/annotation_releases/GCF_000001635.27-RS_2024_02/GCF_000001635.27_GRCm39_rna.fna.gz
+      RNORVEGICUS_GENOMIC_GTF: R_norvegicus/annotation_releases/GCF_036323735.1-RS_2024_02/GCF_036323735.1_GRCr8_genomic.gtf.gz
+      RNORVEGICUS_GENOMIC_FNA: R_norvegicus/annotation_releases/GCF_036323735.1-RS_2024_02/GCF_036323735.1_GRCr8_genomic.fna.gz
+      RNORVEGICUS_PROTEIN_FAA: R_norvegicus/annotation_releases/GCF_036323735.1-RS_2024_02/GCF_036323735.1_GRCr8_protein.faa.gz
+      RNORVEGICUS_RNA_FNA: R_norvegicus/annotation_releases/GCF_036323735.1-RS_2024_02/GCF_036323735.1_GRCr8_rna.fna.gz
+      BTAURUS_GENOMIC_GTF: B_taurus/annotation_releases/GCF_002263795.3-RS_2023_09/GCF_002263795.3_ARS-UCD2.0_genomic.gtf.gz
+      BTAURUS_GENOMIC_FNA: B_taurus/annotation_releases/GCF_002263795.3-RS_2023_09/GCF_002263795.3_ARS-UCD2.0_genomic.fna.gz
+      BTAURUS_PROTEIN_FAA: B_taurus/annotation_releases/GCF_002263795.3-RS_2023_09/GCF_002263795.3_ARS-UCD2.0_protein.faa.gz
+      BTAURUS_RNA_FNA: B_taurus/annotation_releases/GCF_002263795.3-RS_2023_09/GCF_002263795.3_ARS-UCD2.0_rna.fna.gz
+  maneSelect:
+    host: https://ftp.ncbi.nlm.nih.gov/refseq/
+    version: "1.2"
+    files:
+      MANE_SELECT: MANE/MANE_human/release_1.2/MANE.GRCh38.v1.2.summary.txt.gz
+  lrg:
+    host: http://ftp.ebi.ac.uk/
+    version: "2021-03-30"
+    files:
+      LRG: pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt
+  hgnc:
+    host: https://ftp.ebi.ac.uk/
+    version: "2024-04-01"
+    files:
+      HGNC: pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2024-04-01.txt
+  cancerHotspot:
+    host: https://www.cancerhotspots.org/
+    version: "v2"
+    files:
+      CANCER_HOTSPOT: files/hotspots_v2.xls
+  dgidb:
+    host: https://old.dgidb.org/
+    version: "2022-02-01"
+    files:
+      DGIDB: data/monthly_tsvs/2022-Feb/interactions.tsv
   geneUniprotXref:
-    host: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
+    host: http://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
+    version: "2024-03-27"
+    files:
+      UNIPROT_XREF: HUMAN_9606_idmapping_selected.tab.gz
+      MMUSCULUS_UNIPROT_XREF: MOUSE_10090_idmapping_selected.tab.gz
+      RNORVEGICUS_UNIPROT_XREF: RAT_10116_idmapping_selected.tab.gz
+      DRERIO_UNIPROT_XREF: DANRE_7955_idmapping_selected.tab.gz
+      DMELOANOGASTER_UNIPROT_XREF: DROME_7227_idmapping_selected.tab.gz
+      SCEREVISIAE_UNIPROT_XREF: YEAST_559292_idmapping_selected.tab.gz
+      CELEGANS_UNIPROT_XREF: CAEEL_6239_idmapping_selected.tab.gz
   geneExpressionAtlas:
-    host: ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/gxa/allgenes_updown_in_organism_part_2.0.14.tab.gz
+    host: https://ftp.ebi.ac.uk/
+    version: "2.0.14"
+    files:
+      GENE_EXPRESSION_ATLAS: pub/databases/microarray/data/gxa/allgenes_updown_in_organism_part_2.0.14.tab.gz
+  hpo:
+    ## NOTE: Download manually from here now
+    host: https://hpo.jax.org/app/data/annotations/
+    version: "2024-04-26"
+    files:
+      HPO: "manual@phenotype_to_genes.txt"
+  gnomadConstraints:
+    host: https://storage.googleapis.com/
+    version: "2.1.1"
+    files:
+      GNOMAD_CONSTRAINTS: gcp-public-data--gnomad/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz
+  goAnnotation:
+    host: http://geneontology.org/
+    files:
+      GO_ANNOTATION: gene-associations/goa_human.gaf.gz
+      MMUSCULUS_GO_ANNOTATION: gene-associations/mgi.gaf.gz
+  cancerGeneCensus:
+    ## To be downloaded manually
+    host: https://cancer.sanger.ac.uk/census/
+    version: "v99"
+    files:
+      CANCER_GENE_CENSUS: "manual@cancer-gene-census.tsv"
+
+  ## Regulation
   mirbase:
-    host: ftp://mirbase.org/pub/mirbase/CURRENT/miRNA.xls.gz
-  mirbaseReadme:
-    host: ftp://mirbase.org/pub/mirbase/CURRENT/README
+    host: https://www.mirbase.org/
+    version: "22.1"
+    files:
+      MIRBASE: download/miRNA.dat
   targetScan:
     host: http://hgdownload.cse.ucsc.edu/goldenPath/
   miRTarBase:
-    host: http://mirtarbase.cuhk.edu.cn/cache/download/8.0/hsa_MTI.xlsx
+    host: https://mirtarbase.cuhk.edu.cn/
+    version: "9.0"
+    files:
+      MIRTARBASE: ~miRTarBase/miRTarBase_2022/cache/download/9.0/hsa_MTI.xlsx
+      MMUSCULUS_MIRTARBASE: ~miRTarBase/miRTarBase_2022/cache/download/9.0/mmu_MTI.xlsx
+      RNORVEGICUS_MIRTARBASE: ~miRTarBase/miRTarBase_2022/cache/download/9.0/rno_MTI.xlsx
+
+  ## Protein Data
   uniprot:
-    host: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz
-  uniprotRelNotes:
-    host: ftp://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt
-  intact:
-    host: ftp://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.txt
+    host: https://ftp.uniprot.org/
+    version: "2024-03-27"
+    files:
+      UNIPROT: pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz
   interpro:
-    host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/protein2ipr.dat.gz
-  interproRelNotes:
-    host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/release_notes.txt
-  conservation:
-    host: ftp://hgdownload.cse.ucsc.edu/goldenPath/
+    host: https://ftp.ebi.ac.uk/
+    version: "2024-03-27"
+    files:
+      INTERPRO: pub/databases/interpro/current_release/protein2ipr.dat.gz
+  intact:
+    host: https://ftp.ebi.ac.uk/
+    version: "2024-02-16"
+    files:
+      INTACT: pub/databases/intact/current/psimitab/intact.txt
+
+  ## Conservation Scores
+  phastCons:
+    ## The CellBase downloader will change put_assembly_here by the assembly, e.g. hg38; and put_chromosome_here by the chromosomes: 1,2,..X,Y,M
+    host: https://hgdownload.cse.ucsc.edu/
+    version: "2022-08-30"
+    files:
+      PHASTCONS: goldenPath/hg38/phastCons470way/hg38.470way.phastCons/
+      MMUSCULUS_PHASTCONS: goldenPath/mm39/phastCons35way/mm39.35way.phastCons/
+  phylop:
+    ## The CellBase downloader will change put_assembly_here by the assembly, e.g. hg38; and put_chromosome_here by the chromosomes: 1,2,..X,Y,M
+    host: https://hgdownload.cse.ucsc.edu/
+    version: "2022-08-30"
+    files:
+      PHYLOP: goldenPath/hg38/phyloP470way/hg38.470way.phyloP/
+      MMUSCULUS_PHYLOP: goldenPath/mm39/phyloP35way/mm39.35way.phyloP/
   gerp:
-    host: ftp://ftp.ensembl.org/pub/current_compara/conservation_scores/103_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw
+    host: http://ftp.ensembl.org/
+    version: "2023-05-17"
+    files:
+      GERP: pub/release-111/compara/conservation_scores/91_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw
+      MMUSCULUS_GERP: pub/release-111/compara/conservation_scores/91_mammals.gerp_conservation_score/gerp_conservation_scores.mus_musculus.GRCm39.bw
+
+  ## Clinical Variant
   clinvar:
-    host: ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2020-02.xml.gz
-  clinvarSummary:
-    host: ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz
-  clinvarVariationAllele:
-    host: ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variation_allele.txt.gz
-  clinvarEfoTerms:
-    host: ftp://ftp.ebi.ac.uk/pub/databases/eva/ClinVar/2015/ClinVar_Traits_EFO_Names_260615.csv
-  iarctp53:
-    host: http://p53.iarc.fr/ajax/Zipper.ashx
-  docm:
-    host: http://docm.info/api/
-  docmVersion:
-    host: http://docm.info
+    host: https://ftp.ncbi.nlm.nih.gov/
+    version: "2024-02"
+    files:
+      FULL_RELEASE: pub/clinvar/xml/RCV_xml_old_format/ClinVarFullRelease_2024-02.xml.gz
+      SUMMARY: pub/clinvar/tab_delimited/variant_summary.txt.gz
+      ALLELE: pub/clinvar/tab_delimited/variation_allele.txt.gz
+      EFO_TERMS: ftp://ftp.ebi.ac.uk/pub/databases/eva/ClinVar/2015/ClinVar_Traits_EFO_Names_260615.csv
+  cosmic:
+    ## To be downloaded manually
+    host: https://cancer.sanger.ac.uk/cosmic/
+    version: "v99"
+    files:
+      COSMIC: CosmicMutantExport.tsv.gz
+  hgmd:
+    ## To be downloaded manually
+    host: https://www.hgmd.cf.ac.uk/
+    version: "2020-03"
+    files:
+      HGMD: hgmd.vcf
+  gwasCatalog:
+    ## Download file from https://www.ebi.ac.uk/gwas/docs/file-downloads to find the real version, which is 'e111_r2024-04-22'
+    host: https://ftp.ebi.ac.uk/
+    version: "2024-04-22"
+    files:
+      GWAS: pub/databases/gwas/releases/2024/04/22/gwas-catalog-associations_ontology-annotated.tsv
+      DBSNP: All.vcf.gz
+
+  dbSNP:
+    host: https://ftp.ncbi.nih.gov/snp/latest_release/VCF/GCF_000001405.40.gz
+    version: "156"
+
+  pharmGKB:
+    host: https://api.pharmgkb.org/v1/download/file/data/
+    version: v1
+    files:
+      GENES: genes.zip
+      CHEMICALS: chemicals.zip
+      VARIANTS: variants.zip
+      GUIDELINE_ANNOTATIONS: guidelineAnnotations.json.zip
+      VARIANT_ANNOTATIONS: variantAnnotations.zip
+      CLINICAL_ANNOTATIONS: clinicalAnnotations.zip
+      CLINICAL_VARIANTS: clinicalVariants.zip
+      DRUG_LABELS: drugLabels.zip
+      RELATIONSHIPS: relationships.zip
+
   dgv:
     host: http://dgv.tcag.ca/v106/docs
   simpleRepeats:
-    host: http://hgdownload.cse.ucsc.edu/goldenPath
+    host: http://hgdownload.cse.ucsc.edu/
+    files:
+      SIMPLE_REPEATS: goldenPath/hg38/database/simpleRepeat.txt.gz
+      MMUSCULUS_SIMPLE_REPEATS: goldenPath/mm39/database/simpleRepeat.txt.gz
   windowMasker:
-    host: http://hgdownload.cse.ucsc.edu/goldenPath
+    host: http://hgdownload.cse.ucsc.edu/
+    files:
+      WINDOW_MASKER: goldenPath/hg38/database/windowmaskerSdust.txt.gz
+      MMUSCULUS_WINDOW_MASKER: goldenPath/mm39/database/windowmaskerSdust.txt.gz
   genomicSuperDups:
-    host: http://hgdownload.cse.ucsc.edu/goldenPath
-  gwasCatalog:
-    host: ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/2016/09/28/gwas-catalog-associations.tsv
-  hpo:
-    host: http://compbio.charite.de/hudson/job/hpo.annotations/lastStableBuild/artifact/util/annotation/phenotype_to_genes.txt
-  disgenet:
-    host: https://www.disgenet.org/static/disgenet_ap1/files/downloads
+    host: http://hgdownload.cse.ucsc.edu/
     files:
-      - all_gene_disease_associations.tsv.gz
-      - readme.txt
-  dgidb:
-    host: http://dgidb.org/data/interactions.tsv
+      GENOMIC_SUPER_DUPS: goldenPath/hg38/database/genomicSuperDups.txt.gz
+
+  ## Variant Pathogenic Prediction
+  revel:
+    host: https://zenodo.org/
+    version: "1.3"
+    files:
+      REVEL: record/7072866/files/revel-v1.3_all_chromosomes.zip
   cadd:
-    host: http://krishna.gs.washington.edu/download/CADD/v1.3/whole_genome_SNVs.tsv.gz
-  reactome:
-    host: http://www.reactome.org/download/current/biopax.zip
-  gnomadConstraints:
-    host: https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz
-    version: 2.1.1
+    host: https://krishna.gs.washington.edu/
+    version: "1.7"
+    files:
+      CADD: download/CADD/v1.7/GRCh38/whole_genome_SNVs.tsv.gz
+
+  ## OBO Ontologies
+  ## The version is retrieved from the OBO file
   hpoObo:
-    host: http://purl.obolibrary.org/obo/hp.obo
+    host: http://purl.obolibrary.org/obo/
+    files:
+      HPO: hp.obo
   goObo:
-    host: http://purl.obolibrary.org/obo/go/go-basic.obo
+    host: http://purl.obolibrary.org/obo/
+    files:
+      GO: go/go-basic.obo
   doidObo:
-    host: http://purl.obolibrary.org/obo/doid.obo
-  goAnnotation:
-    host: http://geneontology.org/gene-associations/goa_human.gaf.gz
-  refSeq:
-    host: ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz
-  refSeqFasta:
-    host: ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.fna.gz
-  refSeqProteinFasta:
-    host: ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_protein.faa.gz
-  refSeqCdna:
-    host: ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_rna.fna.gz
-  revel:
-    host: https://rothsj06.u.hpc.mssm.edu/revel_grch38_all_chromosomes.csv.zip
+    host: http://purl.obolibrary.org/obo/
+    files:
+      DOID: doid.obo
+  mondoObo:
+    host: http://purl.obolibrary.org/obo/
+    files:
+      MONDO: mondo.obo
+
+  ## Splice score
+  mmSplice:
+    host: http://kipoi.org/models/MMSplice/mtsplice/
+    version: 2.0
+  spliceAi:
+    host: https://basespace.illumina.com/s/otSPW8hnhaZR
+    version: 1.3.1
+
+  ## Others
+  pubmed:
+    host: https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/
+    version: 2024
+    files:
+      PUBMED_REGEX: pubmed24n[1..1219..4].xml.gz
+  reactome:
+    host: http://www.reactome.org/download/current/biopax.zip
+
+
 species:
   vertebrates:
     - id: hsapiens
       scientificName: Homo sapiens
       assemblies:
-        #     - ensemblVersion: '82_37'
-        #       name: GRCh37
-        - ensemblVersion: '99_38'
+        - ensemblVersion: '111_38'
           name: GRCh38
+      #        - ensemblVersion: '82_37'
+      #          name: GRCh37
       data:
-        - clinical_variants
+        - genome
         - conservation
+        - repeats
         - gene
-        - genome
+        - regulation
+        - protein
+        - clinical_variant
         - missense_variation_functional_score
         - ontology
-        - protein
-        - refseq
-        - regulation
-        - repeats
         - variation_functional_score
         - splice_score
-      shards:
-        - collection: "variation"
-          key:
-            - chromosome
-            - start
-            - end
-          rangeKey: "chromosome"
-          zones:
-            - name: "zone0"
-              shardRanges:
-                - minimum: "1"
-                  maximum: "10"
-                - minimum: "2"
-                  maximum: "20"
-                - minimum: "3"
-                  maximum: "9"
-            - name: "zone1"
-              shardRanges:
-                - minimum: "10"
-                  maximum: "2"
-                - minimum: "20"
-                  maximum: "3"
-                - minimum: "9"
-                  maximum: "Z"
+        - pharmacogenomics
     - id: mmusculus
       scientificName: Mus musculus
       assemblies:
-        - ensemblVersion: '82_38'
-          name: GRCm38
+        - ensemblVersion: '111_39'
+          name: GRCm39
       data:
         - genome
-        - genome_info
+        - conservation
+        - repeats
         - gene
+        - regulation
+        - protein
         - variation
+        - ontology
+    - id: rnorvegicus
+      scientificName: Rattus norvegicus
+      assemblies:
+        - ensemblVersion: '111_7.2'
+          name: mRatBN7.2
+      data:
+        - genome
+        - gene
         - regulation
         - protein
-        - conservation
+    #        - variation
     - id: drerio
       scientificName: Danio rerio
       assemblies:
-        - ensemblVersion: '82_10'
-          name: GRCz10
+        - ensemblVersion: '111_11'
+          name: GRCz11
+      data:
+        - genome
+        - gene
+        - regulation
+        - protein
+    #        - variation
+    - id: btaurus
+      scientificName: Bos taurus
+      assemblies:
+        - ensemblVersion: '111_1.3'
+          name: ARS-UCD1.3
       data:
         - genome
         - genome_info
         - gene
+        #        - refseq
+        - regulation
         - variation
         - protein
-    - id: rnorvegicus
-      scientificName: Rattus norvegicus
+    - id: sscrofa
+      scientificName: Sus scrofa
       assemblies:
-        - ensemblVersion: '82_6'
-          name: Rnor_6.0
+        - ensemblVersion: '111_11.1'
+          name: Sscrofa11.1
       data:
         - genome
         - genome_info
diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml
index 9b4dd7fbe0..cff0e8f8e0 100644
--- a/cellbase-lib/pom.xml
+++ b/cellbase-lib/pom.xml
@@ -185,6 +185,11 @@
             <artifactId>junit-platform-engine</artifactId>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-csv</artifactId>
+            <version>1.0</version>
+        </dependency>
 
     </dependencies>
 
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java
index 6330cb71a3..9ad5ac3953 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/EtlCommons.java
@@ -16,9 +16,12 @@
 
 package org.opencb.cellbase.lib;
 
-import org.apache.commons.lang.StringUtils;
-import org.apache.logging.log4j.Level;
-import org.apache.logging.log4j.core.config.Configurator;
+import org.apache.commons.lang3.StringUtils;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.config.DownloadProperties;
+import org.opencb.cellbase.core.config.SpeciesConfiguration;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.lib.download.DownloadFile;
 import org.opencb.commons.utils.FileUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -26,148 +29,521 @@
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.List;
+import java.nio.file.Paths;
+import java.util.*;
+import java.util.stream.Collectors;
 
 /**
  * Created by fjlopez on 03/06/16.
  */
-public class EtlCommons {
+public final class EtlCommons {
 
-    public static final String HOMO_SAPIENS_NAME ="Homo sapiens";
+    // Commons
+    public static final String HOMO_SAPIENS = "Homo sapiens";
+    public static final String HSAPIENS = "hsapiens";
+    public static final String MUS_MUSCULUS = "Mus musculus";
+    public static final String RATTUS_NORVEGICUS = "Rattus norvegicus";
+    public static final String BOS_TAURUS = "Bos taurus";
+    public static final String DANIO_RERIO = "Danio rerio";
 
+    public static final String GRCH38_NAME = "GRCh38";
+    public static final String GRCH37_NAME = "GRCh37";
+    public static final String HG38_NAME = "hg38";
+    public static final String HG19_NAME = "hg19";
+
+    public static final String MANUAL_PREFIX = "manual@";
+    public static final String SCRIPT_PREFIX = "script:";
+
+    public static final String SUFFIX_VERSION_FILENAME = "Version.json";
+
+    public static final String XLSX_EXTENSION = ".xlsx";
+    public static final String CSV_EXTENSION = ".csv";
+    public static final String TBI_EXTENSION = ".tbi";
+    public static final String FAI_EXTENSION = ".fai";
+    public static final String GZ_EXTENSION = ".gz";
+    public static final String TXT_GZ_EXTENSION = ".txt.gz";
+    public static final String TAR_GZ_EXTENSION = ".tar.gz";
+    public static final String JSON_GZ_EXTENSION = ".json.gz";
+
+    public static final String OK_MSG = "Ok.";
+    public static final String DONE_MSG = "Done.";
+    public static final String DATA_NOT_SUPPORTED_MSG = "Data '{}' not supported for species '{}'";
+
+    // Ensembl
+    public static final String ENSEMBL_DATA = "ensembl";
+    public static final String PUT_RELEASE_HERE_MARK = "put_release_here";
+    public static final String PUT_SPECIES_HERE_MARK = "put_species_here";
+    public static final String PUT_CAPITAL_SPECIES_HERE_MARK = "put_capital_species_here";
+    public static final String PUT_ASSEMBLY_HERE_MARK = "put_assembly_here";
+    public static final String PUT_CHROMOSOME_HERE_MARK = "put_chromosome_here";
+    // Must match the configuration file
+    public static final String ENSEMBL_PRIMARY_FA_FILE_ID = "PRIMARY_FA";
+    public static final String ENSEMBL_GTF_FILE_ID = "GTF";
+    public static final String ENSEMBL_PEP_FA_FILE_ID = "PEP_FA";
+    public static final String ENSEMBL_CDNA_FA_FILE_ID = "CDNA_FA";
+    public static final String ENSEMBL_REGULATORY_BUILD_FILE_ID = "REGULATORY_BUILD";
+    public static final String ENSEMBL_MOTIF_FEATURES_FILE_ID = "MOTIF_FEATURES";
+    public static final String ENSEMBL_MOTIF_FEATURES_INDEX_FILE_ID = "MOTIF_FEATURES_INDEX";
+    public static final String ENSEMBL_DESCRIPTION_FILE_ID = "DESCRIPTION";
+    public static final String ENSEMBL_XREFS_FILE_ID = "XREFS";
+    public static final String ENSEMBL_CANONICAL_FILE_ID = "CANONICAL";
+    public static final String GENOME_INFO_FILE_ID = "GENOME_INFO";
+    public static final String VARIATION_FILE_ID = "VARIATION";
+    public static final String STRUCTURAL_VARIATIONS_FILE_ID = "STRUCTURAL_VARIATIONS";
+
+    // Genome
     public static final String GENOME_DATA = "genome";
+    public static final String GENOME_SEQUENCE_COLLECTION_NAME = "genome_sequence";
+    public static final String GENOME_INFO_DATA = "genome_info";
+
+    // Gene
     public static final String GENE_DATA = "gene";
+    public static final String GENE_ANNOTATION_DATA = "gene_annotation";
+    public static final String GENE_DISEASE_ANNOTATION_DATA = "gene_disease_annotation";
+
+    // RefSeq
     public static final String REFSEQ_DATA = "refseq";
-    public static final String GENE_DISEASE_ASSOCIATION_DATA = "gene_disease_association";
+    // Must match the configuration file
+    public static final String REFSEQ_GENOMIC_GTF_FILE_ID = "GENOMIC_GTF";
+    public static final String REFSEQ_GENOMIC_FNA_FILE_ID = "GENOMIC_FNA";
+    public static final String REFSEQ_PROTEIN_FAA_FILE_ID = "PROTEIN_FAA";
+    public static final String REFSEQ_RNA_FNA_FILE_ID = "RNA_FNA";
+
+    // Gene annotation
+    public static final String ENSEMBL_CANONICAL_DATA = "ensembl_canonical";
+    public static final String GENE_EXTRA_INFO_DATA = "gene_extra_info";
+    //   - MANE Select
+    public static final String MANE_SELECT_DATA = "MANE Select";
+    // Must match the configuration file
+    public static final String MANE_SELECT_FILE_ID = "MANE_SELECT";
+    //   - LRG
+    public static final String LRG_DATA = "lrg";
+    // Must match the configuration file
+    public static final String LRG_FILE_ID = "LRG";
+    //   - HGNC
+    public static final String HGNC_DATA = "hgnc";
+    // Must match the configuration file
+    public static final String HGNC_FILE_ID = "HGNC";
+    //   - Cancer HotSpot
+    public static final String CANCER_HOTSPOT_DATA = "cancer_hotspot";
+    // Must match the configuration file
+    public static final String CANCER_HOTSPOT_FILE_ID = "CANCER_HOTSPOT";
+    //   - DGID (drug)
+    public static final String DGIDB_DATA = "dgidb";
+    // Must match the configuration file
+    public static final String DGIDB_FILE_ID = "DGIDB";
+    //   - UniProt Xref
+    public static final String UNIPROT_XREF_DATA = "uniprot_xref";
+    // Must match the configuration file
+    public static final String UNIPROT_XREF_FILE_ID = "UNIPROT_XREF";
+    //   - Gene Expression Atlas
+    public static final String GENE_EXPRESSION_ATLAS_DATA = "gene_expression_atlas";
+    // Must match the configuration file
+    public static final String GENE_EXPRESSION_ATLAS_FILE_ID = "GENE_EXPRESSION_ATLAS";
+    //   - Gene Disease Annotation
+    public static final String GENE_DISEASE_ANNOTATION_NAME = "Gene Disease Annotation";
+    //     - HPO
+    public static final String HPO_DISEASE_DATA = "hpo_disease";
+    // Must match the configuration file
+    public static final String HPO_FILE_ID = "HPO";
+    //     - DISGENET
+    public static final String DISGENET_DATA = "disgenet";
+    // Must match the configuration file
+    public static final String DISGENET_FILE_ID = "DISGENET";
+    //   - gnomAD Constraints
+    public static final String GNOMAD_CONSTRAINTS_DATA = "gnomad_constraints";
+    // Must match the configuration file
+    public static final String GNOMAD_CONSTRAINTS_FILE_ID = "GNOMAD_CONSTRAINTS";
+    //   - GO Annotation
+    public static final String GO_ANNOTATION_DATA = "go_annotation";
+    // Must match the configuration file
+    public static final String GO_ANNOTATION_FILE_ID = "GO_ANNOTATION";
+    //   - Cancer Gene Census
+    public static final String CANCER_GENE_CENSUS_DATA = "cancer_gene_census";
+    // Must match the configuration file
+    public static final String CANCER_GENE_CENSUS_FILE_ID = "CANCER_GENE_CENSUS";
+
+    // Variation
     public static final String VARIATION_DATA = "variation";
-    public static final String VARIATION_FUNCTIONAL_SCORE_DATA = "variation_functional_score";
-    public static final String MISSENSE_VARIATION_SCORE_DATA = "missense_variation_functional_score";
-    public static final String REGULATION_DATA = "regulation";
-    public static final String PROTEIN_DATA = "protein";
-    public static final String CONSERVATION_DATA = "conservation";
-    public static final String CLINICAL_VARIANTS_DATA = "clinical_variants";
-    public static final String SPLICE_SCORE_DATA = "splice_score";
+    public static final String DBSNP_DATA = "dbsnp";
+    public static final String SNP_DATA = "snp";
+
+    // PGS (polygenic scores)
+    public static final String PGS_DATA = "polygenic_score";
+    public static final String PGS_COMMON_COLLECTION = "common_polygenic_score";
+    public static final String PGS_VARIANT_COLLECTION = "variant_polygenic_score";
+    // PGS Catalog
+    public static final String PGS_CATALOG_DATA = "pgs_catalog";
+    // Must match the configuration file
+    public static final String PGS_CATALOG_FILE_ID = "PGS_CATALOG";
 
+    // Pharmacogenomics
     public static final String PHARMACOGENOMICS_DATA = "pharmacogenomics";
-    public static final String PHARMGKB_NAME = "PharmGKB";
+    // PharmGKB
     public static final String PHARMGKB_DATA = "pharmgkb";
-    public static final String PHARMGKB_VERSION_FILENAME = "pharmgkbVersion.json";
-
-    public static final String CLINICAL_VARIANTS_FOLDER = "clinicalVariant";
-    public static final String CLINVAR_VERSION = "2022.11";
-    public static final String CLINVAR_DATE = "2022-11";
-    public static final String CLINVAR_XML_FILE = "ClinVarFullRelease_2022-11.xml.gz";
-    public static final String CLINVAR_EFO_FILE = "ClinVar_Traits_EFO_Names.csv";
-    public static final String CLINVAR_SUMMARY_FILE = "variant_summary.txt.gz";
-    public static final String CLINVAR_VARIATION_ALLELE_FILE = "variation_allele.txt.gz";
-    public static final String IARCTP53_FILE = "IARC-TP53.zip";
-    public static final String GWAS_FILE = "gwas_catalog.tsv";
-    public static final String COSMIC_FILE = "CosmicMutantExport.tsv.gz";
-    @Deprecated
-    public static final String DBSNP_FILE = "GCF_000001405.40.gz";
-    public static final String DBSNP_NAME = "dbSNP";
-    public static final String DBSNP_VERSION_FILENAME = DBSNP_NAME + "Version.json";
-    public static final String SNP_COLLECTION_NAME = "snp";
+    // Must match the configuration file
+    public static final String PHARMGKB_GENES_FILE_ID = "GENES";
+    public static final String PHARMGKB_CHEMICALS_FILE_ID = "CHEMICALS";
+    public static final String PHARMGKB_VARIANTS_FILE_ID = "VARIANTS";
+    public static final String PHARMGKB_GUIDELINE_ANNOTATIONS_FILE_ID = "GUIDELINE_ANNOTATIONS";
+    public static final String PHARMGKB_VARIANT_ANNOTATIONS_FILE_ID = "VARIANT_ANNOTATIONS";
+    public static final String PHARMGKB_CLINICAL_ANNOTATIONS_FILE_ID = "CLINICAL_ANNOTATIONS";
+    public static final String PHARMGKB_CLINICAL_VARIANTS_FILE_ID = "CLINICAL_VARIANTS";
+    public static final String PHARMGKB_DRUG_LABELS_FILE_ID = "DRUG_LABELS";
+    public static final String PHARMGKB_RELATIONSHIPS_FILE_ID = "RELATIONSHIPS";
 
-    public static final String STRUCTURAL_VARIANTS_DATA = "svs";
-    public static final String REPEATS_DATA = "repeats";
-    public static final String OBO_DATA = "ontology";
-    public static final String HPO_FILE = "hp.obo";
-    public static final String GO_FILE = "go-basic.obo";
-    public static final String DOID_FILE = "doid.obo";
-    public static final String MONDO_FILE = "mondo.obo";
-    public static final String PFM_DATA = "regulatory_pfm";
+    // Missense variantion functional score
+    public static final String MISSENSE_VARIATION_SCORE_DATA = "missense_variation_functional_score";
+    // Revel
+    public static final String REVEL_DATA = "revel";
+    // Must match the configuration file
+    public static final String REVEL_FILE_ID = "REVEL";
 
-    // Build specific data options
-    public static final String GENOME_INFO_DATA = "genome_info";
-    public static final String DISGENET_DATA = "disgenet";
-    public static final String HPO_DATA = "hpo";
-    public static final String CADD_DATA = "cadd";
-    public static final String PPI_DATA = "ppi";
-    public static final String DRUG_DATA = "drug";
+    // Clinical variants data
+    public static final String CLINICAL_VARIANT_DATA = "clinical_variants";
+    public static final String CLINICAL_VARIANTS_BASENAME = "clinicalVariants";
+    // ClinVar
     public static final String CLINVAR_DATA = "clinvar";
-    public static final String DOCM_DATA = "docm";
+    public static final String CLINVAR_CHUNKS_SUBDIRECTORY = "clinvar_chunks";
+    // Must match the configuration file
+    public static final String CLINVAR_FULL_RELEASE_FILE_ID = "FULL_RELEASE";
+    public static final String CLINVAR_SUMMARY_FILE_ID = "SUMMARY";
+    public static final String CLINVAR_ALLELE_FILE_ID = "ALLELE";
+    public static final String CLINVAR_EFO_TERMS_FILE_ID = "EFO_TERMS";
+    // COSMIC
     public static final String COSMIC_DATA = "cosmic";
-    public static final String GWAS_DATA = "gwas";
-    public static final String IARCTP53_GERMLINE_FILE = "germlineMutationDataIARC TP53 Database, R20.txt";
-    public static final String IARCTP53_GERMLINE_REFERENCES_FILE = "germlineMutationReferenceIARC TP53 Database, R20.txt";
-    public static final String IARCTP53_SOMATIC_FILE = "somaticMutationDataIARC TP53 Database, R20.txt";
-    public static final String IARCTP53_SOMATIC_REFERENCES_FILE = "somaticMutationReferenceIARC TP53 Database, R20.txt";
+    // Must match the configuration file
+    public static final String COSMIC_FILE_ID = "COSMIC";
+    // HGMD
     public static final String HGMD_DATA = "hgmd";
+    // Must match the configuration file
+    public static final String HGMD_FILE_ID = "HGMD";
+    // GWAS
+    public static final String GWAS_DATA = "gwas";
+    // Must match the configuration file
+    public static final String GWAS_FILE_ID = "GWAS";
+    public static final String GWAS_DBSNP_FILE_ID = "DBSNP";
 
-    public static final String PUBMED_DATA = "pubmed";
+    // Repeats
+    public static final String REPEATS_DATA = "repeats";
+    // Simple repeats
+    public static final String TRF_DATA = "trf";
+    // Must match the configuration file
+    public static final String SIMPLE_REPEATS_FILE_ID = "SIMPLE_REPEATS";
+    // Genomic super duplications
+    public static final String GSD_DATA = "gsd";
+    // Must match the configuration file
+    public static final String GENOMIC_SUPER_DUPS_FILE_ID = "GENOMIC_SUPER_DUPS";
+    // Window masker
+    public static final String WM_DATA = "wm";
+    // Must match the configuration file
+    public static final String WINDOW_MASKER_FILE_ID = "WINDOW_MASKER";
+
+    // Ontology
+    public static final String ONTOLOGY_DATA = "ontology";
+    // HPO
+    public static final String HPO_OBO_DATA = "hpo";
+    // Must match the configuration file
+    public static final String HPO_OBO_FILE_ID = "HPO";
+    // GO
+    public static final String GO_OBO_DATA = "go";
+    // Must match the configuration file
+    public static final String GO_OBO_FILE_ID = "GO";
+    // DOID
+    public static final String DOID_OBO_DATA = "doid";
+    // Must match the configuration file
+    public static final String DOID_OBO_FILE_ID = "DOID";
+    // MONDO
+    public static final String MONDO_OBO_DATA = "mondo";
+    // Must match the configuration file
+    public static final String MONDO_OBO_FILE_ID = "MONDO";
+
+
+    public static final String PFM_DATA = "regulatory_pfm";
+
+    // Variation functional score
+    public static final String VARIATION_FUNCTIONAL_SCORE_DATA = "variation_functional_score";
+    // CADD scores
+    public static final String CADD_DATA = "cadd";
+    public static final String CADD_RAW_DATA = "cadd_raw";
+    public static final String CADD_SCALED_DATA = "cadd_scaled";
+    // Must match the configuration file
+    public static final String CADD_FILE_ID = "CADD";
+
+    // Regulation
+    public static final String REGULATION_DATA = "regulation";
+    // Regulatory build and motif features (see Ensembl files: regulatory build and motif features files)
+    public static final String REGULATORY_BUILD_DATA = "regulatory_build";
+    // Motif features (see Ensembl files)
+    public static final String MOTIF_FEATURES_DATA = "motif_features";
+    // miRBase
+    public static final String MIRBASE_DATA = "mirbase";
+    // Must match the configuration file
+    public static final String MIRBASE_FILE_ID = "MIRBASE";
+    // miRTarBase
+    public static final String MIRTARBASE_DATA = "mirtarbase";
+    // Must match the configuration file
+    public static final String MIRTARBASE_FILE_ID = "MIRTARBASE";
 
     // Load specific data options
     public static final String PROTEIN_FUNCTIONAL_PREDICTION_DATA = "protein_functional_prediction";
 
-    // Path and file names
-    public static final String GERP_SUBDIRECTORY = "gerp";
-    public static final String MMSPLICE_SUBDIRECTORY = "mmsplice";
-    public static final String MMSPLICE_VERSION_FILENAME = "mmspliceVersion.json";
-    public static final String SPLICEAI_SUBDIRECTORY = "spliceai";
-    public static final String SPLICEAI_VERSION_FILENAME = "spliceaiVersion.json";
+    // Protein
+    public static final String PROTEIN_DATA = "protein";
+    // UniProt
+    public static final String UNIPROT_DATA = "uniprot";
+    public static final String UNIPROT_CHUNKS_SUBDIRECTORY = "uniprot_chunks";
+    // Must match the configuration file
+    public static final String UNIPROT_FILE_ID = "UNIPROT";
+    // InterPro
+    public static final String INTERPRO_DATA = "interpro";
+    // Must match the configuration file
+    public static final String INTERPRO_FILE_ID = "INTERPRO";
+    // IntAct
+    public static final String INTACT_DATA = "intact";
+    // Must match the configuration file
+    public static final String INTACT_FILE_ID = "INTACT";
+
+    // Conservation scores
+    public static final String CONSERVATION_DATA = "conservation";
+    // GERP
+    public static final String GERP_DATA = "gerp";
+    // Must match the configuration file
+    public static final String GERP_FILE_ID = "GERP";
+    // PHASTCONS
+    public static final String PHASTCONS_DATA = "phastCons";
+    // Must match the configuration file
+    public static final String PHASTCONS_FILE_ID = "PHASTCONS";
+    // PHYLOP
+    public static final String PHYLOP_DATA = "phylop";
+    // Must match the configuration file
+    public static final String PHYLOP_FILE_ID = "PHYLOP";
 
-    // binary bigwig file
+    // Splice scores
+    public static final String SPLICE_SCORE_DATA = "splice_score";
+    // MMSplice
+    public static final String MMSPLICE_DATA = "mmsplice";
+    // SpliceAI
+    public static final String SPLICEAI_DATA = "spliceai";
+
+    /**
+     * @deprecated (when refactoring downloaders, builders and loaders)
+     */
+    @Deprecated
     public static final String GERP_FILE = "gerp_conservation_scores.homo_sapiens.GRCh38.bw";
-    // bigwig file manually transformed to bedGraph file
-    public static final String GERP_PROCESSED_FILE = "gerp.bedGraph.gz"; //"gerp_conservation_scores.homo_sapiens.GRCh38.bedGraph.gz";
     public static final String CLINICAL_VARIANTS_JSON_FILE = "clinical_variants.json.gz";
     public static final String CLINICAL_VARIANTS_ANNOTATED_JSON_FILE = "clinical_variants.full.json.gz";
-    public static final String DOCM_FILE = "docm.json.gz";
     public static final String DOCM_NAME = "DOCM";
-    public static final String STRUCTURAL_VARIANTS_FOLDER = "structuralVariants";
-    public static final String DGV_FILE = "dgv.txt";
-    public static final String DGV_VERSION_FILE = "dgvVersion.json";
-    public static final String STRUCTURAL_VARIANTS_JSON = "structuralVariants";
-    public static final String TRF_FILE = "simpleRepeat.txt.gz";
-    public static final String TRF_VERSION_FILE = "simpleRepeat.json";
-    public static final String GSD_FILE = "genomicSuperDups.txt.gz";
-    public static final String GSD_VERSION_FILE = "genomicSuperDups.json";
-    public static final String WM_FILE = "windowMasker.txt.gz";
-    public static final String WM_VERSION_FILE = "windowMasker.json";
-    public static final String REPEATS_FOLDER = "genome";
-    public static final String REPEATS_JSON = "repeats";
-    public static final String OBO_JSON = "ontology";
-    public static final String HPO_VERSION_FILE = "hpoVersion.json";
-    public static final String GO_VERSION_FILE = "goVersion.json";
-    public static final String DO_VERSION_FILE = "doVersion.json";
+    public static final String HPO_VERSION_FILE = "hpo" + SUFFIX_VERSION_FILENAME;
+    public static final String GO_VERSION_FILE = "go" + SUFFIX_VERSION_FILENAME;
+    public static final String DO_VERSION_FILE = "do" + SUFFIX_VERSION_FILENAME;
+    public static final String MONDO_VERSION_FILE = "mondo" + SUFFIX_VERSION_FILENAME;
+
     public static final String HGMD_FILE = "hgmd.vcf";
-    public static final String PUBMED_VERSION_FILENAME = "pubmedVersion.json";
 
-    public static final String REGULATORY_FEATURES_FILE = "Regulatory_Build.regulatory_features.gff.gz";
-    public static final String MOTIF_FEATURES_FILE = "motif_features.gff.gz";
+    // PubMed
+    public static final String PUBMED_DATA = "pubmed";
+    // Must match the configuration file
+    public static final String PUBMED_REGEX_FILE_ID = "PUBMED_REGEX";
+
+    // Utilities maps
+    private static Map<String, String> dataNamesMap = new HashMap<>();
+    private static Map<String, String> dataCategoriesMap = new HashMap<>();
+    private static Map<String, String> dataVersionFilenamesMap = new HashMap<>();
 
-    public static boolean runCommandLineProcess(File workingDirectory, String binPath, List<String> args, String logFilePath)
-            throws IOException, InterruptedException {
-        // This small hack allow to configure the appropriate Logger level from the command line, this is done
-        // by setting the DEFAULT_LOG_LEVEL_KEY before the logger object is created.
-//        org.apache.log4j.Logger rootLogger = LogManager.getRootLogger();
-//        ConsoleAppender stderr = (ConsoleAppender) rootLogger.getAppender("stdout");
-//        stderr.setThreshold(Level.toLevel("debug"));
+    private static final Logger LOGGER = LoggerFactory.getLogger(EtlCommons.class);
 
-        Configurator.setRootLevel(Level.INFO);
+    static {
 
-        Logger logger = LoggerFactory.getLogger("EtlCommons");
+        // Populate data names map
+        dataNamesMap.put(ENSEMBL_DATA, "Ensembl");
+        dataNamesMap.put(REFSEQ_DATA, "RefSeq");
+        dataNamesMap.put(GENOME_DATA, "Genome");
+        dataNamesMap.put(GENOME_INFO_DATA, "Genome Info");
+        dataNamesMap.put(GENE_DATA, "Gene");
+        dataNamesMap.put(ENSEMBL_CANONICAL_DATA, "Ensembl canonical");
+        dataNamesMap.put(GENE_EXTRA_INFO_DATA, "Gene extra info");
+        dataNamesMap.put(GENE_ANNOTATION_DATA, "Gene Annotation");
+        dataNamesMap.put(MANE_SELECT_DATA, "MANE Select");
+        dataNamesMap.put(LRG_DATA, "LRG");
+        dataNamesMap.put(HGNC_DATA, "HGNC Gene");
+        dataNamesMap.put(CANCER_HOTSPOT_DATA, "Cancer HotSpot");
+        dataNamesMap.put(DGIDB_DATA, "DGIdb");
+        dataNamesMap.put(UNIPROT_XREF_DATA, "UniProt Xref");
+        dataNamesMap.put(GENE_EXPRESSION_ATLAS_DATA, "Gene Expression Atlas");
+        dataNamesMap.put(GENE_DISEASE_ANNOTATION_DATA, "Gene Disease Annotation");
+        dataNamesMap.put(HPO_DISEASE_DATA, "HPO Disease");
+        dataNamesMap.put(DISGENET_DATA, "DisGeNet");
+        dataNamesMap.put(GNOMAD_CONSTRAINTS_DATA, "gnomAD Constraint");
+        dataNamesMap.put(GO_ANNOTATION_DATA, "EBI Gene Ontology Annotation");
+        dataNamesMap.put(CANCER_GENE_CENSUS_DATA, "Cancer Gene Census");
+        dataNamesMap.put(PROTEIN_DATA, "Protein");
+        dataNamesMap.put(UNIPROT_DATA, "UniProt");
+        dataNamesMap.put(INTERPRO_DATA, "InterPro");
+        dataNamesMap.put(INTACT_DATA, "IntAct");
+        dataNamesMap.put(CONSERVATION_DATA, "Conservation");
+        dataNamesMap.put(GERP_DATA, "GERP++");
+        dataNamesMap.put(PHASTCONS_DATA, "PhastCons");
+        dataNamesMap.put(PHYLOP_DATA, "PhyloP");
+        dataNamesMap.put(REPEATS_DATA, "Repeats");
+        dataNamesMap.put(TRF_DATA, "Tandem Repeats Finder");
+        dataNamesMap.put(WM_DATA, "Window Masker");
+        dataNamesMap.put(GSD_DATA, "Genomic Super Duplications");
+        dataNamesMap.put(REGULATION_DATA, "Regulation");
+        dataNamesMap.put(REGULATORY_BUILD_DATA, "Regulatory Build");
+        dataNamesMap.put(MOTIF_FEATURES_DATA, "Motif Features");
+        dataNamesMap.put(MIRBASE_DATA, "miRBase");
+        dataNamesMap.put(MIRTARBASE_DATA, "miRTarBase");
+        dataNamesMap.put(ONTOLOGY_DATA, "Ontology");
+        dataNamesMap.put(HPO_OBO_DATA, "HPO");
+        dataNamesMap.put(GO_OBO_DATA, "GO");
+        dataNamesMap.put(DOID_OBO_DATA, "DOID");
+        dataNamesMap.put(MONDO_OBO_DATA, "Mondo");
+        dataNamesMap.put(PUBMED_DATA, "PubMed");
+        dataNamesMap.put(PHARMACOGENOMICS_DATA, "Pharmacogenomics");
+        dataNamesMap.put(PHARMGKB_DATA, "PharmGKB");
+        dataNamesMap.put(VARIATION_FUNCTIONAL_SCORE_DATA, "Variant Functional Score");
+        dataNamesMap.put(CADD_DATA, "CADD");
+        dataNamesMap.put(MISSENSE_VARIATION_SCORE_DATA, "Missense Variation Score");
+        dataNamesMap.put(REVEL_DATA, "Revel");
+        dataNamesMap.put(CLINICAL_VARIANT_DATA, "Clinical Variant");
+        dataNamesMap.put(CLINVAR_DATA, "ClinVar");
+        dataNamesMap.put(COSMIC_DATA, "Cosmic");
+        dataNamesMap.put(HGMD_DATA, "HGMD");
+        dataNamesMap.put(GWAS_DATA, "GWAS Catalog");
+        dataNamesMap.put(SPLICE_SCORE_DATA, "Splice Score");
+        dataNamesMap.put(MMSPLICE_DATA, "MMSplice");
+        dataNamesMap.put(SPLICEAI_DATA, "SpliceAI");
+        dataNamesMap.put(VARIATION_DATA, "Variation");
+        dataNamesMap.put(SNP_DATA, "SNP");
+        dataNamesMap.put(DBSNP_DATA, "dbSNP");
+        dataNamesMap.put(PGS_DATA, "Polygenic Score");
+        dataNamesMap.put(PGS_CATALOG_DATA, "PGS Catalog");
 
-        ProcessBuilder builder = getProcessBuilder(workingDirectory, binPath, args, logFilePath);
+        // Populate data categories map
+        dataCategoriesMap.put(ENSEMBL_DATA, "Gene");
+        dataCategoriesMap.put(REFSEQ_DATA, "Gene");
+        dataCategoriesMap.put(GENOME_DATA, dataNamesMap.get(ENSEMBL_DATA));
+        dataCategoriesMap.put(MANE_SELECT_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(LRG_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(HGNC_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(CANCER_HOTSPOT_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(DGIDB_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(UNIPROT_XREF_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(GENE_EXPRESSION_ATLAS_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(HPO_DISEASE_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(DISGENET_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(GNOMAD_CONSTRAINTS_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(GO_ANNOTATION_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(CANCER_GENE_CENSUS_DATA, dataNamesMap.get(GENE_ANNOTATION_DATA));
+        dataCategoriesMap.put(UNIPROT_DATA, dataNamesMap.get(PROTEIN_DATA));
+        dataCategoriesMap.put(INTERPRO_DATA, dataNamesMap.get(PROTEIN_DATA));
+        dataCategoriesMap.put(INTACT_DATA, dataNamesMap.get(PROTEIN_DATA));
+        dataCategoriesMap.put(GERP_DATA, dataNamesMap.get(CONSERVATION_DATA));
+        dataCategoriesMap.put(PHASTCONS_DATA, dataNamesMap.get(CONSERVATION_DATA));
+        dataCategoriesMap.put(PHYLOP_DATA, dataNamesMap.get(CONSERVATION_DATA));
+        dataCategoriesMap.put(TRF_DATA, dataNamesMap.get(REPEATS_DATA));
+        dataCategoriesMap.put(WM_DATA, dataNamesMap.get(REPEATS_DATA));
+        dataCategoriesMap.put(GSD_DATA, dataNamesMap.get(REPEATS_DATA));
+        dataCategoriesMap.put(REGULATORY_BUILD_DATA, dataNamesMap.get(REGULATION_DATA));
+        dataCategoriesMap.put(MOTIF_FEATURES_DATA, dataNamesMap.get(REGULATION_DATA));
+        dataCategoriesMap.put(MIRBASE_DATA, dataNamesMap.get(REGULATION_DATA));
+        dataCategoriesMap.put(MIRTARBASE_DATA, dataNamesMap.get(REGULATION_DATA));
+        dataCategoriesMap.put(HPO_OBO_DATA, dataNamesMap.get(ONTOLOGY_DATA));
+        dataCategoriesMap.put(GO_OBO_DATA, dataNamesMap.get(ONTOLOGY_DATA));
+        dataCategoriesMap.put(DOID_OBO_DATA, dataNamesMap.get(ONTOLOGY_DATA));
+        dataCategoriesMap.put(MONDO_OBO_DATA, dataNamesMap.get(ONTOLOGY_DATA));
+        dataCategoriesMap.put(PUBMED_DATA, "Publication");
+        dataCategoriesMap.put(PHARMGKB_DATA, dataNamesMap.get(PHARMACOGENOMICS_DATA));
+        dataCategoriesMap.put(CADD_DATA, dataNamesMap.get(VARIATION_FUNCTIONAL_SCORE_DATA));
+        dataCategoriesMap.put(REVEL_DATA, dataNamesMap.get(MISSENSE_VARIATION_SCORE_DATA));
+        dataCategoriesMap.put(CLINVAR_DATA, dataNamesMap.get(CLINICAL_VARIANT_DATA));
+        dataCategoriesMap.put(COSMIC_DATA, dataNamesMap.get(CLINICAL_VARIANT_DATA));
+        dataCategoriesMap.put(HGMD_DATA, dataNamesMap.get(CLINICAL_VARIANT_DATA));
+        dataCategoriesMap.put(GWAS_DATA, dataNamesMap.get(CLINICAL_VARIANT_DATA));
+        dataCategoriesMap.put(MMSPLICE_DATA, dataNamesMap.get(SPLICE_SCORE_DATA));
+        dataCategoriesMap.put(SPLICEAI_DATA, dataNamesMap.get(SPLICE_SCORE_DATA));
+        dataCategoriesMap.put(VARIATION_DATA, dataNamesMap.get(VARIATION_DATA));
+        dataCategoriesMap.put(SNP_DATA, dataNamesMap.get(VARIATION_DATA));
+        dataCategoriesMap.put(DBSNP_DATA, dataNamesMap.get(VARIATION_DATA));
+        dataCategoriesMap.put(PGS_CATALOG_DATA, dataNamesMap.get(PGS_DATA));
+
+        // Populate data version filenames Map
+        dataVersionFilenamesMap.put(ENSEMBL_DATA, "ensemblCore" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(REFSEQ_DATA, "refSeqCore" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(GENOME_DATA, "genome" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(MANE_SELECT_DATA, "maneSelect" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(LRG_DATA, "lrg" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(HGNC_DATA, "hgnc" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(CANCER_HOTSPOT_DATA, "cancerHotSpot" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(DGIDB_DATA, "dgidb" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(UNIPROT_XREF_DATA, "uniProtXref" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(GENE_EXPRESSION_ATLAS_DATA, "geneExpressionAtlas" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(HPO_DISEASE_DATA, "hpoDisease" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(DISGENET_DATA, "disGeNet" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(GNOMAD_CONSTRAINTS_DATA, "gnomadConstraints" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(GO_ANNOTATION_DATA, "goAnnotation" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(CANCER_GENE_CENSUS_DATA, "cancerGeneCensus" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(UNIPROT_DATA, "uniProt" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(INTERPRO_DATA, "interPro" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(INTACT_DATA, "intAct" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(GERP_DATA, "gerp" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(PHASTCONS_DATA, "phastCons" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(PHYLOP_DATA, "phyloP" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(TRF_DATA, "simpleRepeat" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(WM_DATA, "windowMasker" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(GSD_DATA, "genomicSuperDups" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(REGULATORY_BUILD_DATA, "regulatoryBuild" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(MOTIF_FEATURES_DATA, "motifFeatures" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(MIRBASE_DATA, "mirBase" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(MIRTARBASE_DATA, "mirTarBase" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(HPO_OBO_DATA, "hpoObo" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(GO_OBO_DATA, "goObo" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(DOID_OBO_DATA, "doidObo" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(MONDO_OBO_DATA, "mondoObo" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(PUBMED_DATA, "pubMed" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(PHARMGKB_DATA, "pharmGkb" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(CADD_DATA, "cadd" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(REVEL_DATA, "revel" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(CLINVAR_DATA, "clinVar" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(COSMIC_DATA, "cosmic" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(HGMD_DATA, "hgmd" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(GWAS_DATA, "gwas" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(MMSPLICE_DATA, "mmSplice" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(SPLICEAI_DATA, "spliceAi" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(VARIATION_DATA, "variation" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(DBSNP_DATA, "dbSnp" + SUFFIX_VERSION_FILENAME);
+        dataVersionFilenamesMap.put(PGS_CATALOG_DATA, "pgsCatalog" + SUFFIX_VERSION_FILENAME);
+    }
 
-        logger.debug("Executing command: " + StringUtils.join(builder.command(), " "));
+    private EtlCommons() {
+        throw new IllegalStateException("Utility class");
+    }
+
+    public static boolean runCommandLineProcess(File workingDirectory, String binPath, List<String> args, Path logFile)
+            throws IOException, InterruptedException, CellBaseException {
+
+        ProcessBuilder builder = getProcessBuilder(workingDirectory, binPath, args, logFile);
+
+        LOGGER.info("Executing command: {}", StringUtils.join(builder.command(), " "));
         Process process = builder.start();
         process.waitFor();
 
         // Check process output
-        boolean executedWithoutErrors = true;
-        int genomeInfoExitValue = process.exitValue();
-        if (genomeInfoExitValue != 0) {
-            logger.warn("Error executing {}, error code: {}. More info in log file: {}", binPath, genomeInfoExitValue, logFilePath);
-            executedWithoutErrors = false;
-        }
-        return executedWithoutErrors;
+//        if (process.exitValue() != 0) {
+//            String msg = "Error executing command '" + binPath + "'; args = " + args + ", error code = " + process.exitValue()
+//                    + ". More info in log file: " + logFilePath;
+//            logger.error(msg);
+//            throw new CellBaseException(msg);
+//        }
+
+        return true;
     }
 
-    private static ProcessBuilder getProcessBuilder(File workingDirectory, String binPath, List<String> args, String logFilePath) {
+    private static ProcessBuilder getProcessBuilder(File workingDirectory, String binPath, List<String> args, Path logFile) {
         List<String> commandArgs = new ArrayList<>();
         commandArgs.add(binPath);
         commandArgs.addAll(args);
@@ -178,13 +554,41 @@ private static ProcessBuilder getProcessBuilder(File workingDirectory, String bi
             builder.directory(workingDirectory);
         }
         builder.redirectErrorStream(true);
-        if (logFilePath != null) {
-            builder.redirectOutput(ProcessBuilder.Redirect.appendTo(new File(logFilePath)));
+        if (logFile != null) {
+            builder.redirectOutput(ProcessBuilder.Redirect.appendTo(logFile.toFile()));
         }
 
         return builder;
     }
 
+    public static Path getFastaPath(Path gzFastaPath) throws CellBaseException {
+        // Sanity check
+        if (!Files.exists(gzFastaPath)) {
+            throw new CellBaseException("Gzipped FASTA file " + gzFastaPath + " does not exist");
+        }
+
+        // Check FASTA and unzip if necessary
+        Path fastaPath = gzFastaPath.getParent().resolve(gzFastaPath.getFileName().toString().replace(GZ_EXTENSION, ""));
+        if (!fastaPath.toFile().exists()) {
+            // Gunzip
+            LOGGER.info("Gunzip file {}", gzFastaPath);
+            try {
+                List<String> params = Arrays.asList("--keep", gzFastaPath.toString());
+                EtlCommons.runCommandLineProcess(null, "gunzip", params, null);
+            } catch (IOException e) {
+                throw new CellBaseException("Error executing gunzip in FASTA file " + gzFastaPath, e);
+            } catch (InterruptedException e) {
+                // Restore interrupted state...
+                Thread.currentThread().interrupt();
+                throw new CellBaseException("Error executing gunzip in FASTA file " + gzFastaPath, e);
+            }
+        }
+        if (!fastaPath.toFile().exists()) {
+            throw new CellBaseException("FASTA file " + fastaPath + " does not exist after executing gunzip");
+        }
+        return fastaPath;
+    }
+
     public static boolean isMissing(String string) {
         return !((string != null) && !string.isEmpty()
                 && !string.replace(" ", "")
@@ -209,7 +613,184 @@ public static Long countFileLines(Path filePath) throws IOException {
             }
             return nLines;
         }
+    }
+
+    public static String getEnsemblUrl(DownloadProperties.EnsemblProperties props, String ensemblRelease, String fileId, String species,
+                                       String assembly, String chromosome) throws CellBaseException {
+        if (!props.getUrl().getFiles().containsKey(fileId)) {
+            throw new CellBaseException(getMissingFileIdMessage(fileId));
+        }
+        String url = props.getUrl().getHost() + props.getUrl().getFiles().get(fileId);
 
+        // Change release, species, assembly, chromosome if necessary
+        if (StringUtils.isNotEmpty(ensemblRelease)) {
+            url = url.replace(PUT_RELEASE_HERE_MARK, ensemblRelease.split("-")[1]);
+        }
+        if (StringUtils.isNotEmpty(species)) {
+            url = url.replace(PUT_SPECIES_HERE_MARK, species);
+            url = url.replace(PUT_CAPITAL_SPECIES_HERE_MARK, Character.toUpperCase(species.charAt(0)) + species.substring(1));
+        }
+        if (StringUtils.isNotEmpty(assembly)) {
+            url = url.replace(PUT_ASSEMBLY_HERE_MARK, assembly);
+        }
+        if (StringUtils.isNotEmpty(chromosome)) {
+            url = url.replace(PUT_CHROMOSOME_HERE_MARK, chromosome);
+        }
+        return url;
+    }
+
+    public static String getUrl(DownloadProperties.URLProperties props, String fileId) throws CellBaseException {
+        return getUrl(props, fileId, null, null, null);
+    }
+
+    public static String getUrl(DownloadProperties.URLProperties props, String fileId, String species, String assembly, String chromosome)
+            throws CellBaseException {
+        if (!props.getFiles().containsKey(fileId)) {
+            throw new CellBaseException(getMissingFileIdMessage(fileId));
+        }
+        String url;
+        String filesValue = props.getFiles().get(fileId);
+        if (filesValue.startsWith("https://") || filesValue.startsWith("http://") || filesValue.startsWith("ftp://")) {
+            url = filesValue;
+        } else {
+            url = props.getHost() + filesValue;
+        }
+        if (StringUtils.isNotEmpty(species)) {
+            url = url.replace(PUT_SPECIES_HERE_MARK, species);
+        }
+        if (StringUtils.isNotEmpty(assembly)) {
+            url = url.replace(PUT_ASSEMBLY_HERE_MARK, assembly);
+        }
+        if (StringUtils.isNotEmpty(chromosome)) {
+            url = url.replace(PUT_CHROMOSOME_HERE_MARK, chromosome);
+        }
+        return url;
+    }
+
+    public static String getFilename(String prefix, String chromosome) {
+        return prefix + "_" + chromosome;
+    }
+
+    public static boolean isExecutableAvailable(String executable) throws IOException, InterruptedException {
+        ProcessBuilder processBuilder = new ProcessBuilder("which", executable);
+        Process process = processBuilder.start();
+
+        try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
+            String line;
+            StringBuilder output = new StringBuilder();
+            while ((line = reader.readLine()) != null) {
+                output.append(line).append("\n");
+            }
+        }
+
+        int exitCode = process.waitFor();
+
+        // if exitCode is 0 then the executable is installed at + output.toString().trim()),
+        // otherwise, it's not
+        return (exitCode == 0);
+    }
+
+    public static String getFilenameFromProps(DownloadProperties.URLProperties props, String fileId) throws CellBaseException {
+        if (!props.getFiles().containsKey(fileId)) {
+            throw new CellBaseException(getMissingFileIdMessage(fileId));
+        }
+        return getFilenameFromUrl(props.getFiles().get(fileId));
+    }
+
+    public static String getFilenameFromUrl(String url) {
+        return Paths.get(url).getFileName().toString();
+    }
+
+    public static void checkDirectory(Path path, String name) throws CellBaseException {
+        if (path == null) {
+            throw new CellBaseException(name + " directory is null");
+        }
+        if (!Files.exists(path)) {
+            throw new CellBaseException(name + " directory " + path + " does not exist");
+        }
+        if (!Files.isDirectory(path)) {
+            throw new CellBaseException(name + " directory " + path + " is not a directory");
+        }
+    }
+
+    private static String getMissingFileIdMessage(String fileId) {
+        return "File ID " + fileId + " is missing in the DownloadProperties.URLProperties within the CellBase configuration file";
+    }
+
+    public static String getDataName(String data) throws CellBaseException {
+        if (!dataNamesMap.containsKey(data)) {
+            throw new CellBaseException("Name not found for data '" + data + "'");
+        }
+        return dataNamesMap.get(data);
+    }
+
+    public static String getDataCategory(String data) throws CellBaseException {
+        if (!dataCategoriesMap.containsKey(data)) {
+            throw new CellBaseException("Category not found for data '" + data + "'");
+        }
+        return dataCategoriesMap.get(data);
+    }
+
+    public static String getDataVersionFilename(String data) throws CellBaseException {
+        if (!dataVersionFilenamesMap.containsKey(data)) {
+            throw new CellBaseException("Version filename not found for data '" + data + "'");
+        }
+        return dataVersionFilenamesMap.get(data);
+    }
+
+    public static List<String> getUrls(List<DownloadFile> downloadFiles) {
+        return downloadFiles.stream().map(DownloadFile::getUrl).collect(Collectors.toList());
+    }
+
+    public static String getManualUrl(DownloadProperties.URLProperties props, String fileId) {
+        return getManualUrl(props.getHost(), props.getFiles().get(fileId));
+    }
+
+    public static String getManualUrl(String host, String file) {
+        if (file.startsWith(MANUAL_PREFIX)) {
+            return MANUAL_PREFIX + host + file.replace(MANUAL_PREFIX, "");
+        }
+        return null;
+    }
+
+    public static List<String> getDataList(String data, CellBaseConfiguration configuration, SpeciesConfiguration speciesConfiguration)
+            throws CellBaseException {
+        switch (data) {
+            case REPEATS_DATA: {
+                return getRepeatsDataList(configuration, speciesConfiguration);
+            }
+            default: {
+                throw new CellBaseException("Unknown data " + data);
+            }
+        }
+    }
+
+    private static List<String> getRepeatsDataList(CellBaseConfiguration configuration, SpeciesConfiguration speciesConfiguration) {
+        List<String> dataList = new ArrayList<>();
+        String speciesId = speciesConfiguration.getId().toUpperCase(Locale.ROOT);
+        if (speciesId.equalsIgnoreCase(HSAPIENS)) {
+            return Arrays.asList(TRF_DATA, WM_DATA, GSD_DATA);
+        }
+
+        if (isDataSupported(configuration.getDownload().getSimpleRepeats(), speciesId)) {
+            dataList.add(TRF_DATA);
+        }
+        if (isDataSupported(configuration.getDownload().getWindowMasker(), speciesId)) {
+            dataList.add(WM_DATA);
+        }
+        if (isDataSupported(configuration.getDownload().getGenomicSuperDups(), speciesId)) {
+            dataList.add(GSD_DATA);
+        }
+        return dataList;
+    }
+
+    public static boolean isDataSupported(DownloadProperties.URLProperties props, String prefix) {
+        for (String key : props.getFiles().keySet()) {
+            if (key.startsWith(prefix)) {
+                return true;
+            }
+        }
+        return false;
     }
 
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/AbstractBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/AbstractBuilder.java
new file mode 100644
index 0000000000..550197c762
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/AbstractBuilder.java
@@ -0,0 +1,212 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.builders;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectReader;
+import org.apache.commons.lang3.StringUtils;
+import org.opencb.cellbase.core.config.DownloadProperties;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
+import org.opencb.cellbase.core.serializer.CellBaseSerializer;
+import org.opencb.cellbase.lib.EtlCommons;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
+
+public abstract class AbstractBuilder {
+
+    protected CellBaseSerializer serializer;
+    protected ObjectReader dataSourceReader = new ObjectMapper().readerFor(DataSource.class);
+
+    protected boolean checked;
+
+    protected Logger logger;
+
+    public static final String CHECKING_BEFORE_BUILDING_LOG_MESSAGE = "Checking files before building {} ...";
+    public static final String CHECKING_DONE_BEFORE_BUILDING_LOG_MESSAGE = "Checking {} done!";
+
+    public static final String BUILDING_LOG_MESSAGE = "Building {} data ...";
+    public static final String BUILDING_DONE_LOG_MESSAGE = "Building done.";
+
+    public static final String CATEGORY_BUILDING_LOG_MESSAGE = "Building {}/{} ...";
+    public static final String CATEGORY_BUILDING_DONE_LOG_MESSAGE = "Building done.";
+
+    public static final String PARSING_LOG_MESSAGE = "Parsing {} ...";
+    public static final String PARSING_DONE_LOG_MESSAGE = "Parsing done.";
+
+    public static final String SKIPPING_INDEX_DATA_LOG_MESSAGE = "Skipping index for data '{}': it is not supported for species '{}'.";
+    public static final String DATA_ALREADY_BUILT = "'{}' data has already been built.";
+
+    protected AbstractBuilder(CellBaseSerializer serializer) {
+        logger = LoggerFactory.getLogger(this.getClass());
+
+        this.serializer = serializer;
+        this.checked = false;
+    }
+
+    public abstract void parse() throws Exception;
+
+    public void disconnect() {
+        if (serializer != null) {
+            try {
+                serializer.close();
+            } catch (Exception e) {
+                logger.error("Error closing serializer. Stack trace: {}", e.getStackTrace());
+            }
+        }
+    }
+
+    protected static String getConfigurationFileIdPrefix(String scientificSpecies) {
+        String prefix = "";
+        if (StringUtils.isNotEmpty(scientificSpecies) && !scientificSpecies.equals("Homo sapiens") && scientificSpecies.contains(" ")) {
+            char c = scientificSpecies.charAt(0);
+            prefix = (c + scientificSpecies.split(" ")[1] + "_").toUpperCase();
+        }
+        return prefix;
+    }
+
+    protected File checkFile(DownloadProperties.URLProperties props, String fileId, Path targetPath, String name) throws CellBaseException {
+        logger.info("Checking file {} (file ID {} in config.) ...", name, fileId);
+        String filename = Paths.get(props.getFiles().get(fileId)).getFileName().toString();
+        if (filename.contains(MANUAL_PREFIX)) {
+            filename = filename.replace(MANUAL_PREFIX, "");
+        } else if (filename.contains(SCRIPT_PREFIX)) {
+            filename = filename.split("@")[1];
+        }
+        Path filePath = targetPath.resolve(filename);
+        if (!Files.exists(filePath)) {
+            if (filename.contains(PUT_CAPITAL_SPECIES_HERE_MARK)) {
+                // Check
+                filename = filename.replace(PUT_CAPITAL_SPECIES_HERE_MARK + "." + PUT_ASSEMBLY_HERE_MARK + "." + PUT_RELEASE_HERE_MARK, "")
+                        .replace(PUT_CAPITAL_SPECIES_HERE_MARK + "." + PUT_ASSEMBLY_HERE_MARK, "");
+                boolean found = false;
+                for (File file : targetPath.toFile().listFiles()) {
+                    if (file.getName().endsWith(filename)) {
+                        filePath = file.toPath();
+                        found = true;
+                    }
+                }
+                if (!found) {
+                    throw new CellBaseException("Expected " + name + " file (configuration file ID = " + fileId + ") does not exist at "
+                            + targetPath);
+                }
+            } else {
+                throw new CellBaseException("Expected " + name + " file: " + filename + " does not exist at " + targetPath);
+            }
+        }
+        logger.info("Ok.");
+        return filePath.toFile();
+    }
+
+    protected File checkFile(String data, DownloadProperties.URLProperties props, String fileId, Path targetPath) throws CellBaseException {
+        logger.info("Checking file {} (file ID {} in config.) ...", getDataName(data), fileId);
+        if (!props.getFiles().containsKey(fileId)) {
+            throw new CellBaseException("File ID " + fileId + " does not exist in the configuration file in the section '" + data + "'");
+        }
+        if (!Files.exists(targetPath)) {
+            throw new CellBaseException("Folder does not exist " + targetPath);
+        }
+
+        String filename = Paths.get(props.getFiles().get(fileId)).getFileName().toString();
+        Path filePath = targetPath.resolve(filename);
+        if (!Files.exists(filePath)) {
+            throw new CellBaseException(getDataName(data) + " file " + filePath + " does not exist");
+        }
+        logger.info("Ok.");
+        return filePath.toFile();
+    }
+
+    protected List<File> checkFiles(String data, Path downloadPath, int expectedFiles) throws CellBaseException, IOException {
+        return checkFiles(getDataName(data), data, downloadPath, expectedFiles);
+    }
+
+    protected List<File> checkFiles(String label, String data, Path downloadPath, int expectedFiles) throws CellBaseException, IOException {
+        List<File> files = checkFiles(dataSourceReader.readValue(downloadPath.resolve(getDataVersionFilename(data)).toFile()),
+                downloadPath, label);
+        if (files.size() != expectedFiles) {
+            throw new CellBaseException(expectedFiles + " " + label + " files are expected at " + downloadPath + ", but currently there"
+                    + " are " + files.size() + " files");
+        }
+        return files;
+    }
+
+    protected List<File> checkFiles(DataSource dataSource, Path targetPath, String name) throws CellBaseException {
+        logger.info("Checking {} folder and files ...", name);
+        if (!targetPath.toFile().exists()) {
+            throw new CellBaseException(name + " folder does not exist " + targetPath);
+        }
+
+        List<File> files = new ArrayList<>();
+
+        List<String> filenames = dataSource.getUrls().stream().map(u -> Paths.get(u).getFileName().toString()).collect(Collectors.toList());
+        for (String filename : filenames) {
+            File file = targetPath.resolve(filename).toFile();
+            if (!file.exists()) {
+                throw new CellBaseException("File " + file + " does not exits");
+            } else {
+                files.add(file);
+            }
+        }
+        logger.info("Ok.");
+        return files;
+    }
+
+    protected Path getIndexFastaReferenceGenome(Path fastaPath) throws CellBaseException {
+        Path indexFastaPath = Paths.get(fastaPath + FAI_EXTENSION);
+        if (!Files.exists(indexFastaPath)) {
+            // Index FASTA file
+            logger.info("Indexing FASTA file {} ...", fastaPath);
+            String errorMsg = "Error executing 'samtools faidx' for FASTA file ";
+            try {
+                List<String> params = Arrays.asList("faidx", fastaPath.toString());
+                EtlCommons.runCommandLineProcess(null, "samtools", params, null);
+            } catch (IOException e) {
+                throw new CellBaseException(errorMsg + fastaPath, e);
+            } catch (InterruptedException e) {
+                // Restore interrupted state...
+                Thread.currentThread().interrupt();
+                throw new CellBaseException(errorMsg + fastaPath, e);
+            }
+            if (!Files.exists(indexFastaPath)) {
+                throw new CellBaseException("It could not index the FASTA file " + fastaPath + ". Please, try to do it manually!");
+            }
+        }
+        return indexFastaPath;
+    }
+
+    public static boolean existFiles(List<Path> paths) {
+        for (Path path : paths) {
+            if (!Files.exists(path)) {
+                return false;
+            }
+        }
+        return true;
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CaddAllAnnotationBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CaddAllAnnotationBuilder.java
index b96985c399..7dd8b6a5bd 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CaddAllAnnotationBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CaddAllAnnotationBuilder.java
@@ -29,7 +29,7 @@
  * @since October 08, 2014
  */
 @Deprecated
-public class CaddAllAnnotationBuilder extends CellBaseBuilder {
+public class CaddAllAnnotationBuilder extends AbstractBuilder {
 
 
     private final Path caddFilePath;
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CaddScoreBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CaddScoreBuilder.java
index f4c6c861fd..64e4dda059 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CaddScoreBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CaddScoreBuilder.java
@@ -17,32 +17,33 @@
 package org.opencb.cellbase.lib.builders;
 
 import org.opencb.biodata.models.core.GenomicScoreRegion;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
 import org.opencb.commons.utils.FileUtils;
-import org.slf4j.LoggerFactory;
 
 import java.io.BufferedReader;
+import java.io.File;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 /**
  * Created by imedina on 06/11/15.
  */
-public class CaddScoreBuilder extends CellBaseBuilder {
+public class CaddScoreBuilder extends AbstractBuilder {
 
-    private Path caddFilePath;
+    private Path caddDownloadPath;
 
     private static final int CHUNK_SIZE = 1000;
     private static final int DECIMAL_RESOLUTION = 100;
 
-    public CaddScoreBuilder(Path caddFilePath, CellBaseSerializer serializer) {
+    public CaddScoreBuilder(Path caddDownloadPath, CellBaseSerializer serializer) {
         super(serializer);
-        this.caddFilePath = caddFilePath;
-
-        logger = LoggerFactory.getLogger(CaddScoreBuilder.class);
+        this.caddDownloadPath = caddDownloadPath;
     }
 
     /* Example:
@@ -57,14 +58,25 @@ public CaddScoreBuilder(Path caddFilePath, CellBaseSerializer serializer) {
     */
     @Override
     public void parse() throws Exception {
-        FileUtils.checkPath(caddFilePath);
+        String dataName = getDataName(CADD_DATA);
+        String dataCategory = getDataCategory(CADD_DATA);
+
+        logger.info(CATEGORY_BUILDING_LOG_MESSAGE, dataCategory, dataName);
+
+        // Sanity check
+        checkDirectory(caddDownloadPath, dataName);
+
+        // Check ontology files
+        List<File> caddFiles = checkFiles(dataSourceReader.readValue(caddDownloadPath.resolve(getDataVersionFilename(CADD_DATA)).toFile()),
+                caddDownloadPath, dataName);
+        if (caddFiles.size() != 1) {
+            throw new CellBaseException("One " + dataName + " file is expected, but currently there are " + caddFiles.size() + " files");
+        }
 
-        BufferedReader bufferedReader = FileUtils.newBufferedReader(caddFilePath);
         List<Long> rawValues = new ArrayList<>(CHUNK_SIZE);
         List<Long> scaledValues = new ArrayList<>(CHUNK_SIZE);
 
         int start = 1;
-//        int end = 1999;
         int end = CHUNK_SIZE - 1;
         String line;
         String[] fields = new String[0];
@@ -72,8 +84,8 @@ public void parse() throws Exception {
         int lineCount = 0;
         int counter = 1;
         int serializedChunks = 0;
-        int previousPosition = 0;
-        int newPosition = 0;
+        int prevPos = 0;
+        int newPos = 0;
         String chromosome = null;
 
         String[] nucleotides = new String[]{"A", "C", "G", "T"};
@@ -81,127 +93,102 @@ public void parse() throws Exception {
         long scaledLongValue = 0;
         Map<String, Float> rawScoreValuesMap = new HashMap<>();
         Map<String, Float> scaledScoreValuesMap = new HashMap<>();
-        while ((line = bufferedReader.readLine()) != null) {
-            if (!line.startsWith("#")) {
-                fields = line.split("\t");
-                newPosition = Integer.parseInt(fields[1]);
-//                if (fields[0].equals("1") && fields[1].equals("249240621")) {
-//                if (fields[0].equals("1") && fields[1].equals("69100")) {
-//                if (fields[0].equals("1") && fields[1].equals("144854598")) {
-//                    logger.debug("line {} reached", line);
-//                    logger.debug("Associated chunk count {}", serializedChunks);
-//                    logger.debug("start {}", start);
-//                    logger.debug("end {}", end);
-//                    logger.debug("chunk size {}", CHUNK_SIZE);
-//                }
-                // this only happens the first time, when we start reading the file
-                if (chromosome == null) {
-                    logger.info("Parsing chr {} ", fields[0]);
-                    chromosome = fields[0];
-
-                    start = newPosition;
-                    previousPosition = newPosition;
-                    end = start + CHUNK_SIZE - 2;
-                }
 
-                if (!chromosome.equals(fields[0])) {
-                    logger.info("Parsing chr {} ", fields[0]);
-                    // both raw and scaled are serialized
-                    GenomicScoreRegion<Long> genomicScoreRegion =
-                            new GenomicScoreRegion<>(chromosome, start, previousPosition, "cadd_raw", rawValues);
-                    serializer.serialize(genomicScoreRegion);
-
-                    genomicScoreRegion = new GenomicScoreRegion<>(chromosome, start, previousPosition, "cadd_scaled", scaledValues);
-                    serializer.serialize(genomicScoreRegion);
-
-                    serializedChunks++;
-                    chromosome = fields[0];
-                    start = newPosition;
-//                    end = CHUNK_SIZE - 1;
-                    end = start + CHUNK_SIZE - 2;
-
-                    counter = 0;
-                    rawValues.clear();
-                    scaledValues.clear();
-//                    rawLongValue = 0;
-//                    lineCount = 0;
-//                    rawScoreValuesMap.clear();
-//                    scaledScoreValuesMap.clear();
-                // The series of cadd scores is not continuous through the whole chromosome
-                } else if (end < newPosition || (newPosition - previousPosition) > 1) {
-                    // both raw and scaled are serialized
-                    GenomicScoreRegion genomicScoreRegion
-                            = new GenomicScoreRegion<>(fields[0], start, previousPosition, "cadd_raw", rawValues);
-                    serializer.serialize(genomicScoreRegion);
-
-                    genomicScoreRegion
-                            = new GenomicScoreRegion<>(fields[0], start, previousPosition, "cadd_scaled", scaledValues);
-                    serializer.serialize(genomicScoreRegion);
-
-                    serializedChunks++;
-                    start = newPosition;
-//                    start = end + 1;
-//                    end += CHUNK_SIZE;
-                    end = (start / CHUNK_SIZE) * CHUNK_SIZE + CHUNK_SIZE - 1;
-
-                    counter = 0;
-                    rawValues.clear();
-                    scaledValues.clear();
-                }
+        logger.info(PARSING_LOG_MESSAGE, caddFiles.get(0));
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(caddFiles.get(0).toPath())) {
+            while ((line = bufferedReader.readLine()) != null) {
+                if (!line.startsWith("#")) {
+                    fields = line.split("\t");
+                    newPos = Integer.parseInt(fields[1]);
+                    String message = "chrom. " + fields[0];
+                    // This only happens the first time, when we start reading the file
+                    if (chromosome == null) {
+                        logger.info(PARSING_LOG_MESSAGE, message);
+                        chromosome = fields[0];
+
+                        start = newPos;
+                        prevPos = newPos;
+                        end = start + CHUNK_SIZE - 2;
+                    }
 
-                rawScoreValuesMap.put(fields[3], Float.valueOf(fields[4]));
-                scaledScoreValuesMap.put(fields[3], Float.valueOf(fields[5]));
-
-                if (++lineCount == 3) {
-//                    if (fields[0].equals("1") && fields[1].equals("249240621")) {
-//                    if (fields[0].equals("1") && fields[1].equals("69100")) {
-//                    if (fields[0].equals("1") && fields[1].equals("144854598")) {
-//                        logger.info("offset: {}", rawValues.size());
-//                    }
-
-                    for (String nucleotide : nucleotides) {
-                        // raw CADD score values can be negative, we add 10 to make positive
-                        float a = rawScoreValuesMap.getOrDefault(nucleotide, 10f) + 10.0f;
-                        v = (short) (a * DECIMAL_RESOLUTION);
-                        rawLongValue = (rawLongValue << 16) | v;
-
-                        // scaled CADD scores are always positive
-                        a = scaledScoreValuesMap.getOrDefault(nucleotide, 0f);
-                        v = (short) (a * DECIMAL_RESOLUTION);
-                        scaledLongValue = (scaledLongValue << 16) | v;
+                    if (!chromosome.equals(fields[0])) {
+                        logger.info(PARSING_LOG_MESSAGE, message);
+
+                        // Both raw and scaled are serialized
+                        GenomicScoreRegion<Long> genomicScoreRegion = new GenomicScoreRegion<>(chromosome, start, prevPos, CADD_RAW_DATA,
+                                rawValues);
+                        serializer.serialize(genomicScoreRegion);
+
+                        genomicScoreRegion = new GenomicScoreRegion<>(chromosome, start, prevPos, CADD_SCALED_DATA, scaledValues);
+                        serializer.serialize(genomicScoreRegion);
+
+                        serializedChunks++;
+                        chromosome = fields[0];
+                        start = newPos;
+                        end = start + CHUNK_SIZE - 2;
+
+                        counter = 0;
+                        rawValues.clear();
+                        scaledValues.clear();
+                        // The series of cadd scores is not continuous through the whole chromosome
+                    } else if (end < newPos || (newPos - prevPos) > 1) {
+                        // Both raw and scaled are serialized
+                        GenomicScoreRegion<Long> genomicScoreRegion = new GenomicScoreRegion<>(fields[0], start, prevPos, CADD_RAW_DATA,
+                                rawValues);
+                        serializer.serialize(genomicScoreRegion);
+
+                        genomicScoreRegion = new GenomicScoreRegion<>(fields[0], start, prevPos, CADD_SCALED_DATA, scaledValues);
+                        serializer.serialize(genomicScoreRegion);
+
+                        serializedChunks++;
+                        start = newPos;
+                        end = (start / CHUNK_SIZE) * CHUNK_SIZE + CHUNK_SIZE - 1;
+
+                        counter = 0;
+                        rawValues.clear();
+                        scaledValues.clear();
                     }
 
-//                    if (rawLongValue < 0 || scaledLongValue < 0) {
-//                        logger.error("raw/scaled Long Values cannot be 0");
-//                        logger.error("Last read line {}", line);
-//                        System.exit(1);
-//                    }
-                    rawValues.add(rawLongValue);
-                    scaledValues.add(scaledLongValue);
-
-                    counter++;
-                    rawLongValue = 0;
-                    lineCount = 0;
-                    rawScoreValuesMap.clear();
-                    scaledScoreValuesMap.clear();
+                    rawScoreValuesMap.put(fields[3], Float.valueOf(fields[4]));
+                    scaledScoreValuesMap.put(fields[3], Float.valueOf(fields[5]));
+
+                    if (++lineCount == 3) {
+                        for (String nucleotide : nucleotides) {
+                            // Raw CADD score values can be negative, we add 10 to make positive
+                            float a = rawScoreValuesMap.getOrDefault(nucleotide, 10f) + 10.0f;
+                            v = (short) (a * DECIMAL_RESOLUTION);
+                            rawLongValue = (rawLongValue << 16) | v;
+
+                            // Scaled CADD scores are always positive
+                            a = scaledScoreValuesMap.getOrDefault(nucleotide, 0f);
+                            v = (short) (a * DECIMAL_RESOLUTION);
+                            scaledLongValue = (scaledLongValue << 16) | v;
+                        }
+
+                        rawValues.add(rawLongValue);
+                        scaledValues.add(scaledLongValue);
+
+                        counter++;
+                        rawLongValue = 0;
+                        lineCount = 0;
+                        rawScoreValuesMap.clear();
+                        scaledScoreValuesMap.clear();
+                    }
+                    prevPos = newPos;
                 }
-                previousPosition = newPosition;
             }
-        }
 
-        // Last chunks can be incomplete for both raw and scaled are serialized
-//        GenomicScoreRegion<Long> genomicScoreRegion =
-//                new GenomicScoreRegion<>(fields[0], start, start + rawValues.size() - 1, "cadd_raw", rawValues);
-        GenomicScoreRegion<Long> genomicScoreRegion =
-                new GenomicScoreRegion<>(fields[0], start, newPosition, "cadd_raw", rawValues);
-        serializer.serialize(genomicScoreRegion);
+            // Last chunks can be incomplete for both raw and scaled are serialized
+            GenomicScoreRegion<Long> genomicScoreRegion = new GenomicScoreRegion<>(fields[0], start, newPos, CADD_RAW_DATA, rawValues);
+            serializer.serialize(genomicScoreRegion);
+
+            genomicScoreRegion = new GenomicScoreRegion<>(fields[0], start, newPos, CADD_SCALED_DATA, scaledValues);
+            serializer.serialize(genomicScoreRegion);
 
-//        genomicScoreRegion = new GenomicScoreRegion<>(fields[0], start, start + scaledValues.size() - 1, "cadd_scaled", scaledValues);
-        genomicScoreRegion = new GenomicScoreRegion<>(fields[0], start, newPosition, "cadd_scaled", scaledValues);
-        serializer.serialize(genomicScoreRegion);
+            serializer.close();
+        }
+        logger.info(PARSING_DONE_LOG_MESSAGE, caddFiles.get(0));
 
-        serializer.close();
-        bufferedReader.close();
-        logger.info("Parsing finished.");
+        logger.info(CATEGORY_BUILDING_DONE_LOG_MESSAGE, dataCategory, dataName);
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CellBaseBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CellBaseBuilder.java
deleted file mode 100644
index 79e5b7e58b..0000000000
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/CellBaseBuilder.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright 2015-2020 OpenCB
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.opencb.cellbase.lib.builders;
-
-import org.opencb.cellbase.core.serializer.CellBaseSerializer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Created by imedina on 30/08/14.
- */
-public abstract class CellBaseBuilder {
-
-    protected CellBaseSerializer serializer;
-
-    protected Logger logger;
-
-    public CellBaseBuilder(CellBaseSerializer serializer) {
-        logger = LoggerFactory.getLogger(this.getClass());
-
-        this.serializer = serializer;
-        //this.serializer.open();
-    }
-
-    public abstract void parse() throws Exception;
-
-    public void disconnect() {
-        try {
-            serializer.close();
-        } catch (Exception e) {
-            logger.error("Disconnecting serializer: " + e.getMessage());
-        }
-    }
-
-}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/ConservationBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/ConservationBuilder.java
index 9247b78faa..aadcdb6caf 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/ConservationBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/ConservationBuilder.java
@@ -18,23 +18,24 @@
 
 import org.opencb.biodata.models.core.GenomicScoreRegion;
 import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
 import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
 import org.opencb.cellbase.lib.EtlCommons;
 import org.opencb.cellbase.lib.MongoDBCollectionConfiguration;
 import org.opencb.commons.utils.FileUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.BufferedReader;
+import java.io.File;
 import java.io.IOException;
-import java.nio.file.DirectoryStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.*;
 
-public class ConservationBuilder extends CellBaseBuilder {
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
+public class ConservationBuilder extends AbstractBuilder {
 
-    private Logger logger;
     private Path conservedRegionPath;
     private int chunkSize;
 
@@ -50,326 +51,289 @@ public ConservationBuilder(Path conservedRegionPath, int chunkSize, CellBaseFile
         fileSerializer = serializer;
         this.conservedRegionPath = conservedRegionPath;
         this.chunkSize = chunkSize;
-        logger = LoggerFactory.getLogger(ConservationBuilder.class);
         outputFileNames = new HashMap<>();
     }
 
     @Override
     public void parse() throws IOException, CellBaseException {
-        System.out.println("conservedRegionPath = " + conservedRegionPath.toString());
         if (conservedRegionPath == null || !Files.exists(conservedRegionPath) || !Files.isDirectory(conservedRegionPath)) {
-            throw new IOException("Conservation directory does not exist, is not a directory or cannot be read");
+            throw new IOException("Conservation directory " + conservedRegionPath + " does not exist or it is not a directory or it cannot"
+                    + " be read");
         }
 
-        /*
-         * GERP is downloaded from Ensembl as a bigwig file. The library we have doesn't seem to parse
-         * this file correctly, so we transform the file into a bedGraph format which is human readable.
-         */
-        Path gerpFolderPath = conservedRegionPath.resolve(EtlCommons.GERP_SUBDIRECTORY);
-        if (gerpFolderPath.toFile().exists()) {
-            logger.debug("Parsing GERP data ...");
-            gerpParser(gerpFolderPath);
-        } else {
-            logger.debug("GERP data not found: " + gerpFolderPath.toString());
+        // Check GERP folder and files
+        Path gerpPath = conservedRegionPath.resolve(GERP_DATA);
+        DataSource dataSource = dataSourceReader.readValue(gerpPath.resolve(getDataVersionFilename(GERP_DATA)).toFile());
+        List<File> gerpFiles = checkFiles(dataSource, gerpPath, getDataName(GERP_DATA));
+
+        // Check PhastCons folder and files
+        Path phastConsPath = conservedRegionPath.resolve(PHASTCONS_DATA);
+        dataSource = dataSourceReader.readValue(phastConsPath.resolve(getDataVersionFilename(PHASTCONS_DATA)).toFile());
+        List<File> phastConsFiles = checkFiles(dataSource, phastConsPath, getDataName(PHASTCONS_DATA));
+
+        // Check PhyloP folder and files
+        Path phylopPath = conservedRegionPath.resolve(PHYLOP_DATA);
+        dataSource = dataSourceReader.readValue(phylopPath.resolve(getDataVersionFilename(PHYLOP_DATA)).toFile());
+        List<File> phylopFiles = checkFiles(dataSource, phylopPath, getDataName(PHYLOP_DATA));
+
+        // GERP is downloaded from Ensembl as a bigwig file. The library we have doesn't seem to parse
+        // this file correctly, so we transform the file into a bedGraph format which is human-readable.
+        if (gerpFiles.size() != 1) {
+            throw new CellBaseException("Only one " + getDataName(GERP_DATA) + " file is expected, but currently there are "
+                    + gerpFiles.size() + " files");
+        }
+        File bigwigFile = gerpFiles.get(0);
+        File bedgraphFile = Paths.get(gerpFiles.get(0).getAbsolutePath() + ".bedgraph").toFile();
+        String exec = "bigWigToBedGraph";
+        if (!bedgraphFile.exists()) {
+            try {
+                if (isExecutableAvailable(exec)) {
+                    EtlCommons.runCommandLineProcess(null, exec, Arrays.asList(bigwigFile.toString(), bedgraphFile.toString()), null);
+                } else {
+                    throw new CellBaseException(exec + " not found in your system, install it to build " + getDataName(GERP_DATA)
+                            + ". It is available at http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/");
+                }
+            } catch (IOException e) {
+                throw new CellBaseException("Error executing " + exec + " in BIGWIG file " + bigwigFile, e);
+            } catch (InterruptedException e) {
+                // Restore interrupted state...
+                Thread.currentThread().interrupt();
+                throw new CellBaseException("" + e.getMessage(), e);
+            }
+            if (!bedgraphFile.exists()) {
+                throw new CellBaseException("Something happened when executing " + exec + " in BIGWIG file " + bigwigFile + "; the BED"
+                        + " graph file was not generated. Please, check " + exec);
+            }
         }
+        gerpParser(bedgraphFile.toPath());
 
-        /*
-         * UCSC phastCons and phylop are stored in the same format. They are processed together.
-         */
+        // UCSC phastCons and phylop are stored in the same format. They are processed together.
         Map<String, Path> files = new HashMap<>();
         String chromosome;
         Set<String> chromosomes = new HashSet<>();
 
-        // Reading all files in phastCons folder
-        DirectoryStream<Path> directoryStream = Files.newDirectoryStream(conservedRegionPath.resolve("phastCons"), "*.wigFix.gz");
-        for (Path path : directoryStream) {
-            chromosome = path.getFileName().toString().split("\\.")[0].replace("chr", "");
+        // Process PhastCons filenames
+        for (File file : phastConsFiles) {
+            chromosome = file.getName().split("\\.")[0].replace("chr", "");
             chromosomes.add(chromosome);
-            files.put(chromosome + "phastCons", path);
+            files.put(chromosome + PHASTCONS_DATA, file.toPath());
         }
 
-        // Reading all files in phylop folder
-        directoryStream = Files.newDirectoryStream(conservedRegionPath.resolve("phylop"), "*.wigFix.gz");
-        for (Path path : directoryStream) {
-            chromosome = path.getFileName().toString().split("\\.")[0].replace("chr", "");
+        // Process PhyloP filenames
+        for (File file : phylopFiles) {
+            chromosome = file.getName().split("\\.")[0].replace("chr", "");
             chromosomes.add(chromosome);
-            files.put(chromosome + "phylop", path);
+            files.put(chromosome + PHYLOP_DATA, file.toPath());
         }
 
-        /*
-         * Now we can iterate over all the chromosomes found and process the files
-         */
-        logger.debug("Chromosomes found '{}'", chromosomes.toString());
+        // Now we can iterate over all the chromosomes found and process the files
+        logger.debug("Chromosomes found '{}'", chromosomes);
         for (String chr : chromosomes) {
-            logger.debug("Processing chromosome '{}', file '{}'", chr, files.get(chr + "phastCons"));
-            processWigFixFile(files.get(chr + "phastCons"), "phastCons");
+            logger.debug("Processing chromosome '{}', file '{}'", chr, files.get(chr + PHASTCONS_DATA));
+            processWigFixFile(files.get(chr + PHASTCONS_DATA), PHASTCONS_DATA);
 
-            logger.debug("Processing chromosome '{}', file '{}'", chr, files.get(chr + "phylop"));
-            processWigFixFile(files.get(chr + "phylop"), "phylop");
+            logger.debug("Processing chromosome '{}', file '{}'", chr, files.get(chr + PHYLOP_DATA));
+            processWigFixFile(files.get(chr + PHYLOP_DATA), PHYLOP_DATA);
         }
     }
 
-    private void gerpParser(Path gerpFolderPath) throws IOException, CellBaseException {
-        Path gerpProcessFilePath = gerpFolderPath.resolve(EtlCommons.GERP_PROCESSED_FILE);
-        logger.info("parsing {}", gerpProcessFilePath);
-        BufferedReader bufferedReader = FileUtils.newBufferedReader(gerpProcessFilePath);
-
-        String line;
-        int startOfBatch = 0;
-        int previousEndValue = 0;
-        String chromosome = null;
-        String previousChromosomeValue = null;
-
-        List<Float> conservationScores = new ArrayList<>(chunkSize);
-        while ((line = bufferedReader.readLine()) != null) {
-            String[] fields = line.split("\t");
-
-            // file is wrong. throw an exception instead?
-            if (fields.length != 4) {
-                logger.error("skipping invalid line: " + line.length());
-                continue;
-            }
+    private void gerpParser(Path gerpProcessFilePath) throws IOException, CellBaseException {
+        logger.info(PARSING_LOG_MESSAGE, gerpProcessFilePath);
 
-            chromosome = fields[0];
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(gerpProcessFilePath)) {
+            String line;
+            int startOfBatch = 0;
+            int previousEndValue = 0;
+            String chromosome = null;
+            String previousChromosomeValue = null;
 
-            // new chromosome, store batch
-            if (previousChromosomeValue != null && !previousChromosomeValue.equals(chromosome)) {
-                storeScores(startOfBatch, previousChromosomeValue, conservationScores);
+            List<Float> conservationScores = new ArrayList<>(chunkSize);
+            while ((line = bufferedReader.readLine()) != null) {
+                String[] fields = line.split("\t");
 
-                // reset values for current batch
-                startOfBatch = 0;
-            }
+                // Checking line
+                if (fields.length != 4) {
+                    throw new CellBaseException("Invalid " + getDataName(GERP_DATA) + " line (expecting 4 columns): " + fields.length
+                            + " items: " + line);
+                }
 
-            // reset chromosome for next entry
-            previousChromosomeValue = chromosome;
+                chromosome = fields[0];
 
-            // file is american! starts at zero, add one
-            int start = Integer.parseInt(fields[1]) + 1;
-            // inclusive
-            int end = Integer.parseInt(fields[2]) + 1;
+                // New chromosome, store batch
+                if (previousChromosomeValue != null && !previousChromosomeValue.equals(chromosome)) {
+                    storeScores(startOfBatch, previousChromosomeValue, conservationScores);
 
-            // start coordinate for this batch of 2,000
-            if (startOfBatch == 0) {
-                startOfBatch = start;
-                previousEndValue = 0;
-            }
+                    // Reset values for current batch
+                    startOfBatch = 0;
+                }
 
-            // if there is a gap between the last entry and this one.
-            if (previousEndValue != 0 && (start - previousEndValue) != 0) {
-                // gap is too big! store what we already have before processing more
-                if (start - previousEndValue >= chunkSize) {
-                    // we have a full batch, store
-                    storeScores(startOfBatch, chromosome, conservationScores);
+                // Reset chromosome for next entry
+                previousChromosomeValue = chromosome;
+
+                // File is american! starts at zero, add one
+                int start = Integer.parseInt(fields[1]) + 1;
+                // Inclusive
+                int end = Integer.parseInt(fields[2]) + 1;
 
-                    // reset batch to start at this record
+                // sSart coordinate for this batch of 2,000
+                if (startOfBatch == 0) {
                     startOfBatch = start;
-                } else {
-                    // fill in the gap with zeroes
-                    // don't overfill the batch
-                    while (previousEndValue < start && conservationScores.size() < chunkSize) {
-                        conservationScores.add((float) 0);
-                        previousEndValue++;
+                    previousEndValue = 0;
+                }
+
+                // If there is a gap between the last entry and this one
+                if (previousEndValue != 0 && (start - previousEndValue) != 0) {
+                    // Gap is too big! store what we already have before processing more
+                    if (start - previousEndValue >= chunkSize) {
+                        // We have a full batch, store
+                        storeScores(startOfBatch, chromosome, conservationScores);
+
+                        // Reset batch to start at this record
+                        startOfBatch = start;
+                    } else {
+                        // Fill in the gap with zeroes, don't overfill the batch
+                        while (previousEndValue < start && conservationScores.size() < chunkSize) {
+                            conservationScores.add((float) 0);
+                            previousEndValue++;
+                        }
+
+                        // We have a full batch, store
+                        if (conservationScores.size() == chunkSize) {
+                            storeScores(startOfBatch, chromosome, conservationScores);
+
+                            // Reset: start a new batch
+                            startOfBatch = start;
+                        }
                     }
+                }
+
+                // Reset value
+                previousEndValue = end;
+
+                // Score for these coordinates
+                String score = fields[3];
 
-                    // we have a full batch, store
+                // Add the score for each coordinate included in the range start-end
+                while (start < end) {
+                    // We have a full batch: store
                     if (conservationScores.size() == chunkSize) {
                         storeScores(startOfBatch, chromosome, conservationScores);
 
-                        // reset. start a new batch
+                        // Reset: start a new batch
                         startOfBatch = start;
                     }
-                }
-            }
 
-            // reset value
-            previousEndValue = end;
+                    // Add score to batch
+                    conservationScores.add(Float.valueOf(score));
 
-            // score for these coordinates
-            String score = fields[3];
+                    // Increment coordinate
+                    start++;
+                }
 
-            // add the score for each coordinate included in the range start-end
-            while (start < end) {
-                // we have a full batch, store
+                // We have a full batch: store
                 if (conservationScores.size() == chunkSize) {
                     storeScores(startOfBatch, chromosome, conservationScores);
 
-                    // reset. start a new batch
-                    startOfBatch = start;
+                    // Reset: start a new batch
+                    startOfBatch = 0;
                 }
-
-                // add score to batch
-                conservationScores.add(Float.valueOf(score));
-
-                // increment coordinate
-                start++;
             }
-
-            // we have a full batch, store
-            if (conservationScores.size() == chunkSize) {
+            // We need to serialize the last chunk that might be incomplete
+            if (!conservationScores.isEmpty()) {
                 storeScores(startOfBatch, chromosome, conservationScores);
-
-                // reset, start a new batch
-                startOfBatch = 0;
             }
         }
-        // we need to serialize the last chunk that might be incomplete
-        if (!conservationScores.isEmpty()) {
-            storeScores(startOfBatch, chromosome, conservationScores);
-        }
-        bufferedReader.close();
+
+        logger.info(PARSING_DONE_LOG_MESSAGE, gerpProcessFilePath);
     }
 
     private void storeScores(int startOfBatch, String chromosome, List<Float> conservationScores)
             throws CellBaseException {
 
-        // if this is a small batch, fill in the missing coordinates with 0
+        // If this is a small batch, fill in the missing coordinates with 0
         while (conservationScores.size() < chunkSize) {
             conservationScores.add((float) 0);
         }
 
         if (conservationScores.size() != chunkSize) {
-            throw new CellBaseException("invalid chunk size " + conservationScores.size() + " for " + chromosome + ":" + startOfBatch);
+            throw new CellBaseException("Invalid chunk size " + conservationScores.size() + " for " + chromosome + ":" + startOfBatch);
         }
 
-        GenomicScoreRegion<Float> conservationScoreRegion = new GenomicScoreRegion(chromosome, startOfBatch,
-                startOfBatch + conservationScores.size() - 1, "gerp", conservationScores);
+        GenomicScoreRegion<Float> conservationScoreRegion = new GenomicScoreRegion<>(chromosome, startOfBatch,
+                startOfBatch + conservationScores.size() - 1, GERP_DATA, conservationScores);
         fileSerializer.serialize(conservationScoreRegion, getOutputFileName(chromosome));
 
-        // reset
+        // Reset
         conservationScores.clear();
     }
 
-//    @Deprecated
-//    private void gerpParser(Path gerpFolderPath) throws IOException, InterruptedException {
-//        logger.info("Uncompressing {}", gerpFolderPath.resolve(EtlCommons.GERP_FILE));
-//        List<String> tarArgs = Arrays.asList("-xvzf", gerpFolderPath.resolve(EtlCommons.GERP_FILE).toString(),
-//                "--overwrite", "-C", gerpFolderPath.toString());
-//        EtlCommons.runCommandLineProcess(null, "tar", tarArgs, null);
-//
-//        DirectoryStream<Path> pathDirectoryStream = Files.newDirectoryStream(gerpFolderPath, "*.rates");
-//        boolean filesFound = false;
-//        for (Path path : pathDirectoryStream) {
-//            filesFound = true;
-//            logger.info("Processing file '{}'", path.getFileName().toString());
-//            String[] chromosome = path.getFileName().toString().replaceFirst("chr", "").split("\\.");
-//            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(String.valueOf(path))));
-//            String line;
-//            int start = 1;
-//            int end = 1999;
-//            int counter = 1;
-//            String[] fields;
-//            List<Float> val = new ArrayList<>(chunkSize);
-//            while ((line = bufferedReader.readLine()) != null) {
-//                fields = line.split("\t");
-//                val.add(Float.valueOf(fields[1]));
-//                counter++;
-//                if (counter == chunkSize) {
-////                    ConservationScoreRegion conservationScoreRegion = new ConservationScoreRegion(chromosome[0], start, end, "gerp",
-// val);
-//                    GenomicScoreRegion<Float> conservationScoreRegion =
-//                            new GenomicScoreRegion<>(chromosome[0], start, end, "gerp", val);
-//                    fileSerializer.serialize(conservationScoreRegion, getOutputFileName(chromosome[0]));
-//
-//                    start = end + 1;
-//                    end += chunkSize;
-//
-//                    counter = 0;
-//                    val.clear();
-//                }
-//            }
-//
-//            // we need to serialize the last chunk that might be incomplete
-////            ConservationScoreRegion conservationScoreRegion =
-////                    new ConservationScoreRegion(chromosome[0], start, start + val.size() - 1, "gerp", val);
-//            GenomicScoreRegion<Float> conservationScoreRegion =
-//                    new GenomicScoreRegion<>(chromosome[0], start, start + val.size() - 1, "gerp", val);
-//            fileSerializer.serialize(conservationScoreRegion, getOutputFileName(chromosome[0]));
-//
-//            bufferedReader.close();
-//        }
-//
-//        if (!filesFound) {
-//            logger.warn("No GERP++ files were found. Please check that the original file {} is there, that it was"
-//                    + " properly decompressed and that the *.rates files are present",
-//                    gerpFolderPath.resolve(EtlCommons.GERP_FILE));
-//        }
-//    }
-
-    private void processWigFixFile(Path inGzPath, String conservationSource) throws IOException {
-        BufferedReader bufferedReader = FileUtils.newBufferedReader(inGzPath);
-
-        String line;
-        String chromosome = "";
-//        int start = 0, end = 0;
-        int start = 0;
-        float value;
-        Map<String, String> attributes = new HashMap<>();
-//        ConservedRegion conservedRegion =  null;
-        List<Float> values = new ArrayList<>();
-//        ConservationScoreRegion conservedRegion = null;
-        GenomicScoreRegion<Float> conservedRegion = null;
-
-        while ((line = bufferedReader.readLine()) != null) {
-            if (line.startsWith("fixedStep")) {
-                //new group, save last
-                if (conservedRegion != null) {
-//                    conservedRegion.setEnd(end);
-//                    conservedRegion = new ConservationScoreRegion(chromosome, start, end, conservationSource, values);
-                    conservedRegion = new GenomicScoreRegion<>(chromosome, start, start + values.size() - 1,
-                            conservationSource, values);
-                    fileSerializer.serialize(conservedRegion, getOutputFileName(chromosome));
-                }
+    private void processWigFixFile(Path inGzPath, String conservationSource) {
+        logger.info(PARSING_LOG_MESSAGE, inGzPath);
+        String line = null;
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(inGzPath)) {
+            String chromosome = "";
+            int start = 0;
+            float value;
+            Map<String, String> attributes = new HashMap<>();
+            List<Float> values = new ArrayList<>();
+            GenomicScoreRegion<Float> conservedRegion = null;
+
+            while ((line = bufferedReader.readLine()) != null) {
+                if (line.startsWith("fixedStep")) {
+                    // New group, save last
+                    if (conservedRegion != null) {
+                        conservedRegion = new GenomicScoreRegion<>(chromosome, start, start + values.size() - 1,
+                                conservationSource, values);
+                        fileSerializer.serialize(conservedRegion, getOutputFileName(chromosome));
+                    }
 
-//                offset = 0;
-                attributes.clear();
-                String[] attrFields = line.split(" ");
-                String[] attrKeyValue;
-                for (String attrField : attrFields) {
-                    if (!attrField.equalsIgnoreCase("fixedStep")) {
-                        attrKeyValue = attrField.split("=");
-                        attributes.put(attrKeyValue[0].toLowerCase(), attrKeyValue[1]);
+                    attributes.clear();
+                    String[] attrFields = line.split(" ");
+                    String[] attrKeyValue;
+                    for (String attrField : attrFields) {
+                        if (!attrField.equalsIgnoreCase("fixedStep")) {
+                            attrKeyValue = attrField.split("=");
+                            attributes.put(attrKeyValue[0].toLowerCase(), attrKeyValue[1]);
+                        }
                     }
-                }
 
-                chromosome = formatChromosome(attributes);
-                start = Integer.parseInt(attributes.get("start"));
-//                end = Integer.parseInt(attributes.get("start"));
-
-                values = new ArrayList<>(2000);
-            } else {
-                int startChunk = start / MongoDBCollectionConfiguration.CONSERVATION_CHUNK_SIZE;
-//                end++;
-                int endChunk = (start + values.size()) / MongoDBCollectionConfiguration.CONSERVATION_CHUNK_SIZE;
-                // This is the endChunk if current read score is
-                // appended to the array (otherwise it would be
-                // start + values.size() - 1). If this endChunk is
-                // different from the startChunk means that current
-                // conserved region must be dumped and current
-                // score must be associated to next chunk. Main
-                // difference to what there was before is that if
-                // the fixedStep starts on the last position of a
-                // chunk e.g. 1999, the chunk must be created with
-                // just that score - the chunk was left empty with
-                // the old code
-                if (startChunk != endChunk) {
-//                    conservedRegion = new ConservationScoreRegion(chromosome, start, end - 1, conservationSource, values);
-                    conservedRegion = new GenomicScoreRegion<>(chromosome, start, start + values.size() - 1,
-                            conservationSource, values);
-                    fileSerializer.serialize(conservedRegion, getOutputFileName(chromosome));
-                    start = start + values.size();
-                    values.clear();
-                }
+                    chromosome = formatChromosome(attributes);
+                    start = Integer.parseInt(attributes.get("start"));
+
+                    values = new ArrayList<>(2000);
+                } else {
+                    int startChunk = start / MongoDBCollectionConfiguration.CONSERVATION_CHUNK_SIZE;
+                    int endChunk = (start + values.size()) / MongoDBCollectionConfiguration.CONSERVATION_CHUNK_SIZE;
+                    // This is the endChunk if current read score is appended to the array (otherwise it would be start + values.size()
+                    // - 1). If this endChunk is different from the startChunk means that current conserved region must be dumped and
+                    // current score must be associated to next chunk. Main difference to what there was before is that if the fixedStep
+                    // starts on the last position of a chunk e.g. 1999, the chunk must be created with just that score - the chunk was
+                    // left empty with the old code
+                    if (startChunk != endChunk) {
+                        conservedRegion = new GenomicScoreRegion<>(chromosome, start, start + values.size() - 1, conservationSource,
+                                values);
+                        fileSerializer.serialize(conservedRegion, getOutputFileName(chromosome));
+                        start = start + values.size();
+                        values.clear();
+                    }
 
-                value = Float.parseFloat(line.trim());
-                values.add(value);
+                    try {
+                        value = Float.parseFloat(line.trim());
+                    } catch (NumberFormatException e) {
+                        value = 0;
+                        logger.warn("Invalid value: {}. Stack trace: {}", line, e.getStackTrace());
+                    }
+                    values.add(value);
+                }
             }
+
+            // Write last
+            conservedRegion = new GenomicScoreRegion<>(chromosome, start, start + values.size() - 1, conservationSource, values);
+            fileSerializer.serialize(conservedRegion, getOutputFileName(chromosome));
+        } catch (Exception e) {
+            logger.error("ERROR parsing {}. Line: {}. Stack trace: {}", inGzPath, line, e.getStackTrace());
         }
-        //write last
-//        conservedRegion = new ConservationScoreRegion(chromosome, start, end, conservationSource, values);
-        conservedRegion = new GenomicScoreRegion<>(chromosome, start, start + values.size() - 1, conservationSource,
-                values);
-        fileSerializer.serialize(conservedRegion, getOutputFileName(chromosome));
-        bufferedReader.close();
+        logger.info(PARSING_DONE_LOG_MESSAGE, inGzPath);
     }
 
     private String getOutputFileName(String chromosome) {
@@ -377,15 +341,23 @@ private String getOutputFileName(String chromosome) {
         if (chromosome.equals("M")) {
             chromosome = "MT";
         }
-        String outputFileName = outputFileNames.get(chromosome);
-        if (outputFileName == null) {
-            outputFileName = "conservation_" + chromosome;
+
+        String outputFileName;
+        if (outputFileNames.containsKey(chromosome)) {
+            outputFileName = outputFileNames.get(chromosome);
+        } else {
+            outputFileName = getFilename(CONSERVATION_DATA, chromosome);
             outputFileNames.put(chromosome, outputFileName);
         }
         return outputFileName;
     }
 
-    // phylop and phastcons list the chromosome as M instead of the standard MT. replace.
+    /**
+     * Remove chr from the chromosome name; and phylop and phastcons list the chromosome as M instead of the standard MT, replace it.
+     *
+     * @param attributes Attributes map with the chromosome name
+     * @return The new chromosome name
+     */
     private String formatChromosome(Map<String, String> attributes) {
         String chromosome = attributes.get("chrom").replace("chr", "");
 
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/DbSnpBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/DbSnpBuilder.java
index 4f128562e6..488b06e724 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/DbSnpBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/DbSnpBuilder.java
@@ -30,17 +30,20 @@
 import java.nio.file.Paths;
 import java.util.*;
 
-import static org.opencb.cellbase.lib.EtlCommons.DBSNP_NAME;
+import static org.opencb.cellbase.lib.EtlCommons.DBSNP_DATA;
 
 /**
  * Created by imedina on 06/11/15.
  */
-public class DbSnpBuilder extends CellBaseBuilder {
+public class DbSnpBuilder extends AbstractBuilder {
 
-    private final Path sourceVariationPath;
+    private final Path downloadPath;
     private final DownloadProperties.URLProperties dbSnpUrlProperties;
     private static final Map<String, String> CHROMOSOME_MAPPING;
 
+    public static final String DBSNP_OUTPUT_BASENAME = "dbsnp";
+    public static final String DBSNP_OUTPUT_FILENAME = DBSNP_OUTPUT_BASENAME + ".json.gz";
+
     static {
         CHROMOSOME_MAPPING = new HashMap<>();
         CHROMOSOME_MAPPING.put("NC_000001", "1");
@@ -69,9 +72,9 @@ public class DbSnpBuilder extends CellBaseBuilder {
         CHROMOSOME_MAPPING.put("NC_000024", "Y");
     }
 
-    public DbSnpBuilder(Path sourceVariationPath, DownloadProperties.URLProperties dbSnpUrlProperties, CellBaseSerializer serializer) {
+    public DbSnpBuilder(Path downloadPath, DownloadProperties.URLProperties dbSnpUrlProperties, CellBaseSerializer serializer) {
         super(serializer);
-        this.sourceVariationPath = sourceVariationPath;
+        this.downloadPath = downloadPath;
         this.dbSnpUrlProperties = dbSnpUrlProperties;
 
         logger = LoggerFactory.getLogger(DbSnpBuilder.class);
@@ -99,7 +102,7 @@ public DbSnpBuilder(Path sourceVariationPath, DownloadProperties.URLProperties d
     */
     @Override
     public void parse() throws Exception {
-        Path dbSnpFilePath = sourceVariationPath.resolve(Paths.get(dbSnpUrlProperties.getHost()).getFileName());
+        Path dbSnpFilePath = downloadPath.resolve(Paths.get(dbSnpUrlProperties.getHost()).getFileName());
         FileUtils.checkPath(dbSnpFilePath);
 
         CellBaseFileSerializer fileSerializer = (CellBaseFileSerializer) serializer;
@@ -120,6 +123,7 @@ public void parse() throws Exception {
 
         SnpAnnotation snpAnnotation;
 
+        logger.info(PARSING_LOG_MESSAGE, dbSnpFilePath);
         try (BufferedReader bufferedReader = FileUtils.newBufferedReader(dbSnpFilePath)) {
             while ((line = bufferedReader.readLine()) != null) {
                 if (!line.startsWith("#")) {
@@ -209,11 +213,11 @@ public void parse() throws Exception {
                     snpAnnotation.setFlags(flags);
                     snpAnnotation.setAdditionalAttributes(additionalAttributes);
 
-                    Snp snp = new Snp(id, chromosome, position, ref, Arrays.asList(alt), type, DBSNP_NAME, version, snpAnnotation);
-                    fileSerializer.serialize(snp, DBSNP_NAME);
+                    Snp snp = new Snp(id, chromosome, position, ref, Arrays.asList(alt), type, DBSNP_DATA, version, snpAnnotation);
+                    fileSerializer.serialize(snp, DBSNP_DATA);
                 }
             }
         }
-        logger.info("Parsing finished.");
+        logger.info(PARSING_DONE_LOG_MESSAGE);
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilder.java
new file mode 100644
index 0000000000..32d779e7ce
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilder.java
@@ -0,0 +1,1004 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.builders;
+
+import htsjdk.tribble.readers.TabixReader;
+import org.apache.commons.lang3.StringUtils;
+import org.opencb.biodata.formats.feature.gff.Gff2;
+import org.opencb.biodata.formats.feature.gtf.Gtf;
+import org.opencb.biodata.formats.feature.gtf.io.GtfReader;
+import org.opencb.biodata.formats.io.FileFormatException;
+import org.opencb.biodata.models.core.*;
+import org.opencb.biodata.tools.sequence.FastaIndex;
+import org.opencb.cellbase.core.ParamConstants;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.config.DownloadProperties;
+import org.opencb.cellbase.core.config.SpeciesConfiguration;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
+import org.opencb.cellbase.core.serializer.CellBaseSerializer;
+import org.rocksdb.RocksDBException;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.*;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
+public class EnsemblGeneBuilder extends AbstractBuilder {
+
+    private Path downloadPath;
+    private SpeciesConfiguration speciesConfiguration;
+    private boolean flexibleGTFParsing;
+    private CellBaseConfiguration configuration;
+
+    private final Map<String, Integer> transcriptDict;
+    private final Map<String, Exon> exonDict;
+
+    private Path gtfFile = null;
+    private Path proteinFastaFile = null;
+    private Path cDnaFastaFile = null;
+    private Path geneDescriptionFile = null;
+    private Path xrefsFile = null;
+    private Path hgncFile = null;
+    private Path maneFile = null;
+    private Path lrgFile = null;
+    private Path uniprotIdMappingFile = null;
+    private Path tfbsFile = null;
+    private Path tabixFile = null;
+    private Path geneExpressionFile = null;
+    private Path geneDrugFile = null;
+    private Path hpoFile = null;
+    private Path genomeSequenceFilePath = null;
+    private Path gnomadFile = null;
+    private Path geneOntologyAnnotationFile = null;
+    private Path miRBaseFile = null;
+    private Path miRTarBaseFile = null;
+    private Path cancerGeneCensusFile = null;
+    private Path cancerHostpotFile = null;
+    private Path ensemblCanonicalFile = null;
+
+    // source for genes is either ensembl or refseq
+    private final String SOURCE = ParamConstants.QueryParams.ENSEMBL.key();
+
+    private int geneCounter;
+    private ArrayList<String> geneList;
+    private String geneName;
+    private int transcriptCounter;
+    private ArrayList<String> transcriptList;
+    private String transcriptName;
+    private int exonCounter;
+    private String feature;
+    private Gtf nextGtfToReturn;
+
+    private boolean isHSapiens = false;
+
+    public static final String ENSEMBL_GENE_BASENAME = "ensemblGene";
+    public static final String ENSEMBL_GENE_OUTPUT_FILENAME = ENSEMBL_GENE_BASENAME + ".json.gz";
+
+    public EnsemblGeneBuilder(Path downloadPath, SpeciesConfiguration speciesConfiguration, boolean flexibleGTFParsing,
+                              CellBaseConfiguration configuration, CellBaseSerializer serializer) {
+        super(serializer);
+
+        this.downloadPath = downloadPath;
+        this.speciesConfiguration = speciesConfiguration;
+        this.flexibleGTFParsing = flexibleGTFParsing;
+        this.configuration = configuration;
+
+        transcriptDict = new HashMap<>(250000);
+        exonDict = new HashMap<>(8000000);
+
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS)) {
+            isHSapiens = true;
+        }
+    }
+
+    public void check() throws Exception {
+        if (checked) {
+            return;
+        }
+
+        String ensemblGeneLabel = getDataName(ENSEMBL_DATA) + " " + getDataName(GENE_DATA);
+        logger.info(CHECKING_BEFORE_BUILDING_LOG_MESSAGE, ensemblGeneLabel);
+
+        // Sanity check
+        checkDirectory(downloadPath, ensemblGeneLabel);
+        if (!Files.exists(serializer.getOutdir())) {
+            try {
+                Files.createDirectories(serializer.getOutdir());
+            } catch (IOException e) {
+                throw new CellBaseException("Error creating folder " + serializer.getOutdir(), e);
+            }
+        }
+
+        // Check Ensembl files
+        DownloadProperties.URLProperties props = configuration.getDownload().getEnsembl().getUrl();
+        gtfFile = checkFile(props, ENSEMBL_GTF_FILE_ID, downloadPath, "Ensembl GTF").toPath();
+        proteinFastaFile = checkFile(props, ENSEMBL_PEP_FA_FILE_ID, downloadPath, "Ensembl Protein Fasta").toPath();
+        cDnaFastaFile = checkFile(props, ENSEMBL_CDNA_FA_FILE_ID, downloadPath, "Ensembl CDNA Fasta").toPath();
+
+        // Commons
+        geneDescriptionFile = checkFile(props, ENSEMBL_DESCRIPTION_FILE_ID, downloadPath.getParent(), "Ensembl Description").toPath();
+        xrefsFile = checkFile(props, ENSEMBL_XREFS_FILE_ID, downloadPath.getParent(), "Ensembl Xrefs").toPath();
+        ensemblCanonicalFile = checkFile(props, ENSEMBL_CANONICAL_FILE_ID, downloadPath.getParent(), "Ensembl Canonical").toPath();
+
+        // Check common files
+        String prefixId = getConfigurationFileIdPrefix(speciesConfiguration.getScientificName());
+        if (isHSapiens || isDataSupported(configuration.getDownload().getManeSelect(), prefixId)) {
+            maneFile = checkFiles(MANE_SELECT_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, getDataName(MANE_SELECT_DATA), speciesConfiguration.getScientificName());
+        }
+        if (isHSapiens || isDataSupported(configuration.getDownload().getLrg(), prefixId)) {
+            lrgFile = checkFiles(LRG_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, getDataName(LRG_DATA), speciesConfiguration.getScientificName());
+        }
+        if (isHSapiens || isDataSupported(configuration.getDownload().getHgnc(), prefixId)) {
+            hgncFile = checkFiles(HGNC_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, getDataName(HGNC_DATA), speciesConfiguration.getScientificName());
+        }
+        if (isHSapiens || isDataSupported(configuration.getDownload().getCancerHotspot(), prefixId)) {
+            cancerHostpotFile = checkFiles(CANCER_HOTSPOT_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, getDataName(CANCER_HOTSPOT_DATA), speciesConfiguration.getScientificName());
+        }
+        if (isHSapiens || isDataSupported(configuration.getDownload().getDgidb(), prefixId)) {
+            geneDrugFile = checkFiles(DGIDB_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, getDataName(DGIDB_DATA), speciesConfiguration.getScientificName());
+        }
+        if (isHSapiens || isDataSupported(configuration.getDownload().getGeneUniprotXref(), prefixId)) {
+            uniprotIdMappingFile = checkFiles(UNIPROT_XREF_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, getDataName(UNIPROT_XREF_DATA), speciesConfiguration.getScientificName());
+        }
+        if (isHSapiens || isDataSupported(configuration.getDownload().getGeneExpressionAtlas(), prefixId)) {
+            geneExpressionFile = checkFiles(GENE_EXPRESSION_ATLAS_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, getDataName(UNIPROT_XREF_DATA), speciesConfiguration.getScientificName());
+        }
+        if (isHSapiens || isDataSupported(configuration.getDownload().getHpo(), prefixId)) {
+            hpoFile = checkFiles(HPO_DISEASE_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, getDataName(HPO_DISEASE_DATA), speciesConfiguration.getScientificName());
+        }
+        if (isHSapiens || isDataSupported(configuration.getDownload().getGnomadConstraints(), prefixId)) {
+            gnomadFile = checkFiles(GNOMAD_CONSTRAINTS_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, getDataName(GNOMAD_CONSTRAINTS_DATA), speciesConfiguration.getScientificName());
+        }
+        if (isHSapiens || isDataSupported(configuration.getDownload().getGoAnnotation(), prefixId)) {
+            geneOntologyAnnotationFile = checkFiles(GO_ANNOTATION_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, getDataName(GO_ANNOTATION_DATA), speciesConfiguration.getScientificName());
+        }
+        if (isHSapiens || isDataSupported(configuration.getDownload().getCancerHotspot(), prefixId)) {
+            cancerGeneCensusFile = checkFiles(CANCER_GENE_CENSUS_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, getDataName(CANCER_GENE_CENSUS_DATA), speciesConfiguration.getScientificName());
+        }
+
+        // Check regulation files
+        // Motif features
+        List<File> files = checkFiles(ensemblGeneLabel, MOTIF_FEATURES_DATA, downloadPath.getParent().getParent().resolve(REGULATION_DATA)
+                        .resolve(MOTIF_FEATURES_DATA), 2);
+        if (files.get(0).getName().endsWith("tbi")) {
+            tabixFile = files.get(0).toPath();
+            tfbsFile = files.get(1).toPath();
+        } else {
+            tabixFile = files.get(1).toPath();
+            tfbsFile = files.get(0).toPath();
+        }
+
+        // mirbase
+        if (isHSapiens || isDataSupported(configuration.getDownload().getMirbase(), prefixId)) {
+            miRBaseFile = checkFiles(MIRBASE_DATA, downloadPath.getParent().getParent().resolve(REGULATION_DATA)
+                    .resolve(MIRBASE_DATA), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, getDataName(MIRTARBASE_DATA), speciesConfiguration.getScientificName());
+        }
+
+        // mirtarbase
+        if (isHSapiens || isDataSupported(configuration.getDownload().getMiRTarBase(), prefixId)) {
+            miRTarBaseFile = checkFiles(MIRTARBASE_DATA, downloadPath.getParent().getParent().resolve(REGULATION_DATA)
+                    .resolve(MIRTARBASE_DATA), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, getDataName(MIRTARBASE_DATA), speciesConfiguration.getScientificName());
+        }
+
+        // Check genome FASTA file
+        Path genomeDownloadPath = downloadPath.getParent().getParent().resolve(GENOME_DATA);
+        String genomeGzFilename = Paths.get(((DataSource) dataSourceReader.readValue(genomeDownloadPath
+                .resolve(getDataVersionFilename(GENOME_DATA)).toFile())).getUrls().get(0)).getFileName().toString();
+        genomeSequenceFilePath = getFastaPath(genomeDownloadPath.resolve(genomeGzFilename));
+
+        logger.info(CHECKING_DONE_BEFORE_BUILDING_LOG_MESSAGE, ensemblGeneLabel);
+        checked = true;
+    }
+
+    public void parse() throws Exception {
+        check();
+
+        Gene gene = null;
+        Transcript transcript;
+        Exon exon = null;
+        int cdna = 1;
+        int cds = 1;
+
+        EnsemblGeneBuilderIndexer indexer = new EnsemblGeneBuilderIndexer(serializer.getOutdir());
+        try {
+            // process files and put values in rocksdb
+            indexer.index(geneDescriptionFile, xrefsFile, hgncFile, maneFile, lrgFile, uniprotIdMappingFile, proteinFastaFile,
+                    cDnaFastaFile, speciesConfiguration.getScientificName(), geneExpressionFile, geneDrugFile, hpoFile, gnomadFile,
+                    geneOntologyAnnotationFile, miRBaseFile, miRTarBaseFile, cancerGeneCensusFile, cancerHostpotFile, ensemblCanonicalFile);
+
+            TabixReader tabixReader = null;
+            if (!Files.exists(tfbsFile) || !Files.exists(tabixFile)) {
+                logger.error("Tfbs or tabix file not found. Download them and try again.");
+            } else {
+                tabixReader = new TabixReader(tfbsFile.toAbsolutePath().toString(), tabixFile.toAbsolutePath().toString());
+            }
+
+            // Preparing the fasta file for fast accessing
+            FastaIndex fastaIndex = new FastaIndex(genomeSequenceFilePath);
+
+            // Empty transcript and exon dictionaries
+            transcriptDict.clear();
+            exonDict.clear();
+
+            logger.info(PARSING_LOG_MESSAGE, gtfFile);
+            GtfReader gtfReader = new GtfReader(gtfFile);
+
+            // Gene->Transcript->Feature->GTF line
+            Map<String, Map<String, Map<String, Object>>> gtfMap = null;
+            if (flexibleGTFParsing) {
+                gtfMap = loadGTFMap(gtfReader);
+                initializePointers(gtfMap);
+            }
+
+            Gtf gtf;
+            while ((gtf = getGTFEntry(gtfReader, gtfMap)) != null) {
+
+                if (gtf.getFeature().equals("gene") || gtf.getFeature().equals("transcript")
+                        || gtf.getFeature().equals("UTR") || gtf.getFeature().equals("Selenocysteine")) {
+                    continue;
+                }
+
+                String geneId = gtf.getAttributes().get("gene_id");
+                String transcriptId = gtf.getAttributes().get("transcript_id");
+                String geneName = gtf.getAttributes().get("gene_name");
+                if (newGene(gene, geneId)) {
+                    // If new geneId is different from the current then we must serialize before data new gene
+                    if (gene != null) {
+                        serializer.serialize(gene);
+                    }
+
+                    GeneAnnotation geneAnnotation = new GeneAnnotation(indexer.getExpression(geneId), indexer.getDiseases(geneName),
+                            indexer.getDrugs(geneName), indexer.getConstraints(geneId), indexer.getMirnaTargets(geneName),
+                            indexer.getCancerGeneCensus(geneName), indexer.getCancerHotspot(geneName));
+
+                    gene = new Gene(geneId, geneName, gtf.getSequenceName().replaceFirst("chr", ""),
+                            gtf.getStart(), gtf.getEnd(), gtf.getStrand(), gtf.getAttributes().get("gene_version"),
+                            gtf.getAttributes().get("gene_biotype"), "KNOWN", SOURCE, indexer.getDescription(geneId),
+                            new ArrayList<>(), indexer.getMirnaGene(transcriptId), geneAnnotation);
+                }
+
+                // Check if Transcript exist in the Gene Set of transcripts
+                if (!transcriptDict.containsKey(transcriptId)) {
+                    transcript = getTranscript(gene, indexer, tabixReader, gtf, transcriptId);
+                } else {
+                    transcript = gene.getTranscripts().get(transcriptDict.get(transcriptId));
+                }
+
+                // At this point gene and transcript objects are set up
+                // Update gene and transcript genomic coordinates, start must be the
+                // lower, and end the higher
+                updateTranscriptAndGeneCoords(transcript, gene, gtf);
+
+                String transcriptIdWithoutVersion = transcript.getId().split("\\.")[0];
+                if (gtf.getFeature().equalsIgnoreCase("exon")) {
+                    // Obtaining the exon sequence
+                    String exonId = gtf.getAttributes().get("exon_id") + "." + gtf.getAttributes().get("exon_version");
+                    String exonSequence = fastaIndex.query(gtf.getSequenceName(), gtf.getStart(), gtf.getEnd());
+
+                    exon = new Exon(exonId, gtf.getSequenceName().replaceFirst("chr", ""),
+                            gtf.getStart(), gtf.getEnd(), gtf.getStrand(), 0, 0, 0, 0, 0, 0, -1, Integer.parseInt(gtf
+                            .getAttributes().get("exon_number")), exonSequence);
+                    transcript.getExons().add(exon);
+
+                    exonDict.put(transcriptIdWithoutVersion + "_" + exon.getExonNumber(), exon);
+                    if (gtf.getAttributes().get("exon_number").equals("1")) {
+                        cdna = 1;
+                        cds = 1;
+                    } else {
+                        // with every exon we update cDNA length with the previous exon length
+                        cdna += exonDict.get(transcriptIdWithoutVersion + "_" + (exon.getExonNumber() - 1)).getEnd()
+                                - exonDict.get(transcriptIdWithoutVersion + "_" + (exon.getExonNumber() - 1)).getStart() + 1;
+                    }
+                } else {
+                    exon = exonDict.get(transcriptIdWithoutVersion + "_" + exon.getExonNumber());
+                    if (gtf.getFeature().equalsIgnoreCase("CDS")) {
+                        // Protein ID is only present in CDS lines
+                        String proteinId = gtf.getAttributes().get("protein_id") != null
+                                ? gtf.getAttributes().get("protein_id") + "." + gtf.getAttributes().get("protein_version")
+                                : "";
+                        transcript.setProteinId(proteinId);
+                        transcript.setProteinSequence(indexer.getProteinFasta(proteinId));
+
+                        if (gtf.getStrand().equals("+") || gtf.getStrand().equals("1")) {
+                            // CDS states the beginning of coding start
+                            exon.setGenomicCodingStart(gtf.getStart());
+                            exon.setGenomicCodingEnd(gtf.getEnd());
+
+                            // cDNA coordinates
+                            exon.setCdnaCodingStart(gtf.getStart() - exon.getStart() + cdna);
+                            exon.setCdnaCodingEnd(gtf.getEnd() - exon.getStart() + cdna);
+                            // Set cdnaCodingEnd to prevent those cases without stop_codon
+
+                            transcript.setCdnaCodingEnd(gtf.getEnd() - exon.getStart() + cdna);
+                            exon.setCdsStart(cds);
+                            exon.setCdsEnd(gtf.getEnd() - gtf.getStart() + cds);
+
+                            // increment in the coding length
+                            cds += gtf.getEnd() - gtf.getStart() + 1;
+                            transcript.setCdsLength(cds - 1);  // Set cdnaCodingEnd to prevent those cases without stop_codon
+
+                            exon.setPhase(Integer.parseInt(gtf.getFrame()));
+
+                            if (transcript.getGenomicCodingStart() == 0 || transcript.getGenomicCodingStart() > gtf.getStart()) {
+                                transcript.setGenomicCodingStart(gtf.getStart());
+                            }
+                            if (transcript.getGenomicCodingEnd() == 0 || transcript.getGenomicCodingEnd() < gtf.getEnd()) {
+                                transcript.setGenomicCodingEnd(gtf.getEnd());
+                            }
+                            // only first time
+                            if (transcript.getCdnaCodingStart() == 0) {
+                                transcript.setCdnaCodingStart(gtf.getStart() - exon.getStart() + cdna);
+                            }
+                            // strand -
+                        } else {
+                            // CDS states the beginning of coding start
+                            exon.setGenomicCodingStart(gtf.getStart());
+                            exon.setGenomicCodingEnd(gtf.getEnd());
+                            // cDNA coordinates
+                            // cdnaCodingStart points to the same base position than genomicCodingEnd
+                            exon.setCdnaCodingStart(exon.getEnd() - gtf.getEnd() + cdna);
+                            // cdnaCodingEnd points to the same base position than genomicCodingStart
+                            exon.setCdnaCodingEnd(exon.getEnd() - gtf.getStart() + cdna);
+                            // Set cdnaCodingEnd to prevent those cases without stop_codon
+                            transcript.setCdnaCodingEnd(exon.getEnd() - gtf.getStart() + cdna);
+                            exon.setCdsStart(cds);
+                            exon.setCdsEnd(gtf.getEnd() - gtf.getStart() + cds);
+
+                            // increment in the coding length
+                            cds += gtf.getEnd() - gtf.getStart() + 1;
+                            transcript.setCdsLength(cds - 1);  // Set cdnaCodingEnd to prevent those cases without stop_codon
+                            exon.setPhase(Integer.parseInt(gtf.getFrame()));
+
+                            if (transcript.getGenomicCodingStart() == 0 || transcript.getGenomicCodingStart() > gtf.getStart()) {
+                                transcript.setGenomicCodingStart(gtf.getStart());
+                            }
+                            if (transcript.getGenomicCodingEnd() == 0 || transcript.getGenomicCodingEnd() < gtf.getEnd()) {
+                                transcript.setGenomicCodingEnd(gtf.getEnd());
+                            }
+                            // only first time
+                            if (transcript.getCdnaCodingStart() == 0) {
+                                // cdnaCodingStart points to the same base position than genomicCodingEnd
+                                transcript.setCdnaCodingStart(exon.getEnd() - gtf.getEnd() + cdna);
+                            }
+                        }
+
+                    }
+//                if (gtf.getFeature().equalsIgnoreCase("start_codon")) {
+//                    // nothing to do
+//                    System.out.println("Empty block, this should be redesigned");
+//                }
+                    if (gtf.getFeature().equalsIgnoreCase("stop_codon")) {
+                        //                      setCdnaCodingEnd = false; // stop_codon found, cdnaCodingEnd will be set here,
+                        //                      no need to set it at the beginning of next feature
+                        if (exon.getStrand().equals("+")) {
+                            updateStopCodingDataPositiveExon(exon, cdna, cds, gtf);
+
+                            cds += gtf.getEnd() - gtf.getStart();
+                            // If stop_codon appears, overwrite values
+                            transcript.setGenomicCodingEnd(gtf.getEnd());
+                            transcript.setCdnaCodingEnd(gtf.getEnd() - exon.getStart() + cdna);
+                            transcript.setCdsLength(cds - 1);
+
+                        } else {
+                            updateNegativeExonCodingData(exon, cdna, cds, gtf);
+
+                            cds += gtf.getEnd() - gtf.getStart();
+                            // If stop_codon appears, overwrite values
+                            transcript.setGenomicCodingStart(gtf.getStart());
+                            // cdnaCodingEnd points to the same base position than genomicCodingStart
+                            transcript.setCdnaCodingEnd(exon.getEnd() - gtf.getStart() + cdna);
+                            transcript.setCdsLength(cds - 1);
+                        }
+                    }
+                }
+            }
+
+            // last gene must be serialized
+            serializer.serialize(gene);
+
+            // Close
+            gtfReader.close();
+            serializer.close();
+            fastaIndex.close();
+            indexer.close();
+
+            logger.info(PARSING_DONE_LOG_MESSAGE, gtfFile);
+        } catch (Exception e) {
+            indexer.close();
+            throw e;
+        }
+    }
+
+    private Transcript getTranscript(Gene gene, EnsemblGeneBuilderIndexer indexer, TabixReader tabixReader, Gtf gtf, String transcriptId)
+            throws IOException, RocksDBException {
+        Map<String, String> gtfAttributes = gtf.getAttributes();
+
+        // To match Ensembl, we set the ID as transcript+version. This also matches the Ensembl website.
+        String transcriptIdWithVersion = transcriptId + "." + gtfAttributes.get("transcript_version");
+        String biotype = gtfAttributes.get("transcript_biotype") != null ? gtfAttributes.get("transcript_biotype") : "";
+        String transcriptChromosome = gtf.getSequenceName().replaceFirst("chr", "");
+        List<TranscriptTfbs> transcriptTfbses = getTranscriptTfbses(gtf, transcriptChromosome, tabixReader);
+
+        List<FeatureOntologyTermAnnotation> ontologyAnnotations = getOntologyAnnotations(indexer.getXrefs(transcriptId), indexer);
+        TranscriptAnnotation transcriptAnnotation = new TranscriptAnnotation(ontologyAnnotations, indexer.getConstraints(transcriptId));
+
+        Transcript transcript = new Transcript(transcriptIdWithVersion, gtfAttributes.get("transcript_name"), transcriptChromosome,
+                gtf.getStart(), gtf.getEnd(), gtf.getStrand(), biotype, "KNOWN",
+                0, 0, 0, 0, 0,
+                indexer.getCdnaFasta(transcriptIdWithVersion), "", "", "",
+                gtfAttributes.get("transcript_version"), SOURCE, new ArrayList<>(), indexer.getXrefs(transcriptId), transcriptTfbses,
+                new HashSet<>(), transcriptAnnotation);
+
+        // Adding Ids appearing in the GTF to the xrefs is required, since for some unknown reason the ENSEMBL
+        // Perl API often doesn't return all genes resulting in an incomplete xrefs.txt file. We must ensure
+        // that the xrefs array contains all ids present in the GTF file
+        addGtfXrefs(transcript, gene, gtfAttributes);
+
+        // Add HGNC ID mappings, with this we can know which Ensembl and Refseq transcripts match to HGNC ID
+        String hgncId = indexer.getHgncId(gene.getName());
+        if (StringUtils.isNotEmpty(hgncId)) {
+            transcript.getXrefs().add(new Xref(hgncId, "hgnc_id", "HGNC ID"));
+        }
+
+        // Add MANE Select mappings, with this we can know which Ensembl and Refseq transcripts match according to MANE
+        for (String suffix: Arrays.asList("refseq", "refseq_protein")) {
+            String maneRefSeq = indexer.getMane(transcriptIdWithVersion, suffix);
+            if (StringUtils.isNotEmpty(maneRefSeq)) {
+                transcript.getXrefs().add(new Xref(maneRefSeq, "mane_select_" + suffix,
+                        "MANE Select RefSeq" + (suffix.contains("_") ? " Protein" : "")));
+            }
+        }
+
+        // Add LRG mappings, with this we can know which Ensembl and Refseq transcripts match according to LRG
+        String lrgRefSeq = indexer.getLrg(transcriptIdWithVersion, "refseq");
+        if (StringUtils.isNotEmpty(lrgRefSeq)) {
+            transcript.getXrefs().add(new Xref(lrgRefSeq, "lrg_refseq", "LRG RefSeq"));
+        }
+
+        // Add Flags
+        // 1. GTF tags
+        String tags = gtf.getAttributes().get("tag");
+        if (StringUtils.isNotEmpty(tags)) {
+            transcript.getFlags().addAll(Arrays.asList(tags.split(",")));
+        }
+
+        // 2. TSL
+        String supportLevel = gtfAttributes.get("transcript_support_level");
+        if (StringUtils.isNotEmpty(supportLevel)) {
+            // split on space so "5 (assigned to previous version 3)" and "5" both become "TSL:5"
+            String truncatedSupportLevel = supportLevel.split(" ")[0];
+            transcript.getFlags().add("TSL:" + truncatedSupportLevel);
+        }
+
+        // 3. MANE Flag
+        String maneFlag = indexer.getMane(transcriptIdWithVersion, "flag");
+        if (StringUtils.isNotEmpty(maneFlag)) {
+            transcript.getFlags().add(maneFlag);
+        }
+
+        // 4. LRG Flag
+        String lrg = indexer.getLrg(transcriptIdWithVersion, "ensembl");
+        if (StringUtils.isNotEmpty(lrg)) {
+            transcript.getFlags().add("LRG");
+        } else {
+            for (Xref xref : transcript.getXrefs()) {
+                if (xref.getId().startsWith("LRG_") && xref.getId().contains("t")) {
+                    transcript.getFlags().add("LRG");
+                }
+            }
+        }
+
+        // 5. Ensembl Canonical
+        String canonicalFlag = indexer.getCanonical(transcriptIdWithVersion);
+        if (StringUtils.isNotEmpty(canonicalFlag)) {
+            transcript.getFlags().add(canonicalFlag);
+        }
+
+        // 6. TSO500 and EGLH HaemOnc
+//        String maneRefSeq = indexer.getMane(transcriptIdWithVersion, "refseq");
+//        if (StringUtils.isNotEmpty(maneRefSeq)) {
+//            String tso500Flag = indexer.getTSO500(maneRefSeq.split("\\.")[0]);
+//            if (StringUtils.isNotEmpty(tso500Flag)) {
+//                transcript.getFlags().add(tso500Flag);
+//            }
+//
+//            String eglhHaemOncFlag = indexer.getEGLHHaemOnc(maneRefSeq.split("\\.")[0]);
+//            if (StringUtils.isNotEmpty(eglhHaemOncFlag)) {
+//                transcript.getFlags().add(eglhHaemOncFlag);
+//            }
+//        }
+
+        gene.getTranscripts().add(transcript);
+
+        // Do not change order!! size()-1 is the index of the transcript ID
+        transcriptDict.put(transcriptId, gene.getTranscripts().size() - 1);
+        return transcript;
+    }
+
+    private List<FeatureOntologyTermAnnotation> getOntologyAnnotations(List<Xref> xrefs,  EnsemblGeneBuilderIndexer indexer)
+            throws IOException, RocksDBException {
+        if (xrefs == null || indexer == null) {
+            return null;
+        }
+        List<FeatureOntologyTermAnnotation> annotations = new ArrayList<>();
+        for (Xref xref : xrefs) {
+            if (xref.getDbName().equals("uniprotkb_acc")) {
+                String key = xref.getId();
+                if (key != null && indexer.getOntologyAnnotations(key) != null) {
+                    annotations.addAll(indexer.getOntologyAnnotations(key));
+                }
+            }
+        }
+        return annotations;
+    }
+
+    private void updateNegativeExonCodingData(Exon exon, int cdna, int cds, Gtf gtf) {
+        // we need to increment 3 nts, the stop_codon length.
+        exon.setGenomicCodingStart(gtf.getStart());
+        // cdnaCodingEnd points to the same base position than genomicCodingStart
+        exon.setCdnaCodingEnd(exon.getEnd() - gtf.getStart() + cdna);
+        exon.setCdsEnd(gtf.getEnd() - gtf.getStart() + cds);
+
+        // If the STOP codon corresponds to the first three nts of the exon then no CDS will be defined
+        // in the gtf -as technically the STOP codon is non-coding- and we must manually set coding
+        // starts
+        if (exon.getGenomicCodingEnd() == 0) {
+            exon.setGenomicCodingEnd(exon.getGenomicCodingStart() + 2);
+        }
+        if (exon.getCdnaCodingStart() == 0) {
+            exon.setCdnaCodingStart(exon.getCdnaCodingEnd() - 2);
+        }
+        if (exon.getCdsStart() == 0) {
+            exon.setCdsStart(exon.getCdsEnd() - 2);
+        }
+    }
+
+    private void updateStopCodingDataPositiveExon(Exon exon, int cdna, int cds, Gtf gtf) {
+        // we need to increment 3 nts, the stop_codon length.
+        exon.setGenomicCodingEnd(gtf.getEnd());
+        exon.setCdnaCodingEnd(gtf.getEnd() - exon.getStart() + cdna);
+        exon.setCdsEnd(gtf.getEnd() - gtf.getStart() + cds);
+
+        // If the STOP codon corresponds to the first three nts of the exon then no CDS will be defined
+        // in the gtf -as technically the STOP codon is non-coding- and we must manually set coding
+        // starts
+        if (exon.getGenomicCodingStart() == 0) {
+            exon.setGenomicCodingStart(exon.getGenomicCodingEnd() - 2);
+        }
+        if (exon.getCdnaCodingStart() == 0) {
+            exon.setCdnaCodingStart(exon.getCdnaCodingEnd() - 2);
+        }
+        if (exon.getCdsStart() == 0) {
+            exon.setCdsStart(exon.getCdsEnd() - 2);
+        }
+    }
+
+    private void addGtfXrefs(Transcript transcript, Gene gene, Map<String, String> gtfAttributes) {
+        if (transcript.getXrefs() == null) {
+            transcript.setXrefs(new ArrayList<>());
+        }
+
+        transcript.getXrefs().add(new Xref(gene.getId(), "ensembl_gene", "Ensembl Gene"));
+        transcript.getXrefs().add(new Xref(transcript.getId(), "ensembl_transcript", "Ensembl Transcript"));
+
+        // Some non-coding genes do not have Gene names
+        if (StringUtils.isNotEmpty(gene.getName())) {
+            transcript.getXrefs().add(new Xref(gene.getName(), "hgnc_symbol", "HGNC Symbol"));
+            transcript.getXrefs().add(new Xref(transcript.getName(), "ensembl_transcript_name", "Ensembl Transcript Name"));
+        }
+
+        if (gtfAttributes.get("ccds_id") != null) {
+            transcript.getXrefs().add(new Xref(gtfAttributes.get("ccds_id"), "ccds_id", "CCDS"));
+        }
+    }
+
+    private void initializePointers(Map<String, Map<String, Map<String, Object>>> gtfMap) {
+        geneCounter = 0;
+        geneList = new ArrayList<>(gtfMap.keySet());
+        geneName = geneList.get(geneCounter);
+        transcriptCounter = 0;
+        transcriptList = new ArrayList<>(gtfMap.get(geneName).keySet());
+        transcriptName = transcriptList.get(transcriptCounter);
+        exonCounter = 0;
+        feature = "exon";
+        nextGtfToReturn = (Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter);
+    }
+
+    private Gtf getGTFEntry(GtfReader gtfReader, Map<String, Map<String, Map<String, Object>>> gtfMap) throws FileFormatException {
+        // Flexible parsing is deactivated, return next line
+        if (gtfMap == null) {
+            return gtfReader.read();
+            // Flexible parsing activated, carefully select next line to return
+        } else {
+            // No more genes/features to return
+            if (nextGtfToReturn == null) {
+                return null;
+            }
+            Gtf gtfToReturn = nextGtfToReturn;
+            if (feature.equals("exon")) {
+//                gtfToReturn = (Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter);
+                if (gtfMap.get(geneName).get(transcriptName).containsKey("cds")) {
+                    nextGtfToReturn = getExonCDSLine(((Gtf) ((List) gtfMap.get(geneName)
+                                    .get(transcriptName).get("exon")).get(exonCounter)).getStart(),
+                            ((Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter)).getEnd(),
+                            (List) gtfMap.get(geneName).get(transcriptName).get("cds"));
+                    if (nextGtfToReturn != null) {
+                        feature = "cds";
+                        return gtfToReturn;
+                    }
+                }
+                // if no cds was found for this exon, get next exon
+                getFeatureFollowsExon(gtfMap);
+                return gtfToReturn;
+            }
+            if (feature.equals("cds") || feature.equals("stop_codon")) {
+                getFeatureFollowsExon(gtfMap);
+                return gtfToReturn;
+            }
+            if (feature.equals("start_codon")) {
+                feature = "stop_codon";
+                nextGtfToReturn = (Gtf) gtfMap.get(geneName).get(transcriptName).get("stop_codon");
+                return gtfToReturn;
+            }
+            // The only accepted features that should appear in the gtfMap are exon, cds, start_codon and stop_codon
+            throw new FileFormatException("Execution cannot reach this point");
+        }
+    }
+
+    private Gtf getExonCDSLine(Integer exonStart, Integer exonEnd, List cdsList) {
+        for (Object cdsObject : cdsList) {
+            int cdsStart = ((Gtf) cdsObject).getStart();
+            int cdsEnd = ((Gtf) cdsObject).getEnd();
+            if (cdsStart <= exonEnd && cdsEnd >= exonStart) {
+                return (Gtf) cdsObject;
+            }
+        }
+        return null;
+    }
+
+    private void getFeatureFollowsExon(Map<String, Map<String, Map<String, Object>>> gtfMap) {
+        exonCounter++;
+        if (exonCounter == ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).size()
+                || feature.equals("stop_codon")) {
+            // If last returned feature was a stop_codon or no start_codon is provided for this transcript,
+            // next transcript must be selected
+            if (!feature.equals("stop_codon") && gtfMap.get(geneName).get(transcriptName).containsKey("start_codon")) {
+                feature = "start_codon";
+                nextGtfToReturn = (Gtf) gtfMap.get(geneName).get(transcriptName).get("start_codon");
+            } else {
+                transcriptCounter++;
+                // No more transcripts in this gene, check if there are more genes
+                if (transcriptCounter == gtfMap.get(geneName).size()) {
+                    geneCounter++;
+                    // No more genes available, end parsing
+                    if (geneCounter == gtfMap.size()) {
+                        nextGtfToReturn = null;
+                        feature = null;
+                        // Still more genes to parse, select next one
+                    } else {
+                        geneName = geneList.get(geneCounter);
+                        transcriptCounter = 0;
+                        transcriptList = new ArrayList<>(gtfMap.get(geneName).keySet());
+                    }
+                }
+                // Check if a new gene was selected - null would indicate there're no more genes
+                if (nextGtfToReturn != null) {
+                    transcriptName = transcriptList.get(transcriptCounter);
+                    exonCounter = 0;
+                    feature = "exon";
+                    nextGtfToReturn = (Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter);
+                }
+            }
+        } else {
+            feature = "exon";
+            nextGtfToReturn = (Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter);
+        }
+    }
+
+    private Map<String, Map<String, Map<String, Object>>> loadGTFMap(GtfReader gtfReader) throws FileFormatException {
+        Map<String, Map<String, Map<String, Object>>> gtfMap = new HashMap<>();
+        Gtf gtf;
+        while ((gtf = gtfReader.read()) != null) {
+            if (gtf.getFeature().equals("gene") || gtf.getFeature().equals("transcript")
+                    || gtf.getFeature().equals("UTR") || gtf.getFeature().equals("Selenocysteine")) {
+                continue;
+            }
+
+            // Get GTF lines associated with this gene - create a new Map of GTF entries if it's a new gene
+            String geneId = gtf.getAttributes().get("gene_id");
+            // Transcript -> feature -> GTF line
+            Map<String, Map<String, Object>> gtfMapGeneEntry;
+            if (gtfMap.containsKey(geneId)) {
+                gtfMapGeneEntry =  gtfMap.get(geneId);
+            } else {
+                gtfMapGeneEntry = new HashMap();
+                gtfMap.put(geneId, gtfMapGeneEntry);
+            }
+
+            // Get GTF lines associated with this transcript - create a new Map of GTF entries if it's a new gene
+            String transcriptId = gtf.getAttributes().get("transcript_id");
+            Map<String, Object> gtfMapTranscriptEntry;
+            if (gtfMapGeneEntry.containsKey(transcriptId)) {
+                gtfMapTranscriptEntry =  gtfMapGeneEntry.get(transcriptId);
+            } else {
+                gtfMapTranscriptEntry = new HashMap();
+                gtfMapGeneEntry.put(transcriptId, gtfMapTranscriptEntry);
+            }
+
+            addGTFLineToGTFMap(gtfMapTranscriptEntry, gtf);
+
+        }
+
+        // Exon number is mandatory for the parser to be able to properly generate the gene data model
+        if (!exonNumberPresent(gtfMap)) {
+            setExonNumber(gtfMap);
+        }
+
+        return gtfMap;
+    }
+
+    private boolean exonNumberPresent(Map<String, Map<String, Map<String, Object>>> gtfMap) {
+        Map<String, Map<String, Object>> geneGtfMap = gtfMap.get(gtfMap.keySet().iterator().next());
+        return ((Gtf) ((List) geneGtfMap.get(geneGtfMap.keySet().iterator().next()).get("exon")).get(0))
+                .getAttributes().containsKey("exon_number");
+    }
+
+    private void setExonNumber(Map<String, Map<String, Map<String, Object>>> gtfMap) {
+        for (String gene : gtfMap.keySet()) {
+            for (String transcript : gtfMap.get(gene).keySet()) {
+                List<Gtf> exonList = (List<Gtf>) gtfMap.get(gene).get(transcript).get("exon");
+                Collections.sort(exonList, (e1, e2) -> Integer.valueOf(e1.getStart()).compareTo(e2.getStart()));
+                if (exonList.get(0).getStrand().equals("+")) {
+                    int exonNumber = 1;
+                    for (Gtf gtf : exonList) {
+                        gtf.getAttributes().put("exon_number", String.valueOf(exonNumber));
+                        exonNumber++;
+                    }
+                } else {
+                    int exonNumber = exonList.size();
+                    for (Gtf gtf : exonList) {
+                        gtf.getAttributes().put("exon_number", String.valueOf(exonNumber));
+                        exonNumber--;
+                    }
+                }
+            }
+        }
+    }
+
+    private void addGTFLineToGTFMap(Map<String, Object> gtfMapTranscriptEntry, Gtf gtf) {
+        // Add exon/cds GTF line to the corresponding gene entry in the map
+        String featureType = gtf.getFeature().toLowerCase();
+        if (featureType.equals("exon") || featureType.equals("cds")) {
+            List gtfList;
+            // Check if there were exons already stored
+            if (gtfMapTranscriptEntry.containsKey(featureType)) {
+                gtfList =  (List) gtfMapTranscriptEntry.get(featureType);
+            } else {
+                gtfList = new ArrayList<>();
+                gtfMapTranscriptEntry.put(featureType, gtfList);
+            }
+            gtfList.add(gtf);
+            // Only one start/stop codon can be stored per transcript - no need to check if the "start_codon"/"stop_codon"
+            // keys are already there
+        } else if (featureType.equals("start_codon") || featureType.equals("stop_codon")) {
+            gtfMapTranscriptEntry.put(featureType, gtf);
+        }
+    }
+
+    private List<TranscriptTfbs> getTranscriptTfbses(Gtf transcript, String chromosome, TabixReader tabixReader) throws IOException {
+        if (tabixReader == null) {
+            return null;
+        }
+        List<TranscriptTfbs> transcriptTfbses = null;
+
+        int transcriptStart = transcript.getStart();
+        int transcriptEnd = transcript.getEnd();
+
+
+        String line;
+        TabixReader.Iterator iter = tabixReader.query(chromosome, transcriptStart, transcriptEnd);
+        while ((line = iter.next()) != null) {
+            String[] elements = line.split("\t");
+
+            String sequenceName = elements[0];
+            String source = elements[1];
+            String feature = elements[2];
+            int start = Integer.parseInt(elements[3]);
+            int end = Integer.parseInt(elements[4]);
+            String score = elements[5];
+            String strand = elements[6];
+            String frame = elements[7];
+            String attribute = elements[8];
+
+            if (strand.equals(transcript.getStrand())) {
+                continue;
+            }
+
+            if (transcript.getStrand().equals("+")) {
+                if (start > transcript.getStart() + 500) {
+                    break;
+                } else if (end > transcript.getStart() - 2500) {
+                    Gff2 tfbs = new Gff2(sequenceName, source, feature, start, end, score, strand, frame, attribute);
+                    transcriptTfbses = addTranscriptTfbstoList(tfbs, transcript, chromosome, transcriptTfbses);
+                }
+            } else {
+                // transcript in negative strand
+                if (start > transcript.getEnd() + 2500) {
+                    break;
+                } else if (start > transcript.getEnd() - 500) {
+                    Gff2 tfbs = new Gff2(sequenceName, source, feature, start, end, score, strand, frame, attribute);
+                    transcriptTfbses = addTranscriptTfbstoList(tfbs, transcript, chromosome, transcriptTfbses);
+                }
+            }
+        }
+
+        return transcriptTfbses;
+    }
+
+    protected List<TranscriptTfbs> addTranscriptTfbstoList(Gff2 tfbs, Gtf transcript, String chromosome,
+                                                           List<TranscriptTfbs> transcriptTfbses) {
+        if (transcriptTfbses == null) {
+            transcriptTfbses = new ArrayList<>();
+        }
+
+        // binding_matrix_stable_id=ENSPFM0542;epigenomes_with_experimental_evidence=SK-N.%2CMCF-7%2CH1-hESC_3%2CHCT116;
+        // stable_id=ENSM00208374688;transcription_factor_complex=TEAD4::ESRRB
+        String[] attributes = tfbs.getAttribute().split(";");
+
+        String id = null;
+        String pfmId = null;
+        List<String> transciptionFactors = null;
+
+        for (String attributePair : attributes) {
+            String[] attributePairArray = attributePair.split("=");
+            switch(attributePairArray[0]) {
+                case "binding_matrix_stable_id":
+                    pfmId = attributePairArray[1];
+                    break;
+                case "stable_id":
+                    id = attributePairArray[1];
+                    break;
+                case "transcription_factor_complex":
+                    transciptionFactors = Arrays.asList(attributePairArray[1].split("(::)|(%2C)"));
+                    break;
+                default:
+                    break;
+            }
+        }
+
+        transcriptTfbses.add(new TranscriptTfbs(id, pfmId, tfbs.getFeature(), transciptionFactors, chromosome, tfbs.getStart(),
+                tfbs.getEnd(), getRelativeTranscriptTfbsStart(tfbs, transcript), getRelativeTranscriptTfbsEnd(tfbs, transcript),
+                Float.parseFloat(tfbs.getScore())));
+        return transcriptTfbses;
+    }
+
+    private Integer getRelativeTranscriptTfbsStart(Gff2 tfbs, Gtf transcript) {
+        Integer relativeStart;
+        if (transcript.getStrand().equals("+")) {
+            if (tfbs.getStart() < transcript.getStart()) {
+                relativeStart = tfbs.getStart() - transcript.getStart();
+            } else {
+                relativeStart = tfbs.getStart() - transcript.getStart() + 1;
+            }
+        } else {
+            // negative strand transcript
+            if (tfbs.getEnd() > transcript.getEnd()) {
+                relativeStart = transcript.getEnd() - tfbs.getEnd();
+            } else {
+                relativeStart = transcript.getEnd() - tfbs.getEnd() + 1;
+            }
+        }
+        return relativeStart;
+    }
+
+    private Integer getRelativeTranscriptTfbsEnd(Gff2 tfbs, Gtf transcript) {
+        Integer relativeEnd;
+        if (transcript.getStrand().equals("+")) {
+            if (tfbs.getEnd() < transcript.getStart()) {
+                relativeEnd = tfbs.getEnd() - transcript.getStart();
+            } else {
+                relativeEnd = tfbs.getEnd() - transcript.getStart() + 1;
+            }
+        } else {
+            if (tfbs.getStart() > transcript.getEnd()) {
+                relativeEnd = transcript.getEnd() - tfbs.getStart();
+            } else {
+                relativeEnd = transcript.getEnd() - tfbs.getStart() + 1;
+            }
+        }
+        return relativeEnd;
+    }
+
+
+
+    private boolean newGene(Gene previousGene, String newGeneId) {
+        return previousGene == null || !newGeneId.equals(previousGene.getId());
+    }
+
+    private void updateTranscriptAndGeneCoords(Transcript transcript, Gene gene, Gtf gtf) {
+        if (transcript.getStart() > gtf.getStart()) {
+            transcript.setStart(gtf.getStart());
+        }
+        if (transcript.getEnd() < gtf.getEnd()) {
+            transcript.setEnd(gtf.getEnd());
+        }
+        if (gene.getStart() > gtf.getStart()) {
+            gene.setStart(gtf.getStart());
+        }
+        if (gene.getEnd() < gtf.getEnd()) {
+            gene.setEnd(gtf.getEnd());
+        }
+    }
+
+    private void getGtfFileFromGeneDirectoryPath(Path geneDirectoryPath) {
+        for (String fileName : geneDirectoryPath.toFile().list()) {
+            if (fileName.endsWith(".gtf") || fileName.endsWith(".gtf.gz")) {
+                gtfFile = geneDirectoryPath.resolve(fileName);
+                break;
+            }
+        }
+    }
+
+    private void getProteinFastaFileFromGeneDirectoryPath(Path geneDirectoryPath) {
+        for (String fileName : geneDirectoryPath.toFile().list()) {
+            if (fileName.endsWith(".pep.all.fa") || fileName.endsWith(".pep.all.fa.gz")) {
+                proteinFastaFile = geneDirectoryPath.resolve(fileName);
+                break;
+            }
+        }
+    }
+
+    private void getCDnaFastaFileFromGeneDirectoryPath(Path geneDirectoryPath) {
+        for (String fileName : geneDirectoryPath.toFile().list()) {
+            if (fileName.endsWith(".cdna.all.fa") || fileName.endsWith(".cdna.all.fa.gz")) {
+                cDnaFastaFile = geneDirectoryPath.resolve(fileName);
+                break;
+            }
+        }
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilderIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilderIndexer.java
index fb67c19b8b..4841f5ffe2 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilderIndexer.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilderIndexer.java
@@ -16,27 +16,44 @@
 
 package org.opencb.cellbase.lib.builders;
 
+import com.fasterxml.jackson.core.JsonProcessingException;
 import org.apache.commons.lang3.StringUtils;
-import org.apache.poi.hssf.usermodel.HSSFSheet;
-import org.apache.poi.hssf.usermodel.HSSFWorkbook;
-import org.apache.poi.ss.usermodel.*;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.opencb.biodata.formats.feature.mirbase.MirBaseParser;
+import org.opencb.biodata.formats.feature.mirbase.MirBaseParserCallback;
 import org.opencb.biodata.formats.gaf.GafParser;
 import org.opencb.biodata.formats.io.FileFormatException;
+import org.opencb.biodata.models.core.FeatureOntologyTermAnnotation;
+import org.opencb.biodata.models.core.MiRnaGene;
+import org.opencb.biodata.models.core.MirnaTarget;
 import org.opencb.biodata.models.core.Xref;
-import org.opencb.biodata.models.core.*;
-import org.opencb.biodata.models.variant.avro.*;
+import org.opencb.biodata.models.variant.avro.Constraint;
+import org.opencb.biodata.models.variant.avro.Expression;
+import org.opencb.biodata.models.variant.avro.ExpressionCall;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.commons.utils.FileUtils;
+import org.rocksdb.RocksDB;
 import org.rocksdb.RocksDBException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 import java.util.zip.GZIPInputStream;
 
-public class EnsemblGeneBuilderIndexer extends GeneBuilderIndexer{
+import static org.opencb.cellbase.lib.EtlCommons.ENSEMBL_DATA;
+import static org.opencb.cellbase.lib.builders.AbstractBuilder.PARSING_DONE_LOG_MESSAGE;
+import static org.opencb.cellbase.lib.builders.AbstractBuilder.PARSING_LOG_MESSAGE;
+
+public class EnsemblGeneBuilderIndexer extends GeneBuilderIndexer {
 
     private static final String DESCRIPTION_SUFFIX = "_description";
     private static final String XREF_SUFFIX = "_xref";
@@ -54,32 +71,30 @@ public EnsemblGeneBuilderIndexer(Path geneDirectoryPath) {
 
     public void index(Path geneDescriptionFile, Path xrefsFile, Path hgncFile, Path maneFile, Path lrgFile, Path uniprotIdMappingFile,
                       Path proteinFastaFile, Path cDnaFastaFile, String species, Path geneExpressionFile, Path geneDrugFile, Path hpoFile,
-                      Path disgenetFile, Path gnomadFile, Path geneOntologyAnnotationFile, Path miRBaseFile, Path miRTarBaseFile,
-                      Path cancerGeneGensusFile, Path cancerHostpotFile, Path canonicalFile, Path tso500File, Path eglhHaemOncFile)
-            throws IOException, RocksDBException, FileFormatException {
+                      Path gnomadFile, Path geneOntologyAnnotationFile, Path miRBaseFile, Path miRTarBaseFile, Path cancerGeneGensusFile,
+                      Path cancerHostpotFile, Path canonicalFile)
+            throws IOException, RocksDBException, FileFormatException, CellBaseException {
         indexDescriptions(geneDescriptionFile);
         indexXrefs(xrefsFile, uniprotIdMappingFile);
         indexHgncIdMapping(hgncFile);
-        indexManeMapping(maneFile, "ensembl");
-        indexLrgMapping(lrgFile, "ensembl");
+        indexManeMapping(maneFile, ENSEMBL_DATA);
+        indexLrgMapping(lrgFile, ENSEMBL_DATA);
         indexProteinSequences(proteinFastaFile);
         indexCdnaSequences(cDnaFastaFile);
         indexExpression(species, geneExpressionFile);
         indexDrugs(geneDrugFile);
-        indexDiseases(hpoFile, disgenetFile);
+        indexDiseases(hpoFile);
         indexConstraints(gnomadFile);
         indexOntologyAnnotations(geneOntologyAnnotationFile);
-        indexMiRBase(miRBaseFile);
+        indexMiRBase(species, miRBaseFile);
         indexMiRTarBase(miRTarBaseFile);
         indexCancerGeneCensus(cancerGeneGensusFile);
         indexCancerHotspot(cancerHostpotFile);
         indexCanonical(canonicalFile);
-        indexTSO500(tso500File);
-        indexEGLHHaemOnc(eglhHaemOncFile);
     }
 
     private void indexDescriptions(Path geneDescriptionFile) throws IOException, RocksDBException {
-        logger.info("Loading gene description data...");
+        logger.info(PARSING_LOG_MESSAGE, geneDescriptionFile);
         String[] fields;
         if (geneDescriptionFile != null && Files.exists(geneDescriptionFile) && Files.size(geneDescriptionFile) > 0) {
             List<String> lines = Files.readAllLines(geneDescriptionFile, StandardCharsets.ISO_8859_1);
@@ -91,6 +106,7 @@ private void indexDescriptions(Path geneDescriptionFile) throws IOException, Roc
             logger.warn("Gene description file " + geneDescriptionFile + " not found");
             logger.warn("Gene description data not loaded");
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE);
     }
 
     public String getDescription(String id) throws RocksDBException {
@@ -103,7 +119,7 @@ public String getDescription(String id) throws RocksDBException {
     }
 
     private void indexXrefs(Path xrefsFile, Path uniprotIdMappingFile) throws IOException, RocksDBException {
-        logger.info("Loading xref data...");
+        logger.info(PARSING_LOG_MESSAGE, xrefsFile);
         String[] fields;
         if (xrefsFile != null && Files.exists(xrefsFile) && Files.size(xrefsFile) > 0) {
             List<String> lines = Files.readAllLines(xrefsFile, StandardCharsets.ISO_8859_1);
@@ -165,6 +181,7 @@ private void indexXrefs(Path xrefsFile, Path uniprotIdMappingFile) throws IOExce
             logger.warn("Uniprot if mapping file " + uniprotIdMappingFile + " not found");
             logger.warn("Protein mapping into xref data not loaded");
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE);
     }
 
     public List<Xref> getXrefs(String id) throws RocksDBException, IOException {
@@ -183,6 +200,10 @@ public List<Xref> getXrefs(String id) throws RocksDBException, IOException {
     }
 
     private void indexExpression(String species, Path geneExpressionFile) throws IOException, RocksDBException {
+        if (geneExpressionFile == null) {
+            return;
+        }
+
         Map<String, List<Expression>> geneExpressionMap = new HashMap<>();
         if (geneExpressionFile != null && Files.exists(geneExpressionFile) && Files.size(geneExpressionFile) > 0
                 && species != null) {
@@ -233,131 +254,12 @@ public List<Expression> getExpression(String id) throws RocksDBException, IOExce
         return rocksDbManager.getExpression(rocksdb, key);
     }
 
-    private void indexDrugs(Path geneDrugFile) throws IOException, RocksDBException {
-        if (geneDrugFile != null && Files.exists(geneDrugFile) && Files.size(geneDrugFile) > 0) {
-            logger.info("Loading gene-drug interaction data from '{}'", geneDrugFile);
-            BufferedReader br = FileUtils.newBufferedReader(geneDrugFile);
-
-            // Skip header
-            br.readLine();
-
-            int lineCounter = 1;
-            String line;
-            String currentGene = "";
-            List<GeneDrugInteraction> drugs = new ArrayList<>();
-            while ((line = br.readLine()) != null) {
-                String[] parts = line.split("\t");
-                String geneName = parts[0];
-                if (currentGene.equals("")) {
-                    currentGene = geneName;
-                } else if (!currentGene.equals(geneName)) {
-                    rocksDbManager.update(rocksdb, currentGene + DRUGS_SUFFIX, drugs);
-                    drugs = new ArrayList<>();
-                    currentGene = geneName;
-                }
-
-                String source = null;
-                if (parts.length >= 4) {
-                    source = parts[3];
-                }
-
-                String interactionType = null;
-                if (parts.length >= 5) {
-                    interactionType = parts[4];
-                }
-
-                String drugName = null;
-                if (parts.length >= 8) {
-                    // if drug name column is empty, use drug claim name instead
-                    drugName = StringUtils.isEmpty(parts[7]) ? parts[6] : parts[7];
-                }
-                if (StringUtils.isEmpty(drugName)) {
-                    // no drug name
-                    continue;
-                }
-
-                String chemblId = null;
-                if (parts.length >= 9) {
-                    chemblId = parts[8];
-                }
-
-                List<String> publications = new ArrayList<>();
-                if (parts.length >= 10 && parts[9] != null) {
-                    publications = Arrays.asList(parts[9].split(","));
-                }
-
-                GeneDrugInteraction drug = new GeneDrugInteraction(
-                        geneName, drugName, source, null, null, interactionType, chemblId, publications);
-                drugs.add(drug);
-                lineCounter++;
-            }
-            br.close();
-            // update last gene
-            rocksDbManager.update(rocksdb, currentGene + DRUGS_SUFFIX, drugs);
-        } else {
-            logger.warn("Gene drug file " + geneDrugFile + " not found");
-            logger.warn("Ignoring " + geneDrugFile);
-        }
-    }
-
-    public List<GeneDrugInteraction> getDrugs(String id) throws RocksDBException, IOException {
-        String key = id + DRUGS_SUFFIX;
-        return rocksDbManager.getDrugs(rocksdb, key);
-    }
-
-    private void indexDiseases(Path hpoFilePath, Path disgenetFilePath) throws IOException, RocksDBException {
-        Map<String, List<GeneTraitAssociation>> geneDiseaseAssociationMap = new HashMap<>(50000);
-        String line;
-
-        if (hpoFilePath != null && hpoFilePath.toFile().exists() && Files.size(hpoFilePath) > 0) {
-            try (BufferedReader bufferedReader = FileUtils.newBufferedReader(hpoFilePath)) {
-                // skip first header line
-                bufferedReader.readLine();
-                while ((line = bufferedReader.readLine()) != null) {
-                    String[] fields = line.split("\t");
-                    String omimId = fields[6];
-                    String geneSymbol = fields[3];
-                    String hpoId = fields[0];
-                    String diseaseName = fields[1];
-                    GeneTraitAssociation disease =
-                            new GeneTraitAssociation(omimId, diseaseName, hpoId, 0f, 0, new ArrayList<>(), new ArrayList<>(), "hpo");
-                    addValueToMapElement(geneDiseaseAssociationMap, geneSymbol, disease);
-                }
-            }
-        }
-
-        if (disgenetFilePath != null && disgenetFilePath.toFile().exists() && Files.size(disgenetFilePath) > 0) {
-            try (BufferedReader bufferedReader = FileUtils.newBufferedReader(disgenetFilePath)) {
-                // skip first header line
-                bufferedReader.readLine();
-                while ((line = bufferedReader.readLine()) != null) {
-                    String[] fields = line.split("\t");
-                    String diseaseId = fields[4];
-                    String diseaseName = fields[5];
-                    String score = fields[9];
-                    String numberOfPubmeds = fields[13].trim();
-                    String numberOfSNPs = fields[14];
-                    String source = fields[15];
-                    GeneTraitAssociation disease = new GeneTraitAssociation(diseaseId, diseaseName, "", Float.parseFloat(score),
-                            Integer.parseInt(numberOfPubmeds), Collections.singletonList(numberOfSNPs), Collections.singletonList(source),
-                            "disgenet");
-                    addValueToMapElement(geneDiseaseAssociationMap, fields[1], disease);
-                }
-            }
-        }
-
-        for (Map.Entry<String, List<GeneTraitAssociation>> entry : geneDiseaseAssociationMap.entrySet()) {
-            rocksDbManager.update(rocksdb, entry.getKey() + DISEASE_SUFFIX, entry.getValue());
+    private void indexConstraints(Path gnomadFile) throws IOException, RocksDBException {
+        if (gnomadFile == null) {
+            return;
         }
-    }
-
-    public List<GeneTraitAssociation> getDiseases(String id) throws RocksDBException, IOException {
-        String key = id + DISEASE_SUFFIX;
-        return rocksDbManager.getDiseases(rocksdb, key);
-    }
 
-    private void indexConstraints(Path gnomadFile) throws IOException, RocksDBException {
-        if (gnomadFile != null && Files.exists(gnomadFile) && Files.size(gnomadFile) > 0) {
+        if (Files.exists(gnomadFile) && Files.size(gnomadFile) > 0) {
             logger.info("Loading OE scores from '{}'", gnomadFile);
             InputStream inputStream = Files.newInputStream(gnomadFile);
             BufferedReader br = new BufferedReader(new InputStreamReader(new GZIPInputStream(inputStream)));
@@ -384,7 +286,7 @@ private void indexConstraints(Path gnomadFile) throws IOException, RocksDBExcept
                 rocksDbManager.update(rocksdb, transcriptIdentifier + CONSTRAINT_SUFFIX, constraints);
 
                 if ("TRUE".equalsIgnoreCase(canonical)) {
-                     rocksDbManager.update(rocksdb, geneIdentifier + CONSTRAINT_SUFFIX, constraints);
+                    rocksDbManager.update(rocksdb, geneIdentifier + CONSTRAINT_SUFFIX, constraints);
                 }
             }
             br.close();
@@ -413,6 +315,10 @@ private void addConstraint(List<Constraint> constraints, String name, String val
     }
 
     private void indexOntologyAnnotations(Path goaFile) throws IOException, RocksDBException {
+        if (goaFile == null) {
+            return;
+        }
+
         Map<String, List<FeatureOntologyTermAnnotation>> annotations = new HashMap<>();
         if (goaFile != null && Files.exists(goaFile) && Files.size(goaFile) > 0) {
             logger.info("Loading GO annotation from '{}'", goaFile);
@@ -432,66 +338,17 @@ public List<FeatureOntologyTermAnnotation> getOntologyAnnotations(String id) thr
         return rocksDbManager.getOntologyAnnotations(rocksdb, key);
     }
 
-    private void indexMiRBase(Path miRBaseFile) throws IOException, RocksDBException {
-        if (miRBaseFile != null && Files.exists(miRBaseFile) && Files.size(miRBaseFile) > 0) {
-            logger.info("Loading mirna from '{}'", miRBaseFile);
-            FileInputStream fileInputStream = new FileInputStream(miRBaseFile.toFile());
-            HSSFWorkbook workbook = new HSSFWorkbook(fileInputStream);
-            HSSFSheet sheet = workbook.getSheetAt(0);
-            Iterator<org.apache.poi.ss.usermodel.Row> iterator = sheet.iterator();
-            while (iterator.hasNext()) {
-                Row currentRow = iterator.next();
-                Iterator<Cell> cellIterator = currentRow.iterator();
-
-                org.apache.poi.ss.usermodel.Cell cell = cellIterator.next();
-                String miRBaseAccession = cell.getStringCellValue();
-
-                cell = cellIterator.next();
-                String miRBaseID = cell.getStringCellValue();
-
-                cell = cellIterator.next();
-                String status = cell.getStringCellValue();
-
-                cell = cellIterator.next();
-                String sequence = cell.getStringCellValue();
-
-                cell = cellIterator.next();
-                String mature1Accession = cell.getStringCellValue();
-
-                cell = cellIterator.next();
-                String mature1Id = cell.getStringCellValue();
-
-                cell = cellIterator.next();
-                String mature1Sequence = cell.getStringCellValue();
-
-                String mature2Accession = "";
-                String mature2Id = "";
-                String mature2Sequence = "";
-                if (cellIterator.hasNext()) {
-                    cell = cellIterator.next();
-                    mature2Accession = cell.getStringCellValue();
-
-                    cell = cellIterator.next();
-                    mature2Id = cell.getStringCellValue();
-
-                    cell = cellIterator.next();
-                    mature2Sequence = cell.getStringCellValue();
-                }
+    private void indexMiRBase(String species, Path miRBaseFile) throws IOException {
+        if (miRBaseFile == null) {
+            return;
+        }
 
-                MiRnaGene miRNAGene = new MiRnaGene(miRBaseAccession, miRBaseID, status, sequence, new ArrayList<>());
-                int cdnaStart = sequence.indexOf(mature1Sequence);
-                int cdnaEnd = cdnaStart + mature1Sequence.length();
-                miRNAGene.addMiRNAMature(mature1Accession, mature1Id, mature1Sequence, cdnaStart, cdnaEnd);
+        logger.info(PARSING_LOG_MESSAGE, miRBaseFile);
 
-                cdnaStart = sequence.indexOf(mature2Sequence);
-                cdnaEnd = cdnaStart + mature2Sequence.length();
-                miRNAGene.addMiRNAMature(mature2Accession, mature2Id, mature2Sequence, cdnaStart, cdnaEnd);
+        MirBaseCallback callback = new MirBaseCallback(rocksdb, rocksDbManager);
+        MirBaseParser.parse(miRBaseFile, species, callback);
 
-                rocksDbManager.update(rocksdb, miRBaseID + MIRBASE_SUFFIX, miRNAGene);
-            }
-        } else {
-            logger.error("mirna file not found");
-        }
+        logger.info(PARSING_DONE_LOG_MESSAGE, miRBaseFile);
     }
 
     public MiRnaGene getMirnaGene(String transcriptId) throws RocksDBException, IOException {
@@ -509,117 +366,11 @@ public MiRnaGene getMirnaGene(String transcriptId) throws RocksDBException, IOEx
         return null;
     }
 
-    private void indexMiRTarBase(Path miRTarBaseFile) throws IOException, RocksDBException {
-        if (miRTarBaseFile != null && Files.exists(miRTarBaseFile) && Files.size(miRTarBaseFile) > 0) {
-            logger.info("Loading mirna targets from '{}'", miRTarBaseFile);
-            FileInputStream file = new FileInputStream(miRTarBaseFile.toFile());
-            Workbook workbook = new XSSFWorkbook(file);
-            Sheet sheet = workbook.getSheetAt(0);
-            Iterator<Row> iterator = sheet.iterator();
-            String currentMiRTarBaseId = null;
-            String currentMiRNA = null;
-            String currentGene = null;
-            List<TargetGene> targetGenes = new ArrayList<>();
-            Map<String, List<MirnaTarget>> geneToMirna = new HashMap<>();
-            while (iterator.hasNext()) {
-                Row currentRow = iterator.next();
-
-                Iterator<Cell> cellIterator = currentRow.iterator();
-                Cell cell = cellIterator.next();
-
-                // Iterate columns
-                String miRTarBaseId = cell.getStringCellValue();
-
-                // skip header
-                if (miRTarBaseId.startsWith("miRTarBase")) {
-                    continue;
-                }
-
-                if (currentMiRTarBaseId == null) {
-                    currentMiRTarBaseId = miRTarBaseId;
-                }
-
-                cell = cellIterator.next();
-                String miRNA = cell.getStringCellValue();
-                if (currentMiRNA == null) {
-                    currentMiRNA = miRNA;
-                }
-
-                // Skip species
-                cellIterator.next();
-
-                // Read target gene
-                cell = cellIterator.next();
-                String geneName = cell.getStringCellValue();
-                if (currentGene == null) {
-                    currentGene = geneName;
-                }
-
-                // Skip entrez gene
-                cellIterator.next();
-                // Skip species
-                cellIterator.next();
-
-                if (!miRTarBaseId.equals(currentMiRTarBaseId) || !geneName.equals(currentGene)) {
-                    // new entry, store current one
-                    MirnaTarget miRnaTarget = new MirnaTarget(currentMiRTarBaseId, "miRTarBase", currentMiRNA, targetGenes);
-                    addValueToMapElement(geneToMirna, currentGene, miRnaTarget);
-                    targetGenes = new ArrayList<>();
-                    currentGene = geneName;
-                    currentMiRTarBaseId = miRTarBaseId;
-                    currentMiRNA = miRNA;
-                }
-
-                // experiment
-                cell = cellIterator.next();
-                String experiment = cell.getStringCellValue();
-
-                // support type
-                cell = cellIterator.next();
-                String supportType = cell.getStringCellValue();
-
-                // pubmed
-                cell = cellIterator.next();
-                String pubmed;
-                // seems to vary, so check both
-                if (cell.getCellType().equals(CellType.NUMERIC)) {
-//                    pubmed = String.valueOf(cell.getNumericCellValue());
-                    pubmed = Integer.toString(Double.valueOf(cell.getNumericCellValue()).intValue());
-                } else {
-                    pubmed = cell.getStringCellValue();
-                }
-
-                targetGenes.add(new TargetGene(experiment, supportType, pubmed));
-            }
-
-            // parse last entry
-            MirnaTarget miRnaTarget = new MirnaTarget(currentMiRTarBaseId, "miRTarBase", currentMiRNA,
-                    targetGenes);
-            addValueToMapElement(geneToMirna, currentGene, miRnaTarget);
-
-            for (Map.Entry<String, List<MirnaTarget>> entry : geneToMirna.entrySet()) {
-                rocksDbManager.update(rocksdb, entry.getKey() + MIRTARBASE_SUFFIX, entry.getValue());
-            }
-        } else {
-            logger.error("mirtarbase file not found");
-        }
-    }
-
     public List<MirnaTarget> getMirnaTargets(String geneName) throws RocksDBException, IOException {
         String key = geneName + MIRTARBASE_SUFFIX;
         return rocksDbManager.getMirnaTargets(rocksdb, key);
     }
 
-    private static <T> void addValueToMapElement(Map<String, List<T>> map, String key, T value) {
-        if (map.containsKey(key)) {
-            map.get(key).add(value);
-        } else {
-            List<T> valueList = new ArrayList<>();
-            valueList.add(value);
-            map.put(key, valueList);
-        }
-    }
-
     protected void indexCanonical(Path canonocalFile) throws IOException, RocksDBException {
         // Gene  Transcript  Canonical
         // ENSG00000210049.1  ENST00000387314.1  1
@@ -652,4 +403,30 @@ public String getCanonical(String transcriptId) throws RocksDBException, IOExcep
         }
         return new String(bytes);
     }
+
+    // Implementation of the MirBaseParserCallback function
+    public class MirBaseCallback implements MirBaseParserCallback {
+
+        private RocksDB rocksDB;
+        private RocksDbManager rocksDbManager;
+        private Logger logger;
+
+        public MirBaseCallback(RocksDB rocksDB, RocksDbManager rocksDbManager) {
+            this.rocksDB = rocksDB;
+            this.rocksDbManager = rocksDbManager;
+            this.logger = LoggerFactory.getLogger(this.getClass());
+        }
+
+        @Override
+        public boolean processMiRnaGene(MiRnaGene miRnaGene) {
+            try {
+                rocksDbManager.update(rocksdb, miRnaGene.getId() + MIRBASE_SUFFIX, miRnaGene);
+            } catch (JsonProcessingException | RocksDBException e) {
+                logger.warn("Something wrong happened when processing miRNA gene {}: {}", miRnaGene.getId(),
+                        StringUtils.join(e.getStackTrace(), "\t"));
+                return false;
+            }
+            return true;
+        }
+    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java
index cd0863a259..785b296982 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilder.java
@@ -16,904 +16,105 @@
 
 package org.opencb.cellbase.lib.builders;
 
-import htsjdk.tribble.readers.TabixReader;
-import org.apache.commons.lang3.StringUtils;
-import org.opencb.biodata.formats.feature.gff.Gff2;
-import org.opencb.biodata.formats.feature.gtf.Gtf;
-import org.opencb.biodata.formats.feature.gtf.io.GtfReader;
-import org.opencb.biodata.formats.io.FileFormatException;
-import org.opencb.biodata.models.core.*;
-import org.opencb.biodata.tools.sequence.FastaIndex;
-import org.opencb.cellbase.core.ParamConstants;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.config.SpeciesConfiguration;
-import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.core.serializer.CellBaseSerializer;
-import org.rocksdb.RocksDBException;
+import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
 
-import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.List;
 
-public class GeneBuilder extends CellBaseBuilder {
+import static org.opencb.cellbase.lib.EtlCommons.*;
+import static org.opencb.cellbase.lib.builders.EnsemblGeneBuilder.ENSEMBL_GENE_BASENAME;
+import static org.opencb.cellbase.lib.builders.RefSeqGeneBuilder.REFSEQ_GENE_BASENAME;
+import static org.opencb.cellbase.lib.builders.RefSeqGeneBuilder.REFSEQ_GENE_OUTPUT_FILENAME;
 
-    private Map<String, Integer> transcriptDict;
-    private Map<String, Exon> exonDict;
+public class GeneBuilder extends AbstractBuilder {
 
-    private Path gtfFile;
-    private Path proteinFastaFile;
-    private Path cDnaFastaFile;
-    private Path geneDescriptionFile;
-    private Path xrefsFile;
-    private Path hgncFile;
-    private Path maneFile;
-    private Path lrgFile;
-    private Path uniprotIdMappingFile;
-    private Path tfbsFile;
-    private Path tabixFile;
-    private Path geneExpressionFile;
-    private Path geneDrugFile;
-    private Path hpoFile;
-    private Path disgenetFile;
-    private Path genomeSequenceFilePath;
-    private Path gnomadFile;
-    private Path geneOntologyAnnotationFile;
-    private Path miRBaseFile;
-    private Path miRTarBaseFile;
-    private Path cancerGeneCensusFile;
-    private Path cancerHostpotFile;
-    private Path ensemblCanonicalFile;
-    private Path tso500File;
-    private Path eglhHaemOncFile;
-    private boolean flexibleGTFParsing;
+    private Path downloadPath;
+    private EnsemblGeneBuilder ensemblGeneBuilder;
+    private RefSeqGeneBuilder refSeqGeneBuilder;
 
-    // source for genes is either ensembl or refseq
-    private final String SOURCE = ParamConstants.QueryParams.ENSEMBL.key();
-    private SpeciesConfiguration speciesConfiguration;
+    public GeneBuilder(Path downloadPath, Path buildPath, SpeciesConfiguration speciesConfiguration, boolean flexibleGTFParsing,
+                       CellBaseConfiguration configuration) {
+        super(null);
 
-    private int geneCounter;
-    private ArrayList<String> geneList;
-    private String geneName;
-    private int transcriptCounter;
-    private ArrayList<String> transcriptList;
-    private String transcriptName;
-    private int exonCounter;
-    private String feature;
-    private Gtf nextGtfToReturn;
+        this.downloadPath = downloadPath;
 
-    public GeneBuilder(Path geneDirectoryPath, Path genomeSequenceFastaFile, SpeciesConfiguration speciesConfiguration,
-                      CellBaseSerializer serializer) throws CellBaseException {
-        this(geneDirectoryPath, genomeSequenceFastaFile, speciesConfiguration, false, serializer);
-    }
-
-    public GeneBuilder(Path geneDirectoryPath, Path genomeSequenceFastaFile, SpeciesConfiguration speciesConfiguration,
-                       boolean flexibleGTFParsing, CellBaseSerializer serializer) throws CellBaseException {
-        this(null, geneDirectoryPath.resolve("description.txt"),
-                geneDirectoryPath.resolve("xrefs.txt"),
-                geneDirectoryPath.resolve("hgnc_complete_set_2023-11-01.txt"),
-                geneDirectoryPath.resolve("MANE.GRCh38.v1.1.summary.txt.gz"),
-                geneDirectoryPath.resolve("list_LRGs_transcripts_xrefs.txt"),
-                geneDirectoryPath.resolve("idmapping_selected.tab.gz"),
-                geneDirectoryPath.getParent().resolve("regulation/motif_features.gff.gz"),
-                geneDirectoryPath.getParent().resolve("regulation/motif_features.gff.gz.tbi"),
-                geneDirectoryPath.resolve("allgenes_updown_in_organism_part.tab.gz"),
-                geneDirectoryPath.resolve("dgidb.tsv"),
-                geneDirectoryPath.resolve("phenotype_to_genes.txt"),
-                geneDirectoryPath.resolve("all_gene_disease_associations.tsv.gz"),
-                geneDirectoryPath.resolve("gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz"),
-                geneDirectoryPath.resolve("goa_human.gaf.gz"),
-                geneDirectoryPath.getParent().resolve("regulation/miRNA.xls"),
-                geneDirectoryPath.getParent().resolve("regulation/hsa_MTI.xlsx"),
-                geneDirectoryPath.resolve("cancer-gene-census.tsv"),
-                geneDirectoryPath.resolve("hotspots_v2.xls"),
-                geneDirectoryPath.resolve("ensembl_canonical.txt"),
-                geneDirectoryPath.resolve("TSO500_transcripts.txt"),
-                geneDirectoryPath.resolve("EGLH_HaemOnc_transcripts.txt"),
-                genomeSequenceFastaFile,
-                speciesConfiguration, flexibleGTFParsing, serializer);
+        // Create Ensembl gene builder
+        CellBaseJsonFileSerializer ensemblGeneSerializer = new CellBaseJsonFileSerializer(buildPath, ENSEMBL_GENE_BASENAME);
+        this.ensemblGeneBuilder = new EnsemblGeneBuilder(downloadPath.resolve(ENSEMBL_DATA), speciesConfiguration, flexibleGTFParsing,
+                configuration, ensemblGeneSerializer);
 
-        getGtfFileFromGeneDirectoryPath(geneDirectoryPath);
-        getProteinFastaFileFromGeneDirectoryPath(geneDirectoryPath);
-        getCDnaFastaFileFromGeneDirectoryPath(geneDirectoryPath);
+        // Create RefSeq gene builder
+        CellBaseJsonFileSerializer refSeqGeneSerializer = new CellBaseJsonFileSerializer(buildPath, REFSEQ_GENE_BASENAME);
+        this.refSeqGeneBuilder = new RefSeqGeneBuilder(downloadPath.resolve(REFSEQ_DATA), speciesConfiguration, configuration,
+                refSeqGeneSerializer);
     }
 
-    public GeneBuilder(Path gtfFile, Path geneDescriptionFile, Path xrefsFile, Path hgncFile, Path maneFile,
-                       Path lrgFile, Path uniprotIdMappingFile, Path tfbsFile, Path tabixFile, Path geneExpressionFile,
-                       Path geneDrugFile, Path hpoFile, Path disgenetFile, Path gnomadFile,
-                       Path geneOntologyAnnotationFile, Path miRBaseFile, Path miRTarBaseFile, Path cancerGeneCensusFile,
-                       Path cancerHostpotFile, Path ensemblCanonicalFile, Path tso500File, Path eglhHaemOncFile,
-                       Path genomeSequenceFilePath, SpeciesConfiguration speciesConfiguration, boolean flexibleGTFParsing,
-                       CellBaseSerializer serializer) {
-        super(serializer);
-
-        this.gtfFile = gtfFile;
-        this.geneDescriptionFile = geneDescriptionFile;
-        this.xrefsFile = xrefsFile;
-        this.hgncFile = hgncFile;
-        this.maneFile = maneFile;
-        this.lrgFile = lrgFile;
-        this.uniprotIdMappingFile = uniprotIdMappingFile;
-        this.tfbsFile = tfbsFile;
-        this.tabixFile = tabixFile;
-        this.geneExpressionFile = geneExpressionFile;
-        this.geneDrugFile = geneDrugFile;
-        this.hpoFile = hpoFile;
-        this.disgenetFile = disgenetFile;
-        this.gnomadFile = gnomadFile;
-        this.geneOntologyAnnotationFile = geneOntologyAnnotationFile;
-        this.miRBaseFile = miRBaseFile;
-        this.miRTarBaseFile = miRTarBaseFile;
-        this.cancerGeneCensusFile = cancerGeneCensusFile;
-        this.cancerHostpotFile = cancerHostpotFile;
-        this.ensemblCanonicalFile = ensemblCanonicalFile;
-        this.tso500File = tso500File;
-        this.eglhHaemOncFile = eglhHaemOncFile;
-        this.genomeSequenceFilePath = genomeSequenceFilePath;
-        this.speciesConfiguration = speciesConfiguration;
-        this.flexibleGTFParsing = flexibleGTFParsing;
+    public void check() throws Exception {
+        // Check Ensembl requirements
+        ensemblGeneBuilder.check();
 
-        transcriptDict = new HashMap<>(250000);
-        exonDict = new HashMap<>(8000000);
+        // Check RefSeq requirements
+        refSeqGeneBuilder.check();
     }
 
+    @Override
     public void parse() throws Exception {
-        Gene gene = null;
-        Transcript transcript;
-        Exon exon = null;
-        int cdna = 1;
-        int cds = 1;
-        EnsemblGeneBuilderIndexer indexer = new EnsemblGeneBuilderIndexer(gtfFile.getParent());
-
-        try {
-            // process files and put values in rocksdb
-            indexer.index(geneDescriptionFile, xrefsFile, hgncFile, maneFile, lrgFile, uniprotIdMappingFile,
-                    proteinFastaFile, cDnaFastaFile, speciesConfiguration.getScientificName(), geneExpressionFile,
-                    geneDrugFile, hpoFile, disgenetFile, gnomadFile, geneOntologyAnnotationFile, miRBaseFile,
-                    miRTarBaseFile, cancerGeneCensusFile, cancerHostpotFile, ensemblCanonicalFile,
-                    tso500File, eglhHaemOncFile);
-
-            TabixReader tabixReader = null;
-            if (!Files.exists(tfbsFile) || !Files.exists(tabixFile)) {
-                logger.error("Tfbs or tabix file not found. Download them and try again.");
-            } else {
-                tabixReader = new TabixReader(tfbsFile.toAbsolutePath().toString(), tabixFile.toAbsolutePath().toString());
-            }
-
-            // Preparing the fasta file for fast accessing
-//            System.out.println("genomeSequenceFilePath.toString() = " + genomeSequenceFilePath.toString());
-            FastaIndex fastaIndex = new FastaIndex(genomeSequenceFilePath);
-
-            // Empty transcript and exon dictionaries
-            transcriptDict.clear();
-            exonDict.clear();
-            logger.info("Parsing gtf...");
-            GtfReader gtfReader = new GtfReader(gtfFile);
-
-            // Gene->Transcript->Feature->GTF line
-            Map<String, Map<String, Map<String, Object>>> gtfMap = null;
-            if (flexibleGTFParsing) {
-                gtfMap = loadGTFMap(gtfReader);
-                initializePointers(gtfMap);
-            }
-
-            Gtf gtf;
-            while ((gtf = getGTFEntry(gtfReader, gtfMap)) != null) {
-
-                if (gtf.getFeature().equals("gene") || gtf.getFeature().equals("transcript")
-                        || gtf.getFeature().equals("UTR") || gtf.getFeature().equals("Selenocysteine")) {
-                    continue;
-                }
-
-                String geneId = gtf.getAttributes().get("gene_id");
-                String transcriptId = gtf.getAttributes().get("transcript_id");
-                String geneName = gtf.getAttributes().get("gene_name");
-                if (newGene(gene, geneId)) {
-                    // If new geneId is different from the current then we must serialize before data new gene
-                    if (gene != null) {
-                        serializer.serialize(gene);
-                    }
-
-                    GeneAnnotation geneAnnotation = new GeneAnnotation(indexer.getExpression(geneId), indexer.getDiseases(geneName),
-                            indexer.getDrugs(geneName), indexer.getConstraints(geneId), indexer.getMirnaTargets(geneName),
-                            indexer.getCancerGeneCensus(geneName), indexer.getCancerHotspot(geneName));
-
-                    gene = new Gene(geneId, geneName, gtf.getSequenceName().replaceFirst("chr", ""),
-                            gtf.getStart(), gtf.getEnd(), gtf.getStrand(), gtf.getAttributes().get("gene_version"),
-                            gtf.getAttributes().get("gene_biotype"), "KNOWN", SOURCE, indexer.getDescription(geneId),
-                            new ArrayList<>(), indexer.getMirnaGene(transcriptId), geneAnnotation);
-                }
-
-                // Check if Transcript exist in the Gene Set of transcripts
-                if (!transcriptDict.containsKey(transcriptId)) {
-                    transcript = getTranscript(gene, indexer, tabixReader, gtf, transcriptId);
-                } else {
-                    transcript = gene.getTranscripts().get(transcriptDict.get(transcriptId));
-                }
-
-                // At this point gene and transcript objects are set up
-                // Update gene and transcript genomic coordinates, start must be the
-                // lower, and end the higher
-                updateTranscriptAndGeneCoords(transcript, gene, gtf);
-
-                String transcriptIdWithoutVersion = transcript.getId().split("\\.")[0];
-                if (gtf.getFeature().equalsIgnoreCase("exon")) {
-                    // Obtaining the exon sequence
-                    String exonId = gtf.getAttributes().get("exon_id") + "." + gtf.getAttributes().get("exon_version");
-                    String exonSequence = fastaIndex.query(gtf.getSequenceName(), gtf.getStart(), gtf.getEnd());
-
-                    exon = new Exon(exonId, gtf.getSequenceName().replaceFirst("chr", ""),
-                            gtf.getStart(), gtf.getEnd(), gtf.getStrand(), 0, 0, 0, 0, 0, 0, -1, Integer.parseInt(gtf
-                            .getAttributes().get("exon_number")), exonSequence);
-                    transcript.getExons().add(exon);
-
-                    exonDict.put(transcriptIdWithoutVersion + "_" + exon.getExonNumber(), exon);
-                    if (gtf.getAttributes().get("exon_number").equals("1")) {
-                        cdna = 1;
-                        cds = 1;
-                    } else {
-                        // with every exon we update cDNA length with the previous exon length
-                        cdna += exonDict.get(transcriptIdWithoutVersion + "_" + (exon.getExonNumber() - 1)).getEnd()
-                                - exonDict.get(transcriptIdWithoutVersion + "_" + (exon.getExonNumber() - 1)).getStart() + 1;
-                    }
-                } else {
-                    exon = exonDict.get(transcriptIdWithoutVersion + "_" + exon.getExonNumber());
-                    if (gtf.getFeature().equalsIgnoreCase("CDS")) {
-                        // Protein ID is only present in CDS lines
-                        String proteinId = gtf.getAttributes().get("protein_id") != null
-                                ? gtf.getAttributes().get("protein_id") + "." + gtf.getAttributes().get("protein_version")
-                                : "";
-                        transcript.setProteinId(proteinId);
-                        transcript.setProteinSequence(indexer.getProteinFasta(proteinId));
-
-                        if (gtf.getStrand().equals("+") || gtf.getStrand().equals("1")) {
-                            // CDS states the beginning of coding start
-                            exon.setGenomicCodingStart(gtf.getStart());
-                            exon.setGenomicCodingEnd(gtf.getEnd());
-
-                            // cDNA coordinates
-                            exon.setCdnaCodingStart(gtf.getStart() - exon.getStart() + cdna);
-                            exon.setCdnaCodingEnd(gtf.getEnd() - exon.getStart() + cdna);
-                            // Set cdnaCodingEnd to prevent those cases without stop_codon
-
-                            transcript.setCdnaCodingEnd(gtf.getEnd() - exon.getStart() + cdna);
-                            exon.setCdsStart(cds);
-                            exon.setCdsEnd(gtf.getEnd() - gtf.getStart() + cds);
-
-                            // increment in the coding length
-                            cds += gtf.getEnd() - gtf.getStart() + 1;
-                            transcript.setCdsLength(cds - 1);  // Set cdnaCodingEnd to prevent those cases without stop_codon
-
-                            exon.setPhase(Integer.parseInt(gtf.getFrame()));
-
-                            if (transcript.getGenomicCodingStart() == 0 || transcript.getGenomicCodingStart() > gtf.getStart()) {
-                                transcript.setGenomicCodingStart(gtf.getStart());
-                            }
-                            if (transcript.getGenomicCodingEnd() == 0 || transcript.getGenomicCodingEnd() < gtf.getEnd()) {
-                                transcript.setGenomicCodingEnd(gtf.getEnd());
-                            }
-                            // only first time
-                            if (transcript.getCdnaCodingStart() == 0) {
-                                transcript.setCdnaCodingStart(gtf.getStart() - exon.getStart() + cdna);
-                            }
-                            // strand -
-                        } else {
-                            // CDS states the beginning of coding start
-                            exon.setGenomicCodingStart(gtf.getStart());
-                            exon.setGenomicCodingEnd(gtf.getEnd());
-                            // cDNA coordinates
-                            // cdnaCodingStart points to the same base position than genomicCodingEnd
-                            exon.setCdnaCodingStart(exon.getEnd() - gtf.getEnd() + cdna);
-                            // cdnaCodingEnd points to the same base position than genomicCodingStart
-                            exon.setCdnaCodingEnd(exon.getEnd() - gtf.getStart() + cdna);
-                            // Set cdnaCodingEnd to prevent those cases without stop_codon
-                            transcript.setCdnaCodingEnd(exon.getEnd() - gtf.getStart() + cdna);
-                            exon.setCdsStart(cds);
-                            exon.setCdsEnd(gtf.getEnd() - gtf.getStart() + cds);
-
-                            // increment in the coding length
-                            cds += gtf.getEnd() - gtf.getStart() + 1;
-                            transcript.setCdsLength(cds - 1);  // Set cdnaCodingEnd to prevent those cases without stop_codon
-                            exon.setPhase(Integer.parseInt(gtf.getFrame()));
-
-                            if (transcript.getGenomicCodingStart() == 0 || transcript.getGenomicCodingStart() > gtf.getStart()) {
-                                transcript.setGenomicCodingStart(gtf.getStart());
-                            }
-                            if (transcript.getGenomicCodingEnd() == 0 || transcript.getGenomicCodingEnd() < gtf.getEnd()) {
-                                transcript.setGenomicCodingEnd(gtf.getEnd());
-                            }
-                            // only first time
-                            if (transcript.getCdnaCodingStart() == 0) {
-                                // cdnaCodingStart points to the same base position than genomicCodingEnd
-                                transcript.setCdnaCodingStart(exon.getEnd() - gtf.getEnd() + cdna);
-                            }
-                        }
-
-                    }
-//                if (gtf.getFeature().equalsIgnoreCase("start_codon")) {
-//                    // nothing to do
-//                    System.out.println("Empty block, this should be redesigned");
-//                }
-                    if (gtf.getFeature().equalsIgnoreCase("stop_codon")) {
-                        //                      setCdnaCodingEnd = false; // stop_codon found, cdnaCodingEnd will be set here,
-                        //                      no need to set it at the beginning of next feature
-                        if (exon.getStrand().equals("+")) {
-                            updateStopCodingDataPositiveExon(exon, cdna, cds, gtf);
-
-                            cds += gtf.getEnd() - gtf.getStart();
-                            // If stop_codon appears, overwrite values
-                            transcript.setGenomicCodingEnd(gtf.getEnd());
-                            transcript.setCdnaCodingEnd(gtf.getEnd() - exon.getStart() + cdna);
-                            transcript.setCdsLength(cds - 1);
-
-                        } else {
-                            updateNegativeExonCodingData(exon, cdna, cds, gtf);
-
-                            cds += gtf.getEnd() - gtf.getStart();
-                            // If stop_codon appears, overwrite values
-                            transcript.setGenomicCodingStart(gtf.getStart());
-                            // cdnaCodingEnd points to the same base position than genomicCodingStart
-                            transcript.setCdnaCodingEnd(exon.getEnd() - gtf.getStart() + cdna);
-                            transcript.setCdsLength(cds - 1);
-                        }
-                    }
-                }
-            }
-
-            // last gene must be serialized
-            serializer.serialize(gene);
-
-            // cleaning
-            gtfReader.close();
-            serializer.close();
-            fastaIndex.close();
-            indexer.close();
-        } catch (Exception e) {
-            indexer.close();
-            throw e;
-        }
-    }
-
-    private Transcript getTranscript(Gene gene, EnsemblGeneBuilderIndexer indexer, TabixReader tabixReader, Gtf gtf, String transcriptId)
-            throws IOException, RocksDBException {
-        Map<String, String> gtfAttributes = gtf.getAttributes();
+        // Check folders and files before building
+        check();
 
-        // To match Ensembl, we set the ID as transcript+version. This also matches the Ensembl website.
-        String transcriptIdWithVersion = transcriptId + "." + gtfAttributes.get("transcript_version");
-        String biotype = gtfAttributes.get("transcript_biotype") != null ? gtfAttributes.get("transcript_biotype") : "";
-        String transcriptChromosome = gtf.getSequenceName().replaceFirst("chr", "");
-        List<TranscriptTfbs> transcriptTfbses = getTranscriptTfbses(gtf, transcriptChromosome, tabixReader);
+        // Build Ensembl genes
+        ensemblGeneBuilder.parse();
 
-        List<FeatureOntologyTermAnnotation> ontologyAnnotations = getOntologyAnnotations(indexer.getXrefs(transcriptId), indexer);
-        TranscriptAnnotation transcriptAnnotation = new TranscriptAnnotation(ontologyAnnotations, indexer.getConstraints(transcriptId));
-
-        Transcript transcript = new Transcript(transcriptIdWithVersion, gtfAttributes.get("transcript_name"), transcriptChromosome,
-                gtf.getStart(), gtf.getEnd(), gtf.getStrand(), biotype, "KNOWN",
-                0, 0, 0, 0, 0,
-                indexer.getCdnaFasta(transcriptIdWithVersion), "", "", "",
-                gtfAttributes.get("transcript_version"), SOURCE, new ArrayList<>(), indexer.getXrefs(transcriptId), transcriptTfbses,
-                new HashSet<>(), transcriptAnnotation);
-
-        // Adding Ids appearing in the GTF to the xrefs is required, since for some unknown reason the ENSEMBL
-        // Perl API often doesn't return all genes resulting in an incomplete xrefs.txt file. We must ensure
-        // that the xrefs array contains all ids present in the GTF file
-        addGtfXrefs(transcript, gene, gtfAttributes);
-
-        // Add HGNC ID mappings, with this we can know which Ensembl and Refseq transcripts match to HGNC ID
-        String hgncId = indexer.getHgncId(gene.getName());
-        if (StringUtils.isNotEmpty(hgncId)) {
-            transcript.getXrefs().add(new Xref(hgncId, "hgnc_id", "HGNC ID"));
-        }
-
-        // Add MANE Select mappings, with this we can know which Ensembl and Refseq transcripts match according to MANE
-        for (String suffix: Arrays.asList("refseq", "refseq_protein")) {
-            String maneRefSeq = indexer.getMane(transcriptIdWithVersion, suffix);
-            if (StringUtils.isNotEmpty(maneRefSeq)) {
-                transcript.getXrefs().add(new Xref(maneRefSeq, "mane_select_" + suffix,
-                        "MANE Select RefSeq" + (suffix.contains("_") ? " Protein" : "")));
-            }
-        }
-
-        // Add LRG mappings, with this we can know which Ensembl and Refseq transcripts match according to LRG
-        String lrgRefSeq = indexer.getLrg(transcriptIdWithVersion, "refseq");
-        if (StringUtils.isNotEmpty(lrgRefSeq)) {
-            transcript.getXrefs().add(new Xref(lrgRefSeq, "lrg_refseq", "LRG RefSeq"));
-        }
-
-        // Add Flags
-        // 1. GTF tags
-        String tags = gtf.getAttributes().get("tag");
-        if (StringUtils.isNotEmpty(tags)) {
-            transcript.getFlags().addAll(Arrays.asList(tags.split(",")));
-        }
-        // 2. TSL
-        String supportLevel = gtfAttributes.get("transcript_support_level");
-        if (StringUtils.isNotEmpty(supportLevel)) {
-            // split on space so "5 (assigned to previous version 3)" and "5" both become "TSL:5"
-            String truncatedSupportLevel = supportLevel.split(" ")[0];
-            transcript.getFlags().add("TSL:" + truncatedSupportLevel);
-        }
-        // 3. MANE Flag
-        String maneFlag = indexer.getMane(transcriptIdWithVersion, "flag");
-        if (StringUtils.isNotEmpty(maneFlag)) {
-            transcript.getFlags().add(maneFlag);
-        }
-        // 4. LRG Flag
-        String lrg = indexer.getLrg(transcriptIdWithVersion, "ensembl");
-        if (StringUtils.isNotEmpty(lrg)) {
-            transcript.getFlags().add("LRG");
+        // Build RefSeq genes
+        if (!Files.exists(downloadPath.resolve(REFSEQ_DATA).resolve(REFSEQ_GENE_OUTPUT_FILENAME))) {
+            refSeqGeneBuilder.parse();
         } else {
-            for (Xref xref : transcript.getXrefs()) {
-                if (xref.getId().startsWith("LRG_") && xref.getId().contains("t")) {
-                    transcript.getFlags().add("LRG");
-                }
-            }
-        }
-        // 5. Ensembl Canonical
-        String canonicalFlag = indexer.getCanonical(transcriptIdWithVersion);
-        if (StringUtils.isNotEmpty(canonicalFlag)) {
-            transcript.getFlags().add(canonicalFlag);
+            logger.info(DATA_ALREADY_BUILT, getDataName(REFSEQ_DATA) + " gene");
         }
 
-        // 6. TSO500 and EGLH HaemOnc
-        String maneRefSeq = indexer.getMane(transcriptIdWithVersion, "refseq");
-        if (StringUtils.isNotEmpty(maneRefSeq)) {
-            String tso500Flag = indexer.getTSO500(maneRefSeq.split("\\.")[0]);
-            if (StringUtils.isNotEmpty(tso500Flag)) {
-                transcript.getFlags().add(tso500Flag);
-            }
-
-            String eglhHaemOncFlag = indexer.getEGLHHaemOnc(maneRefSeq.split("\\.")[0]);
-            if (StringUtils.isNotEmpty(eglhHaemOncFlag)) {
-                transcript.getFlags().add(eglhHaemOncFlag);
-            }
-        }
-
-        gene.getTranscripts().add(transcript);
-
-        // Do not change order!! size()-1 is the index of the transcript ID
-        transcriptDict.put(transcriptId, gene.getTranscripts().size() - 1);
-        return transcript;
-    }
 
-    private List<FeatureOntologyTermAnnotation> getOntologyAnnotations(List<Xref> xrefs,  EnsemblGeneBuilderIndexer indexer)
-            throws IOException, RocksDBException {
-        if (xrefs == null || indexer == null) {
-            return null;
-        }
-        List<FeatureOntologyTermAnnotation> annotations = new ArrayList<>();
-        for (Xref xref : xrefs) {
-            if (xref.getDbName().equals("uniprotkb_acc")) {
-                String key = xref.getId();
-                if (key != null && indexer.getOntologyAnnotations(key) != null) {
-                    annotations.addAll(indexer.getOntologyAnnotations(key));
-                }
-            }
-        }
-        return annotations;
+        logger.info(BUILDING_DONE_LOG_MESSAGE, getDataName(GENE_DATA));
     }
 
-    private void updateNegativeExonCodingData(Exon exon, int cdna, int cds, Gtf gtf) {
-        // we need to increment 3 nts, the stop_codon length.
-        exon.setGenomicCodingStart(gtf.getStart());
-        // cdnaCodingEnd points to the same base position than genomicCodingStart
-        exon.setCdnaCodingEnd(exon.getEnd() - gtf.getStart() + cdna);
-        exon.setCdsEnd(gtf.getEnd() - gtf.getStart() + cds);
+    public static List<String> getCommonDataSources(SpeciesConfiguration speciesConfiguration, CellBaseConfiguration configuration) {
+        List<String> dataList = new ArrayList<>();
 
-        // If the STOP codon corresponds to the first three nts of the exon then no CDS will be defined
-        // in the gtf -as technically the STOP codon is non-coding- and we must manually set coding
-        // starts
-        if (exon.getGenomicCodingEnd() == 0) {
-            exon.setGenomicCodingEnd(exon.getGenomicCodingStart() + 2);
-        }
-        if (exon.getCdnaCodingStart() == 0) {
-            exon.setCdnaCodingStart(exon.getCdnaCodingEnd() - 2);
-        }
-        if (exon.getCdsStart() == 0) {
-            exon.setCdsStart(exon.getCdsEnd() - 2);
+        boolean isHSapiens = false;
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS)) {
+            isHSapiens = true;
         }
-    }
 
-    private void updateStopCodingDataPositiveExon(Exon exon, int cdna, int cds, Gtf gtf) {
-        // we need to increment 3 nts, the stop_codon length.
-        exon.setGenomicCodingEnd(gtf.getEnd());
-        exon.setCdnaCodingEnd(gtf.getEnd() - exon.getStart() + cdna);
-        exon.setCdsEnd(gtf.getEnd() - gtf.getStart() + cds);
+        String prefixId = getConfigurationFileIdPrefix(speciesConfiguration.getScientificName());
 
-        // If the STOP codon corresponds to the first three nts of the exon then no CDS will be defined
-        // in the gtf -as technically the STOP codon is non-coding- and we must manually set coding
-        // starts
-        if (exon.getGenomicCodingStart() == 0) {
-            exon.setGenomicCodingStart(exon.getGenomicCodingEnd() - 2);
+        if (isHSapiens || isDataSupported(configuration.getDownload().getManeSelect(), prefixId)) {
+            dataList.add(MANE_SELECT_DATA);
         }
-        if (exon.getCdnaCodingStart() == 0) {
-            exon.setCdnaCodingStart(exon.getCdnaCodingEnd() - 2);
+        if (isHSapiens || isDataSupported(configuration.getDownload().getLrg(), prefixId)) {
+            dataList.add(LRG_DATA);
         }
-        if (exon.getCdsStart() == 0) {
-            exon.setCdsStart(exon.getCdsEnd() - 2);
+        if (isHSapiens || isDataSupported(configuration.getDownload().getCancerHotspot(), prefixId)) {
+            dataList.add(CANCER_HOTSPOT_DATA);
         }
-    }
-
-    private void addGtfXrefs(Transcript transcript, Gene gene, Map<String, String> gtfAttributes) {
-        if (transcript.getXrefs() == null) {
-            transcript.setXrefs(new ArrayList<>());
+        if (isHSapiens || isDataSupported(configuration.getDownload().getDgidb(), prefixId)) {
+            dataList.add(DGIDB_DATA);
         }
-
-        transcript.getXrefs().add(new Xref(gene.getId(), "ensembl_gene", "Ensembl Gene"));
-        transcript.getXrefs().add(new Xref(transcript.getId(), "ensembl_transcript", "Ensembl Transcript"));
-
-        // Some non-coding genes do not have Gene names
-        if (StringUtils.isNotEmpty(gene.getName())) {
-            transcript.getXrefs().add(new Xref(gene.getName(), "hgnc_symbol", "HGNC Symbol"));
-            transcript.getXrefs().add(new Xref(transcript.getName(), "ensembl_transcript_name", "Ensembl Transcript Name"));
+        if (isHSapiens || isDataSupported(configuration.getDownload().getHpo(), prefixId)) {
+            dataList.add(HPO_DISEASE_DATA);
         }
-
-        if (gtfAttributes.get("ccds_id") != null) {
-            transcript.getXrefs().add(new Xref(gtfAttributes.get("ccds_id"), "ccds_id", "CCDS"));
+        if (isHSapiens || isDataSupported(configuration.getDownload().getCancerHotspot(), prefixId)) {
+            dataList.add(CANCER_GENE_CENSUS_DATA);
         }
-    }
-
-    private void initializePointers(Map<String, Map<String, Map<String, Object>>> gtfMap) {
-        geneCounter = 0;
-        geneList = new ArrayList<>(gtfMap.keySet());
-        geneName = geneList.get(geneCounter);
-        transcriptCounter = 0;
-        transcriptList = new ArrayList<>(gtfMap.get(geneName).keySet());
-        transcriptName = transcriptList.get(transcriptCounter);
-        exonCounter = 0;
-        feature = "exon";
-        nextGtfToReturn = (Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter);
-    }
-
-    private Gtf getGTFEntry(GtfReader gtfReader, Map<String, Map<String, Map<String, Object>>> gtfMap) throws FileFormatException {
-        // Flexible parsing is deactivated, return next line
-        if (gtfMap == null) {
-            return gtfReader.read();
-            // Flexible parsing activated, carefully select next line to return
-        } else {
-            // No more genes/features to return
-            if (nextGtfToReturn == null) {
-                return null;
-            }
-            Gtf gtfToReturn = nextGtfToReturn;
-            if (feature.equals("exon")) {
-//                gtfToReturn = (Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter);
-                if (gtfMap.get(geneName).get(transcriptName).containsKey("cds")) {
-                    nextGtfToReturn = getExonCDSLine(((Gtf) ((List) gtfMap.get(geneName)
-                                    .get(transcriptName).get("exon")).get(exonCounter)).getStart(),
-                            ((Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter)).getEnd(),
-                            (List) gtfMap.get(geneName).get(transcriptName).get("cds"));
-                    if (nextGtfToReturn != null) {
-                        feature = "cds";
-                        return gtfToReturn;
-                    }
-                }
-                // if no cds was found for this exon, get next exon
-                getFeatureFollowsExon(gtfMap);
-                return gtfToReturn;
-            }
-            if (feature.equals("cds") || feature.equals("stop_codon")) {
-                getFeatureFollowsExon(gtfMap);
-                return gtfToReturn;
-            }
-            if (feature.equals("start_codon")) {
-                feature = "stop_codon";
-                nextGtfToReturn = (Gtf) gtfMap.get(geneName).get(transcriptName).get("stop_codon");
-                return gtfToReturn;
-            }
-            // The only accepted features that should appear in the gtfMap are exon, cds, start_codon and stop_codon
-            throw new FileFormatException("Execution cannot reach this point");
+        if (isHSapiens || isDataSupported(configuration.getDownload().getMiRTarBase(), prefixId)) {
+            dataList.add(MIRTARBASE_DATA);
         }
-    }
-
-    private Gtf getExonCDSLine(Integer exonStart, Integer exonEnd, List cdsList) {
-        for (Object cdsObject : cdsList) {
-            int cdsStart = ((Gtf) cdsObject).getStart();
-            int cdsEnd = ((Gtf) cdsObject).getEnd();
-            if (cdsStart <= exonEnd && cdsEnd >= exonStart) {
-                return (Gtf) cdsObject;
-            }
-        }
-        return null;
-    }
-
-    private void getFeatureFollowsExon(Map<String, Map<String, Map<String, Object>>> gtfMap) {
-        exonCounter++;
-        if (exonCounter == ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).size()
-                || feature.equals("stop_codon")) {
-            // If last returned feature was a stop_codon or no start_codon is provided for this transcript,
-            // next transcript must be selected
-            if (!feature.equals("stop_codon") && gtfMap.get(geneName).get(transcriptName).containsKey("start_codon")) {
-                feature = "start_codon";
-                nextGtfToReturn = (Gtf) gtfMap.get(geneName).get(transcriptName).get("start_codon");
-            } else {
-                transcriptCounter++;
-                // No more transcripts in this gene, check if there are more genes
-                if (transcriptCounter == gtfMap.get(geneName).size()) {
-                    geneCounter++;
-                    // No more genes available, end parsing
-                    if (geneCounter == gtfMap.size()) {
-                        nextGtfToReturn = null;
-                        feature = null;
-                        // Still more genes to parse, select next one
-                    } else {
-                        geneName = geneList.get(geneCounter);
-                        transcriptCounter = 0;
-                        transcriptList = new ArrayList<>(gtfMap.get(geneName).keySet());
-                    }
-                }
-                // Check if a new gene was selected - null would indicate there're no more genes
-                if (nextGtfToReturn != null) {
-                    transcriptName = transcriptList.get(transcriptCounter);
-                    exonCounter = 0;
-                    feature = "exon";
-                    nextGtfToReturn = (Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter);
-                }
-            }
-        } else {
-            feature = "exon";
-            nextGtfToReturn = (Gtf) ((List) gtfMap.get(geneName).get(transcriptName).get("exon")).get(exonCounter);
-        }
-    }
-
-    private Map<String, Map<String, Map<String, Object>>> loadGTFMap(GtfReader gtfReader) throws FileFormatException {
-        Map<String, Map<String, Map<String, Object>>> gtfMap = new HashMap<>();
-        Gtf gtf;
-        while ((gtf = gtfReader.read()) != null) {
-            if (gtf.getFeature().equals("gene") || gtf.getFeature().equals("transcript")
-                    || gtf.getFeature().equals("UTR") || gtf.getFeature().equals("Selenocysteine")) {
-                continue;
-            }
-
-            // Get GTF lines associated with this gene - create a new Map of GTF entries if it's a new gene
-            String geneId = gtf.getAttributes().get("gene_id");
-            // Transcript -> feature -> GTF line
-            Map<String, Map<String, Object>> gtfMapGeneEntry;
-            if (gtfMap.containsKey(geneId)) {
-                gtfMapGeneEntry =  gtfMap.get(geneId);
-            } else {
-                gtfMapGeneEntry = new HashMap();
-                gtfMap.put(geneId, gtfMapGeneEntry);
-            }
-
-            // Get GTF lines associated with this transcript - create a new Map of GTF entries if it's a new gene
-            String transcriptId = gtf.getAttributes().get("transcript_id");
-            Map<String, Object> gtfMapTranscriptEntry;
-            if (gtfMapGeneEntry.containsKey(transcriptId)) {
-                gtfMapTranscriptEntry =  gtfMapGeneEntry.get(transcriptId);
-            } else {
-                gtfMapTranscriptEntry = new HashMap();
-                gtfMapGeneEntry.put(transcriptId, gtfMapTranscriptEntry);
-            }
-
-            addGTFLineToGTFMap(gtfMapTranscriptEntry, gtf);
-
-        }
-
-        // Exon number is mandatory for the parser to be able to properly generate the gene data model
-        if (!exonNumberPresent(gtfMap)) {
-            setExonNumber(gtfMap);
-        }
-
-        return gtfMap;
-    }
-
-    private boolean exonNumberPresent(Map<String, Map<String, Map<String, Object>>> gtfMap) {
-        Map<String, Map<String, Object>> geneGtfMap = gtfMap.get(gtfMap.keySet().iterator().next());
-        return ((Gtf) ((List) geneGtfMap.get(geneGtfMap.keySet().iterator().next()).get("exon")).get(0))
-                .getAttributes().containsKey("exon_number");
-    }
-
-    private void setExonNumber(Map<String, Map<String, Map<String, Object>>> gtfMap) {
-        for (String gene : gtfMap.keySet()) {
-            for (String transcript : gtfMap.get(gene).keySet()) {
-                List<Gtf> exonList = (List<Gtf>) gtfMap.get(gene).get(transcript).get("exon");
-                Collections.sort(exonList, (e1, e2) -> Integer.valueOf(e1.getStart()).compareTo(e2.getStart()));
-                if (exonList.get(0).getStrand().equals("+")) {
-                    int exonNumber = 1;
-                    for (Gtf gtf : exonList) {
-                        gtf.getAttributes().put("exon_number", String.valueOf(exonNumber));
-                        exonNumber++;
-                    }
-                } else {
-                    int exonNumber = exonList.size();
-                    for (Gtf gtf : exonList) {
-                        gtf.getAttributes().put("exon_number", String.valueOf(exonNumber));
-                        exonNumber--;
-                    }
-                }
-            }
-        }
-    }
-
-    private void addGTFLineToGTFMap(Map<String, Object> gtfMapTranscriptEntry, Gtf gtf) {
-        // Add exon/cds GTF line to the corresponding gene entry in the map
-        String featureType = gtf.getFeature().toLowerCase();
-        if (featureType.equals("exon") || featureType.equals("cds")) {
-            List gtfList;
-            // Check if there were exons already stored
-            if (gtfMapTranscriptEntry.containsKey(featureType)) {
-                gtfList =  (List) gtfMapTranscriptEntry.get(featureType);
-            } else {
-                gtfList = new ArrayList<>();
-                gtfMapTranscriptEntry.put(featureType, gtfList);
-            }
-            gtfList.add(gtf);
-            // Only one start/stop codon can be stored per transcript - no need to check if the "start_codon"/"stop_codon"
-            // keys are already there
-        } else if (featureType.equals("start_codon") || featureType.equals("stop_codon")) {
-            gtfMapTranscriptEntry.put(featureType, gtf);
-        }
-    }
-
-    private List<TranscriptTfbs> getTranscriptTfbses(Gtf transcript, String chromosome, TabixReader tabixReader) throws IOException {
-        if (tabixReader == null) {
-            return null;
-        }
-        List<TranscriptTfbs> transcriptTfbses = null;
-
-        int transcriptStart = transcript.getStart();
-        int transcriptEnd = transcript.getEnd();
-
-
-        String line;
-        TabixReader.Iterator iter = tabixReader.query(chromosome, transcriptStart, transcriptEnd);
-        while ((line = iter.next()) != null) {
-            String[] elements = line.split("\t");
-
-            String sequenceName = elements[0];
-            String source = elements[1];
-            String feature = elements[2];
-            int start = Integer.parseInt(elements[3]);
-            int end = Integer.parseInt(elements[4]);
-            String score = elements[5];
-            String strand = elements[6];
-            String frame = elements[7];
-            String attribute = elements[8];
-
-            if (strand.equals(transcript.getStrand())) {
-                continue;
-            }
-
-            if (transcript.getStrand().equals("+")) {
-                if (start > transcript.getStart() + 500) {
-                    break;
-                } else if (end > transcript.getStart() - 2500) {
-                    Gff2 tfbs = new Gff2(sequenceName, source, feature, start, end, score, strand, frame, attribute);
-                    transcriptTfbses = addTranscriptTfbstoList(tfbs, transcript, chromosome, transcriptTfbses);
-                }
-            } else {
-                // transcript in negative strand
-                if (start > transcript.getEnd() + 2500) {
-                    break;
-                } else if (start > transcript.getEnd() - 500) {
-                    Gff2 tfbs = new Gff2(sequenceName, source, feature, start, end, score, strand, frame, attribute);
-                    transcriptTfbses = addTranscriptTfbstoList(tfbs, transcript, chromosome, transcriptTfbses);
-                }
-            }
-        }
-
-        return transcriptTfbses;
-    }
-
-    protected List<TranscriptTfbs> addTranscriptTfbstoList(Gff2 tfbs, Gtf transcript, String chromosome,
-                                                           List<TranscriptTfbs> transcriptTfbses) {
-        if (transcriptTfbses == null) {
-            transcriptTfbses = new ArrayList<>();
-        }
-
-        // binding_matrix_stable_id=ENSPFM0542;epigenomes_with_experimental_evidence=SK-N.%2CMCF-7%2CH1-hESC_3%2CHCT116;
-        // stable_id=ENSM00208374688;transcription_factor_complex=TEAD4::ESRRB
-        String[] attributes = tfbs.getAttribute().split(";");
-
-        String id = null;
-        String pfmId = null;
-        List<String> transciptionFactors = null;
-
-        for (String attributePair : attributes) {
-            String[] attributePairArray = attributePair.split("=");
-            switch(attributePairArray[0]) {
-                case "binding_matrix_stable_id":
-                    pfmId = attributePairArray[1];
-                    break;
-                case "stable_id":
-                    id = attributePairArray[1];
-                    break;
-                case "transcription_factor_complex":
-                    transciptionFactors = Arrays.asList(attributePairArray[1].split("(::)|(%2C)"));
-                    break;
-                default:
-                    break;
-            }
-        }
-
-        transcriptTfbses.add(new TranscriptTfbs(id, pfmId, tfbs.getFeature(), transciptionFactors, chromosome, tfbs.getStart(),
-                tfbs.getEnd(), getRelativeTranscriptTfbsStart(tfbs, transcript), getRelativeTranscriptTfbsEnd(tfbs, transcript),
-                Float.parseFloat(tfbs.getScore())));
-        return transcriptTfbses;
-    }
-
-    private Integer getRelativeTranscriptTfbsStart(Gff2 tfbs, Gtf transcript) {
-        Integer relativeStart;
-        if (transcript.getStrand().equals("+")) {
-            if (tfbs.getStart() < transcript.getStart()) {
-                relativeStart = tfbs.getStart() - transcript.getStart();
-            } else {
-                relativeStart = tfbs.getStart() - transcript.getStart() + 1;
-            }
-        } else {
-            // negative strand transcript
-            if (tfbs.getEnd() > transcript.getEnd()) {
-                relativeStart = transcript.getEnd() - tfbs.getEnd();
-            } else {
-                relativeStart = transcript.getEnd() - tfbs.getEnd() + 1;
-            }
-        }
-        return relativeStart;
-    }
-
-    private Integer getRelativeTranscriptTfbsEnd(Gff2 tfbs, Gtf transcript) {
-        Integer relativeEnd;
-        if (transcript.getStrand().equals("+")) {
-            if (tfbs.getEnd() < transcript.getStart()) {
-                relativeEnd = tfbs.getEnd() - transcript.getStart();
-            } else {
-                relativeEnd = tfbs.getEnd() - transcript.getStart() + 1;
-            }
-        } else {
-            if (tfbs.getStart() > transcript.getEnd()) {
-                relativeEnd = transcript.getEnd() - tfbs.getStart();
-            } else {
-                relativeEnd = transcript.getEnd() - tfbs.getStart() + 1;
-            }
-        }
-        return relativeEnd;
-    }
-
-
-
-    private boolean newGene(Gene previousGene, String newGeneId) {
-        return previousGene == null || !newGeneId.equals(previousGene.getId());
-    }
-
-    private void updateTranscriptAndGeneCoords(Transcript transcript, Gene gene, Gtf gtf) {
-        if (transcript.getStart() > gtf.getStart()) {
-            transcript.setStart(gtf.getStart());
-        }
-        if (transcript.getEnd() < gtf.getEnd()) {
-            transcript.setEnd(gtf.getEnd());
-        }
-        if (gene.getStart() > gtf.getStart()) {
-            gene.setStart(gtf.getStart());
-        }
-        if (gene.getEnd() < gtf.getEnd()) {
-            gene.setEnd(gtf.getEnd());
+        if (isHSapiens || isDataSupported(configuration.getDownload().getMirbase(), prefixId)) {
+            dataList.add(MIRBASE_DATA);
         }
-    }
-
-    private void getGtfFileFromGeneDirectoryPath(Path geneDirectoryPath) {
-        for (String fileName : geneDirectoryPath.toFile().list()) {
-            if (fileName.endsWith(".gtf") || fileName.endsWith(".gtf.gz")) {
-                gtfFile = geneDirectoryPath.resolve(fileName);
-                break;
-            }
-        }
-    }
 
-    private void getProteinFastaFileFromGeneDirectoryPath(Path geneDirectoryPath) {
-        for (String fileName : geneDirectoryPath.toFile().list()) {
-            if (fileName.endsWith(".pep.all.fa") || fileName.endsWith(".pep.all.fa.gz")) {
-                proteinFastaFile = geneDirectoryPath.resolve(fileName);
-                break;
-            }
-        }
-    }
-
-    private void getCDnaFastaFileFromGeneDirectoryPath(Path geneDirectoryPath) {
-        for (String fileName : geneDirectoryPath.toFile().list()) {
-            if (fileName.endsWith(".cdna.all.fa") || fileName.endsWith(".cdna.all.fa.gz")) {
-                cDnaFastaFile = geneDirectoryPath.resolve(fileName);
-                break;
-            }
-        }
+        return dataList;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilderIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilderIndexer.java
index 285236ba60..8db1ab315f 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilderIndexer.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilderIndexer.java
@@ -27,6 +27,9 @@
 import org.opencb.biodata.models.core.CancerHotspot;
 import org.opencb.biodata.models.core.CancerHotspotVariant;
 import org.opencb.biodata.models.core.GeneCancerAssociation;
+import org.opencb.biodata.models.core.MirnaTarget;
+import org.opencb.biodata.models.variant.avro.GeneDrugInteraction;
+import org.opencb.biodata.models.variant.avro.GeneTraitAssociation;
 import org.opencb.commons.utils.FileUtils;
 import org.rocksdb.Options;
 import org.rocksdb.RocksDB;
@@ -37,31 +40,35 @@
 import java.io.BufferedReader;
 import java.io.FileInputStream;
 import java.io.IOException;
-import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.*;
 import java.util.stream.Collectors;
 
+import static org.opencb.cellbase.lib.EtlCommons.ENSEMBL_DATA;
+import static org.opencb.cellbase.lib.EtlCommons.HPO_DISEASE_DATA;
+import static org.opencb.cellbase.lib.builders.AbstractBuilder.PARSING_DONE_LOG_MESSAGE;
+import static org.opencb.cellbase.lib.builders.AbstractBuilder.PARSING_LOG_MESSAGE;
+
 public class GeneBuilderIndexer {
 
+    public static final String ROCKSDB_FOLDER = "rocksdb.idx";
+
     protected RocksDB rocksdb;
     protected RocksDbManager rocksDbManager;
     protected Logger logger;
     protected String dbLocation;
     protected Options dbOption;
 
-    protected final String HGNC_ID_SUFFIX = "_hgncid";
-    protected final String MANE_SUFFIX = "_mane";
-    protected final String LRG_SUFFIX = "_lrg";
-    protected final String CANCER_GENE_CENSUS_SUFFIX = "_cgc";
-    protected final String CANCER_HOTSPOT_SUFFIX = "_chs";
-    protected final String PROTEIN_SEQUENCE_SUFFIX = "_protein_fasta";
-    protected final String CDNA_SEQUENCE_SUFFIX = "_cdna_fasta";
-    protected final String DRUGS_SUFFIX = "_drug";
-    protected final String DISEASE_SUFFIX = "_disease";
-    protected final String MIRTARBASE_SUFFIX = "_mirtarbase";
-    protected final String TSO500_SUFFIX = "_tso500";
-    protected final String EGLH_HAEMONC_SUFFIX = "_eglh_haemonc";
+    protected static final String HGNC_ID_SUFFIX = "_hgncid";
+    protected static final String MANE_SUFFIX = "_mane";
+    protected static final String LRG_SUFFIX = "_lrg";
+    protected static final String CANCER_GENE_CENSUS_SUFFIX = "_cgc";
+    protected static final String CANCER_HOTSPOT_SUFFIX = "_chs";
+    protected static final String PROTEIN_SEQUENCE_SUFFIX = "_protein_fasta";
+    protected static final String CDNA_SEQUENCE_SUFFIX = "_cdna_fasta";
+    protected static final String DRUGS_SUFFIX = "_drug";
+    protected static final String DISEASE_SUFFIX = "_disease";
+    protected static final String MIRTARBASE_SUFFIX = "_mirtarbase";
 
     public GeneBuilderIndexer(Path genePath) {
         this.init(genePath);
@@ -69,7 +76,7 @@ public GeneBuilderIndexer(Path genePath) {
 
     private void init(Path genePath) {
         rocksDbManager = new RocksDbManager();
-        dbLocation = genePath.resolve("integration.idx").toString();
+        dbLocation = genePath.resolve(ROCKSDB_FOLDER).toString();
         rocksdb = rocksDbManager.getDBConnection(dbLocation);
         dbOption = new Options().setCreateIfMissing(true);
 
@@ -77,18 +84,14 @@ private void init(Path genePath) {
     }
 
     protected void indexCdnaSequences(Path cDnaFastaFile) throws IOException, FileFormatException, RocksDBException {
-        logger.info("Loading RefSeq's cDNA sequences...");
-        FileUtils.checkPath(cDnaFastaFile);
-        if (Files.size(cDnaFastaFile) > 0) {
-            FastaReader fastaReader = new FastaReader(cDnaFastaFile);
+        logger.info(PARSING_LOG_MESSAGE, cDnaFastaFile);
+        try (FastaReader fastaReader = new FastaReader(cDnaFastaFile)) {
             Fasta fasta;
             while ((fasta = fastaReader.read()) != null) {
                 rocksDbManager.update(rocksdb, fasta.getId() + CDNA_SEQUENCE_SUFFIX, fasta.getSeq());
             }
-            fastaReader.close();
-        } else {
-            logger.warn("RefSeq's cDNA sequences not loaded");
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE, cDnaFastaFile);
     }
 
     public String getCdnaFasta(String id) throws RocksDBException {
@@ -96,18 +99,18 @@ public String getCdnaFasta(String id) throws RocksDBException {
     }
 
     protected void indexProteinSequences(Path proteinFastaFile) throws IOException, FileFormatException, RocksDBException {
-        logger.info("Loading ENSEMBL's protein sequences...");
-        FileUtils.checkPath(proteinFastaFile);
-        if (Files.size(proteinFastaFile) > 0) {
-            FastaReader fastaReader = new FastaReader(proteinFastaFile);
+        if (proteinFastaFile == null) {
+            return;
+        }
+
+        logger.info(PARSING_LOG_MESSAGE, proteinFastaFile);
+        try (FastaReader fastaReader = new FastaReader(proteinFastaFile)) {
             Fasta fasta;
             while ((fasta = fastaReader.read()) != null) {
                 rocksDbManager.update(rocksdb, fasta.getId() + PROTEIN_SEQUENCE_SUFFIX, fasta.getSeq());
             }
-            fastaReader.close();
-        } else {
-            logger.warn("ENSEMBL's protein sequences not loaded");
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE, proteinFastaFile);
     }
 
     protected String getProteinFasta(String id) throws RocksDBException {
@@ -115,22 +118,22 @@ protected String getProteinFasta(String id) throws RocksDBException {
     }
 
     protected void indexHgncIdMapping(Path hgncMappingFile) throws IOException, RocksDBException {
-        // #hgnc_id symbol  name    locus_group     locus_type      status  location        location_sortable  ...
-        logger.info("Indexing HGNC ID mapping data ...");
-
-        // We only need the first two columns: hgnc_id -> symbol
-        if (hgncMappingFile != null && Files.exists(hgncMappingFile) && Files.size(hgncMappingFile) > 0) {
-            try (BufferedReader bufferedReader = FileUtils.newBufferedReader(hgncMappingFile)) {
-                String line = bufferedReader.readLine();
-                while (StringUtils.isNotEmpty(line)) {
-                    String[] fields = line.split("\t", -1);
-                    rocksDbManager.update(rocksdb, fields[1] + HGNC_ID_SUFFIX, fields[0]);
-                    line = bufferedReader.readLine();
-                }
+        if (hgncMappingFile == null) {
+            return;
+        }
+
+        logger.info(PARSING_LOG_MESSAGE, hgncMappingFile);
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(hgncMappingFile)) {
+            String line = bufferedReader.readLine();
+            // We only need the first two columns: hgnc_id -> symbol
+            // #hgnc_id symbol  name    locus_group     locus_type      status  location        location_sortable  ...
+            while (StringUtils.isNotEmpty(line)) {
+                String[] fields = line.split("\t", -1);
+                rocksDbManager.update(rocksdb, fields[1] + HGNC_ID_SUFFIX, fields[0]);
+                line = bufferedReader.readLine();
             }
-        } else {
-            logger.warn("HGNC ID mapping file " + hgncMappingFile + " not found");
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE, hgncMappingFile);
     }
 
     public String getHgncId(String id) throws RocksDBException {
@@ -138,29 +141,29 @@ public String getHgncId(String id) throws RocksDBException {
     }
 
     protected void indexManeMapping(Path maneMappingFile, String referenceId) throws IOException, RocksDBException {
+        if (maneMappingFile == null) {
+            return;
+        }
+
+        logger.info(PARSING_LOG_MESSAGE, maneMappingFile);
+        int idColumn = referenceId.equalsIgnoreCase(ENSEMBL_DATA) ? 7 : 5;
+
         // #NCBI_GeneID    Ensembl_Gene    HGNC_ID      symbol   name    RefSeq_nuc      RefSeq_prot     Ensembl_nuc     Ensembl_prot
         // MANE_status     GRCh38_chr     chr_start       chr_end chr_strand
-        logger.info("Indexing MANE mapping data ...");
-
-        if (maneMappingFile != null && Files.exists(maneMappingFile) && Files.size(maneMappingFile) > 0) {
-            int idColumn = referenceId.equalsIgnoreCase("ensembl") ? 7 : 5;
-//            BufferedReader bufferedReader = FileUtils.newBufferedReader(maneMappingFile);
-            try (BufferedReader bufferedReader = FileUtils.newBufferedReader(maneMappingFile)) {
-                String line = bufferedReader.readLine();
-                while (StringUtils.isNotEmpty(line)) {
-                    String[] fields = line.split("\t", -1);
-                    rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_refseq", fields[5]);
-                    rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_refseq_protein", fields[6]);
-                    rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_ensembl", fields[7]);
-                    rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_ensembl_protein", fields[8]);
-                    rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_flag", fields[9]);
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(maneMappingFile)) {
+            String line = bufferedReader.readLine();
+            while (StringUtils.isNotEmpty(line)) {
+                String[] fields = line.split("\t", -1);
+                rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_refseq", fields[5]);
+                rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_refseq_protein", fields[6]);
+                rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_ensembl", fields[7]);
+                rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_ensembl_protein", fields[8]);
+                rocksDbManager.update(rocksdb, fields[idColumn] + MANE_SUFFIX + "_flag", fields[9]);
 
-                    line = bufferedReader.readLine();
-                }
+                line = bufferedReader.readLine();
             }
-        } else {
-            logger.warn("MANE mapping file " + maneMappingFile + " not found");
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE, maneMappingFile);
     }
 
     public String getMane(String id, String field) throws RocksDBException {
@@ -168,30 +171,31 @@ public String getMane(String id, String field) throws RocksDBException {
     }
 
     protected void indexLrgMapping(Path lrgMappingFile, String referenceId) throws IOException, RocksDBException {
+        if (lrgMappingFile == null) {
+            return;
+        }
+
+        logger.info(PARSING_LOG_MESSAGE, lrgMappingFile);
+
         // # Last modified: 30-03-2021@22:00:06
         // # LRG HGNC_SYMBOL REFSEQ_GENOMIC LRG_TRANSCRIPT REFSEQ_TRANSCRIPT ENSEMBL_TRANSCRIPT CCDS
         // LRG_1 COL1A1 NG_007400.1 t1 NM_000088.3 ENST00000225964.10 CCDS11561.1
-        logger.info("Indexing LRG mapping data ...");
-
-        if (lrgMappingFile != null && Files.exists(lrgMappingFile) && Files.size(lrgMappingFile) > 0) {
-            int idColumn = referenceId.equalsIgnoreCase("ensembl") ? 5 : 4;
-            try (BufferedReader bufferedReader = FileUtils.newBufferedReader(lrgMappingFile)) {
-                String line = bufferedReader.readLine();
-                while (StringUtils.isNotEmpty(line)) {
-                    if (!line.startsWith("#")) {
-                        String[] fields = line.split("\t", -1);
-                        String id = fields[idColumn];
-                        if (StringUtils.isNotEmpty(id) && !id.equals("-")) {
-                            rocksDbManager.update(rocksdb, id + LRG_SUFFIX + "_refseq", fields[4]);
-                            rocksDbManager.update(rocksdb, id + LRG_SUFFIX + "_ensembl", fields[5]);
-                        }
+        int idColumn = referenceId.equalsIgnoreCase("ensembl") ? 5 : 4;
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(lrgMappingFile)) {
+            String line = bufferedReader.readLine();
+            while (StringUtils.isNotEmpty(line)) {
+                if (!line.startsWith("#")) {
+                    String[] fields = line.split("\t", -1);
+                    String id = fields[idColumn];
+                    if (StringUtils.isNotEmpty(id) && !id.equals("-")) {
+                        rocksDbManager.update(rocksdb, id + LRG_SUFFIX + "_refseq", fields[4]);
+                        rocksDbManager.update(rocksdb, id + LRG_SUFFIX + "_ensembl", fields[5]);
                     }
-                    line = bufferedReader.readLine();
                 }
+                line = bufferedReader.readLine();
             }
-        } else {
-            logger.warn("LRG mapping file " + lrgMappingFile + " not found");
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE, lrgMappingFile);
     }
 
     public String getLrg(String id, String field) throws RocksDBException {
@@ -199,6 +203,12 @@ public String getLrg(String id, String field) throws RocksDBException {
     }
 
     protected void indexCancerGeneCensus(Path cgcFile) throws IOException, RocksDBException {
+        if (cgcFile == null) {
+            return;
+        }
+
+        logger.info(PARSING_LOG_MESSAGE, cgcFile);
+
         Map<String, String> tissuesMap = new HashMap<>();
         tissuesMap.put("E", "epithelial");
         tissuesMap.put("L", "leukaemia/lymphoma");
@@ -224,22 +234,20 @@ protected void indexCancerGeneCensus(Path cgcFile) throws IOException, RocksDBEx
         mutationTypesMap.put("Mis", "missense");
         mutationTypesMap.put("PromoterMis", "missense");
 
-        logger.info("Indexing CANCER GENE CENSUS data ...");
-        if (cgcFile != null && Files.exists(cgcFile) && Files.size(cgcFile) > 0) {
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(cgcFile)) {
             // Skip the first header line
-            BufferedReader bufferedReader = FileUtils.newBufferedReader(cgcFile);
-            bufferedReader.readLine();
+            String line = bufferedReader.readLine();
 
             GeneCancerAssociation cancerGeneAssociation;
-            String line;
+
             while ((line = bufferedReader.readLine()) != null) {
                 String[] fields = line.split("\t", -1);
                 // Find Ensembl Gene Id in the last comma-separated column
                 List<String> synonyms = StringUtils.isNotEmpty(fields[19])
                         ? Arrays.stream(fields[19]
-                        .replaceAll("\"", "")
-                        .replaceAll(" ", "")
-                        .split(","))
+                                .replace("\"", "")
+                                .replace(" ", "")
+                                .split(","))
                         .collect(Collectors.toList())
                         : Collections.emptyList();
 
@@ -254,54 +262,55 @@ protected void indexCancerGeneCensus(Path cgcFile) throws IOException, RocksDBEx
                     boolean somatic = StringUtils.isNotEmpty(fields[7]) && fields[7].equalsIgnoreCase("yes");
                     boolean germline = StringUtils.isNotEmpty(fields[8]) && fields[8].equalsIgnoreCase("yes");
                     List<String> somaticTumourTypes = StringUtils.isNotEmpty(fields[9])
-                            ? Arrays.asList(fields[9].replaceAll("\"", "").split(", "))
+                            ? Arrays.asList(fields[9].replace("\"", "").split(", "))
                             : new ArrayList<>();
                     List<String> germlineTumourTypes = StringUtils.isNotEmpty(fields[10])
-                            ? Arrays.asList(fields[10].replaceAll("\"", "").split(", "))
+                            ? Arrays.asList(fields[10].replace("\"", "").split(", "))
                             : Collections.emptyList();
                     List<String> syndromes = StringUtils.isNotEmpty(fields[11])
-                            ? Arrays.asList(fields[11].replaceAll("\"", "").split("; "))
+                            ? Arrays.asList(fields[11].replace("\"", "").split("; "))
                             : Collections.emptyList();
                     List<String> tissues = StringUtils.isNotEmpty(fields[12])
                             ? Arrays.stream(fields[12]
-                            .replaceAll("\"", "")
-                            .replaceAll(" ", "")
-                            .split(","))
+                                    .replace("\"", "")
+                                    .replace(" ", "")
+                                    .split(","))
                             .map(tissuesMap::get)
                             .collect(Collectors.toList())
                             : Collections.emptyList();
-                    List<ClinicalProperty.ModeOfInheritance> modeOfInheritance = StringUtils.isNotEmpty(fields[13])
-                            ? fields[13].equalsIgnoreCase("Dom/Rec")
+                    List<ClinicalProperty.ModeOfInheritance> modeOfInheritance = Collections.emptyList();
+                    if (StringUtils.isNotEmpty(fields[13])) {
+                        modeOfInheritance = fields[13].equalsIgnoreCase("Dom/Rec")
                                 ? Arrays.asList(moiMap.get("Dom"), moiMap.get("Rec"))
-                                : Collections.singletonList(moiMap.get(fields[13]))
-                            : Collections.emptyList();
+                                : Collections.singletonList(moiMap.get(fields[13]));
+                    }
                     List<ClinicalProperty.RoleInCancer> roleInCancer = StringUtils.isNotEmpty(fields[14])
                             ? Arrays.stream(fields[14]
-                            .replaceAll("\"", "")
-                            .replaceAll(" ", "")
-                            .split(","))
+                                    .replace("\"", "")
+                                    .replace(" ", "")
+                                    .split(","))
                             .map(roleInCancerMap::get)
                             .collect(Collectors.toList())
                             : Collections.emptyList();
                     List<String> mutationTypes = StringUtils.isNotEmpty(fields[15])
                             ? Arrays.stream(fields[15]
-                            .replaceAll("\"", "")
-                            .replaceAll(" ", "")
-                            .split(","))
+                                    .replace("\"", "")
+                                    .replace(" ", "")
+                                    .split(","))
                             .map(mutationTypesMap::get)
                             .collect(Collectors.toList())
                             : Collections.emptyList();
                     List<String> translocationPartners = StringUtils.isNotEmpty(fields[16])
                             ? Arrays.stream(fields[16]
-                            .replaceAll("\"", "")
-                            .replaceAll(" ", "")
-                            .split(","))
+                                    .replace("\"", "")
+                                    .replace(" ", "")
+                                    .split(","))
                             .collect(Collectors.toList())
                             : Collections.emptyList();
                     List<String> otherSyndromes = StringUtils.isNotEmpty(fields[18])
                             ? Arrays.stream(fields[18]
-                            .replaceAll("\"", "")
-                            .split("; "))
+                                    .replace("\"", "")
+                                    .split("; "))
                             .collect(Collectors.toList())
                             : Collections.emptyList();
 
@@ -312,10 +321,9 @@ protected void indexCancerGeneCensus(Path cgcFile) throws IOException, RocksDBEx
                     rocksDbManager.update(rocksdb, fields[0] + CANCER_GENE_CENSUS_SUFFIX, cancerGeneAssociation);
                 }
             }
-            bufferedReader.close();
-        } else {
-            logger.warn("CANCER GENE CENSUS file " + cgcFile + " not found");
         }
+
+        logger.info(PARSING_DONE_LOG_MESSAGE, cgcFile);
     }
 
     public List<GeneCancerAssociation> getCancerGeneCensus(String geneName) throws RocksDBException, IOException {
@@ -324,97 +332,106 @@ public List<GeneCancerAssociation> getCancerGeneCensus(String geneName) throws R
     }
 
     public void indexCancerHotspot(Path cancerHotspot) throws IOException, RocksDBException {
+        if (cancerHotspot == null) {
+            return;
+        }
+
+        logger.info(PARSING_LOG_MESSAGE, cancerHotspot);
+
         // Store all cancer hotspot (different gene and aminoacid position) for each gene in the same key
         Map<String, List<CancerHotspot>> visited = new HashMap<>();
-        FileInputStream fileInputStream = new FileInputStream(cancerHotspot.toFile());
-        HSSFWorkbook workbook = new HSSFWorkbook(fileInputStream);
-        HSSFSheet sheet = workbook.getSheetAt(0);
-        Iterator<org.apache.poi.ss.usermodel.Row> iterator = sheet.iterator();
-        iterator.next();
-        while (iterator.hasNext()) {
-            Row currentRow = iterator.next();
-            String geneName = currentRow.getCell(0).toString();
-
-            if (currentRow.getCell(1).toString().contains("splice")) {
-                continue;
-            }
-            int aminoAcidPosition = Integer.parseInt(currentRow.getCell(1).toString());
-
-            CancerHotspot ch = null;
-            // Check if ch object already exist
-            if (visited.containsKey(geneName)) {
-                for (CancerHotspot hotspot : visited.get(geneName)) {
-                    if (hotspot.getAminoacidPosition() == aminoAcidPosition) {
-                        ch = hotspot;
-                        break;
-                    }
-                }
-            }
 
-            // If not exist we create new ch
-            if (ch == null) {
-                ch = new CancerHotspot();
-                ch.setScores(new HashMap<>());
-                ch.setCancerTypeCount(new HashMap<>());
-                ch.setOrganCount(new HashMap<>());
-                ch.setVariants(new ArrayList<>());
-
-                // Parse new row
-                ch.setGeneName(geneName);
-                ch.setAminoacidPosition(aminoAcidPosition);
-                ch.getScores().put("log10Pvalue", Double.parseDouble(currentRow.getCell(2).toString()));
-                ch.setNumMutations(Integer.parseInt(currentRow.getCell(3).toString()));
-
-                String[] cancerCountSplit = currentRow.getCell(11).toString().split("\\|");
-                for (String cancerCount : cancerCountSplit) {
-                    String[] split = cancerCount.split(":");
-                    ch.getCancerTypeCount().put(split[0], Integer.parseInt(split[2]));
+        try (FileInputStream fileInputStream = new FileInputStream(cancerHotspot.toFile())) {
+            HSSFWorkbook workbook = new HSSFWorkbook(fileInputStream);
+            HSSFSheet sheet = workbook.getSheetAt(0);
+            Iterator<org.apache.poi.ss.usermodel.Row> iterator = sheet.iterator();
+            iterator.next();
+            while (iterator.hasNext()) {
+                Row currentRow = iterator.next();
+                String geneName = currentRow.getCell(0).toString();
+
+                if (currentRow.getCell(1).toString().contains("splice")) {
+                    continue;
                 }
+                int aminoAcidPosition = Integer.parseInt(currentRow.getCell(1).toString());
 
-                String[] organCountSplit = currentRow.getCell(12).toString().split("\\|");
-                for (String organCount : organCountSplit) {
-                    String[] split = organCount.split(":");
-                    ch.getOrganCount().put(split[0], Integer.parseInt(split[2]));
+                CancerHotspot ch = null;
+                // Check if ch object already exist
+                if (visited.containsKey(geneName)) {
+                    for (CancerHotspot hotspot : visited.get(geneName)) {
+                        if (hotspot.getAminoacidPosition() == aminoAcidPosition) {
+                            ch = hotspot;
+                            break;
+                        }
+                    }
                 }
 
-                ch.getScores().put("mutability", Double.parseDouble(currentRow.getCell(14).toString()));
-                ch.getScores().put("muProtein", Double.parseDouble(currentRow.getCell(15).toString()));
-                ch.setAnalysis(Arrays.asList(currentRow.getCell(17).toString().split(",")));
-                ch.getScores().put("qvalue", Double.parseDouble(currentRow.getCell(18).toString()));
-                ch.getScores().put("qvaluePancan", Double.parseDouble(currentRow.getCell(20).toString()));
-                ch.setAminoacidReference(currentRow.getCell(35).toString());
-                ch.getScores().put("qvalueCancerType", Double.parseDouble(currentRow.getCell(36).toString()));
-                ch.setCancerType(currentRow.getCell(37).toString());
+                // If not exist we create new ch
+                if (ch == null) {
+                    ch = new CancerHotspot();
+                    ch.setScores(new HashMap<>());
+                    ch.setCancerTypeCount(new HashMap<>());
+                    ch.setOrganCount(new HashMap<>());
+                    ch.setVariants(new ArrayList<>());
+
+                    // Parse new row
+                    ch.setGeneName(geneName);
+                    ch.setAminoacidPosition(aminoAcidPosition);
+                    ch.getScores().put("log10Pvalue", Double.parseDouble(currentRow.getCell(2).toString()));
+                    ch.setNumMutations(Integer.parseInt(currentRow.getCell(3).toString()));
+
+                    String[] cancerCountSplit = currentRow.getCell(11).toString().split("\\|");
+                    for (String cancerCount : cancerCountSplit) {
+                        String[] split = cancerCount.split(":");
+                        ch.getCancerTypeCount().put(split[0], Integer.parseInt(split[2]));
+                    }
+
+                    String[] organCountSplit = currentRow.getCell(12).toString().split("\\|");
+                    for (String organCount : organCountSplit) {
+                        String[] split = organCount.split(":");
+                        ch.getOrganCount().put(split[0], Integer.parseInt(split[2]));
+                    }
 
-                if (visited.containsKey(geneName)) {
-                    // Gene exists but no this aminoacid position
-                    visited.get(geneName).add(ch);
-                } else {
-                    // New gene found
-                    visited.put(geneName, new ArrayList<>(Collections.singletonList(ch)));
+                    ch.getScores().put("mutability", Double.parseDouble(currentRow.getCell(14).toString()));
+                    ch.getScores().put("muProtein", Double.parseDouble(currentRow.getCell(15).toString()));
+                    ch.setAnalysis(Arrays.asList(currentRow.getCell(17).toString().split(",")));
+                    ch.getScores().put("qvalue", Double.parseDouble(currentRow.getCell(18).toString()));
+                    ch.getScores().put("qvaluePancan", Double.parseDouble(currentRow.getCell(20).toString()));
+                    ch.setAminoacidReference(currentRow.getCell(35).toString());
+                    ch.getScores().put("qvalueCancerType", Double.parseDouble(currentRow.getCell(36).toString()));
+                    ch.setCancerType(currentRow.getCell(37).toString());
+
+                    if (visited.containsKey(geneName)) {
+                        // Gene exists but no this aminoacid position
+                        visited.get(geneName).add(ch);
+                    } else {
+                        // New gene found
+                        visited.put(geneName, new ArrayList<>(Collections.singletonList(ch)));
+                    }
                 }
-            }
 
-            // Add cancer hotspot variant information
-            CancerHotspotVariant cancerHotspotVariant = new CancerHotspotVariant();
-            cancerHotspotVariant.setSampleCount(new HashMap<>());
+                // Add cancer hotspot variant information
+                CancerHotspotVariant cancerHotspotVariant = new CancerHotspotVariant();
+                cancerHotspotVariant.setSampleCount(new HashMap<>());
 
-            String[] alternateCountSplit = currentRow.getCell(8).toString().split(":");
-            cancerHotspotVariant.setAminoacidAlternate(alternateCountSplit[0]);
-            cancerHotspotVariant.setCount(Integer.parseInt(alternateCountSplit[1]));
+                String[] alternateCountSplit = currentRow.getCell(8).toString().split(":");
+                cancerHotspotVariant.setAminoacidAlternate(alternateCountSplit[0]);
+                cancerHotspotVariant.setCount(Integer.parseInt(alternateCountSplit[1]));
 
-            String[] sampleSplit = currentRow.getCell(38).toString().split("\\|");
-            for (String sampleCount : sampleSplit) {
-                String[] sampleCountSplit = sampleCount.split(":");
-                cancerHotspotVariant.getSampleCount().put(sampleCountSplit[0], Integer.parseInt(sampleCountSplit[1]));
+                String[] sampleSplit = currentRow.getCell(38).toString().split("\\|");
+                for (String sampleCount : sampleSplit) {
+                    String[] sampleCountSplit = sampleCount.split(":");
+                    cancerHotspotVariant.getSampleCount().put(sampleCountSplit[0], Integer.parseInt(sampleCountSplit[1]));
+                }
+                ch.getVariants().add(cancerHotspotVariant);
             }
-            ch.getVariants().add(cancerHotspotVariant);
         }
-        fileInputStream.close();
 
-        for (String geneName : visited.keySet()) {
-            rocksDbManager.update(rocksdb, geneName + CANCER_HOTSPOT_SUFFIX, visited.get(geneName));
+        for (Map.Entry<String, List<CancerHotspot>> entry : visited.entrySet()) {
+            rocksDbManager.update(rocksdb, entry.getKey() + CANCER_HOTSPOT_SUFFIX, entry.getValue());
         }
+
+        logger.info(PARSING_DONE_LOG_MESSAGE, cancerHotspot);
     }
 
     public List<CancerHotspot> getCancerHotspot(String geneName) throws RocksDBException, IOException {
@@ -422,92 +439,158 @@ public List<CancerHotspot> getCancerHotspot(String geneName) throws RocksDBExcep
         return rocksDbManager.getCancerHotspot(rocksdb, key);
     }
 
+    private String getIndexEntry(String id, String suffix) throws RocksDBException {
+        return getIndexEntry(id, suffix, "");
+    }
 
-    protected void indexTSO500(Path tso500Path) throws IOException, RocksDBException {
-        // Gene Ref Seq
-        // FAS  NM_000043
-        // AR   NM_000044
-        logger.info("Indexing TSO500 data ...");
-
-        if (tso500Path != null && Files.exists(tso500Path) && Files.size(tso500Path) > 0) {
-            try (BufferedReader bufferedReader = FileUtils.newBufferedReader(tso500Path)) {
-                String line = bufferedReader.readLine();
-                while (StringUtils.isNotEmpty(line)) {
-                    if (!line.startsWith("#")) {
-                        String[] fields = line.split("\t", -1);
-                        if (fields.length == 2) {
-                            rocksDbManager.update(rocksdb, fields[1] + TSO500_SUFFIX, "TSO500");
-                        }
-                    }
-                    line = bufferedReader.readLine();
-                }
-            }
-        } else {
-            logger.warn("Ensembl TSO500 mapping file " + tso500Path + " not found");
+    private String getIndexEntry(String id, String suffix, String field) throws RocksDBException {
+        String key = id + suffix;
+        if (StringUtils.isNotEmpty(field)) {
+            key += "_" + field;
+        }
+        byte[] value = rocksdb.get(key.getBytes());
+        if (value != null) {
+            return new String(value);
         }
+        return null;
     }
 
-    public String getTSO500(String transcriptId) throws RocksDBException {
-        String key = transcriptId + TSO500_SUFFIX;
-        byte[] bytes = rocksdb.get(key.getBytes());
-        if (bytes == null) {
-            return null;
-        }
-        return new String(bytes);
+    protected void close() throws IOException {
+        rocksDbManager.closeIndex(rocksdb, dbOption, dbLocation);
     }
 
+    protected void indexDrugs(Path geneDrugFile) throws IOException, RocksDBException {
+        if (geneDrugFile == null) {
+            return;
+        }
 
-    protected void indexEGLHHaemOnc(Path eglhHaemOncPath) throws IOException, RocksDBException {
-        // Gene Ref Seq
-        // GNB1   NM_002074.4
-        // CSF3R  NM_000760.3
-        logger.info("Indexing EGLH HaemOnc data ...");
-
-        if (eglhHaemOncPath != null && Files.exists(eglhHaemOncPath) && Files.size(eglhHaemOncPath) > 0) {
-            try (BufferedReader bufferedReader = FileUtils.newBufferedReader(eglhHaemOncPath)) {
-                String line = bufferedReader.readLine();
-                while (StringUtils.isNotEmpty(line)) {
-                    if (!line.startsWith("#")) {
-                        String[] fields = line.split("\t", -1);
-                        if (fields.length == 2) {
-                            rocksDbManager.update(rocksdb, fields[1].split("\\.")[0] + EGLH_HAEMONC_SUFFIX, "EGLH_HaemOnc");
-                        }
-                    }
-                    line = bufferedReader.readLine();
+        logger.info(PARSING_LOG_MESSAGE, geneDrugFile);
+
+        String currentGene = "";
+        List<GeneDrugInteraction> drugs = new ArrayList<>();
+
+        try (BufferedReader br = FileUtils.newBufferedReader(geneDrugFile)) {
+            // Skip header
+            String line = br.readLine();
+
+            while ((line = br.readLine()) != null) {
+                String[] parts = line.split("\t");
+                String geneName = parts[0];
+                if (currentGene.equals("")) {
+                    currentGene = geneName;
+                } else if (!currentGene.equals(geneName)) {
+                    rocksDbManager.update(rocksdb, currentGene + DRUGS_SUFFIX, drugs);
+                    drugs = new ArrayList<>();
+                    currentGene = geneName;
+                }
+
+                String source = null;
+                if (parts.length >= 4) {
+                    source = parts[3];
+                }
+
+                String interactionType = null;
+                if (parts.length >= 5) {
+                    interactionType = parts[4];
                 }
+
+                String drugName = null;
+                if (parts.length >= 8) {
+                    // if drug name column is empty, use drug claim name instead
+                    drugName = StringUtils.isEmpty(parts[7]) ? parts[6] : parts[7];
+                }
+                if (StringUtils.isEmpty(drugName)) {
+                    // no drug name
+                    continue;
+                }
+
+                String chemblId = null;
+                if (parts.length >= 9) {
+                    chemblId = parts[8];
+                }
+
+                List<String> publications = new ArrayList<>();
+                if (parts.length >= 10 && parts[9] != null) {
+                    publications = Arrays.asList(parts[9].split(","));
+                }
+
+                GeneDrugInteraction drug = new GeneDrugInteraction(
+                        geneName, drugName, source, null, null, interactionType, chemblId, publications);
+                drugs.add(drug);
             }
-        } else {
-            logger.warn("Ensembl EGLH HaemOnc mapping file " + eglhHaemOncPath + " not found");
         }
+        // update last gene
+        rocksDbManager.update(rocksdb, currentGene + DRUGS_SUFFIX, drugs);
+
+        logger.info(PARSING_DONE_LOG_MESSAGE, geneDrugFile);
     }
 
-    public String getEGLHHaemOnc(String transcriptId) throws RocksDBException {
-        String key = transcriptId + EGLH_HAEMONC_SUFFIX;
-        byte[] bytes = rocksdb.get(key.getBytes());
-        if (bytes == null) {
-            return null;
+    protected void indexDiseases(Path hpoFilePath) throws IOException, RocksDBException {
+        if (hpoFilePath == null) {
+            return;
         }
-        return new String(bytes);
-    }
 
-    private String getIndexEntry(String id, String suffix) throws RocksDBException {
-        return getIndexEntry(id, suffix, "");
+        Map<String, List<GeneTraitAssociation>> geneDiseaseAssociationMap = new HashMap<>(50000);
+
+        String line;
+
+        // HPO
+        logger.info(PARSING_LOG_MESSAGE, hpoFilePath);
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(hpoFilePath)) {
+            // Skip first header line
+            line = bufferedReader.readLine();
+            while ((line = bufferedReader.readLine()) != null) {
+                String[] fields = line.split("\t");
+                String omimId = fields[6];
+                String geneSymbol = fields[3];
+                String hpoId = fields[0];
+                String diseaseName = fields[1];
+                GeneTraitAssociation disease =
+                        new GeneTraitAssociation(omimId, diseaseName, hpoId, 0f, 0, new ArrayList<>(), new ArrayList<>(), HPO_DISEASE_DATA);
+                addValueToMapElement(geneDiseaseAssociationMap, geneSymbol, disease);
+            }
+        }
+        logger.info(PARSING_DONE_LOG_MESSAGE);
+
+        for (Map.Entry<String, List<GeneTraitAssociation>> entry : geneDiseaseAssociationMap.entrySet()) {
+            rocksDbManager.update(rocksdb, entry.getKey() + DISEASE_SUFFIX, entry.getValue());
+        }
     }
 
-    private String getIndexEntry(String id, String suffix, String field) throws RocksDBException {
-        String key = id + suffix;
-        if (StringUtils.isNotEmpty(field)) {
-            key += "_" + field;
+    protected void indexMiRTarBase(Path miRTarBaseFile) throws IOException, RocksDBException {
+        if (miRTarBaseFile == null) {
+            return;
         }
-        byte[] value = rocksdb.get(key.getBytes());
-        if (value != null) {
-            return new String(value);
+
+        MiRTarBaseIndexer miRTarBaseIndexer = new MiRTarBaseIndexer();
+        Map<String, List<MirnaTarget>> result = miRTarBaseIndexer.index(miRTarBaseFile);
+        for (Map.Entry<String, List<MirnaTarget>> entry : result.entrySet()) {
+            rocksDbManager.update(rocksdb, entry.getKey() + MIRTARBASE_SUFFIX, entry.getValue());
         }
-        return null;
     }
 
-    protected void close() throws IOException {
-        rocksDbManager.closeIndex(rocksdb, dbOption, dbLocation);
+    public static <T> void addValueToMapElement(Map<String, List<T>> map, String key, T value) {
+        if (map.containsKey(key)) {
+            map.get(key).add(value);
+        } else {
+            List<T> valueList = new ArrayList<>();
+            valueList.add(value);
+            map.put(key, valueList);
+        }
+    }
+
+    protected List<GeneDrugInteraction> getDrugs(String id) throws RocksDBException, IOException {
+        String key = id + DRUGS_SUFFIX;
+        return rocksDbManager.getDrugs(rocksdb, key);
     }
 
+    protected List<GeneTraitAssociation> getDiseases(String id) throws RocksDBException, IOException {
+        String key = id + DISEASE_SUFFIX;
+        return rocksDbManager.getDiseases(rocksdb, key);
+    }
+
+    protected List<MirnaTarget> getMirnaTargets(String geneName) throws RocksDBException, IOException {
+        String key = geneName + MIRTARBASE_SUFFIX;
+        return rocksDbManager.getMirnaTargets(rocksdb, key);
+    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilderUtils.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilderUtils.java
deleted file mode 100644
index 16dbbc9a3c..0000000000
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneBuilderUtils.java
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Copyright 2015-2020 OpenCB
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.opencb.cellbase.lib.builders;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Created by imedina on 12/11/15.
- */
-@Deprecated
-public class GeneBuilderUtils {
-
-    private static Logger logger = LoggerFactory.getLogger(GeneBuilderUtils.class);
-
-//    @Deprecated
-//    public static Map<String, SortedSet<Gff2>> getTfbsMap(Path tfbsFile) throws IOException, NoSuchMethodException, FileFormatException {
-//        Map<String, SortedSet<Gff2>> tfbsMap = new HashMap<>();
-//        if (tfbsFile != null && Files.exists(tfbsFile) && !Files.isDirectory(tfbsFile) && Files.size(tfbsFile) > 0) {
-//            Gff2Reader motifsFeatureReader = new Gff2Reader(tfbsFile);
-//            Gff2 tfbsMotifFeature;
-//            while ((tfbsMotifFeature = motifsFeatureReader.read()) != null) {
-//                // we only want high quality data. See issue 466
-//                if (!tfbsMotifFeature.getAttribute().contains("experimental_evidence")) {
-//                    continue;
-//                }
-//                String chromosome = tfbsMotifFeature.getSequenceName().replaceFirst("chr", "");
-//                SortedSet<Gff2> chromosomeTfbsSet = tfbsMap.get(chromosome);
-//                if (chromosomeTfbsSet == null) {
-//                    chromosomeTfbsSet = new TreeSet<>((Comparator<Gff2>) (feature1, feature2) -> {
-//                        // TODO: maybe this should be in TranscriptTfbs class, and equals method should be overriden too
-//                        if (feature1.getStart() != feature2.getStart()) {
-//                            return feature1.getStart() - feature2.getStart();
-//                        } else {
-//                            return feature1.getAttribute().compareTo(feature2.getAttribute());
-//                        }
-//                    });
-//                    tfbsMap.put(chromosome, chromosomeTfbsSet);
-//                }
-//                chromosomeTfbsSet.add(tfbsMotifFeature);
-//            }
-//            motifsFeatureReader.close();
-//        }
-//        return tfbsMap;
-//    }
-
-//    public static Map<String, ArrayList<Xref>> getXrefMap(Path xrefsFile, Path uniprotIdMappingFile) throws IOException {
-//        Map<String, ArrayList<Xref>> xrefMap = new HashMap<>();
-//        logger.info("Loading xref data...");
-//        String[] fields;
-//        if (xrefsFile != null && Files.exists(xrefsFile) && Files.size(xrefsFile) > 0) {
-//            List<String> lines = Files.readAllLines(xrefsFile, Charset.forName("ISO-8859-1"));
-//            for (String line : lines) {
-//                fields = line.split("\t", -1);
-//                if (fields.length >= 4) {
-//                    if (!xrefMap.containsKey(fields[0])) {
-//                        xrefMap.put(fields[0], new ArrayList<>());
-//                    }
-//                    xrefMap.get(fields[0]).add(new Xref(fields[1], fields[2], fields[3]));
-//                }
-//            }
-//        } else {
-//            logger.warn("Xrefs file " + xrefsFile + " not found");
-//            logger.warn("Xref data not loaded");
-//        }
-//
-//        logger.info("Loading protein mapping into xref data...");
-//        if (uniprotIdMappingFile != null && Files.exists(uniprotIdMappingFile) && Files.size(uniprotIdMappingFile) > 0) {
-//            BufferedReader br = FileUtils.newBufferedReader(uniprotIdMappingFile);
-//            String line;
-//            while ((line = br.readLine()) != null) {
-//                fields = line.split("\t", -1);
-//                if (fields.length >= 19 && fields[19].startsWith("ENST")) {
-//                    String[] transcripts = fields[19].split("; ");
-//                    for (String transcript : transcripts) {
-//                        if (!xrefMap.containsKey(transcript)) {
-//                            xrefMap.put(transcript, new ArrayList<Xref>());
-//                        }
-//                        xrefMap.get(transcript).add(new Xref(fields[0], "uniprotkb_acc", "UniProtKB ACC"));
-//                        xrefMap.get(transcript).add(new Xref(fields[1], "uniprotkb_id", "UniProtKB ID"));
-//                    }
-//                }
-//            }
-//            br.close();
-//        } else {
-//            logger.warn("Uniprot if mapping file " + uniprotIdMappingFile + " not found");
-//            logger.warn("Protein mapping into xref data not loaded");
-//        }
-//
-//        return xrefMap;
-//    }
-
-//    public static Map<String, List<GeneDrugInteraction>> getGeneDrugMap(Path geneDrugFile) throws IOException {
-//        Map<String, List<GeneDrugInteraction>> geneDrugMap = new HashMap<>();
-//        if (geneDrugFile != null && Files.exists(geneDrugFile) && Files.size(geneDrugFile) > 0) {
-//            logger.info("Loading gene-drug interaction data from '{}'", geneDrugFile);
-//            BufferedReader br = FileUtils.newBufferedReader(geneDrugFile);
-//
-//            // Skip header
-//            br.readLine();
-//
-//            int lineCounter = 1;
-//            String line;
-//            while ((line = br.readLine()) != null) {
-//                String[] parts = line.split("\t");
-//                String geneName = parts[0];
-//
-//                String source = null;
-//                if (parts.length >= 4) {
-//                    source = parts[3];
-//                }
-//
-//                String interactionType = null;
-//                if (parts.length >= 5) {
-//                    interactionType = parts[4];
-//                }
-//
-//                String drugName = null;
-//                if (parts.length >= 8) {
-//                    // if drug name column is empty, use drug claim name instead
-//                    drugName = StringUtils.isEmpty(parts[7]) ? parts[6] : parts[7];
-//                }
-//                if (StringUtils.isEmpty(drugName)) {
-//                    // no drug name
-//                    continue;
-//                }
-//
-//                String chemblId = null;
-//                if (parts.length >= 9) {
-//                    chemblId = parts[8];
-//                }
-//
-//                List<String> publications = new ArrayList<>();
-//                if (parts.length >= 10 && parts[9] != null) {
-//                    publications = Arrays.asList(parts[9].split(","));
-//                }
-//
-//                //addValueToMapElement(geneDrugMap, geneName, new GeneDrugInteraction(geneName, drugName, source, null, interactionType));
-//                // TODO update model to add new attributes
-//                addValueToMapElement(geneDrugMap, geneName, new GeneDrugInteraction(geneName, drugName, source, null, null,
-//                        interactionType, chemblId, publications));
-//                lineCounter++;
-//            }
-//
-//            br.close();
-//        } else {
-//            logger.warn("Gene drug file " + geneDrugFile + " not found");
-//            logger.warn("Ignoring " + geneDrugFile);
-//        }
-//
-//        return geneDrugMap;
-//    }
-
-
-//
-//    public static Map<String, List<GeneTraitAssociation>> getGeneDiseaseAssociationMap(Path hpoFilePath, Path disgenetFilePath)
-//            throws IOException {
-//        Map<String, List<GeneTraitAssociation>> geneDiseaseAssociationMap = new HashMap<>(50000);
-//
-//        String line;
-//        if (hpoFilePath != null && hpoFilePath.toFile().exists() && Files.size(hpoFilePath) > 0) {
-//            BufferedReader bufferedReader = FileUtils.newBufferedReader(hpoFilePath);
-//            // skip first header line
-//            bufferedReader.readLine();
-//            while ((line = bufferedReader.readLine()) != null) {
-//                String[] fields = line.split("\t");
-//                String omimId = fields[6];
-//                String geneSymbol = fields[3];
-//                String hpoId = fields[0];
-//                String diseaseName = fields[1];
-//                GeneTraitAssociation disease =
-//                        new GeneTraitAssociation(omimId, diseaseName, hpoId, 0f, 0, new ArrayList<>(), new ArrayList<>(), "hpo");
-//                addValueToMapElement(geneDiseaseAssociationMap, geneSymbol, disease);
-//            }
-//            bufferedReader.close();
-//        }
-//
-//        if (disgenetFilePath != null && disgenetFilePath.toFile().exists() && Files.size(disgenetFilePath) > 0) {
-//            BufferedReader bufferedReader = FileUtils.newBufferedReader(disgenetFilePath);
-//            // skip first header line
-//            bufferedReader.readLine();
-//            while ((line = bufferedReader.readLine()) != null) {
-//                String[] fields = line.split("\t");
-//                String diseaseId = fields[4];
-//                String diseaseName = fields[5];
-//                String score = fields[9];
-//                String numberOfPubmeds = fields[13].trim();
-//                String numberOfSNPs = fields[14];
-//                String source = fields[15];
-//                GeneTraitAssociation disease = new GeneTraitAssociation(diseaseId, diseaseName, "", Float.parseFloat(score),
-//                        Integer.parseInt(numberOfPubmeds), Arrays.asList(numberOfSNPs), Arrays.asList(source), "disgenet");
-//                addValueToMapElement(geneDiseaseAssociationMap, fields[1], disease);
-//            }
-//            bufferedReader.close();
-//        }
-//
-//        return geneDiseaseAssociationMap;
-//    }
-//
-//    /**
-//     * For a gnomad file, parse and return a map of transcript to constraints.
-//     *
-//     * @param gnomadFile gene annotation file path
-//     * @return map of transcript to constraints
-//     * @throws IOException if goa file can't be read
-//     */
-//    public static Map<String, List<Constraint>> getConstraints(Path gnomadFile) throws IOException {
-//        Map<String, List<Constraint>> transcriptConstraints = new HashMap<>();
-//
-//        if (gnomadFile != null && Files.exists(gnomadFile) && Files.size(gnomadFile) > 0) {
-//            logger.info("Loading OE scores from '{}'", gnomadFile);
-////            BufferedReader br = FileUtils.newBufferedReader(gnomadFile);
-//            InputStream inputStream = Files.newInputStream(gnomadFile);
-//            BufferedReader br = new BufferedReader(new InputStreamReader(new GZIPInputStream(inputStream)));
-//            // Skip header.
-//            br.readLine();
-//            String line;
-//            while ((line = br.readLine()) != null) {
-//                String[] parts = line.split("\t");
-//                String transcriptIdentifier = parts[1];
-//                String canonical = parts[2];
-//                String oeMis = parts[5];
-//                String oeSyn = parts[14];
-//                String oeLof = parts[24];
-//                String exacPLI = parts[70];
-//                String exacLof = parts[73];
-//                String geneIdentifier = parts[64];
-//
-//                List<Constraint> constraints = new ArrayList<>();
-//                addConstraint(constraints, "oe_mis", oeMis);
-//                addConstraint(constraints, "oe_syn", oeSyn);
-//                addConstraint(constraints, "oe_lof", oeLof);
-//                addConstraint(constraints, "exac_pLI", exacPLI);
-//                addConstraint(constraints, "exac_oe_lof", exacLof);
-//                transcriptConstraints.put(transcriptIdentifier, constraints);
-//
-//                if ("TRUE".equalsIgnoreCase(canonical)) {
-//                    transcriptConstraints.put(geneIdentifier, constraints);
-//                }
-//            }
-//            br.close();
-//        }
-//        return transcriptConstraints;
-//    }
-//
-//    private static void addConstraint(List<Constraint> constraints, String name, String value) {
-//        Constraint constraint = new Constraint();
-//        constraint.setMethod("pLoF");
-//        constraint.setSource("gnomAD");
-//        constraint.setName(name);
-//        try {
-//            constraint.setValue(Double.parseDouble(value));
-//        } catch (NumberFormatException e) {
-//            // invalid number (e.g. NA), discard.
-//            return;
-//        }
-//        constraints.add(constraint);
-//    }
-//
-//    /**
-//     * For a gene annotation file, parse and return a map of proteins to ontology annotation objects.
-//     *
-//     * @param goaFile gene annotation file path
-//     * @return map of proteins to ontology annotation objects.
-//     * @throws IOException if goa file can't be read
-//     */
-//    public static Map<String, List<FeatureOntologyTermAnnotation>> getOntologyAnnotations(Path goaFile) throws IOException {
-//        Map<String, List<FeatureOntologyTermAnnotation>> annotations = new HashMap<>();
-//        if (goaFile != null && Files.exists(goaFile) && Files.size(goaFile) > 0) {
-//            logger.info("Loading GO annotation from '{}'", goaFile);
-//            BufferedReader br = FileUtils.newBufferedReader(goaFile);
-//            GafParser parser = new GafParser();
-//            annotations = parser.parseGaf(br);
-//        }
-//        return annotations;
-//    }
-}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneExpressionAtlasBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneExpressionAtlasBuilder.java
index 7428cd5fbf..e5f6449051 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneExpressionAtlasBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GeneExpressionAtlasBuilder.java
@@ -31,7 +31,7 @@
 /**
  * Created by antonior on 10/16/14.
  */
-public class GeneExpressionAtlasBuilder extends CellBaseBuilder {
+public class GeneExpressionAtlasBuilder extends AbstractBuilder {
 
     private Path geneAtlasDirectoryPath;
 
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GenomeSequenceFastaBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GenomeSequenceFastaBuilder.java
index 5bb232f5d2..521c5f3a71 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GenomeSequenceFastaBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/GenomeSequenceFastaBuilder.java
@@ -16,7 +16,9 @@
 
 package org.opencb.cellbase.lib.builders;
 
+import org.apache.commons.lang3.StringUtils;
 import org.opencb.biodata.models.core.GenomeSequenceChunk;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
 import org.opencb.commons.utils.FileUtils;
 
@@ -24,11 +26,13 @@
 import java.io.IOException;
 import java.nio.file.Path;
 
-public class GenomeSequenceFastaBuilder extends CellBaseBuilder {
+public class GenomeSequenceFastaBuilder extends AbstractBuilder {
 
     private Path genomeReferenceFastaFile;
 
     private static final int CHUNK_SIZE = 2000;
+    public static final String GENOME_JSON_BASENAME = "genome";
+    public static final String GENOME_JSON_FILENAME = GENOME_JSON_BASENAME + ".json.gz";
 
     public GenomeSequenceFastaBuilder(Path genomeReferenceFastaFile, CellBaseSerializer serializer) {
         super(serializer);
@@ -36,9 +40,10 @@ public GenomeSequenceFastaBuilder(Path genomeReferenceFastaFile, CellBaseSeriali
     }
 
     @Override
-    public void parse() {
+    public void parse() throws CellBaseException {
+        logger.info(PARSING_LOG_MESSAGE, genomeReferenceFastaFile);
 
-        try {
+        try (BufferedReader br = FileUtils.newBufferedReader(genomeReferenceFastaFile)) {
             String sequenceName = null;
             String sequenceType = "";
             String sequenceAssembly = null;
@@ -46,8 +51,7 @@ public void parse() {
             StringBuilder sequenceStringBuilder = new StringBuilder();
 
             // Preparing input and output files
-            BufferedReader br;
-            br = FileUtils.newBufferedReader(genomeReferenceFastaFile);
+
 
             while ((line = br.readLine()) != null) {
 
@@ -55,11 +59,9 @@ public void parse() {
                     sequenceStringBuilder.append(line);
                 } else {
                     // new chromosome, save data
-                    if (sequenceStringBuilder.length() > 0) {
-                        if (!sequenceName.contains("PATCH") && !sequenceName.contains("HSCHR") && !sequenceName.contains("contig")) {
-                            System.out.println(sequenceName);
-                            serializeGenomeSequence(sequenceName, sequenceType, sequenceAssembly, sequenceStringBuilder.toString());
-                        }
+                    if (sequenceStringBuilder.length() > 0 && StringUtils.isNotEmpty(sequenceName) && !sequenceName.contains("PATCH")
+                            && !sequenceName.contains("HSCHR") && !sequenceName.contains("contig")) {
+                        serializeGenomeSequence(sequenceName, sequenceType, sequenceAssembly, sequenceStringBuilder.toString());
                     }
 
                     // initialize data structures
@@ -75,18 +77,17 @@ public void parse() {
                 }
             }
             // Last chromosome must be processed
-            if (!sequenceName.contains("PATCH") && !sequenceName.contains("HSCHR") && !sequenceName.contains("contig")) {
+            if (StringUtils.isNotEmpty(sequenceName) && !sequenceName.contains("PATCH") && !sequenceName.contains("HSCHR")
+                    && !sequenceName.contains("contig")) {
                 serializeGenomeSequence(sequenceName, sequenceType, sequenceAssembly, sequenceStringBuilder.toString());
             }
-
-            br.close();
         } catch (IOException e) {
             e.printStackTrace();
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE);
     }
 
-    private void serializeGenomeSequence(String chromosome, String sequenceType, String sequenceAssembly, String sequence)
-            throws IOException {
+    private void serializeGenomeSequence(String chromosome, String sequenceType, String sequenceAssembly, String sequence) {
         int chunk = 0;
         int start = 1;
         int end = CHUNK_SIZE - 1;
@@ -100,11 +101,10 @@ private void serializeGenomeSequence(String chromosome, String sequenceType, Str
             genomeSequenceChunk = new GenomeSequenceChunk(chromosome, chromosome + "_" + 0 + "_" + chunkIdSuffix, start,
                     sequence.length() - 1, sequenceType, sequenceAssembly, chunkSequence);
             serializer.serialize(genomeSequenceChunk);
-            start += CHUNK_SIZE - 1;
         } else {
             while (start < sequence.length()) {
                 if (chunk % 10000 == 0) {
-                    System.out.println("Chr:" + chromosome + " chunkId:" + chunk);
+                    logger.info("Chr: {}, chunkId: {}", chromosome, chunk);
                 }
                 // First chunk of the chromosome
                 if (start == 1) {
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/InteractionBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/InteractionBuilder.java
index 5fcc68c206..5bc18dba17 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/InteractionBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/InteractionBuilder.java
@@ -33,7 +33,7 @@
  * Time: 4:43 PM
  * To change this template use File | Settings | File Templates.
  */
-public class InteractionBuilder extends CellBaseBuilder {
+public class InteractionBuilder extends AbstractBuilder {
 
     private final String species;
     private final Path psimiTabFile;
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/MiRTarBaseIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/MiRTarBaseIndexer.java
new file mode 100644
index 0000000000..01701362eb
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/MiRTarBaseIndexer.java
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.builders;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.poi.ss.usermodel.*;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.opencb.biodata.models.core.MirnaTarget;
+import org.opencb.biodata.models.core.TargetGene;
+import org.opencb.commons.utils.FileUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.math.BigDecimal;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.opencb.cellbase.lib.EtlCommons.MIRTARBASE_DATA;
+import static org.opencb.cellbase.lib.builders.AbstractBuilder.PARSING_DONE_LOG_MESSAGE;
+import static org.opencb.cellbase.lib.builders.AbstractBuilder.PARSING_LOG_MESSAGE;
+
+public class MiRTarBaseIndexer {
+
+    protected Logger logger;
+
+    public MiRTarBaseIndexer() {
+        logger = LoggerFactory.getLogger(this.getClass());
+    }
+
+    public Map<String, List<MirnaTarget>> index(Path miRTarBaseFile) throws IOException {
+        FileUtils.checkFile(miRTarBaseFile);
+
+        logger.info(PARSING_LOG_MESSAGE, miRTarBaseFile);
+
+        Map<String, List<MirnaTarget>> geneToMirna = new HashMap<>();
+
+        try (InputStream fis = new FileInputStream(miRTarBaseFile.toFile());
+             Workbook workbook = new XSSFWorkbook(fis)) {
+
+            // Get the first sheet
+            Sheet sheet = workbook.getSheetAt(0);
+
+            String currentMiRTarBaseId = null;
+            String currentMiRNA = null;
+            String currentGene = null;
+            List<TargetGene> targetGenes = new ArrayList<>();
+
+            for (int rowNum = sheet.getFirstRowNum() + 1; rowNum <= sheet.getLastRowNum(); rowNum++) {
+                Row row = sheet.getRow(rowNum);
+
+                // Sanity check
+                if (row.getPhysicalNumberOfCells() != 9) {
+                    logger.warn("Error parsing line {}: invalid number of columns {} (expected 9 columns). Line {}.",
+                            rowNum + 1, row.getPhysicalNumberOfCells());
+                    continue;
+                }
+
+                if (row.getCell(0).getCellType() != CellType.STRING || row.getCell(0).getStringCellValue() == null
+                        || row.getCell(1).getCellType() != CellType.STRING || row.getCell(1).getStringCellValue() == null
+                        || row.getCell(3).getCellType() != CellType.STRING || row.getCell(3).getStringCellValue() == null) {
+                    logger.warn("Error parsing line {}: mandatory fields(miRTarBase ID, miRNA, Target Gene) are empty or wrong cell type.",
+                            rowNum + 1);
+                    continue;
+                }
+
+                // #0: miRTarBase ID
+                Cell cell = row.getCell(0);
+                String miRTarBaseId = cell.getStringCellValue();
+                if (currentMiRTarBaseId == null) {
+                    currentMiRTarBaseId = miRTarBaseId;
+                }
+
+                // #1: miRNA
+                cell = row.getCell(1);
+                String miRNA = cell.getStringCellValue();
+                if (currentMiRNA == null) {
+                    currentMiRNA = miRNA;
+                }
+
+                // #2: Species (miRNA)
+
+                // #3: Target Gene
+                cell = row.getCell(3);
+                String geneName = cell.getStringCellValue();
+                if (currentGene == null) {
+                    currentGene = geneName;
+                }
+
+                // #4: Target Gene (Entrez ID)
+                // #5: Species (Target Gene)
+
+                if (!miRTarBaseId.equals(currentMiRTarBaseId) || !geneName.equals(currentGene)) {
+                    // new entry, store current one
+                    MirnaTarget miRnaTarget = new MirnaTarget(currentMiRTarBaseId, MIRTARBASE_DATA, currentMiRNA, targetGenes);
+                    GeneBuilderIndexer.addValueToMapElement(geneToMirna, currentGene, miRnaTarget);
+                    targetGenes = new ArrayList<>();
+                    currentGene = geneName;
+                    currentMiRTarBaseId = miRTarBaseId;
+                    currentMiRNA = miRNA;
+                }
+
+                // #6: Experiments
+                cell = row.getCell(6);
+                String experiment = (cell.getCellType() == CellType.STRING ? cell.getStringCellValue() : null);
+
+                // #7: Support Type
+                cell = row.getCell(7);
+                String supportType = (cell.getCellType() == CellType.STRING ? cell.getStringCellValue() : null);
+
+                // #8: pubmed
+                cell = row.getCell(8);
+                String pubmed = new BigDecimal(cell.getNumericCellValue()).toString();
+
+                if (StringUtils.isNotEmpty(experiment) || StringUtils.isNotEmpty(supportType) || StringUtils.isNotEmpty(pubmed)) {
+                    targetGenes.add(new TargetGene(experiment, supportType, pubmed));
+                }
+            }
+
+            // parse last entry
+            MirnaTarget miRnaTarget = new MirnaTarget(currentMiRTarBaseId, MIRTARBASE_DATA, currentMiRNA, targetGenes);
+            GeneBuilderIndexer.addValueToMapElement(geneToMirna, currentGene, miRnaTarget);
+
+        }
+        logger.info(PARSING_DONE_LOG_MESSAGE, miRTarBaseFile);
+
+        return geneToMirna;
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java
index 1eabf8975a..9273c451f5 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/OntologyBuilder.java
@@ -19,60 +19,84 @@
 
 import org.opencb.biodata.formats.obo.OboParser;
 import org.opencb.biodata.models.core.OntologyTerm;
+import org.opencb.cellbase.core.config.SpeciesConfiguration;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
-import org.opencb.cellbase.lib.EtlCommons;
 import org.opencb.commons.utils.FileUtils;
 
 import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
 import java.nio.file.Path;
+import java.util.Collections;
 import java.util.List;
 
-public class OntologyBuilder extends CellBaseBuilder {
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
-    private Path hpoFile;
-    private Path goFile;
-    private Path doidFile;
-    private Path mondoFile;
+public class OntologyBuilder extends AbstractBuilder {
 
-    public OntologyBuilder(Path oboDirectoryPath, CellBaseSerializer serializer) {
+    private Path oboDownloadPath;
+    private SpeciesConfiguration speciesConfiguration;
+
+    public static final String OBO_OUTPUT_BASENAME = "ontology";
+    public static final String OBO_OUTPUT_FILENAME = OBO_OUTPUT_BASENAME + ".json.gz";
+
+    public OntologyBuilder(Path oboDownloadPath, SpeciesConfiguration speciesConfiguration, CellBaseSerializer serializer) {
         super(serializer);
-        hpoFile = oboDirectoryPath.resolve(EtlCommons.HPO_FILE);
-        goFile = oboDirectoryPath.resolve(EtlCommons.GO_FILE);
-        doidFile = oboDirectoryPath.resolve(EtlCommons.DOID_FILE);
-        mondoFile = oboDirectoryPath.resolve(EtlCommons.MONDO_FILE);
+
+        this.oboDownloadPath = oboDownloadPath;
+        this.speciesConfiguration = speciesConfiguration;
     }
 
     @Override
     public void parse() throws Exception {
-        BufferedReader bufferedReader = FileUtils.newBufferedReader(hpoFile);
-        OboParser parser = new OboParser();
-        List<OntologyTerm> terms = parser.parseOBO(bufferedReader, "Human Phenotype Ontology");
-        for (OntologyTerm term : terms) {
-            term.setSource("HP");
-            serializer.serialize(term);
-        }
+        // Sanity check
+        checkDirectory(oboDownloadPath, getDataName(ONTOLOGY_DATA));
 
-        bufferedReader = FileUtils.newBufferedReader(goFile);
-        terms = parser.parseOBO(bufferedReader, "Gene Ontology");
-        for (OntologyTerm term : terms) {
-            term.setSource("GO");
-            serializer.serialize(term);
+        // Check ontology files
+        List<File> hpoFiles = Collections.emptyList();
+        List<File> doidFiles = Collections.emptyList();
+        List<File> mondoFiles = Collections.emptyList();
+        if (speciesConfiguration.getScientificName().equalsIgnoreCase(HOMO_SAPIENS)) {
+            hpoFiles = checkOboFiles(HPO_OBO_DATA);
+            doidFiles = checkOboFiles(DOID_OBO_DATA);
+            mondoFiles = checkOboFiles(MONDO_OBO_DATA);
         }
+        List<File> goFiles = checkOboFiles(GO_OBO_DATA);
 
-        bufferedReader = FileUtils.newBufferedReader(doidFile);
-        terms = parser.parseOBO(bufferedReader, "Human Disease Ontology");
-        for (OntologyTerm term : terms) {
-            term.setSource("DOID");
-            serializer.serialize(term);
+        // Parse OBO files and build
+        if (speciesConfiguration.getScientificName().equalsIgnoreCase(HOMO_SAPIENS)) {
+            parseOboFile(hpoFiles.get(0), HPO_OBO_DATA);
+            parseOboFile(doidFiles.get(0), DOID_OBO_DATA);
+            parseOboFile(mondoFiles.get(0), MONDO_OBO_DATA);
         }
+        parseOboFile(goFiles.get(0), GO_OBO_DATA);
 
-        bufferedReader = FileUtils.newBufferedReader(mondoFile);
-        terms = parser.parseOBO(bufferedReader, "Mondo Ontology");
-        for (OntologyTerm term : terms) {
-            term.setSource("MONDO");
-            serializer.serialize(term);
+        // Close serializer
+        serializer.close();
+    }
+
+    private void parseOboFile(File oboFile, String data) throws IOException {
+        logger.info(PARSING_LOG_MESSAGE, oboFile);
+        try (BufferedReader bufferedReader = FileUtils.newBufferedReader(oboFile.toPath())) {
+            OboParser parser = new OboParser();
+            List<OntologyTerm> terms = parser.parseOBO(bufferedReader, data);
+            for (OntologyTerm term : terms) {
+                serializer.serialize(term);
+            }
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE, oboFile);
+    }
 
-        serializer.close();
+    private List<File> checkOboFiles(String data) throws IOException, CellBaseException {
+        Path versionFilePath = oboDownloadPath.resolve(data).resolve(getDataVersionFilename(data));
+        String name = getDataName(data);
+
+        List<File> files = checkFiles(dataSourceReader.readValue(versionFilePath.toFile()), oboDownloadPath.resolve(data),
+                getDataName(ONTOLOGY_DATA) + "/" + name);
+        if (files.size() != 1) {
+            throw new CellBaseException("One " + name + " file is expected, but currently there are " + files.size() + " files");
+        }
+        return files;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PharmGKBBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PharmGKBBuilder.java
index 1f7a4836ca..1cfd85ae07 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PharmGKBBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PharmGKBBuilder.java
@@ -23,22 +23,25 @@
 import org.opencb.biodata.models.core.Xref;
 import org.opencb.biodata.models.pharma.*;
 import org.opencb.biodata.models.pharma.guideline.BasicObject;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
 import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
+import org.opencb.cellbase.lib.EtlCommons;
 import org.opencb.commons.utils.FileUtils;
 
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
 import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.*;
 import java.util.stream.Collectors;
 
 import static org.opencb.cellbase.lib.EtlCommons.*;
 
-public class PharmGKBBuilder extends CellBaseBuilder {
+public class PharmGKBBuilder extends AbstractBuilder {
 
-    private final Path inputDir;
-    private final Path pharmGKBDir;
+    private final Path pharmGkbDownloadPath;
 
     private static final String CHEMICALS_BASENAME = "chemicals";
     private static final String CHEMICALS_TSV_FILENAME = "chemicals.tsv";
@@ -88,21 +91,25 @@ public class PharmGKBBuilder extends CellBaseBuilder {
     private static final String PHARMGKB_LAST_UPDATE_DATE_KEY = "PHARMGKB_LAST_UPDATE_DATE";
     private static final String PHARMGKB_IS_VIP_KEY = "PHARMGKB_IS_VIP";
 
-    public PharmGKBBuilder(Path inputDir, CellBaseFileSerializer serializer) {
+    public PharmGKBBuilder(Path parmGkbDownloadPath, CellBaseFileSerializer serializer) {
         super(serializer);
-
-        this.inputDir = inputDir;
-        this.pharmGKBDir = inputDir.resolve(PHARMGKB_DATA);
+        this.pharmGkbDownloadPath = parmGkbDownloadPath;
     }
 
     @Override
     public void parse() throws Exception {
-        // Check input folder
-        FileUtils.checkDirectory(inputDir);
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(PHARMGKB_DATA));
+
+        // Sanity check
+        checkDirectory(pharmGkbDownloadPath, getDataName(PHARMGKB_DATA));
 
-        // PharmGKB
-        FileUtils.checkDirectory(pharmGKBDir);
-        logger.info("Parsing {} files and building the data models...", PHARMGKB_NAME);
+        // Check PharmGKB files
+        DataSource dataSource = dataSourceReader.readValue(pharmGkbDownloadPath.resolve(getDataVersionFilename(PHARMGKB_DATA)).toFile());
+        List<File> pharmGkbFiles = checkFiles(dataSource, pharmGkbDownloadPath, getDataCategory(PHARMGKB_DATA) + "/"
+                + getDataName(PHARMGKB_DATA));
+
+        // Unzip downloaded file
+        unzipDownloadedFiles(pharmGkbFiles);
 
         // Parse chemical file
         Map<String, PharmaChemical> chemicalsMap = parseChemicalFile();
@@ -113,8 +120,6 @@ public void parse() throws Exception {
         // Parse gene file
         parseGeneFile(chemicalsMap);
 
-        logger.info("Parsing {} files finished.", PHARMGKB_NAME);
-
         // Generation the pharmacogenomics JSON file
         logger.info("Writing {} JSON file to {} ...", PHARMACOGENOMICS_DATA, serializer.getOutdir());
         int counter = 0;
@@ -125,11 +130,14 @@ public void parse() throws Exception {
             }
         }
         serializer.close();
-        logger.info("Writing {} JSON file done!", PHARMACOGENOMICS_DATA);
+
+        logger.info(BUILDING_DONE_LOG_MESSAGE, getDataName(PHARMGKB_DATA));
     }
 
     private Map<String, PharmaChemical> parseChemicalFile() throws IOException {
-        Path chemicalsFile = pharmGKBDir.resolve(CHEMICALS_BASENAME).resolve(CHEMICALS_TSV_FILENAME);
+        Path chemicalsFile = serializer.getOutdir().resolve(CHEMICALS_BASENAME).resolve(CHEMICALS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, chemicalsFile);
+
         Map<String, PharmaChemical> chemicalsMap = new HashMap<>();
         try (BufferedReader br = FileUtils.newBufferedReader(chemicalsFile)) {
             // Skip first line, i.e. the header line
@@ -146,7 +154,7 @@ private Map<String, PharmaChemical> parseChemicalFile() throws IOException {
                 // Label Has Dosing Info  Has Rx Annotation  RxNorm Identifiers  ATC Identifiers  PubChem Compound Identifiers
                 PharmaChemical pharmaChemical = new PharmaChemical()
                         .setId(fields[0])
-                        .setSource(PHARMGKB_NAME)
+                        .setSource(PHARMGKB_DATA)
                         .setName(fields[1])
                         .setSmiles(fields[7])
                         .setInChI(fields[8]);
@@ -177,6 +185,7 @@ private Map<String, PharmaChemical> parseChemicalFile() throws IOException {
         }
         logger.info("Number of Chemical items read {}", chemicalsMap.size());
 
+        logger.info(PARSING_DONE_LOG_MESSAGE, chemicalsFile);
         return chemicalsMap;
     }
 
@@ -192,8 +201,9 @@ private void parseClinicalAnnotationFiles(Map<String, PharmaChemical> chemicalsM
         Map<String, Map<String, Object>> variantMap = parseVariantFile();
 
         // clinical_annotations.tsv
-        try (BufferedReader br = FileUtils.newBufferedReader(pharmGKBDir.resolve(CLINICAL_ANNOTATIONS_BASENAME)
-                .resolve(CLINICAL_ANNOTATIONS_TSV_FILENAME))) {
+        Path clinAnnotPath = serializer.getOutdir().resolve(CLINICAL_ANNOTATIONS_BASENAME).resolve(CLINICAL_ANNOTATIONS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, clinAnnotPath);
+        try (BufferedReader br = FileUtils.newBufferedReader(clinAnnotPath)) {
             // Skip first line, i.e. the header line
             String line = br.readLine();
             while ((line = br.readLine()) != null) {
@@ -278,6 +288,7 @@ private void parseClinicalAnnotationFiles(Map<String, PharmaChemical> chemicalsM
                 }
             }
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE, clinAnnotPath);
 
         // Update the clinical annotation map by parsing the clinical annotation evidences
         parseClinicalAnnotationEvidenceFile(variantAnnotationMap);
@@ -300,7 +311,9 @@ private void parseClinicalAnnotationFiles(Map<String, PharmaChemical> chemicalsM
     private Map<String, Map<String, Object>> parseVariantFile() throws IOException {
         Map<String, Map<String, Object>> variantMap = new HashMap<>();
         // Parse the variant file (i.e., variants.tsv)
-        Path varPath = pharmGKBDir.resolve(VARIANTS_BASENAME).resolve(VARIANTS_TSV_FILENAME);
+        Path varPath = serializer.getOutdir().resolve(VARIANTS_BASENAME).resolve(VARIANTS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, varPath);
+
         try (BufferedReader br = FileUtils.newBufferedReader(varPath)) {
             // Skip first line, i.e. the header line
             String line = br.readLine();
@@ -367,6 +380,7 @@ private Map<String, Map<String, Object>> parseVariantFile() throws IOException {
         }
         logger.info("Number of variants = {}", variantMap.size());
 
+        logger.info(PARSING_DONE_LOG_MESSAGE, varPath);
         return variantMap;
     }
 
@@ -385,7 +399,8 @@ private void parseClinicalAnnotationEvidenceFile(Map<String, PharmaVariantAnnota
         parseStudyParameterFile(variantAssociationMap);
 
         // Parse the clinical annotation alleles file (i.e., clinical_ann_alleles.tsv)
-        Path evidencesPath = pharmGKBDir.resolve(CLINICAL_ANNOTATIONS_BASENAME).resolve(CLINICAL_ANN_EVIDENCE_TSV_FILENAME);
+        Path evidencesPath = serializer.getOutdir().resolve(CLINICAL_ANNOTATIONS_BASENAME).resolve(CLINICAL_ANN_EVIDENCE_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, evidencesPath);
         try (BufferedReader br = FileUtils.newBufferedReader(evidencesPath)) {
             // Skip first line, i.e. the header line
             String line = br.readLine();
@@ -440,12 +455,14 @@ private void parseClinicalAnnotationEvidenceFile(Map<String, PharmaVariantAnnota
                         break;
                     }
                     default: {
-                        logger.warn("Unknown evidence type '{}': this evidence is skipped. Valid evidence types are: {}",
-                                evidenceType,
-                                StringUtils.join(
-                                        Arrays.asList(VARIANT_ANNOTATION_EVIDENCE_TYPE, GUIDELINE_ANNOTATION_EVIDENCE_TYPE,
-                                                DRUG_LABEL_ANNOTATION_EVIDENCE_TYPE, FUNCTIONAL_ANNOTATION_EVIDENCE_TYPE,
-                                                PHENOTYPE_ANNOTATION_EVIDENCE_TYPE), ","));
+                        if (logger.isWarnEnabled()) {
+                            logger.warn("Unknown evidence type '{}': this evidence is skipped. Valid evidence types are: {}",
+                                    evidenceType,
+                                    StringUtils.join(
+                                            Arrays.asList(VARIANT_ANNOTATION_EVIDENCE_TYPE, GUIDELINE_ANNOTATION_EVIDENCE_TYPE,
+                                                    DRUG_LABEL_ANNOTATION_EVIDENCE_TYPE, FUNCTIONAL_ANNOTATION_EVIDENCE_TYPE,
+                                                    PHENOTYPE_ANNOTATION_EVIDENCE_TYPE), ","));
+                        }
                         break;
                     }
                 }
@@ -463,11 +480,14 @@ private void parseClinicalAnnotationEvidenceFile(Map<String, PharmaVariantAnnota
                 }
             }
         }
+
+        logger.info(PARSING_DONE_LOG_MESSAGE, evidencesPath);
     }
 
     private void parseClinicalAnnotationAlleleFile(Map<String, PharmaVariantAnnotation> variantAnnotationMap) throws IOException {
         // Parse the clinical annotation alleles file (i.e., clinical_ann_alleles.tsv)
-        Path allelesPath = pharmGKBDir.resolve(CLINICAL_ANNOTATIONS_BASENAME).resolve(CLINICAL_ANN_ALLELES_TSV_FILENAME);
+        Path allelesPath = serializer.getOutdir().resolve(CLINICAL_ANNOTATIONS_BASENAME).resolve(CLINICAL_ANN_ALLELES_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, allelesPath);
         try (BufferedReader br = FileUtils.newBufferedReader(allelesPath)) {
             // Skip first line, i.e. the header line
             String line = br.readLine();
@@ -502,12 +522,14 @@ private void parseClinicalAnnotationAlleleFile(Map<String, PharmaVariantAnnotati
                 }
             }
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE, allelesPath);
     }
 
     private void parseVariantAnnotationFile(Map<String, PharmaVariantAssociation> variantAssociationMap) throws IOException {
         // For CellBase, variant association corresponds to PharmGKB variant annotation
         // Parse the variant annotation file (i.e., var_drug_ann.tsv)
-        Path varDrugPath = pharmGKBDir.resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(VARIANT_ANNOTATIONS_TSV_FILENAME);
+        Path varDrugPath = serializer.getOutdir().resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(VARIANT_ANNOTATIONS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, varDrugPath);
         int counter = 0;
         try (BufferedReader br = FileUtils.newBufferedReader(varDrugPath)) {
             // Skip first line, i.e. the header line
@@ -562,6 +584,7 @@ private void parseVariantAnnotationFile(Map<String, PharmaVariantAssociation> va
             }
         }
         logger.info("Number of variant annotations = {}", counter);
+        logger.info(PARSING_DONE_LOG_MESSAGE, varDrugPath);
     }
 
     private Map<String, PharmaGuidelineAnnotation> parseGuidelineAnnotationFiles() throws IOException {
@@ -571,7 +594,7 @@ private Map<String, PharmaGuidelineAnnotation> parseGuidelineAnnotationFiles() t
         ObjectReader objectReader = mapper.readerFor(PharmaGuidelineAnnotation.class);
 
         // Parse the guideline annotations JSON files
-        Path guidelinesPath = pharmGKBDir.resolve(GUIDELINE_ANNOTATIONS_BASENAME);
+        Path guidelinesPath = serializer.getOutdir().resolve(GUIDELINE_ANNOTATIONS_BASENAME);
         FileUtils.checkDirectory(guidelinesPath);
         for (File file : Objects.requireNonNull(guidelinesPath.toFile().listFiles())) {
             if (file.getName().endsWith("json")) {
@@ -593,7 +616,8 @@ private Map<String, PharmaGuidelineAnnotation> parseGuidelineAnnotationFiles() t
     private Map<String, PharmaDrugLabelAnnotation> parseDrugLabelAnnotationFile() throws IOException {
         Map<String, PharmaDrugLabelAnnotation> drugLabelAnnotationMap = new HashMap<>();
         // Parse the drug labels annotations file (i.e., drugLabels.tsv)
-        Path drugLabelPath = pharmGKBDir.resolve(DRUG_LABELS_BASENAME).resolve(DRUG_LABELS_TSV_FILENAME);
+        Path drugLabelPath = serializer.getOutdir().resolve(DRUG_LABELS_BASENAME).resolve(DRUG_LABELS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, drugLabelPath);
         try (BufferedReader br = FileUtils.newBufferedReader(drugLabelPath)) {
             // Skip first line, i.e. the header line
             String line = br.readLine();
@@ -631,12 +655,15 @@ private Map<String, PharmaDrugLabelAnnotation> parseDrugLabelAnnotationFile() th
         }
         logger.info("Number of drug label annotations = {}", drugLabelAnnotationMap.size());
 
+        logger.info(PARSING_DONE_LOG_MESSAGE, drugLabelPath);
         return drugLabelAnnotationMap;
     }
 
     private void parsePhenotypeAnnotationFile(Map<String, PharmaVariantAssociation> variantAssociationMap) throws IOException {
         // Parse the variant annotation file (i.e., var_pheno_ann.tsv)
-        Path varDrugPath = pharmGKBDir.resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(PHENOTYPE_ANNOTATIONS_TSV_FILENAME);
+        Path varDrugPath = serializer.getOutdir().resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(PHENOTYPE_ANNOTATIONS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, varDrugPath);
+
         int counter = 0;
         try (BufferedReader br = FileUtils.newBufferedReader(varDrugPath)) {
             // Skip first line, i.e. the header line
@@ -691,11 +718,13 @@ private void parsePhenotypeAnnotationFile(Map<String, PharmaVariantAssociation>
             }
         }
         logger.info("Number of phenotype annotations = {}", counter);
+        logger.info(PARSING_DONE_LOG_MESSAGE, varDrugPath);
     }
 
     private void parseFunctionalAnnotationFile(Map<String, PharmaVariantAssociation> variantAssociationMap) throws IOException {
         // Parse the variant annotation file (i.e., var_fa_ann.tsv)
-        Path varDrugPath = pharmGKBDir.resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(FUNCTIONAL_ANNOTATIONS_TSV_FILENAME);
+        Path varDrugPath = serializer.getOutdir().resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(FUNCTIONAL_ANNOTATIONS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, varDrugPath);
         int counter = 0;
         try (BufferedReader br = FileUtils.newBufferedReader(varDrugPath)) {
             // Skip first line, i.e. the header line
@@ -751,12 +780,14 @@ private void parseFunctionalAnnotationFile(Map<String, PharmaVariantAssociation>
             }
         }
         logger.info("Number of variant annotations = {}", counter);
+        logger.info(PARSING_DONE_LOG_MESSAGE, varDrugPath);
     }
 
     private void parseStudyParameterFile(Map<String, PharmaVariantAssociation> variantAssociationMap) throws IOException {
         Map<String, List<PharmaStudyParameters>> studyParametersMap = new HashMap<>();
         // Parse the study parameters file (i.e., study_parameters.tsv)
-        Path studyParamsPath = pharmGKBDir.resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(STUDY_PARAMETERS_TSV_FILENAME);
+        Path studyParamsPath = serializer.getOutdir().resolve(VARIANT_ANNOTATIONS_BASENAME).resolve(STUDY_PARAMETERS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, studyParamsPath);
         try (BufferedReader br = FileUtils.newBufferedReader(studyParamsPath)) {
             // Skip first line, i.e. the header line
             String line = br.readLine();
@@ -807,6 +838,7 @@ private void parseStudyParameterFile(Map<String, PharmaVariantAssociation> varia
             }
         }
         logger.info("Number of study parameters lines = {}", studyParametersMap.size());
+        logger.info(PARSING_DONE_LOG_MESSAGE, studyParamsPath);
 
         for (Map.Entry<String, List<PharmaStudyParameters>> entry : studyParametersMap.entrySet()) {
             if (variantAssociationMap.containsKey(entry.getKey())) {
@@ -861,7 +893,8 @@ private void parseGeneFile(Map<String, PharmaChemical> chemicalsMap) throws IOEx
 
         // Parse the genes file (i.e., genes.tsv)
         Map<String, PharmaGeneAnnotation> geneAnnotationMapByPgkbGeneId = new HashMap<>();
-        Path genesPath = pharmGKBDir.resolve(GENES_BASENAME).resolve(GENES_TSV_FILENAME);
+        Path genesPath = serializer.getOutdir().resolve(GENES_BASENAME).resolve(GENES_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, genesPath);
         try (BufferedReader br = FileUtils.newBufferedReader(genesPath)) {
             // Skip first line, i.e. the header line
             String line = br.readLine();
@@ -940,13 +973,15 @@ private void parseGeneFile(Map<String, PharmaChemical> chemicalsMap) throws IOEx
         }
 
         logger.info("Number of parsed genes = {}", geneAnnotationMapByPgkbGeneId.size());
+        logger.info(PARSING_DONE_LOG_MESSAGE, genesPath);
     }
 
     private void parseChemicalGeneRelationships(Map<String, Set<String>> pgkbGeneIdMapByChemicalName,
                                                 Map<String, PharmaGeneAnnotation> geneAnnotationMapByPgkbGeneId) throws IOException {
         int counter = 0;
         // Parse the genes file (i.e., relationships.tsv)
-        Path relationshipsPath = pharmGKBDir.resolve(RELATIONSHIPS_BASENAME).resolve(RELATIONSHIPS_TSV_FILENAME);
+        Path relationshipsPath = serializer.getOutdir().resolve(RELATIONSHIPS_BASENAME).resolve(RELATIONSHIPS_TSV_FILENAME);
+        logger.info(PARSING_LOG_MESSAGE, relationshipsPath);
         try (BufferedReader br = FileUtils.newBufferedReader(relationshipsPath)) {
             // Skip first line, i.e. the header line
             String line = br.readLine();
@@ -986,6 +1021,7 @@ private void parseChemicalGeneRelationships(Map<String, Set<String>> pgkbGeneIdM
             }
         }
         logger.info("Number of parsed {}-{} relationships = {}", GENE_ENTITY, CHEMICAL_ENTITY, counter);
+        logger.info(PARSING_DONE_LOG_MESSAGE, relationshipsPath);
     }
 
     private List<String> stringFieldToList(String field) {
@@ -1011,6 +1047,29 @@ private boolean isHaplotype(String value) {
     }
 
     private List<String> getHaplotypeList(String value) {
-        return Arrays.stream(value.split(",")).map(s -> s.trim()).collect(Collectors.toList());
+        return Arrays.stream(value.split(",")).map(String::trim).collect(Collectors.toList());
+    }
+
+    private void unzipDownloadedFiles(List<File> pharmGkbFiles) throws CellBaseException {
+        // Unzip
+        for (File pharmGgkFile : pharmGkbFiles) {
+            logger.info("Unzip file: {}", pharmGgkFile);
+            try {
+                String outPath = serializer.getOutdir().resolve(pharmGgkFile.getName().split("\\.")[0]).toString();
+                List<String> params = Arrays.asList("-d", outPath, "-o", pharmGgkFile.toString());
+                EtlCommons.runCommandLineProcess(null, "unzip", params, Paths.get(outPath + ".log"));
+            } catch (CellBaseException e) {
+                if (pharmGgkFile.getName().contains(GUIDELINE_ANNOTATIONS_BASENAME)) {
+                    // It fails because of long filenames, so it does not raise any exception
+                    logger.warn(e.getMessage());
+                }
+            } catch (IOException e) {
+                throw new CellBaseException("Error executing unzip in file " + pharmGgkFile, e);
+            } catch (InterruptedException e) {
+                // Restore interrupted state...
+                Thread.currentThread().interrupt();
+                throw new CellBaseException("Error executing unzip in file " + pharmGgkFile, e);
+            }
+        }
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PolygenicScoreBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PolygenicScoreBuilder.java
new file mode 100644
index 0000000000..1e47d58725
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PolygenicScoreBuilder.java
@@ -0,0 +1,721 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.builders;
+
+import com.fasterxml.jackson.databind.MapperFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectReader;
+import com.fasterxml.jackson.databind.ObjectWriter;
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVParser;
+import org.apache.commons.csv.CSVRecord;
+import org.apache.commons.lang3.StringUtils;
+import org.opencb.biodata.models.core.pgs.CommonPolygenicScore;
+import org.opencb.biodata.models.core.pgs.PgsCohort;
+import org.opencb.biodata.models.core.pgs.PolygenicScore;
+import org.opencb.biodata.models.core.pgs.VariantPolygenicScore;
+import org.opencb.biodata.models.variant.avro.OntologyTermAnnotation;
+import org.opencb.biodata.models.variant.avro.PubmedReference;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
+import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
+import org.opencb.commons.utils.FileUtils;
+import org.rocksdb.*;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.*;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
+public class PolygenicScoreBuilder extends AbstractBuilder {
+
+    private Path downloadPath;
+    private Path integrationPath;
+    private DataSource dataSource;
+
+    private Set<String> pgsIdSet;
+    private Object[] varRDBConn;
+    private Object[] varPgsRDBConn;
+    private int varBatchCounter = 0;
+    private int varPgsBatchCounter = 0;
+    private WriteBatch varBatch;
+    private WriteBatch varPgsBatch;
+
+    private long duplicatedKeys = 0;
+
+    private static ObjectMapper mapper;
+    private static ObjectReader varPgsReader;
+    private static ObjectWriter jsonObjectWriter;
+
+    private static final int MAX_BATCH_SIZE = 100;
+
+    private static final String RSID_COL = "rsID";
+    private static final String CHR_NAME_COL = "chr_name";
+    private static final String EFFECT_ALLELE_COL = "effect_allele";
+    private static final String OTHER_ALLELE_COL = "other_allele";
+    private static final String EFFECT_WEIGHT_COL = "effect_weight";
+    private static final String ALLELEFREQUENCY_EFFECT_COL = "allelefrequency_effect";
+    private static final String ODDS_RATIO_COL = "OR";
+    private static final String HAZARD_RATIO_COL = "HR";
+    private static final String LOCUS_NAME_COL = "locus_name";
+    private static final String IS_HAPLOTYPE_COL = "is_haplotype";
+    private static final String IS_DIPLOTYPE_COL = "is_diplotype";
+    private static final String IMPUTATION_METHOD_COL = "imputation_method";
+    private static final String VARIANT_DESCRIPTION_COL = "variant_description";
+    private static final String INCLUSION_CRITERIA_COL = "inclusion_criteria";
+    private static final String IS_INTERACTION_COL = "is_interaction";
+    private static final String IS_DOMINANT_COL = "is_dominant";
+    private static final String IS_RECESSIVE_COL = "is_recessive";
+    private static final String DOSAGE_0_WEIGHT_COL = "dosage_0_weight";
+    private static final String DOSAGE_1_WEIGHT_COL = "dosage_1_weight";
+    private static final String DOSAGE_2_WEIGHT_COL = "dosage_2_weight";
+    private static final String HM_RSID_COL = "hm_rsID";
+    private static final String HM_CHR_COL = "hm_chr";
+    private static final String HM_POS_COL = "hm_pos";
+    private static final String HM_INFEROTHERALLELE_COL = "hm_inferOtherAllele";
+
+    public static final String SAMPLE_SET_KEY = "Sample Set";
+    public static final String ODDS_RATIO_KEY = "Odds ratio";
+    public static final String HAZARD_RATIO_KEY = "Hazard ratio";
+    public static final String BETA_KEY = "Beta";
+    public static final String AUROC_KEY = "AUROC"; // Area Under the Receiver-Operating Characteristic Curve (AUROC)
+    public static final String CINDEX_KEY = "C-index"; // Concordance Statistic (C-index)
+    public static final String OTHER_KEY = "Other metric";
+    private static final String EFFECT_WEIGHT_KEY = "Effect weight";
+    private static final String ALLELE_FREQUENCY_EFFECT_KEY = "Allele frequency effect";
+    private static final String LOCUS_NAME_KEY = "Locus name";
+    private static final String IS_HAPLOTYPE_KEY = "Haplotype";
+    private static final String IS_DIPLOTYPE_KEY = "Diplotype";
+    private static final String IMPUTATION_METHOD_KEY = "Imputation method";
+    private static final String VARIANT_DESCRIPTION_KEY = "Variant description";
+    private static final String INCLUSION_CRITERIA_KEY = "Score inclusion criteria";
+    private static final String IS_INTERACTION_KEY = "Interaction";
+    private static final String IS_DOMINANT_KEY = "Dominant inheritance model";
+    private static final String IS_RECESSIVE_KEY = "Recessive inheritance model";
+    private static final String DOSAGE_0_WEIGHT_KEY = "Effect weight with 0 copy of the effect allele";
+    private static final String DOSAGE_1_WEIGHT_KEY = "Effect weight with 1 copy of the effect allele";
+    private static final String DOSAGE_2_WEIGHT_KEY = "Effect weight with 1 copy of the effect allele";
+
+    private static final String PARSING_FILE = "Parsing file ";
+
+    public static final String PGS_COMMON_OUTPUT_FILENAME = PGS_COMMON_COLLECTION + JSON_GZ_EXTENSION;
+    public static final String PGS_VARIANT_OUTPUT_FILENAME = PGS_VARIANT_COLLECTION + JSON_GZ_EXTENSION;
+
+    private static final Set<String> VALID_CHROMOSOMES = new HashSet<>(Arrays.asList("1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
+            "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "MT", "M"));
+
+    private static final byte[] ONE = "1".getBytes();
+
+    static {
+        mapper = new ObjectMapper();
+        mapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
+        varPgsReader = mapper.readerFor(VariantPolygenicScore.class);
+        jsonObjectWriter = mapper.writer();
+    }
+
+    public PolygenicScoreBuilder(Path downloadPath, CellBaseFileSerializer serializer) {
+        super(serializer);
+
+        this.downloadPath = downloadPath;
+
+        logger = LoggerFactory.getLogger(PolygenicScoreBuilder.class);
+    }
+
+    public void check() throws CellBaseException, IOException {
+        if (checked) {
+            return;
+        }
+
+        logger.info(CHECKING_BEFORE_BUILDING_LOG_MESSAGE, getDataName(PGS_DATA));
+
+        // Sanity check
+        checkDirectory(downloadPath, getDataName(PGS_DATA));
+        integrationPath = serializer.getOutdir().resolve("integration");
+        Files.createDirectories(integrationPath);
+        if (!Files.exists(integrationPath)) {
+            throw new CellBaseException("Could not create the folder " + integrationPath);
+        }
+        // Prepare RocksDB for variant IDs
+        this.varRDBConn = getDBConnection(integrationPath.resolve("rdb-var.idx").toString(), true);
+        this.varBatch = new WriteBatch();
+        // Prepare RocksDB for PGS/variants
+        this.varPgsRDBConn = getDBConnection(integrationPath.resolve("rdb-var-pgs.idx").toString(), true);
+        this.varPgsBatch = new WriteBatch();
+        // PGS set
+        this.pgsIdSet = new HashSet<>();
+
+        // Check downloaded files
+        this.dataSource = dataSourceReader.readValue(downloadPath.resolve(getDataVersionFilename(PGS_CATALOG_DATA)).toFile());
+        checkFiles(dataSource, downloadPath, getDataName(PGS_CATALOG_DATA));
+
+        logger.info(CHECKING_DONE_BEFORE_BUILDING_LOG_MESSAGE, getDataName(PGS_DATA));
+        checked = true;
+    }
+
+    @Override
+    public void parse() throws Exception {
+        check();
+
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(PGS_DATA));
+
+        int numFiles;
+        int counter;
+        String endsWith;
+        File[] files = downloadPath.toFile().listFiles();
+
+        // First, process metadata files
+        try (BufferedWriter bw = FileUtils.newBufferedWriter(serializer.getOutdir().resolve(PGS_COMMON_COLLECTION + JSON_GZ_EXTENSION))) {
+            counter = 0;
+            endsWith = "_metadata" + TAR_GZ_EXTENSION;
+            numFiles = getNumFiles(files, endsWith);
+            for (File file : files) {
+                if (file.isFile() && file.getName().endsWith(endsWith)) {
+                    // E.g.: PGS004905_metadata.tar.gz: it contains a set of files about metadata
+                    logger.info(PARSING_LOG_MESSAGE, file.getName());
+                    processPgsMetadataFile(file, bw);
+                    logger.info(PARSING_DONE_LOG_MESSAGE, file.getName());
+                    logger.info("Progress: {} of {} meta files", ++counter, numFiles);
+                }
+            }
+        }
+
+        // Second, process variant files
+        counter = 0;
+        endsWith = TXT_GZ_EXTENSION;
+        numFiles = getNumFiles(files, endsWith);
+        for (File file : files) {
+            if (file.isFile() && file.getName().endsWith(endsWith)) {
+                // E.g.: PGS004905_hmPOS_GRCh38.txt.gz: it contains the variants
+                logger.info(PARSING_LOG_MESSAGE, file.getName());
+
+                String pgsId = null;
+                Map<String, Integer> columnPos = new HashMap<>();
+
+                try (BufferedReader br = FileUtils.newBufferedReader(file.toPath())) {
+                    String line;
+                    while ((line = br.readLine()) != null) {
+                        if (line.startsWith("#")) {
+                            if (line.startsWith("#pgs_id=")) {
+                                pgsId = line.split("=")[1].trim();
+                                // Sanity check
+                                if (!file.getName().startsWith(pgsId)) {
+                                    throw new CellBaseException(PARSING_FILE + file.getName() + ": pgs_id mismatch");
+                                }
+                                // Add PGS ID to the set
+                                pgsIdSet.add(pgsId);
+                            }
+                        } else if (line.startsWith(RSID_COL) || line.startsWith(CHR_NAME_COL)) {
+                            String[] fields = line.split("\t");
+                            for (int i = 0; i < fields.length; i++) {
+                                columnPos.put(fields[i], i);
+                            }
+                        } else {
+                            // Sanity check
+                            if (pgsId == null) {
+                                throw new CellBaseException(PARSING_FILE + file.getName() + ": pgs_id is null");
+                            }
+                            saveVariantPolygenicScore(line, columnPos, pgsId);
+                        }
+                    }
+                }
+                logger.info(PARSING_DONE_LOG_MESSAGE, file.getName());
+                logger.info("Progress: {} of {} variant files", ++counter, numFiles);
+            }
+        }
+
+        RocksDB rdb;
+        // Write remaining variant ID batch
+        if (varBatchCounter > 0) {
+            rdb = (RocksDB) varRDBConn[0];
+            rdb.write(new WriteOptions(), varBatch);
+            varBatch.clear();
+        }
+        // Write remaining PGS/variant batch
+        if (varPgsBatchCounter > 0) {
+            rdb = (RocksDB) varPgsRDBConn[0];
+            rdb.write(new WriteOptions(), varPgsBatch);
+            varPgsBatch.clear();
+        }
+
+        // Serialize/write the saved variant polygenic scores in the RocksDB
+        serializeRDB();
+        serializer.close();
+
+        logger.info(BUILDING_DONE_LOG_MESSAGE, getDataName(PGS_DATA));
+    }
+
+    private void processPgsMetadataFile(File metadataFile, BufferedWriter bw) throws CellBaseException {
+        String suffix;
+        String pgsId = metadataFile.getName().split("_")[0];
+
+        Path tmp = serializer.getOutdir().resolve("tmp");
+        if (!tmp.toFile().exists()) {
+            tmp.toFile().mkdirs();
+        }
+
+        String command = "tar -xzf " + metadataFile.getAbsolutePath() + " -C " + tmp.toAbsolutePath();
+        try {
+            logger.info("Executing: {}", command);
+            Process process = Runtime.getRuntime().exec(command);
+            process.waitFor();
+        } catch (IOException e) {
+            throw new CellBaseException("Exception raised when executing: " + command, e);
+        } catch (InterruptedException e) {
+            // Restore interrupted state...
+            Thread.currentThread().interrupt();
+            throw new CellBaseException("Exception raised when executing: " + command, e);
+        }
+
+        // Create PGS object, with the common fields
+        String filename;
+        CommonPolygenicScore pgs = new CommonPolygenicScore();
+        pgs.setId(pgsId);
+        pgs.setSource(PGS_CATALOG_DATA);
+        pgs.setVersion(dataSource.getVersion());
+
+        String line;
+
+        // PGSxxxxx_metadata_publications.csv
+        suffix = "_metadata_publications.csv";
+        filename = pgsId + suffix;
+        try (BufferedReader br = FileUtils.newBufferedReader(tmp.resolve(filename))) {
+            // Skip first line
+            br.readLine();
+            while ((line = br.readLine()) != null) {
+                // 0                                1              2       3              4                  5              6
+                // PGS Publication/Study (PGP) ID   First Author   Title   Journal Name   Publication Date   Release Date   Authors
+                // 7                                 8
+                // digital object identifier (doi)   PubMed ID (PMID)
+                StringReader stringReader = new StringReader(line);
+                CSVParser csvParser = CSVFormat.DEFAULT.parse(stringReader);
+                CSVRecord strings = csvParser.getRecords().get(0);
+                pgs.getPubmedRefs().add(new PubmedReference(strings.get(8), strings.get(2), strings.get(3), strings.get(4), null));
+            }
+        } catch (IOException e) {
+            throw new CellBaseException(PARSING_FILE + filename, e);
+        }
+
+        // PGSxxxxx_metadata_efo_traits.csv
+        suffix = "_metadata_efo_traits.csv";
+        filename = pgsId + suffix;
+        try (BufferedReader br = FileUtils.newBufferedReader(tmp.resolve(filename))) {
+            // Skip first line
+            br.readLine();
+            while ((line = br.readLine()) != null) {
+                // 0                   1                      2                            3
+                // Ontology Trait ID   Ontology Trait Label   Ontology Trait Description   Ontology URL
+                StringReader stringReader = new StringReader(line);
+                CSVParser csvParser = CSVFormat.DEFAULT.parse(stringReader);
+                CSVRecord strings = csvParser.getRecords().get(0);
+                pgs.getTraits().add(new OntologyTermAnnotation(strings.get(0), strings.get(1), strings.get(2), "EFO", strings.get(3),
+                        new HashMap<>()));
+            }
+        } catch (IOException e) {
+            throw new CellBaseException(PARSING_FILE + filename, e);
+        }
+
+        // PGSxxxxx_metadata_scores.csv
+        suffix = "_metadata_scores.csv";
+        filename = pgsId + suffix;
+        try (BufferedReader br = FileUtils.newBufferedReader(tmp.resolve(filename))) {
+            // Skip first line
+            br.readLine();
+            while ((line = br.readLine()) != null) {
+                // 0                          1          2                3                             4
+                // Polygenic Score (PGS) ID   PGS Name   Reported Trait   Mapped Trait(s) (EFO label)   Mapped Trait(s) (EFO ID)
+                // 5                        6                                             7                       8
+                // PGS Development Method   PGS Development Details/Relevant Parameters   Original Genome Build   Number of Variants
+                // 9                             10                       11                         12                   13
+                // Number of Interaction Terms   Type of Variant Weight   PGS Publication (PGP) ID   Publication (PMID)   Publication (doi)
+                // 14                                                 15
+                // Score and results match the original publication   Ancestry Distribution (%) - Source of Variant Associations (GWAS)
+                // 16                                                       17                                           18
+                // Ancestry Distribution (%) - Score Development/Training   Ancestry Distribution (%) - PGS Evaluation   FTP link
+                // 19               20
+                // Release Date     License/Terms of Use
+                StringReader stringReader = new StringReader(line);
+                CSVParser csvParser = CSVFormat.DEFAULT.parse(stringReader);
+                CSVRecord strings = csvParser.getRecords().get(0);
+                // Sanity check
+                if (!pgsId.equals(strings.get(0))) {
+                    throw new CellBaseException(PARSING_FILE + filename + ": mismatch PGS ID");
+                }
+                if (StringUtils.isNotEmpty(pgs.getName())) {
+                    throw new CellBaseException("More than one PGS in file " + filename);
+                }
+                pgs.setName(strings.get(1));
+            }
+        } catch (IOException e) {
+            throw new CellBaseException(PARSING_FILE + filename, e);
+        }
+
+        // TODO: PGSxxxxx_metadata_score_development_samples.csv
+        // 0                          1                          2                       3                 4
+        // Polygenic Score (PGS) ID   Stage of PGS Development   Number of Individuals   Number of Cases   Number of Controls
+        // 5                                      6            7                         8
+        // Percent of Participants Who are Male   Sample Age   Broad Ancestry Category   "Ancestry (e.g. French, Chinese)"
+        // 9                        10                                11                                  12
+        // Country of Recruitment   Additional Ancestry Description   Phenotype Definitions and Methods   Followup Time
+        // 13                                13                        14           15          16
+        // GWAS Catalog Study ID (GCST...)   Source PubMed ID (PMID)   Source DOI   Cohort(s)   Additional Sample/Cohort Information
+
+        // PGSxxxxx_metadata_performance_metrics.csv
+        suffix = "_metadata_performance_metrics.csv";
+        filename = pgsId + suffix;
+        try (BufferedReader br = FileUtils.newBufferedReader(tmp.resolve(filename))) {
+            // Skip first line
+            br.readLine();
+            while ((line = br.readLine()) != null) {
+                // 0                                 1                 2                      3                          4
+                // PGS Performance Metric (PPM) ID   Evaluated Score   PGS Sample Set (PSS)   PGS Publication (PGP) ID   Reported Trait
+                // 5                                  6                                             7                    8
+                // Covariates Included in the Model   PGS Performance: Other Relevant Information   Publication (PMID)   Publication (doi)
+                // 9                   10                11     12
+                // Hazard Ratio (HR)   Odds Ratio (OR)   Beta   Area Under the Receiver-Operating Characteristic Curve (AUROC)
+                // 13                                14
+                // Concordance Statistic (C-index)   Other Metric(s)
+
+                StringReader stringReader = new StringReader(line);
+                CSVParser csvParser = CSVFormat.DEFAULT.parse(stringReader);
+                CSVRecord strings = csvParser.getRecords().get(0);
+
+                // Sanity check
+                if (!pgsId.equals(strings.get(1))) {
+                    continue;
+                }
+
+                Map<String, String> values = new HashMap<>();
+                if (StringUtils.isNotEmpty(strings.get(2))) {
+                    values.put(SAMPLE_SET_KEY, strings.get(2));
+                }
+                if (StringUtils.isNotEmpty(strings.get(9))) {
+                    values.put(HAZARD_RATIO_KEY, strings.get(9));
+                }
+                if (StringUtils.isNotEmpty(strings.get(10))) {
+                    values.put(ODDS_RATIO_KEY, strings.get(10));
+                }
+                if (StringUtils.isNotEmpty(strings.get(11))) {
+                    values.put(BETA_KEY, strings.get(11));
+                }
+                if (StringUtils.isNotEmpty(strings.get(12))) {
+                    values.put(AUROC_KEY, strings.get(12));
+                }
+                if (StringUtils.isNotEmpty(strings.get(13))) {
+                    values.put(CINDEX_KEY, strings.get(13));
+                }
+                if (StringUtils.isNotEmpty(strings.get(14))) {
+                    values.put(OTHER_KEY, strings.get(14));
+                }
+                pgs.getValues().add(values);
+            }
+        } catch (IOException e) {
+            throw new CellBaseException(PARSING_FILE + filename, e);
+        }
+
+        // TODO: PGSxxxxx_metadata_evaluation_sample_sets.csv
+        // 0                      1                          2                       3                 4
+        // PGS Sample Set (PSS)   Polygenic Score (PGS) ID   Number of Individuals   Number of Cases   Number of Controls
+        // 5                                      6                                    7
+        // Percent of Participants Who are Male   Sample Age,Broad Ancestry Category   "Ancestry (e.g.French, Chinese)"
+        // 8                        9                                 10                                  11
+        // Country of Recruitment   Additional Ancestry Description   Phenotype Definitions and Methods   Followup Time
+        // 12                                13                        14           15          16
+        // GWAS Catalog Study ID (GCST...)   Source PubMed ID (PMID)   Source DOI   Cohort(s)   Additional Sample/Cohort Information
+
+        // PGSxxxxx_metadata_cohorts.csv
+        suffix = "_metadata_cohorts.csv";
+        filename = pgsId + suffix;
+        try (BufferedReader br = FileUtils.newBufferedReader(tmp.resolve(filename))) {
+            // Skip first line
+            line = br.readLine();
+            while ((line = br.readLine()) != null) {
+                // 0           1             2
+                // Cohort ID   Cohort Name   Previous/other/additional names
+                StringReader stringReader = new StringReader(line);
+                CSVParser csvParser = CSVFormat.DEFAULT.parse(stringReader);
+                CSVRecord strings = csvParser.getRecords().get(0);
+                pgs.getCohorts().add(new PgsCohort(strings.get(0), strings.get(1), strings.get(2)));
+            }
+        } catch (IOException e) {
+            throw new CellBaseException(PARSING_FILE + filename, e);
+        }
+
+        // Create PGS object, with the common fields
+        try {
+            bw.write(jsonObjectWriter.writeValueAsString(pgs));
+            bw.write("\n");
+        } catch (IOException e) {
+            throw new CellBaseException("Writing CommonPolygenicScore data model", e);
+        }
+
+        // Clean tmp folder
+        for (File tmpFile : tmp.toFile().listFiles()) {
+            try {
+                Files.delete(tmpFile.toPath());
+            } catch (IOException e) {
+                logger.warn("Return false when deleting file: " + tmpFile, e);
+            }
+        }
+    }
+
+    private void saveVariantPolygenicScore(String line, Map<String, Integer> columnPos, String pgsId) throws RocksDBException, IOException {
+        String chrom;
+        int position;
+        String effectAllele;
+        String otherAllele;
+
+        String[] field = line.split("\t", -1);
+
+        if (columnPos.containsKey(HM_CHR_COL)) {
+            chrom = field[columnPos.get(HM_CHR_COL)];
+            if (!VALID_CHROMOSOMES.contains(chrom)) {
+                // Only chromosomes are processed; no contigs, e.g.: 8_KI270821v1_alt, 11_KI270927v1_alt, 12_GL877875v1_alt,...
+                return;
+            }
+        } else {
+//            logger.warn("Missing field '{}', skipping line: {}", HM_CHR_COL, line);
+            return;
+        }
+        if (columnPos.containsKey(HM_POS_COL)) {
+            try {
+                position = Integer.parseInt(field[columnPos.get(HM_POS_COL)]);
+            } catch (NumberFormatException e) {
+//                logger.warn("Invalid field '{}' (value = {}), skipping line: {}", HM_POS_COL, field[columnPos.get(HM_POS_COL)], line);
+                return;
+            }
+        } else {
+//            logger.warn("Missing field '{}', skipping line: {}", HM_POS_COL, line);
+            return;
+        }
+        if (columnPos.containsKey(EFFECT_ALLELE_COL)) {
+            effectAllele = field[columnPos.get(EFFECT_ALLELE_COL)];
+        } else {
+//            logger.warn("Missing field '{}', skipping line: {}", EFFECT_ALLELE_COL, line);
+            return;
+        }
+        if (columnPos.containsKey(HM_INFEROTHERALLELE_COL) && StringUtils.isNotEmpty(field[columnPos.get(HM_INFEROTHERALLELE_COL)])) {
+            otherAllele = field[columnPos.get(HM_INFEROTHERALLELE_COL)];
+        } else if (columnPos.containsKey(OTHER_ALLELE_COL)) {
+            otherAllele = field[columnPos.get(OTHER_ALLELE_COL)];
+        } else {
+//            logger.warn("Missing fields '{}' and '{}' (at least one is mandatory), skipping line: {}", HM_INFEROTHERALLELE_COL,
+//                    OTHER_ALLELE_COL, line);
+            return;
+        }
+
+        // Create polygenic score
+        Map<String, String> values = new HashMap<>();
+        if (columnPos.containsKey(EFFECT_WEIGHT_COL)) {
+            values.put(EFFECT_WEIGHT_KEY, field[columnPos.get(EFFECT_WEIGHT_COL)]);
+        }
+        if (columnPos.containsKey(ALLELEFREQUENCY_EFFECT_COL)) {
+            values.put(ALLELE_FREQUENCY_EFFECT_KEY, field[columnPos.get(ALLELEFREQUENCY_EFFECT_COL)]);
+        }
+        if (columnPos.containsKey(ODDS_RATIO_COL)) {
+            values.put(ODDS_RATIO_KEY, field[columnPos.get(ODDS_RATIO_COL)]);
+        }
+        if (columnPos.containsKey(HAZARD_RATIO_COL)) {
+            values.put(HAZARD_RATIO_KEY, field[columnPos.get(HAZARD_RATIO_COL)]);
+        }
+        if (columnPos.containsKey(LOCUS_NAME_COL)) {
+            values.put(LOCUS_NAME_KEY, field[columnPos.get(LOCUS_NAME_COL)]);
+        }
+        if (columnPos.containsKey(IS_HAPLOTYPE_COL)) {
+            values.put(IS_HAPLOTYPE_KEY, field[columnPos.get(IS_HAPLOTYPE_COL)]);
+        }
+        if (columnPos.containsKey(IS_DIPLOTYPE_COL)) {
+            values.put(IS_DIPLOTYPE_KEY, field[columnPos.get(IS_DIPLOTYPE_COL)]);
+        }
+        if (columnPos.containsKey(IMPUTATION_METHOD_COL)) {
+            values.put(IMPUTATION_METHOD_KEY, field[columnPos.get(IMPUTATION_METHOD_COL)]);
+        }
+        if (columnPos.containsKey(VARIANT_DESCRIPTION_COL)) {
+            values.put(VARIANT_DESCRIPTION_KEY, field[columnPos.get(VARIANT_DESCRIPTION_COL)]);
+        }
+        if (columnPos.containsKey(INCLUSION_CRITERIA_COL)) {
+            values.put(INCLUSION_CRITERIA_KEY, field[columnPos.get(INCLUSION_CRITERIA_COL)]);
+        }
+        if (columnPos.containsKey(IS_INTERACTION_COL)) {
+            values.put(IS_INTERACTION_KEY, field[columnPos.get(IS_INTERACTION_COL)]);
+        }
+        if (columnPos.containsKey(IS_DOMINANT_COL)) {
+            values.put(IS_DOMINANT_KEY, field[columnPos.get(IS_DOMINANT_COL)]);
+        }
+        if (columnPos.containsKey(IS_RECESSIVE_COL)) {
+            values.put(IS_RECESSIVE_KEY, field[columnPos.get(IS_RECESSIVE_COL)]);
+        }
+        if (columnPos.containsKey(DOSAGE_0_WEIGHT_COL)) {
+            values.put(DOSAGE_0_WEIGHT_KEY, field[columnPos.get(DOSAGE_0_WEIGHT_COL)]);
+        }
+        if (columnPos.containsKey(DOSAGE_1_WEIGHT_COL)) {
+            values.put(DOSAGE_1_WEIGHT_KEY, field[columnPos.get(DOSAGE_1_WEIGHT_COL)]);
+        }
+        if (columnPos.containsKey(DOSAGE_2_WEIGHT_COL)) {
+            values.put(DOSAGE_2_WEIGHT_KEY, field[columnPos.get(DOSAGE_2_WEIGHT_COL)]);
+        }
+
+        // Creating and/or updating variant polygenic score
+
+        // First, we store the variant
+        RocksDB rdb = (RocksDB) varRDBConn[0];
+        String key = chrom + ":" + position + ":" + otherAllele + ":" + effectAllele;
+        byte[] dbContent = rdb.get(key.getBytes());
+        if (dbContent == null) {
+            // Add data to batch
+            varBatch.put(key.getBytes(), ONE);
+            varBatchCounter++;
+            if (varBatchCounter >= MAX_BATCH_SIZE) {
+                // Write the batch to the database
+                rdb.write(new WriteOptions(), varBatch);
+                // Reset batch
+                varBatch.clear();
+                varBatchCounter = 0;
+           }
+        }
+
+        // Second, we store the polygenic scores
+        rdb = (RocksDB) varPgsRDBConn[0];
+        key = chrom + ":" + position + ":" + otherAllele + ":" + effectAllele + ":" + pgsId;
+        dbContent = rdb.get(key.getBytes());
+        if (dbContent != null) {
+            duplicatedKeys++;
+            logger.warn("Warning: the indexing PGS key {}: it should be unique", key);
+        } else {
+            VariantPolygenicScore varPgs = new VariantPolygenicScore(chrom, position, otherAllele, effectAllele,
+                    Collections.singletonList(new PolygenicScore(pgsId, values)));
+            // Add data to batch
+            byte[] rdbKey = key.getBytes();
+            byte[] rdbValue = jsonObjectWriter.writeValueAsBytes(varPgs);
+            varPgsBatch.put(rdbKey, rdbValue);
+            varPgsBatchCounter++;
+            if (varPgsBatchCounter >= MAX_BATCH_SIZE) {
+                // Write the batch to the database
+                rdb.write(new WriteOptions(), varPgsBatch);
+                // Reset batch
+                varPgsBatch.clear();
+                varPgsBatchCounter = 0;
+            }
+        }
+    }
+
+    private void serializeRDB() throws IOException, RocksDBException {
+        long counter = 0;
+
+        RocksDB varRDB = (RocksDB) varRDBConn[0];
+        RocksDB varPgsRDB = (RocksDB) varPgsRDBConn[0];
+
+        // DO NOT change the name of the rocksIterator variable - for some unexplainable reason Java VM crashes if it's
+        // named "iterator"
+        logger.info("Writing variants ...");
+        RocksIterator rocksIterator = varRDB.newIterator();
+        for (rocksIterator.seekToFirst(); rocksIterator.isValid(); rocksIterator.next()) {
+            String varKey = new String(rocksIterator.key());
+            VariantPolygenicScore varPgs = null;
+            for (String pgsId : pgsIdSet) {
+                String varPgsKey = varKey + ":" + pgsId;
+                byte[] dbContent = varPgsRDB.get(varPgsKey.getBytes());
+                if (dbContent != null) {
+                    VariantPolygenicScore newVarPgs = varPgsReader.readValue(dbContent);
+                    if (varPgs == null) {
+                        varPgs = newVarPgs;
+                    } else {
+                        varPgs.getPolygenicScores().addAll(newVarPgs.getPolygenicScores());
+                    }
+                }
+            }
+            if (varPgs != null) {
+                serializer.serialize(varPgs);
+            }
+            if (++counter % 500000 == 0) {
+                logger.info("Writing {} variants...", counter);
+            }
+        }
+        logger.info("Writing done.");
+        logger.info("Num. duplicated keys (PGS/Variant) = {}", duplicatedKeys);
+
+        // Close RocksDB
+        closeIndex((RocksDB) varRDBConn[0], (Options) varRDBConn[1], (String) varRDBConn[2]);
+        closeIndex((RocksDB) varPgsRDBConn[0], (Options) varPgsRDBConn[1], (String) varPgsRDBConn[2]);
+    }
+
+    private void closeIndex(RocksDB rdb, Options dbOption, String dbLocation) throws IOException {
+        if (rdb != null) {
+            rdb.close();
+        }
+        if (dbOption != null) {
+            dbOption.dispose();
+        }
+        if (dbLocation != null && Files.exists(Paths.get(dbLocation))) {
+            org.apache.commons.io.FileUtils.deleteDirectory(new File(dbLocation));
+        }
+    }
+
+    private Object[] getDBConnection(String dbLocation, boolean forceCreate) {
+        boolean indexingNeeded = forceCreate || !Files.exists(Paths.get(dbLocation));
+        // a static method that loads the RocksDB C++ library.
+        RocksDB.loadLibrary();
+        // the Options class contains a set of configurable DB options
+        // that determines the behavior of a database.
+        BlockBasedTableConfig tableConfig = new BlockBasedTableConfig();
+        tableConfig.setBlockCacheSize(4 * 1024 * 1024 * 1024L); // 16 GB block cache
+
+        Options options = new Options()
+                .setCreateIfMissing(true)
+                .setWriteBufferSize(1L * 256 * 1024 * 1024) // 256 MB
+                .setMaxWriteBufferNumber(4)
+                .setMinWriteBufferNumberToMerge(2)
+                .setIncreaseParallelism(4)
+                .setMaxBackgroundCompactions(4)
+                .setMaxBackgroundFlushes(2)
+                .setLevelCompactionDynamicLevelBytes(true)
+                .setTargetFileSizeBase(1L * 64 * 1024 * 1024) // 64 MB
+                .setMaxBytesForLevelBase(1L * 512 * 1024 * 1024) // 512 MB
+                .setTableFormatConfig(tableConfig)
+                .setCompressionType(CompressionType.LZ4_COMPRESSION);
+
+        RocksDB db = null;
+        try {
+            // a factory method that returns a RocksDB instance
+            if (indexingNeeded) {
+                db = RocksDB.open(options, dbLocation);
+            } else {
+                db = RocksDB.openReadOnly(options, dbLocation);
+            }
+            // do something
+        } catch (RocksDBException e) {
+            // do some error handling
+            e.printStackTrace();
+            System.exit(1);
+        }
+
+        return new Object[]{db, options, dbLocation, indexingNeeded};
+    }
+
+    private int getNumFiles(File[] files, String endsWith) {
+        int numFiles = 0;
+        for (File file : files) {
+            if (file.isFile() && file.getName().endsWith(endsWith)) {
+                ++numFiles;
+            }
+        }
+        return numFiles;
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/ProteinBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/ProteinBuilder.java
index 0369a0e6aa..186a0218b2 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/ProteinBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/ProteinBuilder.java
@@ -21,67 +21,85 @@
 import com.fasterxml.jackson.databind.ObjectWriter;
 import org.opencb.biodata.formats.protein.uniprot.UniProtParser;
 import org.opencb.biodata.formats.protein.uniprot.v202003jaxb.*;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
 import org.opencb.commons.utils.FileUtils;
 import org.rocksdb.Options;
 import org.rocksdb.RocksDB;
 import org.rocksdb.RocksDBException;
 import org.rocksdb.RocksIterator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import javax.xml.bind.JAXBException;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.IOException;
+import java.io.PrintWriter;
 import java.math.BigInteger;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
+import java.util.stream.Collectors;
 
-public class ProteinBuilder extends CellBaseBuilder {
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
-    private Path uniprotFilesDir;
-    private Path interproFilePath;
-    private String species;
+public class ProteinBuilder extends AbstractBuilder {
 
-    private Map<String, Entry> proteinMap;
+    private Path proteinPath;
+    private String species;
 
-    protected Logger logger = LoggerFactory.getLogger(this.getClass());
+    public static final String PROTEIN_OUTPUT_FILENAME = PROTEIN_DATA + ".json.gz";
 
-    public ProteinBuilder(Path uniprotFilesDir, String species, CellBaseSerializer serializer) {
-        this(uniprotFilesDir, null, species, serializer);
-    }
-
-    public ProteinBuilder(Path uniprotFilesDir, Path interproFilePath, String species, CellBaseSerializer serializer) {
+    public ProteinBuilder(Path proteinPath, String species, CellBaseSerializer serializer) {
         super(serializer);
 
-        this.uniprotFilesDir = uniprotFilesDir;
-        this.interproFilePath = interproFilePath;
+        this.proteinPath = proteinPath;
         this.species = species;
     }
 
     @Override
-    public void parse() throws IOException {
+    public void parse() throws CellBaseException, IOException {
+        // Sanity check
+        checkDirectory(proteinPath, getDataName(PROTEIN_DATA));
 
-        if (uniprotFilesDir == null || !Files.exists(uniprotFilesDir)) {
-            throw new IOException("File '" + uniprotFilesDir + "' not valid");
+        // Check UniProt file
+        DataSource dataSource = dataSourceReader.readValue(proteinPath.resolve(UNIPROT_DATA).resolve(getDataVersionFilename(UNIPROT_DATA))
+                .toFile());
+        List<File> uniProtFiles = checkFiles(dataSource, proteinPath.resolve(UNIPROT_DATA), getDataCategory(UNIPROT_DATA) + "/"
+                + getDataName(UNIPROT_DATA));
+        if (uniProtFiles.size() != 1) {
+            throw new CellBaseException(getMismatchNumFilesErrorMessage(getDataName(UNIPROT_DATA), uniProtFiles.size()));
         }
 
-        RocksDB rocksDb = getDBConnection();
+        // Check InterPro file
+        dataSource = dataSourceReader.readValue(proteinPath.resolve(INTERPRO_DATA).resolve(getDataVersionFilename(INTERPRO_DATA)).toFile());
+        List<File> interProFiles = checkFiles(dataSource, proteinPath.resolve(INTERPRO_DATA), getDataCategory(INTERPRO_DATA) + "/"
+                + getDataName(INTERPRO_DATA));
+        if (interProFiles.size() != 1) {
+            throw new CellBaseException(getMismatchNumFilesErrorMessage(getDataName(INTERPRO_DATA), interProFiles.size()));
+        }
+
+        // Prepare UniProt data by splitting data in chunks
+        Path uniProtChunksPath = serializer.getOutdir().resolve(UNIPROT_CHUNKS_SUBDIRECTORY);
+        logger.info("Split {} file {} into chunks at {}", getDataName(UNIPROT_DATA), uniProtFiles.get(0).getName(), uniProtChunksPath);
+        Files.createDirectories(uniProtChunksPath);
+        splitUniprot(proteinPath.resolve(UNIPROT_DATA).resolve(uniProtFiles.get(0).getName()), uniProtChunksPath);
+
+        // Prepare RocksDB
+        RocksDB rocksDb = getDBConnection(uniProtChunksPath);
         ObjectMapper mapper = new ObjectMapper();
         mapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
         ObjectWriter jsonObjectWriter = mapper.writerFor(Entry.class);
 
-        proteinMap = new HashMap<>(30000);
-//        UniProtParser up = new UniProtParser();
+        Map<String, Entry> proteinMap = new HashMap<>(30000);
+
+        // Parsing files
         try {
-            File[] files = uniprotFilesDir.toFile().listFiles((dir, name) -> name.endsWith(".xml") || name.endsWith(".xml.gz"));
+            File[] files = uniProtChunksPath.toFile().listFiles((dir, name) -> name.endsWith(".xml") || name.endsWith(".xml.gz"));
+
 
             for (File file : files) {
+                logger.info(PARSING_LOG_MESSAGE, file);
                 Uniprot uniprot = (Uniprot) UniProtParser.loadXMLInfo(file.toString(), UniProtParser.UNIPROT_CONTEXT);
 
                 for (Entry entry : uniprot.getEntry()) {
@@ -89,32 +107,41 @@ public void parse() throws IOException {
                     for (OrganismNameType organismNameType : entry.getOrganism().getName()) {
                         entryOrganism = organismNameType.getValue();
                         if (entryOrganism.equals(species)) {
-//                            proteinMap.put(entry.getAccession().get(0), entry);
+                            proteinMap.put(entry.getAccession().get(0), entry);
+
+                            // Update RocksDB
                             rocksDb.put(entry.getAccession().get(0).getBytes(), jsonObjectWriter.writeValueAsBytes(entry));
                         }
                     }
                 }
+                logger.info(PARSING_DONE_LOG_MESSAGE);
+            }
+            logger.info("Number of proteins stored in map: '{}'", proteinMap.size());
+            if (proteinMap.size() > 10) {
+                logger.info("First 10 protein IDs in map: {}", proteinMap.keySet().stream().collect(Collectors.toList()).subList(0, 10));
             }
-            logger.debug("Number of proteins stored in map: '{}'", proteinMap.size());
 
-            if (interproFilePath != null && Files.exists(interproFilePath)) {
-                BufferedReader interproBuffereReader = FileUtils.newBufferedReader(interproFilePath);
-                Set<String> hashSet = new HashSet<>(proteinMap.keySet());
-                Set<String> visited = new HashSet<>(30000);
+            logger.info(PARSING_LOG_MESSAGE, interProFiles.get(0));
+            String interproName = getDataName(INTERPRO_DATA);
+            int numLine = 0;
+            int numInterProLinesProcessed = 0;
+            int numUniqueProteinsProcessed = 0;
+            try (BufferedReader interproBuffereReader = FileUtils.newBufferedReader(interProFiles.get(0).toPath())) {
+
+                Set<String> hashSet = proteinMap.keySet();
+                Set<String> visited = new HashSet<>(proteinMap.size());
 
-                int numInterProLinesProcessed = 0;
-                int numUniqueProteinsProcessed = 0;
                 String[] fields;
                 String line;
                 boolean iprAdded;
                 while ((line = interproBuffereReader.readLine()) != null) {
+                    numLine++;
                     fields = line.split("\t");
 
                     if (hashSet.contains(fields[0])) {
                         iprAdded = false;
                         BigInteger start = BigInteger.valueOf(Integer.parseInt(fields[4]));
                         BigInteger end = BigInteger.valueOf(Integer.parseInt(fields[5]));
-//                        for (FeatureType featureType : proteinMap.get(fields[0]).getFeature()) {
                         byte[] bytes = rocksDb.get(fields[0].getBytes());
                         Entry entry = mapper.readValue(bytes, Entry.class);
                         for (FeatureType featureType : entry.getFeature()) {
@@ -123,8 +150,6 @@ public void parse() throws IOException {
                                     && featureType.getLocation().getEnd().getPosition() != null
                                     && featureType.getLocation().getBegin().getPosition().equals(start)
                                     && featureType.getLocation().getEnd().getPosition().equals(end)) {
-                                featureType.setId(fields[1]);
-                                featureType.setRef(fields[3]);
                                 iprAdded = true;
                                 break;
                             }
@@ -145,10 +170,17 @@ public void parse() throws IOException {
                             locationType.setEnd(positionType2);
                             featureType.setLocation(locationType);
 
-//                            proteinMap.get(fields[0]).getFeature().add(featureType);
                             bytes = rocksDb.get(fields[0].getBytes());
                             entry = mapper.readValue(bytes, Entry.class);
                             entry.getFeature().add(featureType);
+
+                            if (fields[0].equalsIgnoreCase(entry.getAccession().get(0))) {
+                                // Update RocksDB
+                                rocksDb.put(fields[0].getBytes(), jsonObjectWriter.writeValueAsBytes(entry));
+                            } else {
+                                logger.info("Something wrong happen: interpro fields[0] = {} vs entry.getAccession().get(0) = {}",
+                                        fields[0], entry.getAccession().get(0));
+                            }
                         }
 
                         if (!visited.contains(fields[0])) {
@@ -158,11 +190,16 @@ public void parse() throws IOException {
                     }
 
                     if (++numInterProLinesProcessed % 10000000 == 0) {
-                        logger.debug("{} InterPro lines processed. {} unique proteins processed",
-                                numInterProLinesProcessed, numUniqueProteinsProcessed);
+                        printInfoLogs(numInterProLinesProcessed, numUniqueProteinsProcessed, interproName);
                     }
                 }
-                interproBuffereReader.close();
+                printInfoLogs(numInterProLinesProcessed, numUniqueProteinsProcessed, interproName);
+
+                logger.info(PARSING_DONE_LOG_MESSAGE);
+            } catch (IOException e) {
+                logger.error("Error parsing {} file: {}. Num. line = {}. Error stack trace = {}", interproName, interProFiles.get(0),
+                        numLine, Arrays.toString(e.getStackTrace()));
+                printInfoLogs(numInterProLinesProcessed, numUniqueProteinsProcessed, interproName);
             }
 
             // Serialize and save results
@@ -173,24 +210,80 @@ public void parse() throws IOException {
             }
 
             rocksDb.close();
-        } catch (JAXBException | RocksDBException e) {
-            e.printStackTrace();
+        } catch (JAXBException | RocksDBException | IOException e) {
+            throw new CellBaseException("Error parsing " + getDataName(PROTEIN_DATA) + " files", e);
         }
+
+        logger.info(BUILDING_DONE_LOG_MESSAGE, getDataName(PROTEIN_DATA));
     }
 
-    private RocksDB getDBConnection() {
-        // a static method that loads the RocksDB C++ library.
+    private RocksDB getDBConnection(Path uniProtChunksPath) throws CellBaseException {
+        // A static method that loads the RocksDB C++ library
         RocksDB.loadLibrary();
-        // the Options class contains a set of configurable DB options
-        // that determines the behavior of a database.
+        // The Options class contains a set of configurable DB options that determines the behavior of a database
         Options options = new Options().setCreateIfMissing(true);
         try {
-            return RocksDB.open(options, uniprotFilesDir.resolve("integration.idx").toString());
+            return RocksDB.open(options, uniProtChunksPath.resolve("integration.idx").toString());
         } catch (RocksDBException e) {
-            // do some error handling
-            e.printStackTrace();
-            System.exit(1);
+            throw new CellBaseException("Error preparing RocksDB", e);
+        }
+    }
+
+    private void splitUniprot(Path uniprotFilePath, Path splitOutdirPath) throws IOException {
+        PrintWriter pw = null;
+        try (BufferedReader br = FileUtils.newBufferedReader(uniprotFilePath)) {
+            StringBuilder header = new StringBuilder();
+            boolean beforeEntry = true;
+            boolean inEntry = false;
+            int count = 0;
+            int chunk = 0;
+            String line;
+            while ((line = br.readLine()) != null) {
+                if (line.trim().startsWith("<entry ")) {
+                    inEntry = true;
+                    beforeEntry = false;
+                    if (count % 10000 == 0) {
+                        pw = new PrintWriter(Files.newOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile().toPath()));
+                        pw.println(header.toString().trim());
+                    }
+                    count++;
+                }
+
+                if (beforeEntry) {
+                    header.append(line).append("\n");
+                }
+
+                if (inEntry) {
+                    pw.println(line);
+                }
+
+                if (line.trim().startsWith("</entry>")) {
+                    inEntry = false;
+                    if (count % 10000 == 0) {
+                        if (pw != null) {
+                            pw.print("</uniprot>");
+                            pw.close();
+                        }
+                        chunk++;
+                    }
+                }
+            }
+            pw.print("</uniprot>");
+            pw.close();
+        } finally {
+            if (pw != null) {
+                pw.close();
+            }
         }
-        return null;
     }
+
+    private String getMismatchNumFilesErrorMessage(String dataName, int numFiles) {
+        return "Only one " + dataName + " file is expected, but currently there are " + numFiles + " files";
+    }
+
+    private void printInfoLogs(int numInterProLinesProcessed, int numUniqueProteinsProcessed, String dataName) {
+        logger.info("{}: {} lines processed", dataName, numInterProLinesProcessed);
+        logger.info("{}: {} unique proteins processed", dataName, numUniqueProteinsProcessed);
+    }
+
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PubMedBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PubMedBuilder.java
index 8aba7c9dda..5443b9aea9 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PubMedBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/PubMedBuilder.java
@@ -16,63 +16,71 @@
 
 package org.opencb.cellbase.lib.builders;
 
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.ObjectWriter;
 import org.opencb.biodata.formats.pubmed.PubMedParser;
 import org.opencb.biodata.formats.pubmed.v233jaxb.PubmedArticle;
 import org.opencb.biodata.formats.pubmed.v233jaxb.PubmedArticleSet;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
+import org.opencb.cellbase.lib.download.PubMedDownloadManager;
 import org.opencb.commons.utils.FileUtils;
-import org.slf4j.LoggerFactory;
 
-import java.io.File;
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.List;
 
-public class PubMedBuilder extends CellBaseBuilder {
+import static org.opencb.cellbase.lib.EtlCommons.PUBMED_DATA;
+import static org.opencb.cellbase.lib.EtlCommons.getDataName;
 
-    private Path pubmedDir;
-    private CellBaseFileSerializer fileSerializer;
+public class PubMedBuilder extends AbstractBuilder {
 
-    public PubMedBuilder(Path pubmedDir, CellBaseFileSerializer serializer) {
-        super(serializer);
-
-        this.fileSerializer = serializer;
-        this.pubmedDir = pubmedDir;
+    private Path pubMedDownloadPath;
+    private CellBaseConfiguration configuration;
 
-        logger = LoggerFactory.getLogger(PubMedBuilder.class);
+    public PubMedBuilder(Path pubMedDownloadPath, CellBaseFileSerializer serializer, CellBaseConfiguration configuration) {
+        super(serializer);
+        this.pubMedDownloadPath = pubMedDownloadPath;
+        this.configuration = configuration;
     }
 
     @Override
     public void parse() throws Exception {
-        // Check input folder
-        FileUtils.checkPath(pubmedDir);
+        logger.info(BUILDING_LOG_MESSAGE, getDataName(PUBMED_DATA));
 
-        logger.info("Parsing PubMed files...");
+        // Check input folder
+        FileUtils.checkPath(pubMedDownloadPath);
 
-        for (File file : pubmedDir.toFile().listFiles()) {
-            if (file.isFile() && (file.getName().endsWith("gz") || file.getName().endsWith("xml"))) {
-                String name = file.getName().split("\\.")[0];
+        // Check PubMed files before parsing them
+        List<String> pubMedFilenames = PubMedDownloadManager.getPubMedFilenames(configuration.getDownload().getPubmed());
+        for (String pubMedFilename : pubMedFilenames) {
+            Path pubMedPath = pubMedDownloadPath.resolve(pubMedFilename);
+            if (!Files.exists(pubMedPath)) {
+                throw new CellBaseException("Expected PubMed file " + pubMedFilename + ", but it was not found at " + pubMedDownloadPath);
+            }
+        }
 
+        for (String pubMedFilename : pubMedFilenames) {
+            Path pubMedPath = pubMedDownloadPath.resolve(pubMedFilename);
+            String basename = pubMedFilename.split("\\.")[0];
 
-                ObjectWriter objectWriter = new ObjectMapper().writerFor(PubmedArticle.class);
-                PubmedArticleSet pubmedArticleSet = (PubmedArticleSet) PubMedParser.loadXMLInfo(file.getAbsolutePath());
+            PubmedArticleSet pubmedArticleSet = (PubmedArticleSet) PubMedParser.loadXMLInfo(pubMedPath.toAbsolutePath().toString());
 
-                List<Object> objects = pubmedArticleSet.getPubmedArticleOrPubmedBookArticle();
-                logger.info("Parsing PubMed file {} of {} articles ...", file.getName(), objects.size());
-                int counter = 0;
-                for (Object object : objects) {
-                    PubmedArticle pubmedArticle = (PubmedArticle) object;
-                    fileSerializer.serialize(pubmedArticle, name);
-                    if (++counter % 2000 == 0) {
-                        logger.info("\t\t" + counter + " articles");
-                    }
+            List<Object> objects = pubmedArticleSet.getPubmedArticleOrPubmedBookArticle();
+            logger.info(PARSING_LOG_MESSAGE, pubMedPath);
+            int counter = 0;
+            for (Object object : objects) {
+                PubmedArticle pubmedArticle = (PubmedArticle) object;
+                ((CellBaseFileSerializer) serializer).serialize(pubmedArticle, basename);
+                if (++counter % 2000 == 0) {
+                    logger.info("{} articles", counter);
                 }
-                fileSerializer.close();
-                logger.info("\t\tDone: " + counter + " articles.");
             }
+            serializer.close();
+
+            String logMsg = pubMedPath + " (" + counter + " articles)";
+            logger.info(PARSING_DONE_LOG_MESSAGE, logMsg);
         }
 
-        logger.info("Parsing PubMed files finished.");
+        logger.info(BUILDING_DONE_LOG_MESSAGE, getDataName(PUBMED_DATA));
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilder.java
index 48b0cd1d0d..9ddb4e6a7c 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilder.java
@@ -22,28 +22,43 @@
 import org.opencb.biodata.models.core.*;
 import org.opencb.biodata.tools.sequence.FastaIndex;
 import org.opencb.cellbase.core.ParamConstants;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.config.DownloadProperties;
 import org.opencb.cellbase.core.config.SpeciesConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
+import org.opencb.cellbase.lib.EtlCommons;
 import org.rocksdb.RocksDBException;
 
 import java.io.IOException;
+import java.nio.file.Files;
 import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.*;
 
-public class RefSeqGeneBuilder extends CellBaseBuilder {
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
+public class RefSeqGeneBuilder extends AbstractBuilder {
+
+    private Path downloadPath;
+    private CellBaseConfiguration configuration;
 
     private Map<String, Transcript> transcriptDict;
     private Map<String, Exon> exonDict;
-    private Path gtfFile;
-    private Path fastaFile;
-    private Path proteinFastaFile, cdnaFastaFile;
-    private Path maneFile, lrgFile, disgenetFile, hpoFile, geneDrugFile, miRTarBaseFile;
-    private Path cancerGeneCensus, cancerHotspot;
-    private Path tso500File, eglhHaemOncFile;
+    private Path gtfFile = null;
+    private Path fastaFile = null;
+    private Path proteinFastaFile = null;
+    private Path cdnaFastaFile = null;
+    private Path maneFile = null;
+    private Path lrgFile = null;
+    private Path hpoFile = null;
+    private Path geneDrugFile = null;
+    private Path miRTarBaseFile = null;
+    private Path cancerGeneCensusFile = null;
+    private Path cancerHotspot = null;
     private SpeciesConfiguration speciesConfiguration;
     private static final Map<String, String> REFSEQ_CHROMOSOMES = new HashMap<>();
-    private final String status = "KNOWN";
+    private static final String KNOWN_STATUS = "KNOWN";
     private static final String SOURCE = ParamConstants.QueryParams.REFSEQ.key();
     private Gene gene = null;
     private Transcript transcript = null;
@@ -52,134 +67,173 @@ public class RefSeqGeneBuilder extends CellBaseBuilder {
     // sometimes there are two stop codons (eg NM_018159.4). Only parse the first one, skip the second
     private boolean seenStopCodon = false;
 
+    private boolean isHSapiens = false;
+
+    private static final String ENSEMBL = "ensembl";
+    private static final String TRANSCRIPT_ID = "transcript_id";
+    private static final String EXON_NUMBER = "exon_number";
 
-    public RefSeqGeneBuilder(Path refSeqDirectoryPath, SpeciesConfiguration speciesConfiguration, CellBaseSerializer serializer) {
+    public static final String REFSEQ_GENE_BASENAME = "refSeqGene";
+    public static final String REFSEQ_GENE_OUTPUT_FILENAME = REFSEQ_GENE_BASENAME + ".json.gz";
+
+    public RefSeqGeneBuilder(Path downloadPath, SpeciesConfiguration speciesConfiguration, CellBaseConfiguration configuration,
+                             CellBaseSerializer serializer) {
         super(serializer);
 
+        this.downloadPath = downloadPath;
         this.speciesConfiguration = speciesConfiguration;
-
-        getGtfFileFromDirectoryPath(refSeqDirectoryPath);
-        getFastaFileFromDirectoryPath(refSeqDirectoryPath);
-        getProteinFastaFileFromDirectoryPath(refSeqDirectoryPath);
-        getCdnaFastaFileFromDirectoryPath(refSeqDirectoryPath);
-        setAnnotationFiles(refSeqDirectoryPath);
+        this.configuration = configuration;
 
         transcriptDict = new HashMap<>(250000);
         exonDict = new HashMap<>(8000000);
-    }
 
-    private void setAnnotationFiles(Path refSeqDirectoryPath) {
-        Path geneDirectoryPath = refSeqDirectoryPath.getParent().resolve("gene");
-        maneFile = geneDirectoryPath.resolve("MANE.GRCh38.v1.0.summary.txt.gz");
-        lrgFile = geneDirectoryPath.resolve("list_LRGs_transcripts_xrefs.txt");
-        geneDrugFile = geneDirectoryPath.resolve("dgidb.tsv");
-        disgenetFile = geneDirectoryPath.resolve("all_gene_disease_associations.tsv.gz");
-        hpoFile = geneDirectoryPath.resolve("phenotype_to_genes.txt");
-        cancerGeneCensus = geneDirectoryPath.resolve("cancer-gene-census.tsv");
-        cancerHotspot = geneDirectoryPath.resolve("hotspots_v2.xls");
-        tso500File = geneDirectoryPath.resolve("TSO500_transcripts.txt");
-        eglhHaemOncFile = geneDirectoryPath.resolve("EGLH_HaemOnc_transcripts.txt");
-        miRTarBaseFile = refSeqDirectoryPath.getParent().resolve("regulation/hsa_MTI.xlsx");
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS)) {
+            isHSapiens = true;
+        }
     }
 
-    private void getGtfFileFromDirectoryPath(Path refSeqDirectoryPath) {
-        for (String fileName : refSeqDirectoryPath.toFile().list()) {
-            if (fileName.endsWith(".gtf") || fileName.endsWith(".gtf.gz")) {
-                gtfFile = refSeqDirectoryPath.resolve(fileName);
-                break;
-            }
+    public void check() throws Exception {
+        if (checked) {
+            return;
         }
-    }
 
-    private void getFastaFileFromDirectoryPath(Path refSeqDirectoryPath) {
-        for (String fileName : refSeqDirectoryPath.toFile().list()) {
-            if (fileName.endsWith("genomic.fna") || fileName.endsWith("genomic.fna.gz")) {
-                fastaFile = refSeqDirectoryPath.resolve(fileName);
-                break;
+        String refSeqGeneLabel = getDataName(REFSEQ_DATA) + " " + getDataName(GENE_DATA);
+        logger.info(CHECKING_BEFORE_BUILDING_LOG_MESSAGE, refSeqGeneLabel);
+
+        // Sanity check
+        checkDirectory(downloadPath, refSeqGeneLabel);
+        if (!Files.exists(serializer.getOutdir())) {
+            try {
+                Files.createDirectories(serializer.getOutdir());
+            } catch (IOException e) {
+                throw new CellBaseException("Error creating folder " + serializer.getOutdir(), e);
             }
         }
-    }
 
-    private void getProteinFastaFileFromDirectoryPath(Path refSeqDirectoryPath) {
-        for (String fileName : refSeqDirectoryPath.toFile().list()) {
-            if (fileName.endsWith(".faa") || fileName.endsWith(".faa.gz")) {
-                proteinFastaFile = refSeqDirectoryPath.resolve(fileName);
-                break;
-            }
+        // Check RefSeq files
+        String prefixId = getConfigurationFileIdPrefix(speciesConfiguration.getScientificName());
+        DownloadProperties.URLProperties props = configuration.getDownload().getRefSeq();
+        gtfFile = checkFile(props, prefixId + REFSEQ_GENOMIC_GTF_FILE_ID, downloadPath, "RefSeq GTF").toPath();
+        proteinFastaFile = checkFile(props, prefixId + REFSEQ_PROTEIN_FAA_FILE_ID, downloadPath, "RefSeq Protein FAA").toPath();
+        cdnaFastaFile = checkFile(props, prefixId + REFSEQ_RNA_FNA_FILE_ID, downloadPath, "RefSeq RNA FNA").toPath();
+
+        // Check genome FASTA file
+        String genomeGzFilename = Paths.get(props.getFiles().get(prefixId + REFSEQ_GENOMIC_FNA_FILE_ID)).getFileName().toString();
+        Path fastaGzFile = downloadPath.resolve(genomeGzFilename);
+        fastaFile = EtlCommons.getFastaPath(fastaGzFile);
+
+        // Check common files
+        if (isHSapiens || isDataSupported(configuration.getDownload().getManeSelect(), prefixId)) {
+            maneFile = checkFiles(MANE_SELECT_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, MANE_SELECT_DATA, speciesConfiguration.getScientificName());
+        }
+        if (isHSapiens || isDataSupported(configuration.getDownload().getLrg(), prefixId)) {
+            lrgFile = checkFiles(LRG_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, LRG_DATA, speciesConfiguration.getScientificName());
+        }
+        if (isHSapiens || isDataSupported(configuration.getDownload().getCancerHotspot(), prefixId)) {
+            cancerHotspot = checkFiles(CANCER_HOTSPOT_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, CANCER_HOTSPOT_DATA, speciesConfiguration.getScientificName());
+        }
+        if (isHSapiens || isDataSupported(configuration.getDownload().getDgidb(), prefixId)) {
+            geneDrugFile = checkFiles(DGIDB_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, DGIDB_DATA, speciesConfiguration.getScientificName());
+        }
+        if (isHSapiens || isDataSupported(configuration.getDownload().getHpo(), prefixId)) {
+            hpoFile = checkFiles(HPO_DISEASE_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, HPO_DISEASE_DATA, speciesConfiguration.getScientificName());
+        }
+        if (isHSapiens || isDataSupported(configuration.getDownload().getCancerHotspot(), prefixId)) {
+            cancerGeneCensusFile = checkFiles(CANCER_GENE_CENSUS_DATA, downloadPath.getParent(), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, CANCER_GENE_CENSUS_DATA, speciesConfiguration.getScientificName());
         }
-    }
 
-    private void getCdnaFastaFileFromDirectoryPath(Path refSeqDirectoryPath) {
-        for (String fileName : refSeqDirectoryPath.toFile().list()) {
-            if (fileName.endsWith("cdna.fna") || fileName.endsWith("cdna.fna.gz")) {
-                cdnaFastaFile = refSeqDirectoryPath.resolve(fileName);
-                break;
-            }
+        // Check regulation files
+        // mirtarbase
+        if (isHSapiens || isDataSupported(configuration.getDownload().getMiRTarBase(), prefixId)) {
+            miRTarBaseFile = checkFiles(MIRTARBASE_DATA, downloadPath.getParent().getParent().resolve(REGULATION_DATA)
+                    .resolve(MIRTARBASE_DATA), 1).get(0).toPath();
+        } else {
+            logger.info(SKIPPING_INDEX_DATA_LOG_MESSAGE, MIRTARBASE_DATA, speciesConfiguration.getScientificName());
         }
+
+        logger.info(CHECKING_DONE_BEFORE_BUILDING_LOG_MESSAGE, refSeqGeneLabel);
+        checked = true;
     }
 
     public void parse() throws Exception {
+        check();
+
         // Preparing the fasta file for fast accessing
         FastaIndex fastaIndex = null;
         if (fastaFile != null) {
             fastaIndex = new FastaIndex(fastaFile);
         }
 
-        // index protein sequences for later
+        // Index protein sequences for later
+        logger.info("Indexing gene annotation for {} ...", getDataName(REFSEQ_DATA));
         RefSeqGeneBuilderIndexer indexer = new RefSeqGeneBuilderIndexer(gtfFile.getParent());
-        indexer.index(maneFile, lrgFile, proteinFastaFile, cdnaFastaFile, geneDrugFile, hpoFile, disgenetFile, miRTarBaseFile,
-                cancerGeneCensus, cancerHotspot, tso500File, eglhHaemOncFile);
-
-        logger.info("Parsing RefSeq gtf...");
-        GtfReader gtfReader = new GtfReader(gtfFile);
-
-        Gtf gtf;
-        while ((gtf = gtfReader.read()) != null) {
-            String chromosome = getSequenceName(gtf.getSequenceName());
-            switch (gtf.getFeature()) {
-                case "gene":
-                    // we've finished the previous transcript, store xrefs
-                    addXrefs(transcript, geneDbxrefs, exonDbxrefs);
-                    parseGene(gtf, chromosome, indexer);
-                    break;
-                case "transcript":
-                    break;
-                case "exon":
-                    parseExon(gtf, chromosome, fastaIndex, indexer);
-                    break;
-                case "CDS":
-                    parseCDS(gtf, indexer);
-                    break;
-                case "start_codon":
-                    seenStopCodon = false;
-                    break;
-                case "stop_codon":
-                    if (!seenStopCodon) {
-                        parseStopCodon(gtf);
-                        seenStopCodon = true;
-                    }
-                    break;
-                default:
-                    throw new RuntimeException("Unexpected feature type: " + gtf.getFeature());
+        indexer.index(maneFile, lrgFile, proteinFastaFile, cdnaFastaFile, geneDrugFile, hpoFile, miRTarBaseFile, cancerGeneCensusFile,
+                cancerHotspot);
+        logger.info("Indexing done for {}", getDataName(REFSEQ_DATA));
+
+        logger.info(PARSING_LOG_MESSAGE, gtfFile);
+        try (GtfReader gtfReader = new GtfReader(gtfFile)) {
+            Gtf gtf;
+            while ((gtf = gtfReader.read()) != null) {
+                String chromosome = getSequenceName(gtf.getSequenceName());
+                switch (gtf.getFeature()) {
+                    case "gene":
+                        // we've finished the previous transcript, store xrefs
+                        addXrefs(transcript, geneDbxrefs, exonDbxrefs);
+                        parseGene(gtf, chromosome, indexer);
+                        break;
+                    case "transcript":
+                        break;
+                    case "exon":
+                        parseExon(gtf, chromosome, fastaIndex, indexer);
+                        break;
+                    case "CDS":
+                        parseCDS(gtf, indexer);
+                        break;
+                    case "start_codon":
+                        seenStopCodon = false;
+                        break;
+                    case "stop_codon":
+                        if (!seenStopCodon) {
+                            parseStopCodon(gtf);
+                            seenStopCodon = true;
+                        }
+                        break;
+                    default:
+                        throw new CellBaseException("Error parsing: unexpected feature type: " + gtf.getFeature());
+                }
             }
         }
 
-        // add xrefs to last transcript
+        // Add xrefs to last transcript
         addXrefs(transcript, geneDbxrefs, exonDbxrefs);
 
-        // last gene must be serialized
+        // Last gene must be serialized
         store();
 
-        // cleaning
-        gtfReader.close();
+        // Close
         serializer.close();
         if (fastaIndex != null) {
             fastaIndex.close();
         }
         indexer.close();
+
+        logger.info(PARSING_DONE_LOG_MESSAGE, gtfFile);
     }
 
-    // store right before parsing the previous gene, or the very last gene.
+    // Store right before parsing the previous gene, or the very last gene.
     private void store() {
         serializer.serialize(gene);
         reset();
@@ -200,7 +254,6 @@ private void addXrefs(Transcript transcript, Set<Xref> geneDbxrefs, Set<Xref> ex
             return;
         }
         exonDbxrefs.addAll(geneDbxrefs);
-//        transcript.setXrefs(new ArrayList<>(exonDbxrefs));
         transcript.getXrefs().addAll(exonDbxrefs);
         transcript.getXrefs().add(new Xref(transcript.getName(), "hgnc_symbol", "HGNC Symbol"));
 
@@ -235,12 +288,13 @@ private void parseGene(Gtf gtf, String chromosome, RefSeqGeneBuilderIndexer inde
                 null, indexer.getMirnaTargets(geneName), indexer.getCancerGeneCensus(geneName), indexer.getCancerHotspot(geneName));
 
         gene = new Gene(geneId, geneName, chromosome, gtf.getStart(), gtf.getEnd(), gtf.getStrand(), "1", geneBiotype,
-                status, SOURCE, geneDescription, new ArrayList<>(), null, geneAnnotation);
+                KNOWN_STATUS, SOURCE, geneDescription, new ArrayList<>(), null, geneAnnotation);
         geneDbxrefs = parseXrefs(gtf);
     }
 
-    private void parseExon(Gtf gtf, String chromosome, FastaIndex fastaIndex, RefSeqGeneBuilderIndexer indexer) throws RocksDBException {
-        String transcriptId = gtf.getAttributes().get("transcript_id");
+    private void parseExon(Gtf gtf, String chromosome, FastaIndex fastaIndex, RefSeqGeneBuilderIndexer indexer) throws RocksDBException,
+            CellBaseException {
+        String transcriptId = gtf.getAttributes().get(TRANSCRIPT_ID);
 
         // new transcript
         if (!transcriptDict.containsKey(transcriptId)) {
@@ -264,7 +318,7 @@ private void parseExon(Gtf gtf, String chromosome, FastaIndex fastaIndex, RefSeq
         if (fastaIndex != null) {
             exonSequence = fastaIndex.query(gtf.getSequenceName(), gtf.getStart(), gtf.getEnd());
         }
-        String exonNumber = gtf.getAttributes().get("exon_number");
+        String exonNumber = gtf.getAttributes().get(EXON_NUMBER);
         // RefSeq does not provide Exon IDs, we are using transcript ID and exon numbers
         String exonId = transcriptId + "_" + exonNumber;
 
@@ -286,14 +340,14 @@ private void parseExon(Gtf gtf, String chromosome, FastaIndex fastaIndex, RefSeq
         }
     }
 
-    private void parseCDS(Gtf gtf, RefSeqGeneBuilderIndexer indexer) throws RocksDBException {
-        String exonNumber = gtf.getAttributes().get("exon_number");
+    private void parseCDS(Gtf gtf, RefSeqGeneBuilderIndexer indexer) throws RocksDBException, CellBaseException {
+        String exonNumber = gtf.getAttributes().get(EXON_NUMBER);
         if (StringUtils.isEmpty(exonNumber)) {
             // this CDS doesn't know which exon it belongs to. skip
             return;
         }
 
-        transcript = transcriptDict.get(gtf.getAttributes().get("transcript_id"));
+        transcript = transcriptDict.get(gtf.getAttributes().get(TRANSCRIPT_ID));
         String exonId = transcript.getId() + "_" + exonNumber;
         Exon exon = exonDict.get(exonId);
 
@@ -419,12 +473,12 @@ private void parseCDS(Gtf gtf, RefSeqGeneBuilderIndexer indexer) throws RocksDBE
     }
 
     private void parseStopCodon(Gtf gtf) {
-        String exonNumber = gtf.getAttributes().get("exon_number");
+        String exonNumber = gtf.getAttributes().get(EXON_NUMBER);
         if (StringUtils.isEmpty(exonNumber)) {
             // some codons don't have an exon number, discard
             return;
         }
-        Transcript transcript = transcriptDict.get(gtf.getAttributes().get("transcript_id"));
+        transcript = transcriptDict.get(gtf.getAttributes().get(TRANSCRIPT_ID));
         String exonId = transcript.getId() + "_" + exonNumber;
         Exon exon = exonDict.get(exonId);
 
@@ -525,14 +579,14 @@ private void parseStopCodon(Gtf gtf) {
         }
     }
 
-    private Set<Xref> parseXrefs(Gtf gtf) {
+    private Set<Xref> parseXrefs(Gtf gtf) throws CellBaseException {
         Set<Xref> xrefSet = new HashSet<>();
         String xrefs = gtf.getAttributes().get("db_xref");
         if (StringUtils.isNotEmpty(xrefs)) {
             for (String xrefString : xrefs.split(",")) {
                 String[] dbxrefParts = xrefString.split(":", 2);
                 if (dbxrefParts.length != 2) {
-                    throw new RuntimeException("Bad xref, expected colon: " + xrefString);
+                    throw new CellBaseException("Error parsing Xrefs: bad xref, expected colon: " + xrefString);
                 }
                 String dbName = dbxrefParts[0].toLowerCase();
                 String id = dbxrefParts[1];
@@ -541,7 +595,7 @@ private Set<Xref> parseXrefs(Gtf gtf) {
                     dbName = "hgnc_id";
                     dbDisplayName = "HGNC ID";
                 }
-                if ("ensembl".equalsIgnoreCase(dbName)) {
+                if (ENSEMBL.equalsIgnoreCase(dbName)) {
                     if (id.startsWith("ENST")) {
                         dbName = "ensembl_transcript";
                         dbDisplayName = "Ensembl transcript";
@@ -562,18 +616,17 @@ private Transcript getTranscript(Gtf gtf, String chromosome, String transcriptId
         Map<String, String> gtfAttributes = gtf.getAttributes();
 
         String name = gene.getName();
-//        String biotype = gtfAttributes.get("gbkey");
         String biotype = gtfAttributes.get("transcript_biotype");
         if ("mRNA".equals(biotype)) {
             biotype = "protein_coding";
         }
-        transcript = new Transcript(transcriptId, name, chromosome, gtf.getStart(), gtf.getEnd(), gtf.getStrand(), biotype, status,
+        transcript = new Transcript(transcriptId, name, chromosome, gtf.getStart(), gtf.getEnd(), gtf.getStrand(), biotype, KNOWN_STATUS,
                 0, 0, 0, 0, 0,
                 indexer.getCdnaFasta(transcriptId), "", "", "", version, SOURCE,
                 new ArrayList<>(), new ArrayList<>(), new ArrayList<>(), new HashSet<>(), new TranscriptAnnotation());
 
         // Add MANE Select mappings, with this we can know which Ensembl and Refseq transcripts match according to MANE
-        for (String suffix: Arrays.asList("ensembl", "ensembl_protein")) {
+        for (String suffix: Arrays.asList(ENSEMBL, "ensembl_protein")) {
             String maneRefSeq = indexer.getMane(transcriptId, suffix);
             if (StringUtils.isNotEmpty(maneRefSeq)) {
                 transcript.getXrefs().add(new Xref(maneRefSeq, "mane_select_" + suffix,
@@ -582,7 +635,7 @@ private Transcript getTranscript(Gtf gtf, String chromosome, String transcriptId
         }
 
         // Add LRG mappings, with this we can know which Ensembl and Refseq transcripts match according to LRG
-        String lrgRefSeq = indexer.getLrg(transcriptId, "ensembl");
+        String lrgRefSeq = indexer.getLrg(transcriptId, ENSEMBL);
         if (StringUtils.isNotEmpty(lrgRefSeq)) {
             transcript.getXrefs().add(new Xref(lrgRefSeq, "lrg_ensembl", "LRG Ensembl"));
         }
@@ -598,15 +651,6 @@ private Transcript getTranscript(Gtf gtf, String chromosome, String transcriptId
         if (StringUtils.isNotEmpty(lrg)) {
             transcript.getFlags().add("LRG");
         }
-        // 3. TSO500 and EGLH HaemOnc
-        String tso500Flag = indexer.getTSO500(transcriptId.split("\\.")[0]);
-        if (StringUtils.isNotEmpty(tso500Flag)) {
-            transcript.getFlags().add(tso500Flag);
-        }
-        String eglhHaemOncFlag = indexer.getEGLHHaemOnc(transcriptId.split("\\.")[0]);
-        if (StringUtils.isNotEmpty(eglhHaemOncFlag)) {
-            transcript.getFlags().add(eglhHaemOncFlag);
-        }
 
         gene.getTranscripts().add(transcript);
 
@@ -615,7 +659,7 @@ private Transcript getTranscript(Gtf gtf, String chromosome, String transcriptId
     }
 
     private String getGeneId(Gtf gtf) throws CellBaseException {
-        // db_xref "GeneID:100287102";
+        // Splitting the db_xref, e.g.: "GeneID:100287102"
         String xrefString = gtf.getAttributes().get("db_xref");
         String[] xrefs = xrefString.split(",");
         for (String xref : xrefs) {
@@ -628,11 +672,11 @@ private String getGeneId(Gtf gtf) throws CellBaseException {
         throw new CellBaseException("Didn't find geneId for db_xref:" + xrefString);
     }
 
-    private String getSequenceName(String fullSequenceName) {
+    private String getSequenceName(String fullSequenceName) throws CellBaseException {
         String[] sequenceNameParts = fullSequenceName.split("\\.");
 
         if (sequenceNameParts.length != 2) {
-            throw new RuntimeException("bad chromosome: " + fullSequenceName);
+            throw new CellBaseException("Invalid sequence name: bad chromosome: " + fullSequenceName);
         }
 
         // just get the first part, e.g. NC_000024.11
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilderIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilderIndexer.java
index 45520161f5..6a4fe69fc9 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilderIndexer.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilderIndexer.java
@@ -16,275 +16,32 @@
 
 package org.opencb.cellbase.lib.builders;
 
-import org.apache.commons.lang.StringUtils;
-import org.apache.poi.ss.usermodel.*;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 import org.opencb.biodata.formats.io.FileFormatException;
-import org.opencb.biodata.models.core.MirnaTarget;
-import org.opencb.biodata.models.core.TargetGene;
-import org.opencb.biodata.models.variant.avro.GeneDrugInteraction;
-import org.opencb.biodata.models.variant.avro.GeneTraitAssociation;
-import org.opencb.commons.utils.FileUtils;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.rocksdb.RocksDBException;
 
-import java.io.BufferedReader;
-import java.io.FileInputStream;
 import java.io.IOException;
-import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.*;
 
-public class RefSeqGeneBuilderIndexer extends GeneBuilderIndexer{
+import static org.opencb.cellbase.lib.EtlCommons.REFSEQ_DATA;
+
+public class RefSeqGeneBuilderIndexer extends GeneBuilderIndexer {
 
     public RefSeqGeneBuilderIndexer(Path refSeqDirectoryPath) {
         super(refSeqDirectoryPath);
     }
 
     public void index(Path maneFile, Path lrgFile, Path proteinFastaFile, Path cDnaFastaFile, Path geneDrugFile, Path hpoFilePath,
-                      Path disgenetFile, Path miRTarBaseFile, Path cancerGeneGensus, Path cancerHotspot, Path tso500File,
-                      Path eglhHaemOncFile) throws IOException, RocksDBException, FileFormatException {
-        indexManeMapping(maneFile, "refseq");
-        indexLrgMapping(lrgFile, "refseq");
+                      Path miRTarBaseFile, Path cancerGeneGensus, Path cancerHotspot)
+            throws IOException, RocksDBException, FileFormatException, CellBaseException {
+        indexManeMapping(maneFile, REFSEQ_DATA);
+        indexLrgMapping(lrgFile, REFSEQ_DATA);
         indexProteinSequences(proteinFastaFile);
         indexCdnaSequences(cDnaFastaFile);
         indexDrugs(geneDrugFile);
-        indexDiseases(hpoFilePath, disgenetFile);
+        indexDiseases(hpoFilePath);
         indexMiRTarBase(miRTarBaseFile);
         indexCancerGeneCensus(cancerGeneGensus);
         indexCancerHotspot(cancerHotspot);
-        indexTSO500(tso500File);
-        indexEGLHHaemOnc(eglhHaemOncFile);
-    }
-
-    private void indexDrugs(Path geneDrugFile) throws IOException, RocksDBException {
-        if (geneDrugFile != null && Files.exists(geneDrugFile) && Files.size(geneDrugFile) > 0) {
-            logger.info("Loading gene-drug interaction data from '{}'", geneDrugFile);
-            BufferedReader br = FileUtils.newBufferedReader(geneDrugFile);
-
-            // Skip header
-            br.readLine();
-
-            int lineCounter = 1;
-            String line;
-            String currentGene = "";
-            List<GeneDrugInteraction> drugs = new ArrayList<>();
-            while ((line = br.readLine()) != null) {
-                String[] parts = line.split("\t");
-                String geneName = parts[0];
-                if (currentGene.equals("")) {
-                    currentGene = geneName;
-                } else if (!currentGene.equals(geneName)) {
-                    rocksDbManager.update(rocksdb, currentGene + DRUGS_SUFFIX, drugs);
-                    drugs = new ArrayList<>();
-                    currentGene = geneName;
-                }
-
-                String source = null;
-                if (parts.length >= 4) {
-                    source = parts[3];
-                }
-
-                String interactionType = null;
-                if (parts.length >= 5) {
-                    interactionType = parts[4];
-                }
-
-                String drugName = null;
-                if (parts.length >= 8) {
-                    // if drug name column is empty, use drug claim name instead
-                    drugName = StringUtils.isEmpty(parts[7]) ? parts[6] : parts[7];
-                }
-                if (StringUtils.isEmpty(drugName)) {
-                    // no drug name
-                    continue;
-                }
-
-                String chemblId = null;
-                if (parts.length >= 9) {
-                    chemblId = parts[8];
-                }
-
-                List<String> publications = new ArrayList<>();
-                if (parts.length >= 10 && parts[9] != null) {
-                    publications = Arrays.asList(parts[9].split(","));
-                }
-
-                GeneDrugInteraction drug = new GeneDrugInteraction(
-                        geneName, drugName, source, null, null, interactionType, chemblId, publications);
-                drugs.add(drug);
-                lineCounter++;
-            }
-            br.close();
-            // update last gene
-            rocksDbManager.update(rocksdb, currentGene + DRUGS_SUFFIX, drugs);
-        } else {
-            logger.warn("Gene drug file " + geneDrugFile + " not found");
-            logger.warn("Ignoring " + geneDrugFile);
-        }
-    }
-
-    public List<GeneDrugInteraction> getDrugs(String id) throws RocksDBException, IOException {
-        String key = id + DRUGS_SUFFIX;
-        return rocksDbManager.getDrugs(rocksdb, key);
-    }
-
-    private void indexDiseases(Path hpoFilePath, Path disgenetFilePath) throws IOException, RocksDBException {
-        Map<String, List<GeneTraitAssociation>> geneDiseaseAssociationMap = new HashMap<>(50000);
-
-        String line;
-        if (hpoFilePath != null && hpoFilePath.toFile().exists() && Files.size(hpoFilePath) > 0) {
-            BufferedReader bufferedReader = FileUtils.newBufferedReader(hpoFilePath);
-            // skip first header line
-            bufferedReader.readLine();
-            while ((line = bufferedReader.readLine()) != null) {
-                String[] fields = line.split("\t");
-                String omimId = fields[6];
-                String geneSymbol = fields[3];
-                String hpoId = fields[0];
-                String diseaseName = fields[1];
-                GeneTraitAssociation disease =
-                        new GeneTraitAssociation(omimId, diseaseName, hpoId, 0f, 0, new ArrayList<>(), new ArrayList<>(), "hpo");
-                addValueToMapElement(geneDiseaseAssociationMap, geneSymbol, disease);
-            }
-            bufferedReader.close();
-        }
-
-        if (disgenetFilePath != null && disgenetFilePath.toFile().exists() && Files.size(disgenetFilePath) > 0) {
-            BufferedReader bufferedReader = FileUtils.newBufferedReader(disgenetFilePath);
-            // skip first header line
-            bufferedReader.readLine();
-            while ((line = bufferedReader.readLine()) != null) {
-                String[] fields = line.split("\t");
-                String diseaseId = fields[4];
-                String diseaseName = fields[5];
-                String score = fields[9];
-                String numberOfPubmeds = fields[13].trim();
-                String numberOfSNPs = fields[14];
-                String source = fields[15];
-                GeneTraitAssociation disease = new GeneTraitAssociation(diseaseId, diseaseName, "", Float.parseFloat(score),
-                        Integer.parseInt(numberOfPubmeds), Arrays.asList(numberOfSNPs), Arrays.asList(source), "disgenet");
-                addValueToMapElement(geneDiseaseAssociationMap, fields[1], disease);
-            }
-            bufferedReader.close();
-        }
-
-        for (Map.Entry<String, List<GeneTraitAssociation>> entry : geneDiseaseAssociationMap.entrySet()) {
-            rocksDbManager.update(rocksdb, entry.getKey() + DISEASE_SUFFIX, entry.getValue());
-        }
-    }
-
-    public List<GeneTraitAssociation> getDiseases(String id) throws RocksDBException, IOException {
-        String key = id + DISEASE_SUFFIX;
-        return rocksDbManager.getDiseases(rocksdb, key);
-    }
-
-    private void indexMiRTarBase(Path miRTarBaseFile) throws IOException, RocksDBException {
-        if (miRTarBaseFile != null && Files.exists(miRTarBaseFile) && Files.size(miRTarBaseFile) > 0) {
-            logger.info("Loading mirna targets from '{}'", miRTarBaseFile);
-            FileInputStream file = new FileInputStream(miRTarBaseFile.toFile());
-            Workbook workbook = new XSSFWorkbook(file);
-            Sheet sheet = workbook.getSheetAt(0);
-            Iterator<Row> iterator = sheet.iterator();
-            String currentMiRTarBaseId = null;
-            String currentMiRNA = null;
-            String currentGene = null;
-            List<TargetGene> targetGenes = new ArrayList();
-            Map<String, List<MirnaTarget>> geneToMirna = new HashMap();
-            while (iterator.hasNext()) {
-
-                Row currentRow = iterator.next();
-                Iterator<Cell> cellIterator = currentRow.iterator();
-
-                Cell cell = cellIterator.next();
-                String miRTarBaseId = cell.getStringCellValue();
-
-                // skip header
-                if (miRTarBaseId.startsWith("miRTarBase")) {
-                    continue;
-                }
-
-                if (currentMiRTarBaseId == null) {
-                    currentMiRTarBaseId = miRTarBaseId;
-                }
-
-                cell = cellIterator.next();
-                String miRNA = cell.getStringCellValue();
-                if (currentMiRNA == null) {
-                    currentMiRNA = miRNA;
-                }
-
-                // species
-                cellIterator.next();
-
-                cell = cellIterator.next();
-                String geneName = cell.getStringCellValue();
-                if (currentGene == null) {
-                    currentGene = geneName;
-                }
-
-                // entrez
-                cellIterator.next();
-                // species
-                cellIterator.next();
-
-                if (!miRTarBaseId.equals(currentMiRTarBaseId) || !geneName.equals(currentGene)) {
-                    // new entry, store current one
-                    MirnaTarget miRnaTarget = new MirnaTarget(currentMiRTarBaseId, "miRTarBase", currentMiRNA,
-                            targetGenes);
-                    addValueToMapElement(geneToMirna, currentGene, miRnaTarget);
-                    targetGenes = new ArrayList();
-                    currentGene = geneName;
-                    currentMiRTarBaseId = miRTarBaseId;
-                    currentMiRNA = miRNA;
-                }
-
-                // experiment
-                cell = cellIterator.next();
-                String experiment = cell.getStringCellValue();
-
-                // support type
-                cell = cellIterator.next();
-                String supportType = cell.getStringCellValue();
-
-                // pubmeds
-                cell = cellIterator.next();
-                String pubmed = null;
-                // seems to vary, so check both
-                if (cell.getCellType().equals(CellType.NUMERIC)) {
-                    pubmed = String.valueOf(cell.getNumericCellValue());
-                } else {
-                    pubmed = cell.getStringCellValue();
-                }
-
-                targetGenes.add(new TargetGene(experiment, supportType, pubmed));
-            }
-
-            // parse last entry
-            MirnaTarget miRnaTarget = new MirnaTarget(currentMiRTarBaseId, "miRTarBase", currentMiRNA,
-                    targetGenes);
-            addValueToMapElement(geneToMirna, currentGene, miRnaTarget);
-
-            for (Map.Entry<String, List<MirnaTarget>> entry : geneToMirna.entrySet()) {
-                rocksDbManager.update(rocksdb, entry.getKey() + MIRTARBASE_SUFFIX, entry.getValue());
-            }
-        } else {
-            logger.error("mirtarbase file not found");
-        }
     }
-
-    public List<MirnaTarget> getMirnaTargets(String geneName) throws RocksDBException, IOException {
-        String key = geneName + MIRTARBASE_SUFFIX;
-        return rocksDbManager.getMirnaTargets(rocksdb, key);
-    }
-
-    private static <T> void addValueToMapElement(Map<String, List<T>> map, String key, T value) {
-        if (map.containsKey(key)) {
-            map.get(key).add(value);
-        } else {
-            List<T> valueList = new ArrayList<>();
-            valueList.add(value);
-            map.put(key, valueList);
-        }
-    }
-
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RegulatoryFeatureBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RegulatoryFeatureBuilder.java
index 03fc3a1cd6..280fc631bb 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RegulatoryFeatureBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RegulatoryFeatureBuilder.java
@@ -16,60 +16,160 @@
 
 package org.opencb.cellbase.lib.builders;
 
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.commons.lang3.StringUtils;
 import org.opencb.biodata.formats.feature.gff.Gff2;
 import org.opencb.biodata.formats.feature.gff.io.Gff2Reader;
 import org.opencb.biodata.formats.io.FileFormatException;
 import org.opencb.biodata.models.core.RegulatoryFeature;
+import org.opencb.biodata.models.core.RegulatoryPfm;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
+import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
-import org.opencb.cellbase.lib.EtlCommons;
 
+import java.io.File;
 import java.io.IOException;
+import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.*;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
-public class RegulatoryFeatureBuilder extends CellBaseBuilder  {
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
-    private final Path gffFile;
-    protected Set<Gff2> regulatoryFeatureSet;
+public class RegulatoryFeatureBuilder extends AbstractBuilder {
 
-    public RegulatoryFeatureBuilder(Path regulatoryDirectoryPath, CellBaseSerializer serializer) {
+    private Path regulationPath;
+    private Set<Gff2> regulatoryFeatureSet;
+
+    public static final String REGULATORY_REGION_BASENAME = "regulatory_region";
+    public static final String REGULATORY_REGION_OUTPUT_FILENAME = REGULATORY_REGION_BASENAME + ".json.gz";
+    public static final String REGULATORY_PFM_BASENAME = "regulatory_pfm";
+    public static final String REGULATORY_PFM_OUTPUT_FILENAME = REGULATORY_PFM_BASENAME + ".json.gz";
+
+    public RegulatoryFeatureBuilder(Path regulationPath, CellBaseSerializer serializer) {
         super(serializer);
-        gffFile = regulatoryDirectoryPath.resolve(EtlCommons.REGULATORY_FEATURES_FILE);
+        this.regulationPath = regulationPath;
     }
 
     @Override
     public void parse() throws Exception {
-        logger.info("Parsing regulatory features...");
-        if (Files.exists(gffFile)) {
-            parseGffFile(gffFile);
-        } else {
-            logger.warn("No regulatory features GFF file found {}", EtlCommons.REGULATORY_FEATURES_FILE);
-            logger.warn("Skipping regulatory features GFF file parsing. Regulatory feature data models will not be built.");
+        // Sanity check
+        checkDirectory(regulationPath, getDataName(REGULATION_DATA));
+
+        DataSource dataSource;
+        List<File> regulatoryFiles;
+        List<File> motifFeaturesFiles;
+
+        // Check build regulatory files
+        dataSource = dataSourceReader.readValue(regulationPath.resolve(REGULATORY_BUILD_DATA)
+                .resolve(getDataVersionFilename(REGULATORY_BUILD_DATA)).toFile());
+        regulatoryFiles = checkFiles(dataSource, regulationPath.resolve(REGULATORY_BUILD_DATA), getDataCategory(REGULATORY_BUILD_DATA) + "/"
+                + getDataName(REGULATORY_BUILD_DATA));
+        if (regulatoryFiles.size() != 1) {
+            throw new CellBaseException("One " + getDataName(REGULATORY_BUILD_DATA) + " file is expected, but currently there are "
+                    + regulatoryFiles.size() + " files");
         }
+
+        // Check motif features files
+        dataSource = dataSourceReader.readValue(regulationPath.resolve(MOTIF_FEATURES_DATA)
+                .resolve(getDataVersionFilename(MOTIF_FEATURES_DATA)).toFile());
+        motifFeaturesFiles = checkFiles(dataSource, regulationPath.resolve(MOTIF_FEATURES_DATA), getDataCategory(MOTIF_FEATURES_DATA) + "/"
+                + getDataName(MOTIF_FEATURES_DATA));
+        if (motifFeaturesFiles.size() != 2) {
+            throw new CellBaseException("Two " + getDataName(MOTIF_FEATURES_DATA) + " files are expected, but currently there are "
+                    + motifFeaturesFiles.size() + " files");
+        }
+
+        // Downloading and building pfm matrices
+        File motifFile = motifFeaturesFiles.get(0).getName().endsWith("tbi") ? motifFeaturesFiles.get(1) : motifFeaturesFiles.get(0);
+        loadPfmMatrices(motifFile.toPath(), serializer.getOutdir());
+
+        // Parse regulatory build features
+        parseGffFile(regulatoryFiles.get(0).toPath());
     }
 
     protected void parseGffFile(Path regulatoryFeatureFile) throws IOException, NoSuchMethodException, FileFormatException {
+        logger.info(PARSING_LOG_MESSAGE, regulatoryFeatureFile);
+
+        // Create and populate regulatory feature set
         regulatoryFeatureSet = new HashSet<>();
-        if (regulatoryFeatureFile != null && Files.exists(regulatoryFeatureFile) && !Files.isDirectory(regulatoryFeatureFile)
-                && Files.size(regulatoryFeatureFile) > 0) {
-            Gff2Reader regulatoryFeatureReader = new Gff2Reader(regulatoryFeatureFile);
+        try (Gff2Reader regulatoryFeatureReader = new Gff2Reader(regulatoryFeatureFile)) {
             Gff2 feature;
             while ((feature = regulatoryFeatureReader.read()) != null) {
                 regulatoryFeatureSet.add(feature);
             }
-            regulatoryFeatureReader.close();
         }
 
-        int i = 0;
         // Serialize and save results
         for (Gff2 feature : regulatoryFeatureSet) {
-            // ID=TF_binding_site:ENSR00000243312;
+            // In order to get the ID we split the attribute format: ID=TF_binding_site:ENSR00000243312; ....
             String id = feature.getAttribute().split(";")[0].split(":")[1];
             RegulatoryFeature regulatoryFeature = new RegulatoryFeature(id, feature.getSequenceName(), feature.getFeature(),
                     feature.getStart(), feature.getEnd());
             serializer.serialize(regulatoryFeature);
         }
         serializer.close();
+
+        logger.info(PARSING_DONE_LOG_MESSAGE);
+    }
+
+    private void loadPfmMatrices(Path motifGffFile, Path buildFolder) throws IOException, NoSuchMethodException, FileFormatException,
+            InterruptedException {
+        Path regulatoryPfmPath = buildFolder.resolve(REGULATORY_PFM_OUTPUT_FILENAME);
+        logger.info("Downloading and building PFM matrices in {} from {} ...", regulatoryPfmPath, motifGffFile);
+        if (Files.exists(regulatoryPfmPath)) {
+            logger.info("{} is already built", regulatoryPfmPath);
+            return;
+        }
+
+        Set<String> motifIds = new HashSet<>();
+        logger.info(PARSING_LOG_MESSAGE, motifGffFile);
+        try (Gff2Reader motifsFeatureReader = new Gff2Reader(motifGffFile)) {
+            Gff2 tfbsMotifFeature;
+            Pattern filePattern = Pattern.compile("ENSPFM(\\d+)");
+            while ((tfbsMotifFeature = motifsFeatureReader.read()) != null) {
+                String pfmId = getMatrixId(filePattern, tfbsMotifFeature);
+                if (StringUtils.isNotEmpty(pfmId)) {
+                    motifIds.add(pfmId);
+                }
+            }
+        }
+        logger.info(PARSING_DONE_LOG_MESSAGE);
+
+        ObjectMapper mapper = new ObjectMapper();
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, REGULATORY_PFM_BASENAME, true);
+        if (logger.isInfoEnabled()) {
+            logger.info("Looking up {} PFMs", motifIds.size());
+        }
+        for (String pfmId : motifIds) {
+            String urlString = "https://rest.ensembl.org/species/homo_sapiens/binding_matrix/" + pfmId
+                    + "?unit=frequencies;content-type=application/json";
+            URL url = new URL(urlString);
+            RegulatoryPfm regulatoryPfm = mapper.readValue(url, RegulatoryPfm.class);
+            serializer.serialize(regulatoryPfm);
+            // https://github.com/Ensembl/ensembl-rest/wiki/Rate-Limits
+            TimeUnit.MILLISECONDS.sleep(250);
+        }
+        serializer.close();
+
+        logger.info("Downloading and building PFM matrices at {} done.", regulatoryPfmPath);
+    }
+
+    private String getMatrixId(Pattern pattern, Gff2 tfbsMotifFeature) {
+        Matcher matcher = pattern.matcher(tfbsMotifFeature.getAttribute());
+        if (matcher.find()) {
+            return matcher.group(0);
+        }
+        return null;
+    }
+
+    public Set<Gff2> getRegulatoryFeatureSet() {
+        return regulatoryFeatureSet;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RegulatoryRegionBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RegulatoryRegionBuilder.java
deleted file mode 100644
index 3727ac4a69..0000000000
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RegulatoryRegionBuilder.java
+++ /dev/null
@@ -1,607 +0,0 @@
-/*
- * Copyright 2015-2020 OpenCB
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.opencb.cellbase.lib.builders;
-
-import org.opencb.biodata.models.core.RegulatoryFeature;
-import org.opencb.cellbase.core.serializer.CellBaseSerializer;
-import org.opencb.cellbase.lib.EtlCommons;
-import org.opencb.commons.utils.FileUtils;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.sql.*;
-import java.util.*;
-
-/**
- * User: fsalavert.
- * Date: 4/10/13
- * Time: 10:14 AM
- */
-@Deprecated
-public class RegulatoryRegionBuilder extends CellBaseBuilder {
-
-    private static final int CHUNK_SIZE = 2000;
-    private static final String REGULATORY_FEATURES = "regulatory_features";
-    @Deprecated
-    private static final String DEPRECATED_MOTIF_FEATURES = "deprecated_motif_features";
-    private static final String MOTIF_FEATURES = "motif_features";
-    private static final String FEATURE_TYPE = "feature_type";
-    private static final String ID = "id";
-    private static final String BINDING_MATRIX = "binding_matrix";
-    private static final String MOTIF_FEATURE_TYPE = "motif_feature_type";
-    private Path regulatoryRegionPath;
-
-    public RegulatoryRegionBuilder(Path regulatoryRegionFilesDir, CellBaseSerializer serializer) {
-        super(serializer);
-
-        this.regulatoryRegionPath = regulatoryRegionFilesDir;
-
-    }
-
-    public void createSQLiteRegulatoryFiles(Path regulatoryRegionPath)
-            throws SQLException, IOException, ClassNotFoundException, NoSuchMethodException {
-        List<String> gffColumnNames = Arrays.asList("seqname", "source", "feature", "start", "end", "score", "strand", "frame", "group");
-        List<String> gffColumnTypes = Arrays.asList("TEXT", "TEXT", "TEXT", "INT", "INT", "TEXT", "TEXT", "TEXT", "TEXT");
-
-        //        Path regulatoryRegionPath = regulationDir.toPath();
-
-        Path filePath;
-
-        filePath = regulatoryRegionPath.resolve(EtlCommons.REGULATORY_FEATURES_FILE);
-        createSQLiteRegulatoryFiles(filePath, REGULATORY_FEATURES, gffColumnNames, gffColumnTypes);
-
-        filePath = regulatoryRegionPath.resolve(EtlCommons.MOTIF_FEATURES_FILE);
-        createSQLiteRegulatoryFiles(filePath, MOTIF_FEATURES, gffColumnNames, gffColumnTypes);
-
-        // TODO: REMOVE
-        // >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DEPRECATED
-        filePath = regulatoryRegionPath.resolve("AnnotatedFeatures.gff.gz");
-        createSQLiteRegulatoryFiles(filePath, "annotated_features", gffColumnNames, gffColumnTypes);
-
-
-        filePath = regulatoryRegionPath.resolve("MotifFeatures.gff.gz");
-        createSQLiteRegulatoryFiles(filePath, DEPRECATED_MOTIF_FEATURES, gffColumnNames, gffColumnTypes);
-
-
-        filePath = regulatoryRegionPath.resolve("RegulatoryFeatures_MultiCell.gff.gz");
-        createSQLiteRegulatoryFiles(filePath, "regulatory_features_multicell", gffColumnNames, gffColumnTypes);
-        // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< DEPRECATED
-
-
-
-//  GFFColumnNames = Arrays.asList("seqname", "source", "feature", "start", "end", "score", "strand", "frame");
-//  GFFColumnTypes = Arrays.asList("TEXT", "TEXT", "TEXT", "INT", "INT", "TEXT", "TEXT", "TEXT");
-        filePath = regulatoryRegionPath.resolve("mirna_uniq.gff.gz");
-        if (Files.exists(filePath)) {
-            createSQLiteRegulatoryFiles(filePath, "mirna_uniq", gffColumnNames, gffColumnTypes);
-        }
-
-    }
-
-    @Override
-    public void parse() throws SQLException, IOException, ClassNotFoundException, NoSuchMethodException {
-        if (regulatoryRegionPath == null || !Files.exists(regulatoryRegionPath) || !Files.isDirectory(regulatoryRegionPath)) {
-            throw new IOException("Regulation directory whether does not exist, is not a directory or cannot be read");
-        }
-
-        // Create the SQLite databases
-        createSQLiteRegulatoryFiles(regulatoryRegionPath);
-
-        String chunkIdSuffix = CHUNK_SIZE / 1000 + "k";
-
-        Path regulatoryFilePath = regulatoryRegionPath.resolve(EtlCommons.REGULATORY_FEATURES_FILE + ".db");
-        Path motifFilePath = regulatoryRegionPath.resolve(EtlCommons.MOTIF_FEATURES_FILE + ".db");
-        Path annotatedFilePath = regulatoryRegionPath.resolve("AnnotatedFeatures.gff.gz.db");
-        Path deprecatedMotifFilePath = regulatoryRegionPath.resolve("MotifFeatures.gff.gz.db");
-        Path deprecatedRegulatoryFilePath = regulatoryRegionPath.resolve("RegulatoryFeatures_MultiCell.gff.gz.db");
-        Path mirnaFilePath = regulatoryRegionPath.resolve("mirna_uniq.gff.gz.db");
-
-        List<Path> filePaths = Arrays.asList(regulatoryFilePath, motifFilePath, annotatedFilePath,
-                deprecatedMotifFilePath, deprecatedRegulatoryFilePath);
-        List<String> tableNames = Arrays.asList(REGULATORY_FEATURES, MOTIF_FEATURES, "annotated_features",
-                DEPRECATED_MOTIF_FEATURES, "regulatory_features_multicell");
-
-        if (Files.exists(mirnaFilePath)) {
-            filePaths.add(mirnaFilePath);
-            tableNames.add("mirna_uniq");
-        }
-
-        // Fetching and joining all chromosomes found in the different databases
-        Set<String> setChr = new HashSet<>();
-        setChr.addAll(getChromosomesList(regulatoryFilePath, REGULATORY_FEATURES));
-        setChr.addAll(getChromosomesList(motifFilePath, MOTIF_FEATURES));
-        setChr.addAll(getChromosomesList(annotatedFilePath, "annotated_features"));
-        setChr.addAll(getChromosomesList(deprecatedMotifFilePath, DEPRECATED_MOTIF_FEATURES));
-        setChr.addAll(getChromosomesList(deprecatedRegulatoryFilePath, "regulatory_features_multicell"));
-        if (Files.exists(mirnaFilePath)) {
-            setChr.addAll(getChromosomesList(mirnaFilePath, "mirna_uniq"));
-        }
-
-        List<String> chromosomes = new ArrayList<>(setChr);
-        List<RegulatoryFeature> regulatoryFeatures;
-        HashSet<Integer> chunksHash;
-        for (String chromosome : chromosomes) {
-            for (int i = 0; i < tableNames.size(); i++) {
-                chunksHash = new HashSet<>();
-                regulatoryFeatures = queryChromosomesRegulatoryDB(filePaths.get(i), tableNames.get(i), chromosome);
-                for (RegulatoryFeature regulatoryFeature : regulatoryFeatures) {
-                    int firstChunkId = getChunkId(regulatoryFeature.getStart(), CHUNK_SIZE);
-                    int lastChunkId = getChunkId(regulatoryFeature.getEnd(), CHUNK_SIZE);
-
-                    List<String> chunkIds = new ArrayList<>();
-                    String chunkId;
-                    for (int j = firstChunkId; j <= lastChunkId; j++) {
-                        chunkId = chromosome + "_" + j + "_" + chunkIdSuffix;
-                        chunkIds.add(chunkId);
-                        //count chunks
-                        if (!chunksHash.contains(j)) {
-                            chunksHash.add(j);
-                        }
-                    }
-//                    regulatoryFeature.setChunkIds(chunkIds);
-
-                    // remove 'chr' prefix
-//                    if (genericFeature.getChromosome() != null) {
-//                        genericFeature.setSequenceName(genericFeature.getSequenceName().replace("chr", ""));
-//                    }
-                    serializer.serialize(regulatoryFeature);
-                }
-            }
-        }
-    }
-
-
-    public void createSQLiteRegulatoryFiles(Path filePath, String tableName, List<String> columnNames, List<String> columnTypes)
-            throws ClassNotFoundException, IOException, SQLException {
-        int limitRows = 100000;
-        int batchCount = 0;
-
-        if (!Files.exists(filePath) || Files.size(filePath) == 0) {
-            return;
-        }
-
-        Path dbPath = Paths.get(filePath.toString() + ".db");
-        if (Files.exists(dbPath) && Files.size(dbPath) > 0) {
-            return;
-        }
-
-        BufferedReader br = FileUtils.newBufferedReader(filePath);
-
-        Class.forName("org.sqlite.JDBC");
-        Connection conn = DriverManager.getConnection("jdbc:sqlite:" + dbPath.toString());
-        conn.setAutoCommit(false); //Set false to perform commits manually and increase performance on insertion
-
-        //Create table query
-        Statement createTables = conn.createStatement();
-
-        StringBuilder sbQuery = new StringBuilder();
-        sbQuery.append("CREATE TABLE if not exists " + tableName + "(");
-        for (int i = 0; i < columnNames.size(); i++) {    //columnNames and columnTypes must have the same size
-            sbQuery.append("'" + columnNames.get(i) + "' " + columnTypes.get(i) + ",");
-        }
-        sbQuery.deleteCharAt(sbQuery.length() - 1);
-        sbQuery.append(")");
-
-        System.out.println(sbQuery.toString());
-        createTables.executeUpdate(sbQuery.toString());
-
-        //Prepare insert query
-        sbQuery = new StringBuilder();
-        sbQuery.append("INSERT INTO " + tableName + "(");
-        for (int i = 0; i < columnNames.size(); i++) {
-            sbQuery.append("'" + columnNames.get(i) + "',");
-        }
-        sbQuery.deleteCharAt(sbQuery.length() - 1);
-        sbQuery.append(") values (");
-        sbQuery.append(repeat("?,", columnNames.size()));
-        sbQuery.deleteCharAt(sbQuery.length() - 1);
-        sbQuery.append(")");
-        System.out.println(sbQuery.toString());
-
-        PreparedStatement ps = conn.prepareStatement(sbQuery.toString());
-
-        //Read file
-        String line = null;
-        while ((line = br.readLine()) != null) {
-
-            insertByType(ps, getFields(line, tableName), columnTypes);
-            ps.addBatch();
-            batchCount++;
-
-            //commit batch
-            if (batchCount % limitRows == 0 && batchCount != 0) {
-                ps.executeBatch();
-                conn.commit();
-            }
-
-        }
-        br.close();
-
-        //Execute last Batch
-        ps.executeBatch();
-        conn.commit();
-
-        //Create index
-        System.out.println("creating indices...");
-        createTables.executeUpdate("CREATE INDEX " + tableName + "_seqname_idx on " + tableName + "(" + columnNames.get(0) + ")");
-        System.out.println("indices created.");
-
-        conn.commit();
-        conn.close();
-    }
-
-    public List<String> getChromosomesList(Path dbPath, String tableName) throws IOException {
-
-        try {
-            FileUtils.checkFile(dbPath);
-        } catch (IOException e) {
-            logger.warn(e.getMessage());
-            return Collections.emptyList();
-        }
-
-        List<String> chromosomes = new ArrayList<>();
-        try {
-            Class.forName("org.sqlite.JDBC");
-            Connection conn = DriverManager.getConnection("jdbc:sqlite:" + dbPath.toString());
-
-            Statement query = conn.createStatement();
-            ResultSet rs = query.executeQuery("select distinct(seqname) from " + tableName);
-//            ResultSet rs = query.executeQuery("select distinct(seqname) from " + tableName + " where seqname like 'chr%'");
-
-            while (rs.next()) {
-                chromosomes.add(rs.getString(1));
-            }
-            conn.close();
-
-        } catch (ClassNotFoundException | SQLException e) {
-            e.printStackTrace();
-        }
-        return chromosomes;
-    }
-
-    public List<RegulatoryFeature> queryChromosomesRegulatoryDB(Path dbPath, String tableName, String chromosome) {
-
-        try {
-            FileUtils.checkFile(dbPath);
-        } catch (IOException e) {
-            logger.warn(e.getMessage());
-            return Collections.emptyList();
-        }
-
-        Connection conn;
-        List<RegulatoryFeature> regulatoryFeatures = new ArrayList<>();
-        try {
-            Class.forName("org.sqlite.JDBC");
-            conn = DriverManager.getConnection("jdbc:sqlite:" + dbPath.toString());
-
-            Statement query = conn.createStatement();
-            ResultSet rs = query.executeQuery("select * from " + tableName + " where seqname='" + chromosome + "'");
-//            ResultSet rs = query.executeQuery("select * from " + tableName + " where seqname='chr" + chromosome + "'");
-            while (rs.next()) {
-                regulatoryFeatures.add(getDeprecatedRegulatoryFeature(rs, tableName));
-            }
-            conn.close();
-
-        } catch (ClassNotFoundException | SQLException e) {
-            e.printStackTrace();
-        }
-        return regulatoryFeatures;
-    }
-
-    public static List<RegulatoryFeature> queryRegulatoryDB(Path dbPath, String tableName, String chrFile, int start, int end) {
-        Connection conn = null;
-        List<RegulatoryFeature> regulatoryFeatures = new ArrayList<>();
-        try {
-            Class.forName("org.sqlite.JDBC");
-            conn = DriverManager.getConnection("jdbc:sqlite:" + dbPath.toString());
-
-            Statement query = conn.createStatement();
-            ResultSet rs = query.executeQuery("select * from " + tableName + " where start<=" + end + " AND end>=" + start);
-
-            while (rs.next()) {
-                regulatoryFeatures.add(getDeprecatedRegulatoryFeature(rs, tableName));
-            }
-            conn.close();
-
-        } catch (ClassNotFoundException | SQLException e) {
-            e.printStackTrace();
-        }
-        return regulatoryFeatures;
-    }
-
-    private static RegulatoryFeature getDeprecatedRegulatoryFeature(ResultSet rs, String tableName) throws SQLException {
-        RegulatoryFeature regulatoryFeature = null;
-        switch (tableName.toLowerCase()) {
-            case REGULATORY_FEATURES:
-                regulatoryFeature = getRegulatoryFeature(rs);
-                break;
-            case MOTIF_FEATURES:
-                regulatoryFeature = getMotifFeature(rs);
-                break;
-            case "annotated_features":
-                regulatoryFeature = getAnnotatedFeature(rs);
-                break;
-            case "regulatory_features_multicell":
-                regulatoryFeature = getDeprecatedRegulatoryFeature(rs);
-                break;
-            case DEPRECATED_MOTIF_FEATURES:
-                regulatoryFeature = getDeprecatedMotifFeature(rs);
-                break;
-            case "mirna_uniq":
-                regulatoryFeature = getMirnaFeature(rs);
-                break;
-            default:
-                break;
-        }
-        return regulatoryFeature;
-    }
-
-    private static RegulatoryFeature getMotifFeature(ResultSet rs) throws SQLException {
-        //   GFF     https://genome.ucsc.edu/FAQ/FAQformat.html#format3
-        RegulatoryFeature regulatoryFeature = new RegulatoryFeature();
-        Map<String, String> groupFields = getGroupFields(rs.getString(9));
-
-        regulatoryFeature.setChromosome(rs.getString(1));
-        regulatoryFeature.setSource(rs.getString(2));
-        regulatoryFeature.setFeatureType(rs.getString(3));
-        regulatoryFeature.setStart(rs.getInt(4));
-        regulatoryFeature.setEnd(rs.getInt(5));
-        regulatoryFeature.setScore(rs.getString(6));
-        regulatoryFeature.setStrand(rs.getString(7));
-
-        // Seems weird that the motif_feature_type property is used to fill the Name field. However, this is how the
-        // it was being done from the previous ENSEMBL files
-        regulatoryFeature.setName(groupFields.get(MOTIF_FEATURE_TYPE));
-
-        regulatoryFeature.setMatrix(groupFields.get(BINDING_MATRIX));
-
-        return regulatoryFeature;
-    }
-
-    private static RegulatoryFeature getRegulatoryFeature(ResultSet rs) throws SQLException {
-        //   GFF     https://genome.ucsc.edu/FAQ/FAQformat.html#format3
-        RegulatoryFeature regulatoryFeature = new RegulatoryFeature();
-        Map<String, String> groupFields = getGroupFields(rs.getString(9));
-
-        regulatoryFeature.setId(groupFields.get(ID));
-        regulatoryFeature.setChromosome(rs.getString(1));
-        regulatoryFeature.setSource(rs.getString(2));
-        regulatoryFeature.setFeatureType(groupFields.get(FEATURE_TYPE).replace(" ", "_"));
-        regulatoryFeature.setStart(rs.getInt(4));
-        regulatoryFeature.setEnd(rs.getInt(5));
-        regulatoryFeature.setScore(rs.getString(6));
-        regulatoryFeature.setStrand(rs.getString(7));
-
-        return regulatoryFeature;
-    }
-
-    private static RegulatoryFeature getAnnotatedFeature(ResultSet rs) throws SQLException {
-        //   GFF     https://genome.ucsc.edu/FAQ/FAQformat.html#format3
-        RegulatoryFeature regulatoryFeature = new RegulatoryFeature();
-        Map<String, String> groupFields = getGroupFields(rs.getString(9));
-
-        regulatoryFeature.setChromosome(rs.getString(1));
-        regulatoryFeature.setSource(rs.getString(2));
-        regulatoryFeature.setFeatureType(rs.getString(3));
-        regulatoryFeature.setStart(rs.getInt(4));
-        regulatoryFeature.setEnd(rs.getInt(5));
-        regulatoryFeature.setScore(rs.getString(6));
-        regulatoryFeature.setStrand(rs.getString(7));
-        regulatoryFeature.setFrame(rs.getString(8));
-
-        regulatoryFeature.setName(groupFields.get("name"));
-        regulatoryFeature.setAlias(groupFields.get("alias"));
-        regulatoryFeature.setFeatureClass(groupFields.get("class"));
-        regulatoryFeature.getCellTypes().add(groupFields.get("cell_type"));
-
-        return regulatoryFeature;
-    }
-
-    @Deprecated
-    private static RegulatoryFeature getDeprecatedRegulatoryFeature(ResultSet rs) throws SQLException {
-        //   GFF     https://genome.ucsc.edu/FAQ/FAQformat.html#format3
-        RegulatoryFeature regulatoryFeature = new RegulatoryFeature();
-        Map<String, String> groupFields = getGroupFields(rs.getString(9));
-
-        regulatoryFeature.setChromosome(rs.getString(1));
-        regulatoryFeature.setSource(rs.getString(2));
-        regulatoryFeature.setFeatureType(rs.getString(3));
-        regulatoryFeature.setStart(rs.getInt(4));
-        regulatoryFeature.setEnd(rs.getInt(5));
-        regulatoryFeature.setScore(rs.getString(6));
-        regulatoryFeature.setStrand(rs.getString(7));
-        regulatoryFeature.setFrame(rs.getString(8));
-        regulatoryFeature.setFrame(rs.getString(9));
-
-        return regulatoryFeature;
-    }
-
-    @Deprecated
-    private static RegulatoryFeature getDeprecatedMotifFeature(ResultSet rs) throws SQLException {
-        //   GFF     https://genome.ucsc.edu/FAQ/FAQformat.html#format3
-        RegulatoryFeature regulatoryFeature = new RegulatoryFeature();
-        Map<String, String> groupFields = getGroupFields(rs.getString(9));
-
-        regulatoryFeature.setChromosome(rs.getString(1));
-        regulatoryFeature.setSource(rs.getString(2));
-        regulatoryFeature.setFeatureType(rs.getString(3) + "_motif");
-        regulatoryFeature.setStart(rs.getInt(4));
-        regulatoryFeature.setEnd(rs.getInt(5));
-        regulatoryFeature.setScore(rs.getString(6));
-        regulatoryFeature.setStrand(rs.getString(7));
-        regulatoryFeature.setFrame(rs.getString(8));
-
-        String[] split = groupFields.get("name").split(":");
-        regulatoryFeature.setName(split[0]);
-        regulatoryFeature.setMatrix(split[1]);
-
-        return regulatoryFeature;
-    }
-
-    private static RegulatoryFeature getMirnaFeature(ResultSet rs) throws SQLException {
-        //   GFF     https://genome.ucsc.edu/FAQ/FAQformat.html#format3
-        RegulatoryFeature regulatoryFeature = new RegulatoryFeature();
-        Map<String, String> groupFields = getGroupFields(rs.getString(9));
-
-        regulatoryFeature.setChromosome(rs.getString(1));
-        regulatoryFeature.setSource(rs.getString(2));
-        regulatoryFeature.setFeatureType(rs.getString(3));
-        regulatoryFeature.setStart(rs.getInt(4));
-        regulatoryFeature.setEnd(rs.getInt(5));
-        regulatoryFeature.setScore(rs.getString(6));
-        regulatoryFeature.setStrand(rs.getString(7));
-        regulatoryFeature.setFrame(rs.getString(8));
-
-        regulatoryFeature.setFeatureClass("microRNA");
-        regulatoryFeature.setName(groupFields.get("name"));
-
-        return regulatoryFeature;
-    }
-
-    private static Map<String, String> getGroupFields(String group) {
-        //process group column
-        Map<String, String> groupFields = new HashMap<>();
-        String[] attributeFields = group.split(";");
-        String[] attributeKeyValue;
-        for (String attributeField : attributeFields) {
-            attributeKeyValue = attributeField.trim().split("=");
-            groupFields.put(attributeKeyValue[0].toLowerCase(), attributeKeyValue[1]);
-        }
-        return groupFields;
-    }
-
-
-    public static List<String> getFields(String line, String tableName) {
-        List<String> fields = new ArrayList<>();
-        switch (tableName.toLowerCase()) {
-            case REGULATORY_FEATURES:
-                fields = getRegulatoryFeaturesFields(line);
-                break;
-            case MOTIF_FEATURES:
-                fields = getMotifFeaturesFields(line);
-                break;
-            case "annotated_features":
-                fields = getAnnotatedFeaturesFields(line);
-                break;
-            case "regulatory_features_multicell":
-                fields = getRegulatoryFeaturesFields(line);
-                break;
-            case DEPRECATED_MOTIF_FEATURES:
-                fields = getMotifFeaturesFields(line);
-                break;
-            case "mirna_uniq":
-                fields = getMirnaFeaturesFields(line);
-                break;
-            default:
-                break;
-        }
-        return fields;
-    }
-
-    @Deprecated
-    public static List<String> getAnnotatedFeaturesFields(String line) {
-        String[] fields = line.split("\t");
-        fields[0] = fields[0].replace("chr", "");
-        return Arrays.asList(fields);
-    }
-
-    public static List<String> getRegulatoryFeaturesFields(String line) {
-        String[] fields = line.split("\t");
-        fields[0] = fields[0].replace("chr", "");
-        return Arrays.asList(fields);
-    }
-
-    public static List<String> getMotifFeaturesFields(String line) {
-        String[] fields = line.split("\t");
-        fields[0] = fields[0].replace("chr", "");
-        return Arrays.asList(fields);
-    }
-
-    public static List<String> getMirnaFeaturesFields(String line) {
-        String[] fields = line.split("\t");
-        fields[0] = fields[0].replace("chr", "");
-        return Arrays.asList(fields);
-    }
-
-    public static void insertByType(PreparedStatement ps, List<String> fields, List<String> types) throws SQLException {
-        //Datatypes In SQLite Version 3 -> http://www.sqlite.org/datatype3.html
-        String raw;
-        String type;
-        if (types.size() == fields.size()) {
-            for (int i = 0; i < fields.size(); i++) { //columnNames and columnTypes must have same size
-                int sqliteIndex = i + 1;
-                raw = fields.get(i);
-                type = types.get(i);
-
-                switch (type) {
-                    case "INTEGER":
-                    case "INT":
-                        ps.setInt(sqliteIndex, Integer.parseInt(raw));
-                        break;
-                    case "REAL":
-                        ps.setFloat(sqliteIndex, Float.parseFloat(raw));
-                        break;
-                    case "TEXT":
-                        ps.setString(sqliteIndex, raw);
-                        break;
-                    default:
-                        ps.setString(sqliteIndex, raw);
-                        break;
-                }
-            }
-        }
-
-    }
-
-    public String repeat(String s, int n) {
-        if (s == null) {
-            return null;
-        }
-        final StringBuilder sb = new StringBuilder();
-        for (int i = 0; i < n; i++) {
-            sb.append(s);
-        }
-        return sb.toString();
-    }
-
-    private int getChunkId(int position, int chunksize) {
-        if (chunksize <= 0) {
-            return position / CHUNK_SIZE;
-        } else {
-            return position / chunksize;
-        }
-    }
-
-    private int getChunkStart(int id, int chunksize) {
-        if (chunksize <= 0) {
-            return (id == 0) ? 1 : id * CHUNK_SIZE;
-        } else {
-            return (id == 0) ? 1 : id * chunksize;
-        }
-    }
-
-    private int getChunkEnd(int id, int chunksize) {
-        if (chunksize <= 0) {
-            return (id * CHUNK_SIZE) + CHUNK_SIZE - 1;
-        } else {
-            return (id * chunksize) + chunksize - 1;
-        }
-    }
-}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RepeatsBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RepeatsBuilder.java
index d37765e0b6..5add326db7 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RepeatsBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RepeatsBuilder.java
@@ -18,8 +18,10 @@
 
 import org.opencb.biodata.models.core.Region;
 import org.opencb.biodata.models.variant.avro.Repeat;
-import org.opencb.cellbase.lib.EtlCommons;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
+import org.opencb.cellbase.lib.EtlCommons;
 import org.opencb.commons.ProgressLogger;
 import org.opencb.commons.utils.FileUtils;
 
@@ -27,62 +29,100 @@
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
 /**
  * Created by fjlopez on 05/05/17.
  */
-public class RepeatsBuilder extends CellBaseBuilder {
-    private static final String TRF = "trf";
-    private static final String GSD = "genomicSuperDup";
-    private static final String WM = "windowMasker";
+public class RepeatsBuilder extends AbstractBuilder {
+
+    private CellBaseConfiguration configuration;
+
+    private List<String> dataList;
     private final Path filesDir;
 
-    public RepeatsBuilder(Path filesDir, CellBaseFileSerializer serializer) {
+    public static final String REPEATS_OUTPUT_BASENAME = "repeats";
+    public static final String REPEATS_OUTPUT_FILENAME = REPEATS_OUTPUT_BASENAME + ".json.gz";
+
+    public RepeatsBuilder(List<String> dataList, Path filesDir, CellBaseFileSerializer serializer, CellBaseConfiguration configuration) {
         super(serializer);
+        this.dataList = dataList;
         this.filesDir = filesDir;
+        this.configuration = configuration;
     }
 
 
     @Override
     public void parse() throws Exception {
+        // Sanity check
+        checkDirectory(filesDir, getDataName(REPEATS_DATA));
+
+        // Check Simple Repeats (TRF) filename
+        String trfFilename = null;
+        if (dataList.contains(TRF_DATA)) {
+            trfFilename = Paths.get(configuration.getDownload().getSimpleRepeats().getFiles().get(SIMPLE_REPEATS_FILE_ID)).getFileName()
+                    .toString();
+            if (!Files.exists(filesDir.resolve(trfFilename))) {
+                throw new CellBaseException(getMessageMissingFile(TRF_DATA, trfFilename, filesDir));
+            }
+        }
 
-        logger.info("Parsing repeats...");
-        if (Files.exists(filesDir.resolve(EtlCommons.TRF_FILE))) {
-            parseTrfFile(filesDir.resolve(EtlCommons.TRF_FILE));
-        } else {
-            logger.warn("No TRF file found {}", EtlCommons.TRF_FILE);
-            logger.warn("Skipping TRF file parsing. TRF data models will not be built.");
+        // Check Genomic Super Duplications (GSD) file
+        String gsdFilename = null;
+        if (dataList.contains(GSD_DATA)) {
+            gsdFilename = Paths.get(configuration.getDownload().getGenomicSuperDups().getFiles().get(GENOMIC_SUPER_DUPS_FILE_ID))
+                    .getFileName().toString();
+            if (!Files.exists(filesDir.resolve(gsdFilename))) {
+                throw new CellBaseException(getMessageMissingFile(GSD_DATA, gsdFilename, filesDir));
+            }
         }
 
-        if (Files.exists(filesDir.resolve(EtlCommons.GSD_FILE))) {
-            parseGsdFile(filesDir.resolve(EtlCommons.GSD_FILE));
-        } else {
-            logger.warn("No Genomic Super Duplications file found {}", EtlCommons.GSD_FILE);
-            logger.warn("Skipping Genomic Super Duplications file parsing. "
-                    + "Genomic Super Duplications data models will not be built.");
+        // Check Window Masker (WM) file
+        String wmFilename = null;
+        if (dataList.contains(WM_DATA)) {
+            wmFilename = Paths.get(configuration.getDownload().getWindowMasker().getFiles().get(WINDOW_MASKER_FILE_ID)).getFileName()
+                    .toString();
+            if (!Files.exists(filesDir.resolve(wmFilename))) {
+                throw new CellBaseException(getMessageMissingFile(WM_DATA, wmFilename, filesDir));
+            }
         }
 
-        if (Files.exists(filesDir.resolve(EtlCommons.WM_FILE))) {
-            parseWmFile(filesDir.resolve(EtlCommons.WM_FILE));
-        } else {
-            logger.warn("No WindowMasker file found {}", EtlCommons.WM_FILE);
-            logger.warn("Skipping WindowMasker file parsing. WindowMasker data models will not be built.");
+        // Parse TRF file
+        if (dataList.contains(TRF_DATA)) {
+            logger.info(PARSING_LOG_MESSAGE, getDataName(TRF_DATA));
+            parseTrfFile(filesDir.resolve(trfFilename));
+        }
+
+        // Parse GSD file
+        if (dataList.contains(GSD_DATA)) {
+            logger.info(PARSING_LOG_MESSAGE, getDataName(GSD_DATA));
+            parseGsdFile(filesDir.resolve(gsdFilename));
+        }
+
+        // Parse WM file
+        if (dataList.contains(WM_DATA)) {
+            logger.info(PARSING_LOG_MESSAGE, getDataName(WM_DATA));
+            parseWmFile(filesDir.resolve(wmFilename));
         }
-        logger.info("Done.");
     }
 
-    private void parseTrfFile(Path filePath) throws IOException {
+    private void parseTrfFile(Path filePath) throws IOException, CellBaseException {
+        logger.info(PARSING_LOG_MESSAGE, filePath);
         try (BufferedReader bufferedReader = FileUtils.newBufferedReader(filePath)) {
             String line = bufferedReader.readLine();
 
-            ProgressLogger progressLogger = new ProgressLogger("Parsed TRF lines:",
-                    () -> EtlCommons.countFileLines(filePath), 200).setBatchSize(10000);
+            ProgressLogger progressLogger = new ProgressLogger(getMessageParsedLines(TRF_DATA), () -> EtlCommons.countFileLines(filePath),
+                    200).setBatchSize(10000);
             while (line != null) {
                 serializer.serialize(parseTrfLine(line));
                 line = bufferedReader.readLine();
                 progressLogger.increment(1);
             }
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE);
     }
 
     private Repeat parseTrfLine(String line) {
@@ -90,21 +130,23 @@ private Repeat parseTrfLine(String line) {
 
         return new Repeat(null, Region.normalizeChromosome(parts[1]), Integer.valueOf(parts[2]) + 1,
                 Integer.valueOf(parts[3]), Integer.valueOf(parts[5]), Integer.valueOf(parts[7]),
-                Float.valueOf(parts[6]), Float.valueOf(parts[8]) / 100, Float.valueOf(parts[10]), parts[16], TRF);
+                Float.valueOf(parts[6]), Float.valueOf(parts[8]) / 100, Float.valueOf(parts[10]), parts[16], TRF_DATA);
     }
 
-    private void parseGsdFile(Path filePath) throws IOException {
+    private void parseGsdFile(Path filePath) throws IOException, CellBaseException {
+        logger.info(PARSING_LOG_MESSAGE, filePath);
         try (BufferedReader bufferedReader = FileUtils.newBufferedReader(filePath)) {
             String line = bufferedReader.readLine();
 
-            ProgressLogger progressLogger = new ProgressLogger("Parsed GSD lines:",
-                    () -> EtlCommons.countFileLines(filePath), 200).setBatchSize(10000);
+            ProgressLogger progressLogger = new ProgressLogger(getMessageParsedLines(GSD_DATA), () -> EtlCommons.countFileLines(filePath),
+                    200).setBatchSize(10000);
             while (line != null) {
                 serializer.serialize(parseGSDLine(line));
                 line = bufferedReader.readLine();
                 progressLogger.increment(1);
             }
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE);
     }
 
     private Repeat parseGSDLine(String line) {
@@ -112,28 +154,39 @@ private Repeat parseGSDLine(String line) {
 
         return new Repeat(parts[11], Region.normalizeChromosome(parts[1]), Integer.valueOf(parts[2]) + 1,
                 Integer.valueOf(parts[3]), null, null, 2f, Float.valueOf(parts[26]), null,
-                null, GSD);
+                null, GSD_DATA);
 
     }
 
-    private void parseWmFile(Path filePath) throws IOException {
+    private void parseWmFile(Path filePath) throws IOException, CellBaseException {
+        logger.info(PARSING_LOG_MESSAGE, filePath);
         try (BufferedReader bufferedReader = FileUtils.newBufferedReader(filePath)) {
             String line = bufferedReader.readLine();
 
-            ProgressLogger progressLogger = new ProgressLogger("Parsed WM lines:",
-                    () -> EtlCommons.countFileLines(filePath), 200).setBatchSize(10000);
+            ProgressLogger progressLogger = new ProgressLogger(getMessageParsedLines(WM_DATA), () -> EtlCommons.countFileLines(filePath),
+                    200).setBatchSize(10000);
             while (line != null) {
                 serializer.serialize(parseWmLine(line));
                 line = bufferedReader.readLine();
                 progressLogger.increment(1);
             }
         }
+        logger.info(PARSING_DONE_LOG_MESSAGE);
     }
 
     private Repeat parseWmLine(String line) {
         String[] parts = line.split("\t");
 
         return new Repeat(parts[4].replace("\t", ""), Region.normalizeChromosome(parts[1]),
-                Integer.valueOf(parts[2]) + 1, Integer.valueOf(parts[3]), null, null, null, null, null, null, WM);
+                Integer.valueOf(parts[2]) + 1, Integer.valueOf(parts[3]), null, null, null, null, null, null, WM_DATA);
+    }
+
+    private String getMessageMissingFile(String data, String filename, Path folder) throws CellBaseException {
+        return getDataName(data) + " file " + filename + " does not exist at " + folder;
+    }
+
+    private String getMessageParsedLines(String data) throws CellBaseException {
+        return "Parsed " + getDataName(data) + " lines:";
     }
 }
+
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RevelScoreBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RevelScoreBuilder.java
index 2ccf0cb2a1..4f0dac0a81 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RevelScoreBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RevelScoreBuilder.java
@@ -19,8 +19,8 @@
 
 import org.opencb.biodata.models.core.MissenseVariantFunctionalScore;
 import org.opencb.biodata.models.core.TranscriptMissenseVariantFunctionalScore;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
-import org.slf4j.LoggerFactory;
 
 import java.io.*;
 import java.nio.file.Path;
@@ -30,75 +30,95 @@
 import java.util.zip.ZipFile;
 import java.util.zip.ZipInputStream;
 
-public class RevelScoreBuilder extends CellBaseBuilder {
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
-    private Path revelFilePath = null;
-    private static final String SOURCE = "revel";
+public class RevelScoreBuilder extends AbstractBuilder {
 
-    public RevelScoreBuilder(Path revelDirectoryPath, CellBaseSerializer serializer) {
-        super(serializer);
-        this.revelFilePath = revelDirectoryPath.resolve("revel-v1.3_all_chromosomes.zip");
-        logger = LoggerFactory.getLogger(ConservationBuilder.class);
+    private Path revelDownloadPath = null;
 
+    public RevelScoreBuilder(Path revelDownloadPath, CellBaseSerializer serializer) {
+        super(serializer);
+        this.revelDownloadPath = revelDownloadPath;
     }
 
     @Override
-    public void parse() throws IOException {
-        logger.error("processing Revel file at " + revelFilePath.toAbsolutePath());
-        ZipInputStream zis = new ZipInputStream(new FileInputStream(String.valueOf(revelFilePath)));
+    public void parse() throws IOException, CellBaseException {
+        String dataName = getDataName(REVEL_DATA);
+        String dataCategory = getDataCategory(REVEL_DATA);
+
+        logger.info(CATEGORY_BUILDING_LOG_MESSAGE, dataCategory, dataName);
+
+        // Sanity check
+        checkDirectory(revelDownloadPath, dataName);
+
+        // Check ontology files
+        List<File> revelFiles = checkFiles(dataSourceReader.readValue(revelDownloadPath.resolve(getDataVersionFilename(REVEL_DATA))
+                        .toFile()), revelDownloadPath, dataName);
+        if (revelFiles.size() != 1) {
+            throw new CellBaseException("One " + dataName + " file is expected, but currently there are " + revelFiles.size() + " files");
+        }
+
+        logger.info(PARSING_LOG_MESSAGE, revelFiles.get(0));
+
+        ZipInputStream zis = new ZipInputStream(new FileInputStream(String.valueOf(revelFiles.get(0))));
         ZipEntry zipEntry = zis.getNextEntry();
 
-        ZipFile zipFile = new ZipFile(String.valueOf(revelFilePath));
+        ZipFile zipFile = new ZipFile(revelFiles.get(0).toString());
         InputStream inputStream = zipFile.getInputStream(zipEntry);
-        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
-
-        // skip header
-        String line = bufferedReader.readLine();
-        String[] fields = null;
-        String lastEntry = null;
-        String currentEntry = null;
-        List<TranscriptMissenseVariantFunctionalScore> scores = new ArrayList<>();
-        MissenseVariantFunctionalScore predictions = null;
-        while ((line = bufferedReader.readLine()) != null) {
-            fields = line.split(",");
-            String chromosome = fields[0];
-            if (".".equalsIgnoreCase(fields[2])) {
-                // 1,12855835,.,C,A,A,D,0.175
-                // skip if invalid position
-                continue;
-            }
-            int position = Integer.parseInt(fields[2]);
-            String reference = fields[3];
-            String alternate = fields[4];
-            String aaReference = fields[5];
-            String aaAlternate = fields[6];
-            double score = Double.parseDouble(fields[7]);
-
-            currentEntry = chromosome + position;
-
-            // new chromosome + position, store previous entry
-            if (lastEntry != null && !currentEntry.equals(lastEntry)) {
-                serializer.serialize(predictions);
-                scores = new ArrayList<>();
-                predictions = null;
+        try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream))) {
+            // Skip header
+            bufferedReader.readLine();
+            String[] fields;
+            String lastEntry = null;
+            String currentEntry;
+            List<TranscriptMissenseVariantFunctionalScore> scores = new ArrayList<>();
+            MissenseVariantFunctionalScore predictions = null;
+            String line;
+            while ((line = bufferedReader.readLine()) != null) {
+                fields = line.split(",");
+                String chromosome = fields[0];
+                if (".".equalsIgnoreCase(fields[2])) {
+                    // 1,12855835,.,C,A,A,D,0.175
+                    // skip if invalid position
+                    continue;
+                }
+                int position = Integer.parseInt(fields[2]);
+                String reference = fields[3];
+                String alternate = fields[4];
+                String aaReference = fields[5];
+                String aaAlternate = fields[6];
+                double score = Double.parseDouble(fields[7]);
+
+                currentEntry = chromosome + position;
+
+                // new chromosome + position, store previous entry
+                if (lastEntry != null && !currentEntry.equals(lastEntry)) {
+                    serializer.serialize(predictions);
+                    scores = new ArrayList<>();
+                    predictions = null;
+                }
+
+                if (predictions == null) {
+                    predictions = new MissenseVariantFunctionalScore(chromosome, position, reference, REVEL_DATA, scores);
+                }
+
+                TranscriptMissenseVariantFunctionalScore predictedScore = new TranscriptMissenseVariantFunctionalScore("", alternate,
+                        aaReference, aaAlternate, score);
+                scores.add(predictedScore);
+                lastEntry = chromosome + position;
             }
 
-            if (predictions == null) {
-                predictions = new MissenseVariantFunctionalScore(chromosome, position, reference, SOURCE, scores);
-            }
-
-            TranscriptMissenseVariantFunctionalScore predictedScore = new TranscriptMissenseVariantFunctionalScore("",
-                    alternate, aaReference, aaAlternate, score);
-            scores.add(predictedScore);
-            lastEntry = chromosome + position;
+            // Serialise last entry
+            serializer.serialize(predictions);
         }
 
-        // serialise last entry
-        serializer.serialize(predictions);
+        logger.info(PARSING_DONE_LOG_MESSAGE, revelFiles.get(0));
 
+        // Close
         zis.close();
         zipFile.close();
         inputStream.close();
-        bufferedReader.close();
+
+        logger.info(CATEGORY_BUILDING_DONE_LOG_MESSAGE, dataCategory, dataName);
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RocksDbManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RocksDbManager.java
index cf8351cc54..3a178b4828 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RocksDbManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/RocksDbManager.java
@@ -60,8 +60,11 @@ public RocksDB getDBConnection(String dbLocation) {
         Options options = new Options().setCreateIfMissing(true);
         RocksDB db = null;
         try {
+            if (!Files.exists(Paths.get(dbLocation))) {
+                Files.createDirectories(Paths.get(dbLocation));
+            }
             return RocksDB.open(options, dbLocation);
-        } catch (RocksDBException e) {
+        } catch (RocksDBException | IOException e) {
             // do some error handling
             e.printStackTrace();
             System.exit(1);
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/SpliceBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/SpliceBuilder.java
index ddff52328b..bbd82344e7 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/SpliceBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/SpliceBuilder.java
@@ -24,7 +24,6 @@
 import org.opencb.biodata.models.variant.Variant;
 import org.opencb.biodata.tools.variant.VariantNormalizer;
 import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
-import org.opencb.cellbase.lib.EtlCommons;
 import org.opencb.commons.utils.FileUtils;
 import org.rocksdb.RocksDB;
 import org.rocksdb.RocksDBException;
@@ -35,9 +34,13 @@
 import java.io.File;
 import java.io.IOException;
 import java.nio.file.Path;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.HashMap;
 
-public class SpliceBuilder extends CellBaseBuilder {
+import static org.opencb.cellbase.lib.EtlCommons.MMSPLICE_DATA;
+import static org.opencb.cellbase.lib.EtlCommons.SPLICEAI_DATA;
+
+public class SpliceBuilder extends AbstractBuilder {
 
     private Path spliceDir;
     private CellBaseFileSerializer fileSerializer;
@@ -58,14 +61,14 @@ public void parse() throws Exception {
 
         logger.info("Parsing splice files...");
 
-        Path splicePath = spliceDir.resolve(EtlCommons.MMSPLICE_SUBDIRECTORY);
+        Path splicePath = spliceDir.resolve(MMSPLICE_DATA);
         if (splicePath.toFile().exists()) {
             logger.info("Parsing MMSplice data...");
             mmspliceParser(splicePath);
         } else {
             logger.debug("MMSplice data not found: " + splicePath);
         }
-        splicePath = spliceDir.resolve(EtlCommons.SPLICEAI_SUBDIRECTORY);
+        splicePath = spliceDir.resolve(SPLICEAI_DATA);
         if (splicePath.toFile().exists()) {
             logger.info("Parsing SpliceAI data...");
             spliceaiParser(splicePath);
@@ -85,7 +88,7 @@ public void parse() throws Exception {
      */
     private void mmspliceParser(Path mmsplicePath) throws IOException {
         // Check output folder: MMSplice
-        Path mmspliceOutFolder = fileSerializer.getOutdir().resolve(EtlCommons.MMSPLICE_SUBDIRECTORY);
+        Path mmspliceOutFolder = fileSerializer.getOutdir().resolve(MMSPLICE_DATA);
         if (!mmspliceOutFolder.toFile().exists()) {
             mmspliceOutFolder.toFile().mkdirs();
         }
@@ -177,7 +180,7 @@ private void mmspliceParser(Path mmsplicePath) throws IOException {
         }
 
         // Dump rocksDB to JSON file
-        dumpRocksDB(EtlCommons.MMSPLICE_SUBDIRECTORY + "/splice_score_mmsplice_chr", rocksDB);
+        dumpRocksDB(MMSPLICE_DATA + "/splice_score_mmsplice_chr", rocksDB);
 
         // Clean up
         rocksDB.close();
@@ -195,7 +198,7 @@ private void mmspliceParser(Path mmsplicePath) throws IOException {
      */
     private void spliceaiParser(Path spliceaiPath) throws IOException {
         // Check output folder: MMSplice
-        Path spliceaiOutFolder = fileSerializer.getOutdir().resolve(EtlCommons.SPLICEAI_SUBDIRECTORY);
+        Path spliceaiOutFolder = fileSerializer.getOutdir().resolve(SPLICEAI_DATA);
         if (!spliceaiOutFolder.toFile().exists()) {
             spliceaiOutFolder.toFile().mkdirs();
         }
@@ -292,7 +295,7 @@ private void spliceaiParser(Path spliceaiPath) throws IOException {
         }
 
         // Dump rocksDB to JSON file
-        dumpRocksDB(EtlCommons.SPLICEAI_SUBDIRECTORY + "/splice_score_spliceai_chr", rocksDB);
+        dumpRocksDB(SPLICEAI_DATA + "/splice_score_spliceai_chr", rocksDB);
 
         // Clean up
         rocksDB.close();
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/VariationBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/VariationBuilder.java
index 087a4aed36..33ffe2e337 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/VariationBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/VariationBuilder.java
@@ -16,33 +16,172 @@
 
 package org.opencb.cellbase.lib.builders;
 
+import org.apache.commons.collections4.MapUtils;
+import org.opencb.biodata.formats.variant.io.VariantReader;
+import org.opencb.biodata.models.variant.Variant;
+import org.opencb.biodata.models.variant.VariantFileMetadata;
+import org.opencb.biodata.models.variant.avro.AdditionalAttribute;
+import org.opencb.biodata.models.variant.avro.VariantAnnotation;
+import org.opencb.biodata.models.variant.avro.Xref;
+import org.opencb.biodata.models.variant.metadata.VariantStudyMetadata;
+import org.opencb.biodata.tools.variant.VariantNormalizer;
+import org.opencb.biodata.tools.variant.VariantVcfHtsjdkReader;
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.config.DownloadProperties;
 import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
-import org.slf4j.LoggerFactory;
+import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
 
+import java.io.IOException;
+import java.nio.file.DirectoryStream;
+import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.*;
+
+import static org.opencb.cellbase.lib.EtlCommons.DBSNP_DATA;
+import static org.opencb.cellbase.lib.EtlCommons.HOMO_SAPIENS;
 
 /**
- * Created by imedina on 06/11/15.
+ * Created by jtarraga on 01/08/24.
  */
-public class VariationBuilder extends CellBaseBuilder {
+public class VariationBuilder extends AbstractBuilder {
+
+    private Path downloadPath;
+    private String species;
 
     private DbSnpBuilder dbSnpBuilder;
 
-    public VariationBuilder(Path downloadVariationPath, CellBaseFileSerializer fileSerializer, CellBaseConfiguration configuration) {
+    public static final String VARIATION_CHR_PREFIX = "variation_chr";
+    public static final String VCF_ID_KEY = "VCF_ID";
+    public static final String EVA_PREFIX = "EVA_";
+    public static final String RS_PREFIX = "rs";
+
+    private static final String VARIANTS_PARSED_LOG_MESSAGE = "{} variants parsed";
+
+    public static final Map<String, String> SV_VALUES_MAP;
+
+    static {
+        Map<String, String> tempMap = new HashMap<>();
+        tempMap.put("<complex_structural_alteration>", "<CS>");
+        tempMap.put("<copy_number_loss>", "<CNL>");
+        tempMap.put("<copy_number_gain>", "<CNG>");
+        tempMap.put("<copy_number_variation>", "<CNV>");
+        tempMap.put("<deletion>", "<DEL>");
+        tempMap.put("<duplication>", "<DUP>");
+        tempMap.put("<insertion>", "<INS>");
+        tempMap.put("<inversion>", "<INV>");
+        tempMap.put("<mobile_element_insertion>", "<INS:ME>");
+        tempMap.put("<tandem_duplication>", "<DUP:TANDEM>");
+        SV_VALUES_MAP = Collections.unmodifiableMap(tempMap);
+    }
+
+    public VariationBuilder(Path downloadPath, String species, CellBaseFileSerializer fileSerializer, CellBaseConfiguration configuration) {
         super(fileSerializer);
 
+        this.downloadPath = downloadPath;
+        this.species = species;
+
         // dbSNP
         DownloadProperties.URLProperties dbSnpUrlProperties = configuration.getDownload().getDbSNP();
-        dbSnpBuilder = new DbSnpBuilder(downloadVariationPath, dbSnpUrlProperties, fileSerializer);
-
-        logger = LoggerFactory.getLogger(VariationBuilder.class);
+        dbSnpBuilder = new DbSnpBuilder(downloadPath.resolve(DBSNP_DATA), dbSnpUrlProperties, fileSerializer);
     }
 
     @Override
     public void parse() throws Exception {
-        // Parsing dbSNP data
-        dbSnpBuilder.parse();
+        if (species.equalsIgnoreCase(HOMO_SAPIENS)) {
+            // Parsing dbSNP data
+            dbSnpBuilder.parse();
+        } else {
+            // Parsing VCF files
+            parseVcf();
+        }
+    }
+
+    private void parseVcf() throws IOException {
+        VariantNormalizer.VariantNormalizerConfig normalizerConfig = new VariantNormalizer.VariantNormalizerConfig()
+                .setReuseVariants(true)
+                .setNormalizeAlleles(true)
+                .setDecomposeMNVs(false);
+
+        CellBaseJsonFileSerializer fileSerializer = (CellBaseJsonFileSerializer) this.serializer;
+
+        // Usually we expect two VCF files prefixed by the species scientific name
+        // e.g., for 'Mus musculus' the VCF files are 'mus_musculus.vcf.gz' and 'mus_musculus_structural_variations.vcf.gz'
+        String prefix = species.toLowerCase(Locale.ROOT).replace(" ", "_");
+
+        try (DirectoryStream<Path> vcfPaths = Files.newDirectoryStream(downloadPath,
+                entry -> entry.getFileName().toString().startsWith(prefix))) {
+            for (Path vcfPath : vcfPaths) {
+                logger.info(PARSING_LOG_MESSAGE, vcfPath);
+
+                VariantStudyMetadata variantStudyMetadata = new VariantFileMetadata(vcfPath.getFileName().toString(),
+                        vcfPath.toAbsolutePath().toString()).toVariantStudyMetadata("");
+                VariantReader variantVcfReader = new VariantVcfHtsjdkReader(vcfPath, variantStudyMetadata,
+                        new VariantNormalizer(normalizerConfig));
+
+                // Write variant to the JSON files according to the chromosome
+                int count = 0;
+                Iterator<Variant> iterator = variantVcfReader.iterator();
+                while (iterator.hasNext()) {
+                    Variant variant = iterator.next();
+                    // Convert alternate for structural variants
+                    if (SV_VALUES_MAP.containsKey(variant.getAlternate())) {
+                        variant.setAlternate(SV_VALUES_MAP.get(variant.getAlternate()));
+                    }
+                    // Set variant ID (after converting the alternate)
+                    variant.setId(variant.toString());
+                    // Set variant annotation: chrom, start, end, ref, alt, xrefs and additional attributes
+                    VariantAnnotation variantAnnotation = new VariantAnnotation();
+                    variantAnnotation.setChromosome(variant.getChromosome());
+                    variantAnnotation.setStart(variant.getStart());
+                    variantAnnotation.setEnd(variant.getEnd());
+                    variantAnnotation.setReference(variant.getReference());
+                    variantAnnotation.setAlternate(variant.getAlternate());
+                    try {
+                        Xref xref = null;
+                        Map<String, String> attributes = new HashMap<>();
+                        Map<String, String> data = variant.getStudies().get(0).getFiles().get(0).getData();
+                        for (Map.Entry<String, String> entry : data.entrySet()) {
+                            if (entry.getKey().startsWith(EVA_PREFIX)) {
+                                if (xref == null && data.containsKey(VCF_ID_KEY) && data.get(VCF_ID_KEY).startsWith(RS_PREFIX)) {
+                                    xref = new Xref(data.get(VCF_ID_KEY), entry.getKey());
+                                }
+                            } else if (!entry.getKey().equals(VCF_ID_KEY)) {
+                                attributes.put(entry.getKey(), entry.getValue());
+                            }
+                        }
+                        if (xref != null) {
+                            variantAnnotation.setXrefs(Collections.singletonList(xref));
+                        }
+                        if (MapUtils.isNotEmpty(attributes)) {
+                            AdditionalAttribute additionalAttribute = new AdditionalAttribute(attributes);
+                            Map<String, AdditionalAttribute> additionalAttributeMap = new HashMap<>();
+                            additionalAttributeMap.put(vcfPath.getFileName().toString(), additionalAttribute);
+                            variantAnnotation.setAdditionalAttributes(additionalAttributeMap);
+                        }
+                    } catch (Exception e) {
+                        logger.warn("Error setting annotation for variant {}: {}", variant.getId(), Arrays.toString(e.getStackTrace()));
+                    }
+                    if (variantAnnotation != null) {
+                        variant.setAnnotation(variantAnnotation);
+                    }
+                    variant.setAnnotation(variantAnnotation);
+
+                    // Remove study info
+                    variant.setStudies(null);
+
+                    // Serialize
+                    fileSerializer.serialize(variant, VARIATION_CHR_PREFIX + variant.getChromosome());
+                    if (++count % 1000000 == 0) {
+                        logger.info(VARIANTS_PARSED_LOG_MESSAGE, count);
+                    }
+                }
+                variantVcfReader.close();
+
+                logger.info(VARIANTS_PARSED_LOG_MESSAGE, count);
+                logger.info(PARSING_DONE_LOG_MESSAGE);
+            }
+        }
+
+        fileSerializer.close();
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinVarIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinVarIndexer.java
index a31bd8d5e6..951ea5c530 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinVarIndexer.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinVarIndexer.java
@@ -41,11 +41,6 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
-import static org.opencb.cellbase.lib.EtlCommons.CLINVAR_DATE;
-import static org.opencb.cellbase.lib.EtlCommons.CLINVAR_VERSION;
-
-//import org.opencb.biodata.formats.variant.clinvar.v24jaxb.*;
-
 /**
  * Created by fjlopez on 28/09/16.
  */
@@ -78,11 +73,15 @@ public class ClinVarIndexer extends ClinicalIndexer {
     private static final String DIPLOTYPE = "Diplotype";
     private static final String VARIANT = "Variant";
     private static final char CLINICAL_SIGNIFICANCE_SEPARATOR = '/';
+
     private final Path clinvarXMLFiles;
     private final Path clinvarSummaryFile;
     private final Path clinvarVariationAlleleFile;
     private final Path clinvarEFOFile;
+
+    private final String version;
     private final String assembly;
+
     private int numberSomaticRecords = 0;
     private int numberGermlineRecords = 0;
     private int numberNoDiseaseTrait = 0;
@@ -94,15 +93,15 @@ public class ClinVarIndexer extends ClinicalIndexer {
     private static final Set<ModeOfInheritance> RECESSIVE_TERM_SET
             = new HashSet<>(Arrays.asList(ModeOfInheritance.biallelic));
 
-    public ClinVarIndexer(Path clinvarXMLFiles, Path clinvarSummaryFile, Path clinvarVariationAlleleFile,
-                          Path clinvarEFOFile, boolean normalize, Path genomeSequenceFilePath, String assembly,
-                          RocksDB rdb) throws IOException {
+    public ClinVarIndexer(Path clinvarXMLFiles, Path clinvarSummaryFile, Path clinvarVariationAlleleFile, Path clinvarEFOFile,
+                          String version, boolean normalize, Path genomeSequenceFilePath, String assembly, RocksDB rdb) throws IOException {
         super(genomeSequenceFilePath);
         this.rdb = rdb;
         this.clinvarXMLFiles = clinvarXMLFiles;
         this.clinvarSummaryFile = clinvarSummaryFile;
         this.clinvarVariationAlleleFile = clinvarVariationAlleleFile;
         this.clinvarEFOFile = clinvarEFOFile;
+        this.version = version;
         this.normalize = normalize;
         this.genomeSequenceFilePath = genomeSequenceFilePath;
         this.assembly = assembly;
@@ -310,7 +309,7 @@ private void addNewEntries(VariantAnnotation variantAnnotation, String variation
                                String mateVariantString, String clinicalHaplotypeString,
                                Map<String, EFO> traitsToEfoTermsMap) {
 
-        EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.CLINVAR_DATA, CLINVAR_VERSION, CLINVAR_DATE);
+        EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.CLINVAR_DATA, version, null);
         // Create a set to avoid situations like germline;germline;germline
         List<AlleleOrigin> alleleOrigin = null;
         if (!EtlCommons.isMissing(lineFields[VARIANT_SUMMARY_ORIGIN_COLUMN])) {
@@ -391,7 +390,7 @@ private void addNewEntries(VariantAnnotation variantAnnotation, PublicSetType pu
         throws JsonProcessingException {
 
         List<Property> additionalProperties = new ArrayList<>(3);
-        EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.CLINVAR_DATA, CLINVAR_VERSION, CLINVAR_DATE);
+        EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.CLINVAR_DATA, version, null);
 //        String accession = publicSet.getReferenceClinVarAssertion().getClinVarAccession().getAcc();
 
         VariantClassification variantClassification = getVariantClassification(
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinVarParser.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinVarParser.java
index e44ce53e90..4a95b65757 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinVarParser.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinVarParser.java
@@ -20,7 +20,7 @@
 import org.opencb.biodata.formats.variant.clinvar.rcv.v64jaxb.*;
 import org.opencb.cellbase.core.common.clinical.ClinvarPublicSet;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
-import org.opencb.cellbase.lib.builders.CellBaseBuilder;
+import org.opencb.cellbase.lib.builders.AbstractBuilder;
 
 import javax.xml.bind.JAXBElement;
 import javax.xml.bind.JAXBException;
@@ -45,7 +45,7 @@
  * Created by imedina on 26/09/14.
  */
 @Deprecated
-public class ClinVarParser extends CellBaseBuilder {
+public class ClinVarParser extends AbstractBuilder {
 
     private static final String ASSEMBLY_PREFIX = "GRCh";
     public static final String GRCH37_ASSEMBLY = "37";
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalIndexer.java
index bbe33017fd..3f6e87b89c 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalIndexer.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalIndexer.java
@@ -83,7 +83,7 @@ public ClinicalIndexer(Path genomeSequenceFilePath) throws IOException {
                 .setDecomposeMNVs(true);
 
         if (genomeSequenceFilePath != null) {
-            logger.info("Enabling left aligning by using sequence at {}", genomeSequenceFilePath.toString());
+            logger.info("Enabling left aligning by using sequence at {}", genomeSequenceFilePath);
             variantNormalizerConfig.enableLeftAlign(genomeSequenceFilePath.toString());
         } else {
             logger.info("Left alignment is NOT enabled.");
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalVariantBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalVariantBuilder.java
index f574133ad7..9b3457dc78 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalVariantBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalVariantBuilder.java
@@ -19,165 +19,159 @@
 import com.fasterxml.jackson.databind.ObjectMapper;
 import org.opencb.biodata.models.variant.Variant;
 import org.opencb.biodata.models.variant.avro.VariantAnnotation;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
-import org.opencb.cellbase.lib.EtlCommons;
-import org.opencb.cellbase.lib.builders.CellBaseBuilder;
+import org.opencb.cellbase.lib.builders.AbstractBuilder;
+import org.opencb.commons.utils.FileUtils;
 import org.rocksdb.Options;
 import org.rocksdb.RocksDB;
 import org.rocksdb.RocksDBException;
 import org.rocksdb.RocksIterator;
 
-import java.io.File;
-import java.io.IOException;
+import java.io.*;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 /**
  * Created by fjlopez on 26/09/16.
  */
-public class ClinicalVariantBuilder extends CellBaseBuilder {
-
-    private final Path clinvarXMLFile;
-    private final Path clinvarSummaryFile;
-    private final Path clinvarVariationAlleleFile;
-    private final Path clinvarEFOFile;
-    private final Path cosmicFile;
-    private final Path gwasFile;
-    private final Path dbsnpFile;
+public class ClinicalVariantBuilder extends AbstractBuilder {
+
+    private final Path clinicalVariantPath;
     private final String assembly;
-    private final Path iarctp53GermlineFile;
-    private final Path iarctp53SomaticFile;
-    private final Path iarctp53GermlineReferencesFile;
-    private final Path iarctp53SomaticReferencesFile;
     private final Path genomeSequenceFilePath;
-    private final Path docmFile;
-    private final Path hgmdFile;
-    private boolean normalize = true;
+    private boolean normalize;
 
-    public ClinicalVariantBuilder(Path clinicalVariantFolder, boolean normalize, Path genomeSequenceFilePath,
-                                  String assembly, CellBaseSerializer serializer) {
-        this(clinicalVariantFolder.resolve(EtlCommons.CLINVAR_XML_FILE),
-                clinicalVariantFolder.resolve(EtlCommons.CLINVAR_SUMMARY_FILE),
-                clinicalVariantFolder.resolve(EtlCommons.CLINVAR_VARIATION_ALLELE_FILE),
-                clinicalVariantFolder.resolve(EtlCommons.CLINVAR_EFO_FILE),
-                clinicalVariantFolder.resolve(EtlCommons.COSMIC_FILE),
-                clinicalVariantFolder.resolve(EtlCommons.GWAS_FILE),
-                clinicalVariantFolder.resolve(EtlCommons.DBSNP_FILE),
-                clinicalVariantFolder.resolve("datasets/" + EtlCommons.IARCTP53_GERMLINE_FILE),
-                clinicalVariantFolder.resolve("datasets/" + EtlCommons.IARCTP53_GERMLINE_REFERENCES_FILE),
-                clinicalVariantFolder.resolve("datasets/" + EtlCommons.IARCTP53_SOMATIC_FILE),
-                clinicalVariantFolder.resolve("datasets/" + EtlCommons.IARCTP53_SOMATIC_REFERENCES_FILE),
-                clinicalVariantFolder.resolve(EtlCommons.DOCM_FILE),
-                clinicalVariantFolder.resolve(EtlCommons.HGMD_FILE),
-                normalize,
-                genomeSequenceFilePath, assembly, serializer);
-    }
+    private Path clinvarFullReleaseFilePath;
+    private Path clinvarSummaryFilePath;
+    private Path clinvarVariationAlleleFilePath;
+    private Path clinvarEFOFilePath;
+    private Path cosmicFilePath;
+    private Path hgmdFilePath;
+    private Path gwasFilePath;
+    private Path gwasDbSnpFilePath;
 
-    public ClinicalVariantBuilder(Path clinvarXMLFile, Path clinvarSummaryFile, Path clinvarVariationAlleleFile,
-                                  Path clinvarEFOFile, Path cosmicFile, Path gwasFile, Path dbsnpFile,
-                                  Path iarctp53GermlineFile, Path iarctp53GermlineReferencesFile,
-                                  Path iarctp53SomaticFile, Path iarctp53SomaticReferencesFile, Path docmFile, Path hgmdFile,
-                                  boolean normalize, Path genomeSequenceFilePath, String assembly,
-                                  CellBaseSerializer serializer) {
+    private final CellBaseConfiguration configuration;
+
+    public ClinicalVariantBuilder(Path clinicalVariantFolder, boolean normalize, Path genomeSequenceFilePath,
+                                  String assembly, CellBaseConfiguration configuration, CellBaseSerializer serializer) {
         super(serializer);
-        this.clinvarXMLFile = clinvarXMLFile;
-        this.clinvarSummaryFile = clinvarSummaryFile;
-        this.clinvarVariationAlleleFile = clinvarVariationAlleleFile;
-        this.clinvarEFOFile = clinvarEFOFile;
-        this.cosmicFile = cosmicFile;
-        this.gwasFile = gwasFile;
-        this.dbsnpFile = dbsnpFile;
-        this.iarctp53GermlineFile = iarctp53GermlineFile;
-        this.iarctp53GermlineReferencesFile = iarctp53GermlineReferencesFile;
-        this.iarctp53SomaticFile = iarctp53SomaticFile;
-        this.iarctp53SomaticReferencesFile = iarctp53SomaticReferencesFile;
-        this.docmFile = docmFile;
-        this.hgmdFile = hgmdFile;
+        this.clinicalVariantPath = clinicalVariantFolder;
         this.normalize = normalize;
         this.genomeSequenceFilePath = genomeSequenceFilePath;
         this.assembly = assembly;
+        this.configuration = configuration;
+    }
+
+    public void check() throws CellBaseException, IOException {
+        if (checked) {
+            return;
+        }
+
+        logger.info(CHECKING_BEFORE_BUILDING_LOG_MESSAGE, getDataName(CLINICAL_VARIANT_DATA));
+
+        // Sanity check
+        checkDirectory(clinicalVariantPath, getDataName(CLINICAL_VARIANT_DATA));
+        if (!Files.exists(serializer.getOutdir())) {
+            try {
+                Files.createDirectories(serializer.getOutdir());
+            } catch (IOException e) {
+                throw new CellBaseException("Error creating folder " + serializer.getOutdir(), e);
+            }
+        }
+
+        // Check genome file
+        logger.info("Checking genome FASTA file ...");
+        if (!Files.exists(genomeSequenceFilePath)) {
+            throw new CellBaseException("Genome file path does not exist " + genomeSequenceFilePath);
+        }
+        logger.info(OK_MSG);
+        logger.info("Checking index for genome FASTA file ...");
+        getIndexFastaReferenceGenome(genomeSequenceFilePath);
+        logger.info(OK_MSG);
+
+        // Check ClinVar files
+        clinvarFullReleaseFilePath = checkFile(CLINVAR_DATA, configuration.getDownload().getClinvar(), CLINVAR_FULL_RELEASE_FILE_ID,
+                clinicalVariantPath).toPath();
+        clinvarSummaryFilePath = checkFile(CLINVAR_DATA, configuration.getDownload().getClinvar(), CLINVAR_SUMMARY_FILE_ID,
+                clinicalVariantPath).toPath();
+        clinvarVariationAlleleFilePath = checkFile(CLINVAR_DATA, configuration.getDownload().getClinvar(), CLINVAR_ALLELE_FILE_ID,
+                clinicalVariantPath).toPath();
+        clinvarEFOFilePath = checkFile(CLINVAR_DATA, configuration.getDownload().getClinvar(), CLINVAR_EFO_TERMS_FILE_ID,
+                clinicalVariantPath).toPath();
+
+        // Check COSMIC file
+        cosmicFilePath = checkFiles(COSMIC_DATA, clinicalVariantPath, 1).get(0).toPath();
+
+        // Check HGMD file
+        hgmdFilePath = checkFiles(HGMD_DATA, clinicalVariantPath, 1).get(0).toPath();
+
+        // Check GWAS files
+        gwasFilePath = checkFiles(GWAS_DATA, clinicalVariantPath, 1).get(0).toPath();
+        String dbSnpFilename = Paths.get(configuration.getDownload().getGwasCatalog().getFiles().get(GWAS_DBSNP_FILE_ID)).getFileName()
+                .toString();
+        gwasDbSnpFilePath = clinicalVariantPath.resolve(dbSnpFilename);
+        if (!Files.exists(gwasDbSnpFilePath)) {
+            throw new CellBaseException("Could not build clinical variants: the dbSNP file " + dbSnpFilename + " is missing at "
+                    + clinicalVariantPath);
+        }
+        if (!Files.exists(clinicalVariantPath.resolve(dbSnpFilename + TBI_EXTENSION))) {
+            throw new CellBaseException("Could not build clinical variants: the dbSNP tabix file " + dbSnpFilename + TBI_EXTENSION
+                    + " is missing at " + clinicalVariantPath);
+        }
+
+        logger.info(CHECKING_DONE_BEFORE_BUILDING_LOG_MESSAGE, getDataName(CLINICAL_VARIANT_DATA));
+        checked = true;
     }
 
-    public void parse() throws IOException, RocksDBException {
+    public void parse() throws IOException, RocksDBException, CellBaseException {
+        check();
+
+        // Prepare ClinVar chunk files before building (if necessary)
+        Path chunksPath = serializer.getOutdir().resolve(CLINVAR_CHUNKS_SUBDIRECTORY);
+        if (Files.notExists(chunksPath)) {
+            Files.createDirectories(chunksPath);
+            logger.info("Splitting CliVar file {} in {} ...", clinvarFullReleaseFilePath, chunksPath);
+            splitClinvar(clinvarFullReleaseFilePath, chunksPath);
+            logger.info(OK_MSG);
+        }
 
         RocksDB rdb = null;
         Options dbOption = null;
         String dbLocation = null;
 
         try {
-            Object[] dbConnection = getDBConnection(clinvarXMLFile.getParent().toString() + "/integration.idx", true);
+            Object[] dbConnection = getDBConnection(clinicalVariantPath.toString() + "/integration.idx", true);
             rdb = (RocksDB) dbConnection[0];
             dbOption = (Options) dbConnection[1];
             dbLocation = (String) dbConnection[2];
 
             // COSMIC
-            // IMPORTANT: COSMIC must be indexed first (before ClinVar, IARC TP53, DOCM, HGMD,...)!!!
-            if (this.cosmicFile != null && Files.exists(this.cosmicFile)) {
-                CosmicIndexer cosmicIndexer = new CosmicIndexer(cosmicFile, normalize, genomeSequenceFilePath, assembly, rdb);
-                cosmicIndexer.index();
-            } else {
-                logger.warn("Cosmic file {} missing. Skipping Cosmic data", cosmicFile);
-            }
+            // IMPORTANT: COSMIC must be indexed first (before ClinVar, HGMD,...)!!!
+            CosmicIndexer cosmicIndexer = new CosmicIndexer(cosmicFilePath, configuration.getDownload().getCosmic().getVersion(),
+                    normalize, genomeSequenceFilePath, assembly, rdb);
+            cosmicIndexer.index();
 
             // ClinVar
-            if (this.clinvarXMLFile != null && this.clinvarSummaryFile != null
-                    && this.clinvarVariationAlleleFile != null && Files.exists(clinvarXMLFile)
-                    && Files.exists(clinvarSummaryFile) && Files.exists(clinvarVariationAlleleFile)) {
-              ClinVarIndexer clinvarIndexer = new ClinVarIndexer(clinvarXMLFile.getParent().resolve("clinvar_chunks"), clinvarSummaryFile,
-                        clinvarVariationAlleleFile, clinvarEFOFile, normalize, genomeSequenceFilePath, assembly, rdb);
-                clinvarIndexer.index();
-            } else {
-                logger.warn("One or more of required ClinVar files are missing. Skipping ClinVar data.\n"
-                        + "Please, ensure that these two files exist:\n"
-                        + "{}\n"
-                        + "{}", this.clinvarXMLFile.toString(), this.clinvarSummaryFile.toString());
-            }
-
-            // IARC TP53
-            if (this.iarctp53GermlineFile != null && this.iarctp53SomaticFile != null
-                    && Files.exists(iarctp53GermlineFile) && Files.exists(iarctp53SomaticFile)) {
-                IARCTP53Indexer iarctp53Indexer = new IARCTP53Indexer(iarctp53GermlineFile,
-                        iarctp53GermlineReferencesFile, iarctp53SomaticFile, iarctp53SomaticReferencesFile,
-                        normalize, genomeSequenceFilePath, assembly, rdb);
-                iarctp53Indexer.index();
-            } else {
-                logger.warn("One or more of required IARCTP53 files are missing. Skipping IARCTP53 data.");
-            }
-
-            // DOCM
-            if (this.docmFile != null && Files.exists(docmFile)) {
-                DOCMIndexer docmIndexer = new DOCMIndexer(docmFile, normalize, genomeSequenceFilePath, assembly, rdb);
-                docmIndexer.index();
-            } else {
-                logger.warn("The DOCM file {} is missing. Skipping DOCM data.", docmFile);
-            }
+            ClinVarIndexer clinvarIndexer = new ClinVarIndexer(serializer.getOutdir().resolve(CLINVAR_CHUNKS_SUBDIRECTORY),
+                    clinvarSummaryFilePath, clinvarVariationAlleleFilePath, clinvarEFOFilePath, configuration.getDownload().getClinvar()
+                    .getVersion(), normalize, genomeSequenceFilePath, assembly, rdb);
+            clinvarIndexer.index();
 
             // HGMD
-            if (this.hgmdFile != null && Files.exists(hgmdFile)) {
-                HGMDIndexer hgmdIndexer = new HGMDIndexer(hgmdFile, normalize, genomeSequenceFilePath, assembly, rdb);
-                hgmdIndexer.index();
-            } else {
-                logger.warn("The HGMD file {} is missing. Skipping HGMD data.", hgmdFile);
-            }
+            HGMDIndexer hgmdIndexer = new HGMDIndexer(hgmdFilePath, configuration.getDownload().getHgmd().getVersion(), normalize,
+                    genomeSequenceFilePath, assembly, rdb);
+            hgmdIndexer.index();
 
             // GWAS catalog
-            if (gwasFile != null && Files.exists(gwasFile)) {
-                if (dbsnpFile != null && Files.exists(dbsnpFile)) {
-                    Path tabixFile = Paths.get(dbsnpFile.toAbsolutePath() + ".tbi");
-                    if (tabixFile != null && Files.exists(tabixFile)) {
-                        GwasIndexer gwasIndexer = new GwasIndexer(gwasFile, dbsnpFile, genomeSequenceFilePath, assembly, rdb);
-                        gwasIndexer.index();
-                    } else {
-                        logger.warn("The dbSNP tabix file {} is missing. Skipping GWAS catalog data.", tabixFile);
-                    }
-                } else {
-                    logger.warn("The dbSNP file {} is missing. Skipping GWAS catalog data.", dbsnpFile);
-                }
-            } else {
-                logger.warn("The GWAS catalog file {} is missing. Skipping GWAS catalog data.", gwasFile);
-            }
+            GwasIndexer gwasIndexer = new GwasIndexer(gwasFilePath, gwasDbSnpFilePath, genomeSequenceFilePath, assembly, rdb);
+            gwasIndexer.index();
 
+            // Serialize
             serializeRDB(rdb);
             closeIndex(rdb, dbOption, dbLocation);
             serializer.close();
@@ -186,7 +180,6 @@ public void parse() throws IOException, RocksDBException {
             serializer.close();
             throw e;
         }
-
     }
 
     private void serializeRDB(RocksDB rdb) throws IOException {
@@ -223,7 +216,7 @@ private Variant parseVariantFromVariantId(String variantId) {
                 return new Variant(parts[0].trim(), Integer.parseInt(parts[1].trim()), parts[2], parts[3]);
             }
         } catch (Exception e) {
-            logger.warn(e.getMessage() + ". Impossible to create the variant object from the variant ID: " + variantId);
+            logger.warn("{}. Impossible to create the variant object from the variant ID: {}", e.getMessage(), variantId);
             return null;
         }
     }
@@ -275,4 +268,53 @@ private Object[] getDBConnection(String dbLocation, boolean forceCreate) {
 
     }
 
+    private void splitClinvar(Path clinvarXmlFilePath, Path splitOutdirPath) throws IOException {
+        PrintWriter pw = null;
+        try (BufferedReader br = FileUtils.newBufferedReader(clinvarXmlFilePath)) {
+            StringBuilder header = new StringBuilder();
+            boolean beforeEntry = true;
+            boolean inEntry = false;
+            int count = 0;
+            int chunk = 0;
+            String line;
+            while ((line = br.readLine()) != null) {
+                if (line.trim().startsWith("<ClinVarSet ")) {
+                    inEntry = true;
+                    beforeEntry = false;
+                    if (count % 10000 == 0) {
+                        pw = new PrintWriter(new FileOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile()));
+                        pw.println(header.toString().trim());
+                    }
+                    count++;
+                }
+
+                if (beforeEntry) {
+                    header.append(line).append("\n");
+                }
+
+                if (inEntry) {
+                    pw.println(line);
+                }
+
+                if (line.trim().startsWith("</ClinVarSet>")) {
+                    inEntry = false;
+                    if (count % 10000 == 0) {
+                        if (pw != null) {
+                            pw.print("</ReleaseSet>");
+                            pw.close();
+                        }
+                        chunk++;
+                    }
+                }
+            }
+            if (pw != null) {
+                pw.print("</ReleaseSet>");
+                pw.close();
+            }
+        } finally {
+            if (pw != null) {
+                pw.close();
+            }
+        }
+    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicBuilder.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicBuilder.java
index 0a8931b536..e103385556 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicBuilder.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicBuilder.java
@@ -16,7 +16,7 @@
 
 package org.opencb.cellbase.lib.builders.clinical.variant;
 
-import org.opencb.cellbase.lib.builders.CellBaseBuilder;
+import org.opencb.cellbase.lib.builders.AbstractBuilder;
 import org.opencb.cellbase.core.common.clinical.Cosmic;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
 import org.opencb.cellbase.lib.variant.VariantAnnotationUtils;
@@ -37,7 +37,7 @@
  * @since October 08, 2014
  */
 @Deprecated
-public class CosmicBuilder extends CellBaseBuilder {
+public class CosmicBuilder extends AbstractBuilder {
 
     private final Path cosmicFilePath;
     private static final String CHROMOSOME = "CHR";
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicIndexer.java
index f8d2f16d15..51be2b6f31 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicIndexer.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/CosmicIndexer.java
@@ -37,12 +37,12 @@
 public class CosmicIndexer extends ClinicalIndexer {
 
     private final Path cosmicFile;
+    private final String version;
     private final String assembly;
+
     private Pattern mutationGRCh37GenomePositionPattern;
     private Pattern snvPattern;
 
-    private static final String COSMIC_VERSION = "v95";
-
     private static final int GENE_NAMES_COLUMN = 0;
     private static final int HGNC_COLUMN = 3;
     private static final int PRIMARY_SITE_COLUMN = 7;
@@ -84,10 +84,12 @@ public class CosmicIndexer extends ClinicalIndexer {
     private int rocksDBNewVariants = 0;
     private int rocksDBUpdateVariants = 0;
 
-    public CosmicIndexer(Path cosmicFile, boolean normalize, Path genomeSequenceFilePath, String assembly, RocksDB rdb) throws IOException {
+    public CosmicIndexer(Path cosmicFile, String version, boolean normalize, Path genomeSequenceFilePath, String assembly, RocksDB rdb)
+            throws IOException {
         super(genomeSequenceFilePath);
 
         this.cosmicFile = cosmicFile;
+        this.version = version;
         this.normalize = normalize;
         this.assembly = assembly;
         this.rdb = rdb;
@@ -469,7 +471,7 @@ private EvidenceEntry buildCosmic(String[] fields) {
         String id = fields[ID_COLUMN];
         String url = "https://cancer.sanger.ac.uk/cosmic/search?q=" + id;
 
-        EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.COSMIC_DATA, COSMIC_VERSION, null);
+        EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.COSMIC_DATA, version, null);
         SomaticInformation somaticInformation = getSomaticInformation(fields);
         List<GenomicFeature> genomicFeatureList = getGenomicFeature(fields);
 
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/DOCMIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/DOCMIndexer.java
index b77f238432..a150e042dd 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/DOCMIndexer.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/DOCMIndexer.java
@@ -178,7 +178,7 @@ private VariantAnnotation parseVariantAnnotation(Map<String, Object> map) {
                 List<String> bibliography = getBibliography(evidenceEntry);
                 bibliography.add(PMID + diseaseMap.get(SOURCE_PUBMED_ID));
             } else {
-                EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.DOCM_DATA, null, null);
+                EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.DOCM_NAME, null, null);
                 HeritableTrait heritableTrait = new HeritableTrait((String) diseaseMap.get(DISEASE), null);
 
                 List<GenomicFeature> genomicFeatureList = getGenomicFeature(map);
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/HGMDIndexer.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/HGMDIndexer.java
index d2ce12dee8..f132f4b9e8 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/HGMDIndexer.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/builders/clinical/variant/HGMDIndexer.java
@@ -36,15 +36,17 @@
  */
 public class HGMDIndexer extends ClinicalIndexer {
     private final Path hgmdFile;
+    private final String version;
     private final String assembly;
 
-    public HGMDIndexer(Path hgmdFile, boolean normalize, Path genomeSequenceFilePath, String assembly, RocksDB rdb)
+    public HGMDIndexer(Path hgmdFile, String version, boolean normalize, Path genomeSequenceFilePath, String assembly, RocksDB rdb)
             throws IOException {
         super(genomeSequenceFilePath);
-        this.rdb = rdb;
-        this.assembly = assembly;
         this.hgmdFile = hgmdFile;
+        this.version = version;
         this.normalize = normalize;
+        this.assembly = assembly;
+        this.rdb = rdb;
     }
 
     public void index() throws RocksDBException, IOException {
@@ -93,7 +95,7 @@ private void parseHgmdInfo(Variant variant) {
             }
 
             // Source
-            entry.setSource(new EvidenceSource(EtlCommons.HGMD_DATA, "2020.3", "2020"));
+            entry.setSource(new EvidenceSource(EtlCommons.HGMD_DATA, version, null));
 
             // Assembly
             entry.setAssembly(assembly);
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/db/MongoDBManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/db/MongoDBManager.java
index d78c0446c8..6c5d4cf679 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/db/MongoDBManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/db/MongoDBManager.java
@@ -26,6 +26,7 @@
 import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.models.DataRelease;
 import org.opencb.cellbase.core.result.CellBaseDataResult;
+import org.opencb.cellbase.core.utils.DatabaseNameUtils;
 import org.opencb.cellbase.core.utils.SpeciesUtils;
 import org.opencb.cellbase.lib.impl.core.ReleaseMongoDBAdaptor;
 import org.opencb.commons.datastore.core.DataStoreServerAddress;
@@ -49,10 +50,8 @@
 
 public class MongoDBManager {
 
-    public static final String DBNAME_SEPARATOR = "_";
-
     private MongoDataStoreManager mongoDataStoreManager;
-    private CellBaseConfiguration cellBaseConfiguration;
+    private final CellBaseConfiguration cellBaseConfiguration;
 
     private Logger logger;
 
@@ -99,19 +98,19 @@ public MongoDataStore createMongoDBDatastore(String speciesStr, String assemblyS
             //  cellbase_speciesId_assembly_cellbaseVersion
             // Example:
             //  cellbase_hsapiens_grch37_v3
-            String database = getDatabaseName(species.getId(), species.getAssembly(), cellBaseConfiguration.getVersion());
+            String database = DatabaseNameUtils.getDatabaseName(species.getId(), species.getAssembly(), cellBaseConfiguration.getVersion());
             logger.debug("Database for the species is '{}'", database);
             return createMongoDBDatastore(database);
         } catch (CellBaseException e) {
             e.printStackTrace();
             logger.error("Species name is not valid: '{}'. Valid species: {}", speciesStr,
-                    String.join(",", cellBaseConfiguration.getAllSpecies().stream().map((tmpSpeciesObject)
-                            -> (tmpSpeciesObject.getCommonName() + "|" + tmpSpeciesObject.getScientificName()))
+                    String.join(",", SpeciesUtils.getAllSpecies(cellBaseConfiguration).stream().map((tmpSpeciesObject)
+                                    -> (tmpSpeciesObject.getCommonName() + "|" + tmpSpeciesObject.getScientificName()))
                             .collect(Collectors.toList())));
             throw new InvalidParameterException("Species name is not valid: '" + speciesStr + "'. Please provide one"
                     + " of supported species: {"
-                    + String.join(",", cellBaseConfiguration.getAllSpecies().stream().map((tmpSpeciesObject)
-                    -> (tmpSpeciesObject.getCommonName() + "|" + tmpSpeciesObject.getScientificName()))
+                    + String.join(",", SpeciesUtils.getAllSpecies(cellBaseConfiguration).stream().map((tmpSpeciesObject)
+                            -> (tmpSpeciesObject.getCommonName() + "|" + tmpSpeciesObject.getScientificName()))
                     .collect(Collectors.toList())) + "}");
         }
     }
@@ -162,29 +161,6 @@ public MongoDataStore createMongoDBDatastore(String database) {
         return mongoDatastore;
     }
 
-    public static String getDatabaseName(String species, String assembly, String version) {
-        if (StringUtils.isEmpty(species) || StringUtils.isEmpty(assembly)) {
-            throw new InvalidParameterException("Species and assembly are required");
-        }
-
-        String cleanAssembly = assembly
-                .replaceAll("\\.", "")
-                .replaceAll("-", "")
-                .replaceAll("_", "");
-
-        // Process version from the configuration file, in order to suffix the database name
-        //  - Production environment, e.g.: if version is "v5", the suffix added wil be "_v5"
-        //  - Test environment, e.g.: if version is "v5.6" or "v5.6.0-SNAPSHOT", the suffix added will be "_v5_6"
-        String auxVersion = version.replace(".", DBNAME_SEPARATOR).replace("-", DBNAME_SEPARATOR);
-        String[] split = auxVersion.split(DBNAME_SEPARATOR);
-        String dbName = "cellbase" + DBNAME_SEPARATOR + species.toLowerCase() + DBNAME_SEPARATOR + cleanAssembly.toLowerCase()
-                + DBNAME_SEPARATOR + split[0];
-        if (split.length > 1) {
-            dbName += (DBNAME_SEPARATOR + split[1]);
-        }
-        return dbName;
-    }
-
     public Map<String, DatastoreStatus> getDatabaseStatus(String species, String assembly) {
         MongoDataStore mongoDatastore = createMongoDBDatastore(species, assembly);
         try {
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/AbstractDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/AbstractDownloadManager.java
index a4ade6603e..d88ef5d389 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/AbstractDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/AbstractDownloadManager.java
@@ -24,33 +24,38 @@
 import com.fasterxml.jackson.databind.ObjectWriter;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang3.StringUtils;
-import org.opencb.biodata.formats.io.FileFormatException;
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.config.DownloadProperties;
 import org.opencb.cellbase.core.config.SpeciesConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.models.DataSource;
 import org.opencb.cellbase.core.utils.SpeciesUtils;
 import org.opencb.cellbase.lib.EtlCommons;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.BufferedReader;
-import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.nio.charset.Charset;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import java.sql.Timestamp;
 import java.text.SimpleDateFormat;
 import java.time.LocalDateTime;
 import java.util.*;
 
-public class AbstractDownloadManager {
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
-    private static final String DGV_NAME = "DGV";
+public abstract class AbstractDownloadManager {
 
-    private static final String GNOMAD_NAME = "gnomAD";
+    protected static final String DOWNLOADING_MSG = "Downloading {} ...";
+    protected static final String DOWNLOADING_DONE_MSG = "Downloading {} done.";
+    protected static final String CATEGORY_DOWNLOADING_MSG = "Downloading {}/{} ...";
+    protected static final String CATEGORY_DOWNLOADING_DONE_MSG = "Downloading {}/{} done.";
+    protected static final String DOWNLOADING_FROM_TO_MSG = "Downloading {} to {} ...";
+    protected static final String DATA_ALREADY_DOWNLOADED_MSG = "The file {} already exists, indicating that the data {} has already been"
+            + " downloaded.";
 
     protected String species;
     protected String assembly;
@@ -66,15 +71,23 @@ public class AbstractDownloadManager {
     protected Path downloadFolder;
     protected Path downloadLogFolder; // /download/log
     protected Path buildFolder; // <output>/<species>_<assembly>/generated-json
+
+    protected ObjectReader dataSourceReader;
+    protected ObjectWriter dataSourceWriter;
+
     protected Logger logger;
 
-    public AbstractDownloadManager(String species, String assembly, Path outdir, CellBaseConfiguration configuration)
+    protected AbstractDownloadManager(String species, String assembly, Path outdir, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
         this.species = species;
         this.assembly = assembly;
         this.outdir = outdir;
         this.configuration = configuration;
 
+        ObjectMapper jsonObjectMapper = new ObjectMapper();
+        this.dataSourceReader = jsonObjectMapper.readerFor(DataSource.class);
+        this.dataSourceWriter = jsonObjectMapper.writerFor(DataSource.class);
+
         this.init();
     }
 
@@ -104,83 +117,130 @@ private void init() throws CellBaseException, IOException {
         // Prepare outdir
         Path speciesFolder = outdir.resolve(speciesShortName + "_" + assemblyConfiguration.getName().toLowerCase());
         downloadFolder = outdir.resolve(speciesFolder + "/download");
-        logger.info("Creating download dir " + downloadFolder.toString());
+        logger.info("Creating download dir: {}", downloadFolder);
         Files.createDirectories(downloadFolder);
 
         downloadLogFolder = outdir.resolve(speciesFolder + "/download/log");
-        logger.info("Creating download log dir " + downloadLogFolder.toString());
+        logger.info("Creating download log dir: {}", downloadLogFolder);
         Files.createDirectories(downloadLogFolder);
 
         // <output>/<species>_<assembly>/generated_json
         buildFolder = outdir.resolve(speciesFolder + "/generated_json");
-        logger.info("Creating build dir " + buildFolder.toString());
+        logger.info("Creating build dir: {}", buildFolder);
         Files.createDirectories(buildFolder);
 
-        logger.info("Processing species " + speciesConfiguration.getScientificName());
+        logger.info("Processing species {}", speciesConfiguration.getScientificName());
     }
 
-    public List<DownloadFile> download() throws IOException, InterruptedException, NoSuchMethodException, FileFormatException {
-        return null;
-    }
+    public abstract List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException;
 
-//    public DownloadFile downloadStructuralVariants() throws IOException, InterruptedException {
-//        if (!speciesHasInfoToDownload(speciesConfiguration, "svs")) {
-//             return null;
-//        }
-//        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-//            logger.info("Downloading DGV data ...");
-//
-//            Path structuralVariantsFolder = downloadFolder.resolve(EtlCommons.STRUCTURAL_VARIANTS_FOLDER);
-//            Files.createDirectories(structuralVariantsFolder);
-//            String sourceFilename = (assemblyConfiguration.getName().equalsIgnoreCase("grch37") ? "GRCh37_hg19" : "GRCh38_hg38")
-//                    + "_variants_2016-05-15.txt";
-//            String url = configuration.getDownload().getDgv().getHost() + "/" + sourceFilename;
-//            saveVersionData(EtlCommons.STRUCTURAL_VARIANTS_DATA, DGV_NAME, getDGVVersion(sourceFilename), getTimeStamp(),
-//                    Collections.singletonList(url), structuralVariantsFolder.resolve(EtlCommons.DGV_VERSION_FILE));
-//            return downloadFile(url, structuralVariantsFolder.resolve(EtlCommons.DGV_FILE).toString());
-//        }
-//        return null;
-//    }
-
-//    private String getDGVVersion(String sourceFilename) {
-//        return sourceFilename.split("\\.")[0].split("_")[3];
-//    }
-
-    protected boolean speciesHasInfoToDownload(SpeciesConfiguration sp, String info) {
+    protected boolean speciesHasInfoToDownload(SpeciesConfiguration sp, String data) {
         boolean hasInfo = true;
-        if (sp.getData() == null || !sp.getData().contains(info)) {
-            logger.warn("Species '{}' has no '{}' information available to download", sp.getScientificName(), info);
+        if (sp.getData() == null || !sp.getData().contains(data)) {
+            logger.warn("Species '{}' has no '{}' information available to download", sp.getScientificName(), data);
             hasInfo = false;
         }
         return hasInfo;
     }
 
-    protected String getTimeStamp() {
-        return new SimpleDateFormat("yyyyMMdd_HHmmss").format(Calendar.getInstance().getTime());
+    protected String getConfigurationFileIdPrefix(String scientificSpecies) {
+        String prefix = "";
+        if (StringUtils.isNotEmpty(scientificSpecies) && !scientificSpecies.equals("Homo sapiens") && scientificSpecies.contains(" ")) {
+            char c = scientificSpecies.charAt(0);
+            prefix = (c + scientificSpecies.split(" ")[1] + "_").toUpperCase();
+        }
+        return prefix;
     }
 
-    protected void saveVersionData(String data, String name, String version, String date, List<String> url, Path outputFilePath)
-            throws IOException {
-        Map<String, Object> versionDataMap = new HashMap<>();
-        versionDataMap.put("data", data);
-        versionDataMap.put("name", name);
-        versionDataMap.put("version", version);
-        versionDataMap.put("date", date);
-        versionDataMap.put("url", url);
+    protected DownloadFile downloadAndSaveDataSource(DownloadProperties.URLProperties props, String fileId, String data, Path outPath)
+            throws IOException, InterruptedException, CellBaseException {
+        return downloadAndSaveDataSource(props, fileId, data, null, outPath);
+    }
 
-        ObjectMapper jsonObjectMapper = new ObjectMapper();
-        jsonObjectMapper.writeValue(outputFilePath.toFile(), versionDataMap);
+    protected DownloadFile downloadAndSaveDataSource(DownloadProperties.URLProperties props, String fileId, String data, String chromosome,
+                                                     Path outPath) throws IOException, InterruptedException, CellBaseException {
+        String versionFilename = getDataVersionFilename(data);
+
+        // Download file
+        DownloadFile downloadFile = downloadDataSource(props, fileId, chromosome, outPath);
+
+        // Save data source
+        saveDataSource(data, props.getVersion(), getTimeStamp(), Collections.singletonList(downloadFile.getUrl()),
+                outPath.resolve(versionFilename));
+
+        return downloadFile;
+    }
+
+    protected DownloadFile downloadAndSaveEnsemblDataSource(DownloadProperties.EnsemblProperties ensemblProps, String fileId, String data,
+                                                            Path outPath) throws IOException, InterruptedException, CellBaseException {
+        return downloadAndSaveEnsemblDataSource(ensemblProps, fileId, data, null, outPath);
+    }
+
+    protected DownloadFile downloadAndSaveEnsemblDataSource(DownloadProperties.EnsemblProperties ensemblProps, String fileId, String data,
+                                                            String chromosome, Path outPath)
+            throws IOException, InterruptedException, CellBaseException {
+        // Download file
+        DownloadFile downloadFile = downloadEnsemblDataSource(ensemblProps, fileId, chromosome, outPath);
+
+        // Save data source
+        saveDataSource(data, "(" + getDataName(ENSEMBL_DATA) + " " + ensemblVersion + ")", getTimeStamp(),
+                Collections.singletonList(downloadFile.getUrl()), outPath.resolve(getDataVersionFilename(data)));
+
+        return downloadFile;
+    }
+
+    protected DownloadFile downloadDataSource(DownloadProperties.URLProperties props, String fileId, Path outPath)
+            throws IOException, InterruptedException, CellBaseException {
+        return downloadDataSource(props, fileId, null, outPath);
+    }
+
+    protected DownloadFile downloadDataSource(DownloadProperties.URLProperties props, String fileId,
+                                              String chromosome, Path outPath)
+            throws IOException, InterruptedException, CellBaseException {
+        String url = EtlCommons.getUrl(props, fileId, species, assembly, chromosome);
+        Path outFile = outPath.resolve(getFilenameFromUrl(url));
+        return downloadFile(url, outFile);
+    }
+
+    protected DownloadFile downloadEnsemblDataSource(DownloadProperties.EnsemblProperties ensemblProps, String fileId, Path outPath)
+            throws IOException, InterruptedException, CellBaseException {
+        return downloadEnsemblDataSource(ensemblProps, fileId, null, outPath);
+    }
+
+    protected DownloadFile downloadEnsemblDataSource(DownloadProperties.EnsemblProperties ensemblProps, String fileId, String chromosome,
+                                                     Path outPath) throws IOException, InterruptedException, CellBaseException {
+        String url = EtlCommons.getEnsemblUrl(ensemblProps, ensemblRelease, fileId, speciesShortName, assemblyConfiguration.getName(),
+                chromosome);
+        Path outFile = outPath.resolve(getFilenameFromUrl(url));
+        return downloadFile(url, outFile);
+    }
+
+    protected void saveDataSource(String data, String version, String date, List<String> urls, Path versionFilePath)
+            throws IOException, CellBaseException {
+        String name = getDataName(data);
+        String category = getDataCategory(data);
+        DataSource dataSource = new DataSource(data, name, category, version, date, urls);
+
+        if (StringUtils.isEmpty(version)) {
+            logger.warn("Version missing for data source {}/{}, using the date as version: {}", category, name, date);
+            dataSource.setVersion(date);
+        }
+
+        dataSourceWriter.writeValue(versionFilePath.toFile(), dataSource);
+        logger.info("Created the {} version file {} at {}", getDataName(data), versionFilePath.getFileName(), versionFilePath.getParent());
+    }
+
+    protected String getTimeStamp() {
+        return new SimpleDateFormat("yyyyMMdd_HHmmss").format(Calendar.getInstance().getTime());
     }
 
     protected String getLine(Path readmePath, int lineNumber) {
         Files.exists(readmePath);
-        try {
-            BufferedReader reader = Files.newBufferedReader(readmePath, Charset.defaultCharset());
+        try (BufferedReader reader = Files.newBufferedReader(readmePath, Charset.defaultCharset())) {
             String line = null;
             for (int i = 0; i < lineNumber; i++) {
                 line = reader.readLine();
             }
-            reader.close();
             return line;
         } catch (IOException e) {
             e.printStackTrace();
@@ -216,115 +276,107 @@ protected String getPhylo(SpeciesConfiguration sp) {
         }
     }
 
-
-
-    protected DownloadFile downloadFile(String url, String outputFileName) throws IOException, InterruptedException {
-        return downloadFile(url, outputFileName, null);
+    protected DownloadFile downloadFile(String url, Path outputFile) throws IOException, InterruptedException, CellBaseException {
+        return downloadFile(url, outputFile, null);
     }
 
-    protected DownloadFile downloadFile(String url, String outputFileName, List<String> wgetAdditionalArgs)
-            throws IOException, InterruptedException {
-        DownloadFile downloadFileInfo = new DownloadFile(url, outputFileName, Timestamp.valueOf(LocalDateTime.now()).toString());
+    protected DownloadFile downloadFile(String url, Path outputFile, List<String> wgetAdditionalArgs)
+            throws IOException, InterruptedException, CellBaseException {
+        DownloadFile downloadFile = new DownloadFile(url, outputFile.toAbsolutePath().toString(),
+                Timestamp.valueOf(LocalDateTime.now()).toString());
         Long startTime = System.currentTimeMillis();
-        if (Paths.get(outputFileName).toFile().exists()) {
-            logger.warn("File '{}' is already downloaded", outputFileName);
-            setDownloadStatusAndMessage(outputFileName, downloadFileInfo, "File '" + outputFileName + "' is already downloaded", true);
+        final Path outputLog = downloadLogFolder.resolve(outputFile.getFileName().toString() + ".log");
+        if (Files.exists(outputFile)) {
+            logger.warn("File '{}' is already downloaded", outputFile);
+            setDownloadStatusAndMessage(outputFile, downloadFile, outputLog, true);
+            downloadFile.setMessage("File is already downloaded");
         } else {
-            final String outputLog = downloadLogFolder + "/" + Paths.get(outputFileName).toFile().getName() + ".log";
-            List<String> wgetArgs = new ArrayList<>(Arrays.asList("--tries=10", url, "-O", outputFileName, "-o", outputLog));
+            logger.info(DOWNLOADING_FROM_TO_MSG, url, outputFile);
+            List<String> wgetArgs = new ArrayList<>(Arrays.asList("--tries=10", url,
+                    "-O", outputFile.toAbsolutePath().toString(),
+                    "-o", outputLog.toAbsolutePath().toString()));
             if (wgetAdditionalArgs != null && !wgetAdditionalArgs.isEmpty()) {
                 wgetArgs.addAll(wgetAdditionalArgs);
             }
             boolean downloaded = EtlCommons.runCommandLineProcess(null, "wget", wgetArgs, outputLog);
-            setDownloadStatusAndMessage(outputFileName, downloadFileInfo, outputLog, downloaded);
+            setDownloadStatusAndMessage(outputFile, downloadFile, outputLog, downloaded);
+            logger.info(OK_MSG);
         }
-        downloadFileInfo.setElapsedTime(startTime, System.currentTimeMillis());
-        return downloadFileInfo;
+        downloadFile.setElapsedTime(startTime, System.currentTimeMillis());
+        return downloadFile;
     }
 
-    private void setDownloadStatusAndMessage(String outputFileName, DownloadFile downloadFile, String outputLog, boolean downloaded) {
+    private void setDownloadStatusAndMessage(Path outputFile, DownloadFile downloadFile, Path logFile, boolean downloaded) {
         if (downloaded) {
-            boolean validFileSize = validateDownloadFile(downloadFile, outputFileName, outputLog);
+            boolean validFileSize = validateDownloadFile(downloadFile, outputFile, logFile);
             if (validFileSize) {
                 downloadFile.setStatus(DownloadFile.Status.OK);
                 downloadFile.setMessage("File downloaded successfully");
             } else {
                 downloadFile.setStatus(DownloadFile.Status.ERROR);
                 downloadFile.setMessage("Expected downloaded file size " + downloadFile.getExpectedFileSize()
-                + ", Actual file size " + downloadFile.getActualFileSize());
+                        + ", actual file size " + downloadFile.getActualFileSize());
             }
         } else {
-            downloadFile.setMessage("See full error message in " + outputLog);
+            downloadFile.setMessage("See full error message in " + logFile);
             downloadFile.setStatus(DownloadFile.Status.ERROR);
-            // because we use the -O flag, a file will be written, even on error. See #467
-//            Files.deleteIfExists((new File(outputFileName)).toPath());
         }
     }
 
-    public static void writeDownloadLogFile(Path downloadFolder, List<DownloadFile> downloadFiles) throws IOException {
+    public void writeDownloadLogFile(Map<String, Object> params, List<DownloadFile> downloadFiles) throws IOException {
+        // Get current date and time
+        String timeStamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date());
+        Path summaryPath = downloadLogFolder.resolve(timeStamp + "_summary.json");
+
+        Map<String, Object> summary = new HashMap<>();
+        summary.put("params", params);
+        summary.put("downloadFiles", downloadFiles);
+
         ObjectMapper mapper = new ObjectMapper();
         ObjectWriter writer = mapper.writer(new DefaultPrettyPrinter());
-        writer.writeValue(new File(downloadFolder + "/download_log.json"), downloadFiles);
+        writer.writeValue(summaryPath.toFile(), summary);
     }
 
-    private boolean validateDownloadFile(DownloadFile downloadFile, String outputFileName, String outputFileLog) {
-        long expectedFileSize = getExpectedFileSize(outputFileLog);
-        long actualFileSize = FileUtils.sizeOf(new File(outputFileName));
+    public boolean isAlreadyDownloaded(Path path, String dataName) {
+        if (Files.exists(path)) {
+            logger.info(DATA_ALREADY_DOWNLOADED_MSG, path.getFileName(), dataName);
+            return true;
+        }
+        return false;
+    }
+
+    private boolean validateDownloadFile(DownloadFile downloadFile, Path outputFile, Path logFile) {
+        long expectedFileSize = getExpectedFileSize(logFile);
+        long actualFileSize = FileUtils.sizeOf(outputFile.toFile());
         downloadFile.setActualFileSize(actualFileSize);
         downloadFile.setExpectedFileSize(expectedFileSize);
         return expectedFileSize == actualFileSize;
     }
 
-    private long getExpectedFileSize(String outputFileLog) {
-        try (BufferedReader reader = new BufferedReader(new FileReader(outputFileLog))) {
-            String line = null;
+    private long getExpectedFileSize(Path path) {
+        try (BufferedReader reader = new BufferedReader(new FileReader(path.toFile()))) {
+            String line;
             while ((line = reader.readLine()) != null) {
                 // looking for: Length: 13846591 (13M)
                 if (line.startsWith("Length:")) {
                     String[] parts = line.split("\\s");
-                    return Long.valueOf(parts[1]);
+                    return Long.parseLong(parts[1]);
                 }
             }
         } catch (Exception e) {
-            logger.info("Error getting expected file size " + e.getMessage());
+            logger.info("Error getting expected file size: {}. Stack trace: {}", e.getMessage(), Arrays.toString(e.getStackTrace()));
         }
         return -1;
     }
 
-    protected String getVersionFromVersionLine(Path path, String tag) {
-        Files.exists(path);
-        try {
-            BufferedReader reader = Files.newBufferedReader(path, Charset.defaultCharset());
-            String line = reader.readLine();
-            // There shall be a line at the README.txt containing the version.
-            // e.g. The files in the current directory contain the data corresponding to the latest release
-            // (version 4.0, April 2016). ...
-            while (line != null) {
-                // tag specifies a certain string that must be found within the line supposed to contain the version
-                // info
-                if (line.contains(tag)) {
-                    String version = line.split("\\(")[1].split("\\)")[0];
-                    reader.close();
-                    return version;
-                }
-                line = reader.readLine();
-            }
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-        return null;
-    }
-
     private String getEnsemblURL(SpeciesConfiguration sp) {
         // We need to find which is the correct Ensembl host URL.
         // This can different depending on if is a vertebrate species.
-        String ensemblHostUrl;
         if (configuration.getSpecies().getVertebrates().contains(sp)) {
-            ensemblHostUrl = configuration.getDownload().getEnsembl().getUrl().getHost();
+            return configuration.getDownload().getEnsembl().getUrl().getHost();
         } else {
-            ensemblHostUrl = configuration.getDownload().getEnsemblGenomes().getUrl().getHost();
+            return configuration.getDownload().getEnsemblGenomes().getUrl().getHost();
         }
-        return ensemblHostUrl;
     }
 }
 
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/CaddDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/CaddDownloadManager.java
index e0cae1250e..c128b1d67d 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/CaddDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/CaddDownloadManager.java
@@ -18,7 +18,7 @@
 
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
 
 import java.io.IOException;
 import java.nio.file.Files;
@@ -26,36 +26,35 @@
 import java.util.Collections;
 import java.util.List;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 public class CaddDownloadManager extends AbstractDownloadManager {
 
-    private static final String CADD_NAME = "CADD";
     public CaddDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
         super(species, assembly, targetDirectory, configuration);
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        return Collections.singletonList(downloadCaddScores());
-    }
-
-    public DownloadFile downloadCaddScores() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "variation_functional_score")) {
-            return null;
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        // Check if the species supports this data
+        if (!SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), VARIATION_FUNCTIONAL_SCORE_DATA)) {
+            logger.info(DATA_NOT_SUPPORTED_MSG, getDataName(VARIATION_FUNCTIONAL_SCORE_DATA), speciesConfiguration.getScientificName());
+            return Collections.emptyList();
         }
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading CADD scores information ...");
 
-            Path variationFunctionalScoreFolder = downloadFolder.resolve("variation_functional_score");
-            Files.createDirectories(variationFunctionalScoreFolder);
+        logger.info(CATEGORY_DOWNLOADING_MSG, getDataCategory(CADD_DATA), getDataName(CADD_DATA));
 
-            // Downloads CADD scores
-            String url = configuration.getDownload().getCadd().getHost();
+        // Create the CADD download path
+        Path caddDownloadPath = downloadFolder.resolve(VARIATION_FUNCTIONAL_SCORE_DATA).resolve(CADD_DATA);
+        Files.createDirectories(caddDownloadPath);
 
-            saveVersionData(EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, CADD_NAME, url.split("/")[5], getTimeStamp(),
-                    Collections.singletonList(url), variationFunctionalScoreFolder.resolve("caddVersion.json"));
-            return downloadFile(url, variationFunctionalScoreFolder.resolve("whole_genome_SNVs.tsv.gz").toString());
-        }
-        return null;
+        // Download CADD and save data source
+        DownloadFile downloadFile = downloadAndSaveDataSource(configuration.getDownload().getCadd(), CADD_FILE_ID, CADD_DATA,
+                caddDownloadPath);
+
+        logger.info(CATEGORY_DOWNLOADING_DONE_MSG, getDataCategory(CADD_DATA), getDataName(CADD_DATA));
+
+        return Collections.singletonList(downloadFile);
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ClinicalDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ClinicalDownloadManager.java
index eb1f28db2d..e70e3d297b 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ClinicalDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ClinicalDownloadManager.java
@@ -19,27 +19,20 @@
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.config.DownloadProperties;
 import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
 import org.opencb.cellbase.lib.EtlCommons;
-import org.opencb.commons.utils.FileUtils;
 
-import javax.ws.rs.client.Client;
-import javax.ws.rs.client.ClientBuilder;
-import javax.ws.rs.client.WebTarget;
-import java.io.*;
-import java.net.URI;
+import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
-import java.util.Map;
 
-public class ClinicalDownloadManager extends AbstractDownloadManager {
-
-    private static final String CLINVAR_NAME = "ClinVar";
-    private static final String GWAS_NAME = "GWAS catalog";
-    private static final String IARCTP53_NAME = "IARC TP53 Database";
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
+public class ClinicalDownloadManager extends AbstractDownloadManager {
 
     public ClinicalDownloadManager(String species, String assembly, Path outdir, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
@@ -47,199 +40,64 @@ public ClinicalDownloadManager(String species, String assembly, Path outdir, Cel
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        List<DownloadFile> downloadFiles = new ArrayList<>();
-        downloadFiles.addAll(downloadClinical());
-        return downloadFiles;
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        return downloadClinical();
     }
 
-    public List<DownloadFile> downloadClinical() throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading clinical variant information ...");
-
-            String url;
-            List<DownloadFile> downloadFiles = new ArrayList<>();
-
-            Path clinicalFolder = downloadFolder.resolve(EtlCommons.CLINICAL_VARIANTS_FOLDER);
-            Files.createDirectories(clinicalFolder);
-            logger.info("\t\tDownloading ClinVar files ...");
-
-            List<String> clinvarUrls = new ArrayList<>(3);
-            url = configuration.getDownload().getClinvar().getHost();
-
-            downloadFiles.add(downloadFile(url, clinicalFolder.resolve(EtlCommons.CLINVAR_XML_FILE).toString()));
-            clinvarUrls.add(url);
-
-            url = configuration.getDownload().getClinvarEfoTerms().getHost();
-            downloadFiles.add(downloadFile(url, clinicalFolder.resolve(EtlCommons.CLINVAR_EFO_FILE).toString()));
-            clinvarUrls.add(url);
-
-            url = configuration.getDownload().getClinvarSummary().getHost();
-            downloadFiles.add(downloadFile(url, clinicalFolder.resolve(EtlCommons.CLINVAR_SUMMARY_FILE).toString()));
-            clinvarUrls.add(url);
-
-            url = configuration.getDownload().getClinvarVariationAllele().getHost();
-            downloadFiles.add(downloadFile(url, clinicalFolder.resolve(EtlCommons.CLINVAR_VARIATION_ALLELE_FILE).toString()));
-            clinvarUrls.add(url);
-            saveVersionData(EtlCommons.CLINICAL_VARIANTS_DATA, CLINVAR_NAME, getClinVarVersion(), getTimeStamp(), clinvarUrls,
-                    clinicalFolder.resolve("clinvarVersion.json"));
-
-            // Gwas catalog
-            logger.info("\t\tDownloading GWAS catalog file ...");
-            DownloadProperties.URLProperties gwasCatalog = configuration.getDownload().getGwasCatalog();
-            url = gwasCatalog.getHost();
-            downloadFiles.add(downloadFile(url, clinicalFolder.resolve(EtlCommons.GWAS_FILE).toString()));
-            saveVersionData(EtlCommons.CLINICAL_VARIANTS_DATA, GWAS_NAME, gwasCatalog.getVersion(), getTimeStamp(),
-                    Collections.singletonList(url), clinicalFolder.resolve("gwasVersion.json"));
-
-//            List<String> hgvsList = getDocmHgvsList();
-//            if (!hgvsList.isEmpty()) {
-//                downloadDocm(hgvsList, clinicalFolder.resolve(EtlCommons.DOCM_FILE));
-//                downloadFiles.add(downloadFile(configuration.getDownload().getDocmVersion().getHost(),
-//                        clinicalFolder.resolve("docmIndex.html").toString()));
-//                saveVersionData(EtlCommons.CLINICAL_VARIANTS_DATA, EtlCommons.DOCM_NAME,
-//                        getDocmVersion(clinicalFolder.resolve("docmIndex.html")), getTimeStamp(),
-//                        Arrays.asList(configuration.getDownload().getDocm().getHost() + "v1/variants.json",
-//                                configuration.getDownload().getDocm().getHost() + "v1/variants/{hgvs}.json"),
-//                        clinicalFolder.resolve("docmVersion.json"));
-//            } else {
-//                logger.warn("No DOCM variants found for assembly {}. Please double-check that this is the correct "
-//                        + "assembly", assemblyConfiguration.getName());
-//            }
-
-            // I am only able to download these files manually
-//            if (assemblyConfiguration.getName().equalsIgnoreCase("grch38")) {
-//                url = configuration.getDownload().getIarctp53().getHost();
-//                downloadFiles.add(downloadFile(url, clinicalFolder.resolve(EtlCommons.IARCTP53_FILE).toString()));
-//
-//                ZipFile zipFile = new ZipFile(clinicalFolder.resolve(EtlCommons.IARCTP53_FILE).toString());
-//                Enumeration<? extends ZipEntry> entries = zipFile.entries();
-//                while (entries.hasMoreElements()) {
-//                    ZipEntry entry = entries.nextElement();
-//                    File entryDestination = new File(clinicalFolder.toFile(), entry.getName());
-//                    if (entry.isDirectory()) {
-//                        entryDestination.mkdirs();
-//                    } else {
-//                        entryDestination.getParentFile().mkdirs();
-//                        InputStream in = zipFile.getInputStream(entry);
-//                        OutputStream out = new FileOutputStream(entryDestination);
-//                        IOUtils.copy(in, out);
-//                        IOUtils.closeQuietly(in);
-//                        out.close();
-//                    }
-//                }
-//                saveVersionData(EtlCommons.CLINICAL_VARIANTS_DATA, IARCTP53_NAME,
-//                        getVersionFromVersionLine(clinicalFolder.resolve("Disclaimer.txt"),
-//                                "The version of the database should be identified"), getTimeStamp(),
-//                        Collections.singletonList(url), clinicalFolder.resolve("iarctp53Version.json"));
-//            }
-
-            if (Files.notExists(clinicalFolder.resolve("clinvar_chunks"))) {
-                Files.createDirectories(clinicalFolder.resolve("clinvar_chunks"));
-                splitClinvar(clinicalFolder.resolve(EtlCommons.CLINVAR_XML_FILE), clinicalFolder.resolve("clinvar_chunks"));
-            }
-
-            return downloadFiles;
+    public List<DownloadFile> downloadClinical() throws IOException, InterruptedException, CellBaseException {
+        // Check if the species supports this data
+        if (!SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), CLINICAL_VARIANT_DATA)) {
+            logger.info(DATA_NOT_SUPPORTED_MSG, getDataName(CLINICAL_VARIANT_DATA), speciesConfiguration.getScientificName());
+            return Collections.emptyList();
         }
-        return null;
-    }
 
-    private void splitClinvar(Path clinvarXmlFilePath, Path splitOutdirPath) throws IOException {
-        BufferedReader br = FileUtils.newBufferedReader(clinvarXmlFilePath);
-        PrintWriter pw = null;
-        StringBuilder header = new StringBuilder();
-        boolean beforeEntry = true;
-        boolean inEntry = false;
-        int count = 0;
-        int chunk = 0;
-        String line;
-        while ((line = br.readLine()) != null) {
-            if (line.trim().startsWith("<ClinVarSet ")) {
-                inEntry = true;
-                beforeEntry = false;
-                if (count % 10000 == 0) {
-                    pw = new PrintWriter(new FileOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile()));
-                    pw.println(header.toString().trim());
-                }
-                count++;
-            }
-
-            if (beforeEntry) {
-                header.append(line).append("\n");
-            }
-
-            if (inEntry) {
-                pw.println(line);
-            }
-
-            if (line.trim().startsWith("</ClinVarSet>")) {
-                inEntry = false;
-                if (count % 10000 == 0) {
-                    pw.print("</ReleaseSet>");
-                    pw.close();
-                    chunk++;
-                }
-            }
-        }
-        pw.print("</ReleaseSet>");
-        pw.close();
-        br.close();
-    }
+        DownloadFile downloadFile;
+        List<DownloadFile> downloadFiles = new ArrayList<>();
 
-    private String getDocmVersion(Path docmIndexHtml) {
-        return getVersionFromVersionLine(docmIndexHtml, "<select name=\"version\" id=\"version\"");
-    }
+        logger.info(DOWNLOADING_MSG, getDataName(CLINICAL_VARIANT_DATA));
 
-    private void downloadDocm(List<String> hgvsList, Path path) throws IOException, InterruptedException {
-        BufferedWriter bufferedWriter = Files.newBufferedWriter(path);
-        Client client = ClientBuilder.newClient();
-        WebTarget restUrlBase = client
-                .target(URI.create(configuration.getDownload().getDocm().getHost() + "v1/variants"));
-
-        logger.info("Querying DOCM REST API to get detailed data for all their variants");
-        int counter = 0;
-        for (String hgvs : hgvsList) {
-            WebTarget callUrl = restUrlBase.path(hgvs + ".json");
-            String jsonString = callUrl.request().get(String.class);
-            bufferedWriter.write(jsonString + "\n");
-
-            if (counter % 10 == 0) {
-                logger.info("{} DOCM variants saved", counter);
-            }
-            // Wait 1/3 of a second to avoid saturating their REST server - also avoid getting banned
-            Thread.sleep(300);
-
-            counter++;
-        }
-        logger.info("Finished. {} DOCM variants saved at {}", counter, path.toString());
-        bufferedWriter.close();
-    }
+        // Create clinical directory
+        Path clinicalPath = downloadFolder.resolve(EtlCommons.CLINICAL_VARIANT_DATA).toAbsolutePath();
+        Files.createDirectories(clinicalPath);
 
-    private List<String> getDocmHgvsList() throws IOException {
-        Client client = ClientBuilder.newClient();
-        WebTarget restUrl = client
-                .target(URI.create(configuration.getDownload().getDocm().getHost() + "v1/variants.json"));
-
-        String jsonString;
-        logger.info("Getting full list of DOCM hgvs from: {}", restUrl.getUri().toURL());
-        jsonString = restUrl.request().get(String.class);
-
-        List<Map<String, String>> responseMap = parseResult(jsonString);
-        List<String> hgvsList = new ArrayList<>(responseMap.size());
-        for (Map<String, String> document : responseMap) {
-            if (document.containsKey("reference_version")
-                    && document.get("reference_version").equalsIgnoreCase(assemblyConfiguration.getName())) {
-                hgvsList.add(document.get("hgvs"));
-            }
-        }
-        logger.info("{} hgvs found", hgvsList.size());
 
-        return hgvsList;
-    }
+        // ClinVar
+        logger.info(DOWNLOADING_MSG, getDataName(CLINVAR_DATA));
+        DownloadProperties.URLProperties props = configuration.getDownload().getClinvar();
+        List<String> urls = new ArrayList<>();
+        for (String fileId : Arrays.asList(CLINVAR_FULL_RELEASE_FILE_ID, CLINVAR_SUMMARY_FILE_ID, CLINVAR_ALLELE_FILE_ID,
+                CLINVAR_EFO_TERMS_FILE_ID)) {
+            downloadFile = downloadDataSource(props, fileId, clinicalPath);
+            downloadFiles.add(downloadFile);
 
-    private String getClinVarVersion() {
-        // ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2015-12.xml.gz
-        return configuration.getDownload().getClinvar().getHost().split("_")[1].split("\\.")[0];
-    }
+            // Save URLs to be written in the version file
+            urls.add(downloadFile.getUrl());
+        }
+        // Save data source
+        saveDataSource(CLINVAR_DATA, props.getVersion(), getTimeStamp(), urls,
+                clinicalPath.resolve(getDataVersionFilename(CLINVAR_DATA)));
+        logger.info(DOWNLOADING_DONE_MSG, getDataName(CLINVAR_DATA));
+
+        // COSMIC
+        logger.warn("{} files must be downloaded manually !", getDataName(COSMIC_DATA));
+        props = configuration.getDownload().getCosmic();
+        String url = props.getHost() + props.getFiles().get(COSMIC_FILE_ID);
+        saveDataSource(COSMIC_DATA, props.getVersion(), getTimeStamp(), Collections.singletonList(url),
+                clinicalPath.resolve(getDataVersionFilename(COSMIC_DATA)));
+
+        // HGMD
+        logger.warn("{} files must be downloaded manually !", getDataName(HGMD_DATA));
+        props = configuration.getDownload().getHgmd();
+        url = props.getHost() + props.getFiles().get(HGMD_FILE_ID);
+        saveDataSource(HGMD_DATA, props.getVersion(), getTimeStamp(), Collections.singletonList(url),
+                clinicalPath.resolve(getDataVersionFilename(HGMD_DATA)));
+
+        // GWAS catalog
+        logger.info(DOWNLOADING_MSG, getDataName(GWAS_DATA));
+        downloadFile = downloadAndSaveDataSource(configuration.getDownload().getGwasCatalog(), GWAS_FILE_ID, GWAS_DATA, clinicalPath);
+        downloadFiles.add(downloadFile);
+        logger.info(DOWNLOADING_DONE_MSG, getDataName(GWAS_DATA));
 
+        return downloadFiles;
+    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ConservationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ConservationDownloadManager.java
new file mode 100644
index 0000000000..41725c0f54
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ConservationDownloadManager.java
@@ -0,0 +1,170 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.download;
+
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
+public class ConservationDownloadManager extends AbstractDownloadManager {
+
+    public ConservationDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
+            throws IOException, CellBaseException {
+        super(species, assembly, targetDirectory, configuration);
+    }
+
+    @Override
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        return downloadConservation();
+    }
+
+    /**
+     * This method downloads both PhastCons and PhyloP data from UCSC for Human and Mouse species.
+     * @return list of files downloaded
+     * @throws IOException if there is an error writing to a file
+     * @throws InterruptedException if there is an error downloading files
+     * @throws CellBaseException if there is an error executing the command line
+     */
+    public List<DownloadFile> downloadConservation() throws IOException, InterruptedException, CellBaseException {
+        // Check if the species supports this data
+        if (!SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), CONSERVATION_DATA)) {
+            logger.info(DATA_NOT_SUPPORTED_MSG, getDataName(CONSERVATION_DATA), speciesConfiguration.getScientificName());
+            return Collections.emptyList();
+        }
+
+        List<DownloadFile> downloadFiles = new ArrayList<>();
+
+        // Create folders
+        Path conservationFolder = downloadFolder.resolve(CONSERVATION_DATA);
+        Files.createDirectories(conservationFolder);
+        Path gerpFolder = Files.createDirectories(conservationFolder.resolve(GERP_DATA));
+        Path phastConsFolder = Files.createDirectories(conservationFolder.resolve(PHASTCONS_DATA));
+        Path phyloPFolder = Files.createDirectories(conservationFolder.resolve(PHYLOP_DATA));
+
+        logger.info(DOWNLOADING_MSG, getDataName(CONSERVATION_DATA));
+
+        // Download data
+        String filename;
+        Path outputPath;
+
+        // Prepare variables
+        String phastconsHost = configuration.getDownload().getPhastCons().getHost();
+        String phylopHost = configuration.getDownload().getPhylop().getHost();
+        List<String> phastconsUrls = new ArrayList<>(50);
+        List<String> phyloPUrls = new ArrayList<>(50);
+        String gerpUrl = null;
+
+        // Human
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS)) {
+            // 1. PhastCons and PhyloP
+            String[] chromosomes = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
+                    "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "M"};
+            for (String chromosome : chromosomes) {
+                logger.info(DOWNLOADING_MSG, getChromDownloadMessage(getDataName(PHASTCONS_DATA), chromosome));
+                String phastConsUrl = phastconsHost + configuration.getDownload().getPhastCons().getFiles().get(PHASTCONS_FILE_ID)
+                        + "chr" + chromosome + ".phastCons470way.wigFix.gz";
+                filename = Paths.get(phastConsUrl).getFileName().toString();
+                outputPath = conservationFolder.resolve(PHASTCONS_DATA).resolve(filename);
+                downloadFiles.add(downloadFile(phastConsUrl, outputPath));
+                phastconsUrls.add(phastConsUrl);
+                logger.info(OK_MSG);
+
+                logger.info(DOWNLOADING_MSG, getChromDownloadMessage(getDataName(PHYLOP_DATA), chromosome));
+                String phyloPUrl = phylopHost + configuration.getDownload().getPhylop().getFiles().get(PHYLOP_FILE_ID)
+                        + "chr" + chromosome + ".phyloP470way.wigFix.gz";
+                filename = Paths.get(phyloPUrl).getFileName().toString();
+                outputPath = conservationFolder.resolve(PHYLOP_DATA).resolve(filename);
+                downloadFiles.add(downloadFile(phyloPUrl, outputPath));
+                phyloPUrls.add(phyloPUrl);
+                logger.info(OK_MSG);
+            }
+
+            // 2. Gerp
+            logger.info(DOWNLOADING_MSG, getDataName(GERP_DATA));
+            gerpUrl = configuration.getDownload().getGerp().getHost()
+                    + configuration.getDownload().getGerp().getFiles().get(GERP_FILE_ID);
+            filename = Paths.get(gerpUrl).getFileName().toString();
+            outputPath = conservationFolder.resolve(GERP_DATA).resolve(filename);
+            downloadFiles.add(downloadFile(gerpUrl, outputPath));
+            logger.info(OK_MSG);
+        }
+
+        // Mouse
+        if (speciesConfiguration.getScientificName().equals(MUS_MUSCULUS)) {
+            String prefixId = getConfigurationFileIdPrefix(speciesConfiguration.getScientificName());
+
+            // 1. PhastCons and PhyloP
+            String[] chromosomes = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
+                    "15", "16", "17", "18", "19", "X", "Y", "M"};
+            for (String chromosome : chromosomes) {
+                logger.info(DOWNLOADING_MSG, getChromDownloadMessage(getDataName(PHASTCONS_DATA), chromosome));
+                String phastConsUrl = phastconsHost
+                        + configuration.getDownload().getPhastCons().getFiles().get(prefixId + PHASTCONS_FILE_ID)
+                        + "chr" + chromosome + ".phastCons35way.wigFix.gz";
+                filename = Paths.get(phastConsUrl).getFileName().toString();
+                outputPath = conservationFolder.resolve(PHASTCONS_DATA).resolve(filename);
+                downloadFiles.add(downloadFile(phastConsUrl, outputPath));
+                phastconsUrls.add(phastConsUrl);
+                logger.info(OK_MSG);
+
+                logger.info(DOWNLOADING_MSG, getChromDownloadMessage(getDataName(PHYLOP_DATA), chromosome));
+                String phyloPUrl = phylopHost + configuration.getDownload().getPhylop().getFiles().get(prefixId + PHYLOP_FILE_ID)
+                        + "chr" + chromosome + ".phyloP35way.wigFix.gz";
+                filename = Paths.get(phyloPUrl).getFileName().toString();
+                outputPath = conservationFolder.resolve(PHYLOP_DATA).resolve(filename);
+                downloadFiles.add(downloadFile(phyloPUrl, outputPath));
+                phyloPUrls.add(phyloPUrl);
+                logger.info(OK_MSG);
+            }
+
+            // 2. Gerp
+            logger.info(DOWNLOADING_MSG, getDataName(GERP_DATA));
+            gerpUrl = configuration.getDownload().getGerp().getHost()
+                    + configuration.getDownload().getGerp().getFiles().get(prefixId + GERP_FILE_ID);
+            filename = Paths.get(gerpUrl).getFileName().toString();
+            outputPath = conservationFolder.resolve(GERP_DATA).resolve(filename);
+            downloadFiles.add(downloadFile(gerpUrl, outputPath));
+            logger.info(OK_MSG);
+        }
+
+        // Save data version
+        saveDataSource(PHASTCONS_DATA, configuration.getDownload().getPhastCons().getVersion(), getTimeStamp(), phastconsUrls,
+                phastConsFolder.resolve(getDataVersionFilename(PHASTCONS_DATA)));
+        saveDataSource(PHYLOP_DATA, configuration.getDownload().getPhylop().getVersion(), getTimeStamp(), phyloPUrls,
+                phyloPFolder.resolve(getDataVersionFilename(PHYLOP_DATA)));
+        saveDataSource(GERP_DATA, configuration.getDownload().getGerp().getVersion(), getTimeStamp(),
+                Collections.singletonList(gerpUrl), gerpFolder.resolve(getDataVersionFilename(GERP_DATA)));
+
+        logger.info(DOWNLOADING_DONE_MSG, getDataName(CONSERVATION_DATA));
+
+        return downloadFiles;
+    }
+
+    private String getChromDownloadMessage(String dataName, String chromosome) {
+        return dataName + ", chrom. " + chromosome;
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/CoreDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/CoreDownloadManager.java
deleted file mode 100644
index aca27ff2e8..0000000000
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/CoreDownloadManager.java
+++ /dev/null
@@ -1,467 +0,0 @@
-/*
- * Copyright 2015-2020 OpenCB
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.opencb.cellbase.lib.download;
-
-import org.apache.commons.io.FilenameUtils;
-import org.apache.commons.lang.StringUtils;
-import org.opencb.cellbase.core.config.CellBaseConfiguration;
-import org.opencb.cellbase.core.config.SpeciesConfiguration;
-import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
-import org.opencb.commons.utils.FileUtils;
-
-import java.io.*;
-import java.net.URI;
-import java.nio.charset.Charset;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.*;
-
-@Deprecated
-public class CoreDownloadManager extends DownloadManager {
-
-    private static final String ENSEMBL_NAME = "ENSEMBL";
-    private static final String UNIPROT_NAME = "UniProt";
-    private static final String INTACT_NAME = "IntAct";
-    private static final String INTERPRO_NAME = "InterPro";
-    private static final String GERP_NAME = "GERP++";
-    private static final String PHASTCONS_NAME = "PhastCons";
-    private static final String PHYLOP_NAME = "PhyloP";
-    private static final String GENE_EXPRESSION_ATLAS_NAME = "Gene Expression Atlas";
-    private static final String HPO_NAME = "HPO";
-    private static final String DISGENET_NAME = "DisGeNET";
-    private static final String GO_ANNOTATION_NAME = "EBI Gene Ontology Annotation";
-    private static final String DGIDB_NAME = "DGIdb";
-    private static final String GNOMAD_NAME = "gnomAD";
-
-    private static final HashMap GENE_UNIPROT_XREF_FILES = new HashMap() {
-        {
-            put("Homo sapiens", "HUMAN_9606_idmapping_selected.tab.gz");
-            put("Mus musculus", "MOUSE_10090_idmapping_selected.tab.gz");
-            put("Rattus norvegicus", "RAT_10116_idmapping_selected.tab.gz");
-            put("Danio rerio", "DANRE_7955_idmapping_selected.tab.gz");
-            put("Drosophila melanogaster", "DROME_7227_idmapping_selected.tab.gz");
-            put("Saccharomyces cerevisiae", "YEAST_559292_idmapping_selected.tab.gz");
-        }
-    };
-
-    public CoreDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
-            throws IOException, CellBaseException {
-        super(species, assembly, targetDirectory, configuration);
-    }
-
-    public CoreDownloadManager(CellBaseConfiguration configuration, Path targetDirectory, SpeciesConfiguration speciesConfiguration,
-                               SpeciesConfiguration.Assembly assembly) throws IOException, CellBaseException {
-        super(configuration, targetDirectory, speciesConfiguration, assembly);
-    }
-
-    public void downloadReferenceGenome() throws IOException, InterruptedException {
-        logger.info("Downloading genome information ...");
-        Path sequenceFolder = downloadFolder.resolve("genome");
-        Files.createDirectories(sequenceFolder);
-
-        // Reference genome sequences are downloaded from Ensembl
-        // New Homo sapiens assemblies contain too many ALT regions, so we download 'primary_assembly' file instead
-        String url = ensemblHostUrl + "/" + ensemblRelease;
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            url = url + "/fasta/" + speciesShortName + "/dna/*.dna.primary_assembly.fa.gz";
-        } else {
-            if (!configuration.getSpecies().getVertebrates().contains(speciesConfiguration)) {
-                url = ensemblHostUrl + "/" + ensemblRelease + "/" + getPhylo(speciesConfiguration);
-            }
-            url = url + "/fasta/";
-            if (configuration.getSpecies().getBacteria().contains(speciesConfiguration)) {
-                // WARN: assuming there's just one assembly
-                url = url + speciesConfiguration.getAssemblies().get(0).getEnsemblCollection() + "/";
-            }
-            url = url + speciesShortName + "/dna/*.dna.toplevel.fa.gz";
-        }
-
-        String outputFileName = StringUtils.capitalize(speciesShortName) + "." + assemblyConfiguration.getName() + ".fa.gz";
-        Path outputPath = sequenceFolder.resolve(outputFileName);
-        downloadFile(url, outputPath.toString());
-        logger.info("Saving reference genome version data at {}", sequenceFolder.resolve("genomeVersion.json"));
-        saveVersionData(EtlCommons.GENOME_DATA, ENSEMBL_NAME, ensemblVersion, getTimeStamp(),
-                Collections.singletonList(url), buildFolder.resolve("genomeVersion.json"));
-    }
-
-    public void downloadEnsemblGene()throws IOException, InterruptedException {
-        logger.info("Downloading gene information ...");
-        Path geneFolder = downloadFolder.resolve("gene");
-        Files.createDirectories(geneFolder);
-
-        downloadEnsemblData(geneFolder);
-        downloadDrugData(geneFolder);
-        downloadGeneUniprotXref(geneFolder);
-        downloadGeneExpressionAtlas(geneFolder);
-        downloadGeneDiseaseAnnotation(geneFolder);
-        downloadGnomadConstraints(geneFolder);
-        downloadGO(geneFolder);
-        // FIXME
-//        runGeneExtraInfo(geneFolder);
-    }
-
-    private void downloadGO(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading go annotation...");
-            String url = configuration.getDownload().getGoAnnotation().getHost();
-            downloadFile(url, geneFolder.resolve("goa_human.gaf.gz").toString());
-            saveVersionData(EtlCommons.GENE_DATA, GO_ANNOTATION_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                    buildFolder.resolve("goAnnotationVersion.json"));
-        }
-    }
-
-    public void downloadObo() throws IOException, InterruptedException {
-        logger.info("Downloading obo files ...");
-
-        Path oboFolder = downloadFolder.resolve("obo");
-        Files.createDirectories(oboFolder);
-
-        String url = configuration.getDownload().getHpoObo().getHost();
-        downloadFile(url, oboFolder.resolve("hp.obo").toString());
-
-        url = configuration.getDownload().getGoObo().getHost();
-        downloadFile(url, oboFolder.resolve("go-basic.obo").toString());
-    }
-
-    private void downloadGnomadConstraints(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading gnomAD constraints data...");
-            String url = configuration.getDownload().getGnomadConstraints().getHost();
-            downloadFile(url, geneFolder.resolve("gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz").toString());
-            saveVersionData(EtlCommons.GENE_DATA, GNOMAD_NAME, configuration.getDownload().
-                            getGnomadConstraints().getVersion(), getTimeStamp(),
-                    Collections.singletonList(url), buildFolder.resolve("gnomadVersion.json"));
-        }
-    }
-    private void downloadDrugData(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading drug-gene data...");
-            String url = configuration.getDownload().getDgidb().getHost();
-            downloadFile(url, geneFolder.resolve("dgidb.tsv").toString());
-            saveVersionData(EtlCommons.GENE_DATA, DGIDB_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                    buildFolder.resolve("dgidbVersion.json"));
-        }
-    }
-
-    private void downloadEnsemblData(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading gene Ensembl data (gtf, pep, cdna, motifs) ...");
-        List<String> downloadedUrls = new ArrayList<>(4);
-
-        String ensemblHost = ensemblHostUrl + "/" + ensemblRelease;
-        if (!configuration.getSpecies().getVertebrates().contains(speciesConfiguration)) {
-            ensemblHost = ensemblHostUrl + "/" + ensemblRelease + "/" + getPhylo(speciesConfiguration);
-        }
-
-        String bacteriaCollectionPath = "";
-        if (configuration.getSpecies().getBacteria().contains(speciesConfiguration)) {
-            // WARN: assuming there's just one assembly
-            bacteriaCollectionPath =  speciesConfiguration.getAssemblies().get(0).getEnsemblCollection() + "/";
-        }
-
-        // Ensembl leaves now several GTF files in the FTP folder, we need to build a more accurate URL
-        // to download the correct GTF file.
-        String version = ensemblRelease.split("-")[1];
-        String url = ensemblHost + "/gtf/" + bacteriaCollectionPath + speciesShortName + "/*" + version + ".gtf.gz";
-        String fileName = geneFolder.resolve(speciesShortName + ".gtf.gz").toString();
-        downloadFile(url, fileName);
-        downloadedUrls.add(url);
-
-        url = ensemblHost + "/fasta/" + bacteriaCollectionPath + speciesShortName + "/pep/*.pep.all.fa.gz";
-        fileName = geneFolder.resolve(speciesShortName + ".pep.all.fa.gz").toString();
-        downloadFile(url, fileName);
-        downloadedUrls.add(url);
-
-        url = ensemblHost + "/fasta/" + bacteriaCollectionPath + speciesShortName + "/cdna/*.cdna.all.fa.gz";
-        fileName = geneFolder.resolve(speciesShortName + ".cdna.all.fa.gz").toString();
-        downloadFile(url, fileName);
-        downloadedUrls.add(url);
-
-        //ftp://ftp.ensembl.org/pub/release-99/regulation/homo_sapiens/MotifFeatures/Homo_sapiens.GRCh38.motif_features.gff.gz
-//        url = ensemblHost + "/regulation/" + speciesShortName + "/MotifFeatures/*.motif_features.gff.gz";
-//        Path outputFile = geneFolder.resolve("motif_features.gff.gz");
-//        downloadFile(url, outputFile.toString());
-//        downloadedUrls.add(url);
-
-
-        saveVersionData(EtlCommons.GENE_DATA, ENSEMBL_NAME, ensemblVersion, getTimeStamp(), downloadedUrls,
-                buildFolder.resolve("ensemblCoreVersion.json"));
-    }
-
-    private void downloadGeneUniprotXref(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading UniProt ID mapping ...");
-
-        if (GENE_UNIPROT_XREF_FILES.containsKey(speciesConfiguration.getScientificName())) {
-            String geneGtfUrl = configuration.getDownload().getGeneUniprotXref().getHost() + "/"
-                    + GENE_UNIPROT_XREF_FILES.get(speciesConfiguration.getScientificName());
-            downloadFile(geneGtfUrl, geneFolder.resolve("idmapping_selected.tab.gz").toString());
-            downloadFile(getUniProtReleaseNotesUrl(), geneFolder.resolve("uniprotRelnotes.txt").toString());
-
-            saveVersionData(EtlCommons.GENE_DATA, UNIPROT_NAME,
-                    getUniProtRelease(geneFolder.resolve("uniprotRelnotes.txt").toString()), getTimeStamp(),
-                    Collections.singletonList(geneGtfUrl), buildFolder.resolve("uniprotXrefVersion.json"));
-        }
-    }
-
-    private String getUniProtRelease(String relnotesFilename) {
-        Path path = Paths.get(relnotesFilename);
-        Files.exists(path);
-        try {
-            // The first line at the relnotes.txt file contains the UniProt release
-            BufferedReader reader = Files.newBufferedReader(path, Charset.defaultCharset());
-            String release = reader.readLine().split(" ")[2];
-            reader.close();
-            return  release;
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-        return null;
-    }
-
-    private String getUniProtReleaseNotesUrl() {
-        return URI.create(configuration.getDownload().getGeneUniprotXref().getHost()).resolve("../../../").toString()
-                + "/relnotes.txt";
-    }
-
-    private void downloadGeneExpressionAtlas(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading gene expression atlas ...");
-
-        String geneGtfUrl = configuration.getDownload().getGeneExpressionAtlas().getHost();
-        downloadFile(geneGtfUrl, geneFolder.resolve("allgenes_updown_in_organism_part.tab.gz").toString());
-
-        saveVersionData(EtlCommons.GENE_DATA, GENE_EXPRESSION_ATLAS_NAME, getGeneExpressionAtlasVersion(), getTimeStamp(),
-                Collections.singletonList(geneGtfUrl), buildFolder.resolve("geneExpressionAtlasVersion.json"));
-
-    }
-
-    private String getGeneExpressionAtlasVersion() {
-        return FilenameUtils.getBaseName(configuration.getDownload().getGeneExpressionAtlas().getHost())
-                .split("_")[5].replace(".tab", "");
-    }
-
-    private void downloadGeneDiseaseAnnotation(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading gene disease annotation ...");
-
-        String host = configuration.getDownload().getHpo().getHost();
-        String fileName = StringUtils.substringAfterLast(host, "/");
-        downloadFile(host, geneFolder.resolve(fileName).toString());
-        saveVersionData(EtlCommons.GENE_DATA, HPO_NAME, null, getTimeStamp(), Collections.singletonList(host),
-                buildFolder.resolve("hpoVersion.json"));
-
-        host = configuration.getDownload().getDisgenet().getHost();
-        List<String> files = configuration.getDownload().getDisgenet().getFiles();
-        for (String file : files) {
-            String outputFile = file.equalsIgnoreCase("readme.txt") ? "disgenetReadme.txt" : file;
-            downloadFile(host + "/" + file, geneFolder.resolve(outputFile).toString());
-        }
-
-        saveVersionData(EtlCommons.GENE_DISEASE_ASSOCIATION_DATA, DISGENET_NAME,
-                getVersionFromVersionLine(geneFolder.resolve("disgenetReadme.txt"), "(version"), getTimeStamp(),
-                Collections.singletonList(host), buildFolder.resolve("disgenetVersion.json"));
-    }
-
-    private void runGeneExtraInfo(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading gene extra info ...");
-
-        String geneExtraInfoLogFile = geneFolder.resolve("gene_extra_info.log").toString();
-        List<String> args = new ArrayList<>();
-        args.addAll(Arrays.asList("--species", speciesConfiguration.getScientificName(), "--assembly", assemblyConfiguration.getName(),
-                "--outdir", geneFolder.toAbsolutePath().toString(),
-                "--ensembl-libs", configuration.getDownload().getEnsembl().getLibs()));
-
-        if (!configuration.getSpecies().getVertebrates().contains(speciesConfiguration)
-                && !speciesConfiguration.getScientificName().equals("Drosophila melanogaster")) {
-            args.add("--phylo");
-            args.add("no-vertebrate");
-        }
-
-        File ensemblScriptsFolder = new File(System.getProperty("basedir") + "/bin/ensembl-scripts/");
-
-        // run gene_extra_info.pl
-        boolean geneExtraInfoDownloaded = EtlCommons.runCommandLineProcess(ensemblScriptsFolder,
-                "./gene_extra_info.pl",
-                args,
-                geneExtraInfoLogFile);
-
-        // check output
-        if (geneExtraInfoDownloaded) {
-            logger.info("Gene extra files created OK");
-        } else {
-            logger.error("Gene extra info for " + speciesConfiguration.getScientificName() + " cannot be downloaded");
-        }
-    }
-
-    /**
-     * This method downloads Gerp, PhastCons and PhyloP data from UCSC for Human and Mouse species.
-
-     * @throws IOException if there is an error writing to a file
-     * @throws InterruptedException if there is an error downloading files
-     */
-    public void downloadConservation() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "conservation")) {
-            return;
-        }
-
-        logger.info("Downloading conservation information ...");
-        Path conservationFolder = downloadFolder.resolve("conservation");
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            Files.createDirectories(conservationFolder);
-            Files.createDirectories(conservationFolder.resolve("phastCons"));
-            Files.createDirectories(conservationFolder.resolve("phylop"));
-            Files.createDirectories(conservationFolder.resolve("gerp"));
-
-            String[] chromosomes = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
-                    "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "M", };
-
-            if (assemblyConfiguration.getName().equalsIgnoreCase("GRCh38")) {
-                logger.info("Downloading GERP++ ...");
-                downloadFile(configuration.getDownload().getGerp().getHost(),
-                        conservationFolder.resolve(EtlCommons.GERP_SUBDIRECTORY + "/" + EtlCommons.GERP_FILE).toAbsolutePath().toString());
-                saveVersionData(EtlCommons.CONSERVATION_DATA, GERP_NAME, null, getTimeStamp(),
-                        Collections.singletonList(configuration.getDownload().getGerp().getHost()),
-                        buildFolder.resolve("gerpVersion.json"));
-
-                logger.info("Downloading phastCons and PhyloP ...");
-                String url = configuration.getDownload().getConservation().getHost() + "/hg38";
-                List<String> phastconsUrls = new ArrayList<>(chromosomes.length);
-                List<String> phyloPUrls = new ArrayList<>(chromosomes.length);
-                for (String chromosome : chromosomes) {
-                    String phastConsUrl = url + "/phastCons100way/hg38.100way.phastCons/chr" + chromosome + ".phastCons100way.wigFix.gz";
-                    downloadFile(phastConsUrl, conservationFolder.resolve("phastCons").resolve("chr" + chromosome
-                            + ".phastCons100way.wigFix.gz").toString());
-                    phastconsUrls.add(phastConsUrl);
-
-                    String phyloPUrl = url + "/phyloP100way/hg38.100way.phyloP100way/chr" + chromosome + ".phyloP100way.wigFix.gz";
-                    downloadFile(phyloPUrl, conservationFolder.resolve("phylop").resolve("chr" + chromosome
-                            + ".phyloP100way.wigFix.gz").toString());
-                    phyloPUrls.add(phyloPUrl);
-                }
-                saveVersionData(EtlCommons.CONSERVATION_DATA, PHASTCONS_NAME, null, getTimeStamp(), phastconsUrls,
-                        buildFolder.resolve("phastConsVersion.json"));
-                saveVersionData(EtlCommons.CONSERVATION_DATA, PHYLOP_NAME, null, getTimeStamp(), phyloPUrls,
-                        buildFolder.resolve("phyloPVersion.json"));
-            }
-        }
-
-        if (speciesConfiguration.getScientificName().equals("Mus musculus")) {
-            Files.createDirectories(conservationFolder);
-            Files.createDirectories(conservationFolder.resolve("phastCons"));
-            Files.createDirectories(conservationFolder.resolve("phylop"));
-
-            String url = configuration.getDownload().getConservation().getHost() + "/mm10";
-            String[] chromosomes = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
-                    "15", "16", "17", "18", "19", "X", "Y", "M", };
-            List<String> phastconsUrls = new ArrayList<>(chromosomes.length);
-            List<String> phyloPUrls = new ArrayList<>(chromosomes.length);
-            for (String chromosome : chromosomes) {
-                String phastConsUrl = url + "/phastCons60way/mm10.60way.phastCons/chr" + chromosome + ".phastCons60way.wigFix.gz";
-                downloadFile(phastConsUrl, conservationFolder.resolve("phastCons").resolve("chr" + chromosome
-                        + ".phastCons60way.wigFix.gz").toString());
-                phastconsUrls.add(phastConsUrl);
-                String phyloPUrl = url + "/phyloP60way/mm10.60way.phyloP60way/chr" + chromosome + ".phyloP60way.wigFix.gz";
-                downloadFile(phyloPUrl, conservationFolder.resolve("phylop").resolve("chr" + chromosome
-                        + ".phyloP60way.wigFix.gz").toString());
-                phyloPUrls.add(phyloPUrl);
-            }
-            saveVersionData(EtlCommons.CONSERVATION_DATA, PHASTCONS_NAME, null, getTimeStamp(), phastconsUrls,
-                    buildFolder.resolve("phastConsVersion.json"));
-            saveVersionData(EtlCommons.CONSERVATION_DATA, PHYLOP_NAME, null, getTimeStamp(), phyloPUrls,
-                    buildFolder.resolve("phastConsVersion.json"));
-        }
-    }
-
-
-    /**
-     * This method downloads UniProt, IntAct and Interpro data from EMBL-EBI.
-     *
-     * @throws IOException if there is an error writing to a file
-     * @throws InterruptedException if there is an error downloading files
-     */
-    public void downloadProtein() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "protein")) {
-            return;
-        }
-        logger.info("Downloading protein information ...");
-        Path proteinFolder = downloadFolder.resolve("protein");
-        Files.createDirectories(proteinFolder);
-
-        String url = configuration.getDownload().getUniprot().getHost();
-        downloadFile(url, proteinFolder.resolve("uniprot_sprot.xml.gz").toString());
-        String relNotesUrl = configuration.getDownload().getUniprotRelNotes().getHost();
-        downloadFile(relNotesUrl, proteinFolder.resolve("uniprotRelnotes.txt").toString());
-        Files.createDirectories(proteinFolder.resolve("uniprot_chunks"));
-        splitUniprot(proteinFolder.resolve("uniprot_sprot.xml.gz"), proteinFolder.resolve("uniprot_chunks"));
-        saveVersionData(EtlCommons.PROTEIN_DATA, UNIPROT_NAME, getLine(proteinFolder.resolve("uniprotRelnotes.txt"), 1),
-                getTimeStamp(), Collections.singletonList(url), buildFolder.resolve("uniprotVersion.json"));
-
-//        url = configuration.getDownload().getIntact().getHost();
-//        downloadFile(url, proteinFolder.resolve("intact.txt").toString());
-//        saveVersionData(EtlCommons.PROTEIN_DATA, INTACT_NAME, null, getTimeStamp(), Collections.singletonList(url),
-//                proteinFolder.resolve("intactVersion.json"));
-//
-//        url = configuration.getDownload().getInterpro().getHost();
-//        downloadFile(url, proteinFolder.resolve("protein2ipr.dat.gz").toString());
-//        relNotesUrl = configuration.getDownload().getInterproRelNotes().getHost();
-//        downloadFile(relNotesUrl, proteinFolder.resolve("interproRelnotes.txt").toString());
-//        saveVersionData(EtlCommons.PROTEIN_DATA, INTERPRO_NAME, getLine(proteinFolder.resolve("interproRelnotes.txt"), 5),
-//                getTimeStamp(), Collections.singletonList(url), proteinFolder.resolve("interproVersion.json"));
-    }
-
-    private void splitUniprot(Path uniprotFilePath, Path splitOutdirPath) throws IOException {
-        BufferedReader br = FileUtils.newBufferedReader(uniprotFilePath);
-        PrintWriter pw = null;
-        StringBuilder header = new StringBuilder();
-        boolean beforeEntry = true;
-        boolean inEntry = false;
-        int count = 0;
-        int chunk = 0;
-        String line;
-        while ((line = br.readLine()) != null) {
-            if (line.trim().startsWith("<entry ")) {
-                inEntry = true;
-                beforeEntry = false;
-                if (count % 10000 == 0) {
-                    pw = new PrintWriter(new FileOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile()));
-                    pw.println(header.toString().trim());
-                }
-                count++;
-            }
-
-            if (beforeEntry) {
-                header.append(line).append("\n");
-            }
-
-            if (inEntry) {
-                pw.println(line);
-            }
-
-            if (line.trim().startsWith("</entry>")) {
-                inEntry = false;
-                if (count % 10000 == 0) {
-                    pw.print("</uniprot>");
-                    pw.close();
-                    chunk++;
-                }
-            }
-        }
-        pw.print("</uniprot>");
-        pw.close();
-        br.close();
-    }
-
-}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/DownloadFile.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/DownloadFile.java
index 5cc11acf64..079a5c921d 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/DownloadFile.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/DownloadFile.java
@@ -19,6 +19,7 @@
 import java.util.concurrent.TimeUnit;
 
 public class DownloadFile {
+
     private String startTime;
     private String elapsedTime;
     private Status status;
@@ -38,6 +39,20 @@ public DownloadFile(String url, String outputFile, String startTime) {
         this.startTime = startTime;
     }
 
+    @Override
+    public String toString() {
+        return "DownloadFile{"
+                + "startTime='" + startTime + '\''
+                + ", elapsedTime='" + elapsedTime + '\''
+                + ", status=" + status
+                + ", message='" + message + '\''
+                + ", expectedFileSize=" + expectedFileSize
+                + ", actualFileSize=" + actualFileSize
+                + ", outputFile='" + outputFile + '\''
+                + ", url='" + url + '\''
+                + '}';
+    }
+
     public String getStartTime() {
         return startTime;
     }
@@ -47,7 +62,7 @@ public String getElapsedTime() {
     }
 
     public DownloadFile setElapsedTime(Long startTime, Long endTime) {
-        Long elapsedTime = endTime - startTime;
+        long elapsedTime = endTime - startTime;
         this.elapsedTime = TimeUnit.MILLISECONDS.toSeconds(elapsedTime) + " seconds";
         return this;
     }
@@ -96,17 +111,4 @@ public DownloadFile setMessage(String message) {
         return this;
     }
 
-    @Override
-    public String toString() {
-        return "DownloadFile{"
-                + "startTime='" + startTime + '\''
-                + ", elapsedTime='" + elapsedTime + '\''
-                + ", status=" + status
-                + ", message='" + message + '\''
-                + ", expectedFileSize=" + expectedFileSize
-                + ", actualFileSize=" + actualFileSize
-                + ", outputFile='" + outputFile + '\''
-                + ", url='" + url + '\''
-                + '}';
-    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/DownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/DownloadManager.java
deleted file mode 100644
index ab1d090294..0000000000
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/DownloadManager.java
+++ /dev/null
@@ -1,385 +0,0 @@
-/*
- * Copyright 2015-2020 OpenCB
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.opencb.cellbase.lib.download;
-
-import com.beust.jcommander.ParameterException;
-import com.fasterxml.jackson.core.util.DefaultPrettyPrinter;
-import com.fasterxml.jackson.databind.DeserializationFeature;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.ObjectReader;
-import com.fasterxml.jackson.databind.ObjectWriter;
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.opencb.cellbase.core.config.CellBaseConfiguration;
-import org.opencb.cellbase.core.config.SpeciesConfiguration;
-import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
-import org.opencb.cellbase.core.utils.SpeciesUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.nio.charset.Charset;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.sql.Timestamp;
-import java.text.SimpleDateFormat;
-import java.time.LocalDateTime;
-import java.util.*;
-
-@Deprecated
-public class DownloadManager {
-
-
-    private static final String CADD_NAME = "CADD";
-    private static final String DGV_NAME = "DGV";
-//    private static final String GWAS_NAME = "Gwas Catalog";
-//    private static final String DBSNP_NAME = "dbSNP";
-//    private static final String REACTOME_NAME = "Reactome";
-
-    private static final String GNOMAD_NAME = "gnomAD";
-
-    protected String species;
-    protected String assembly;
-    protected Path outdir;
-    protected CellBaseConfiguration configuration;
-
-    protected SpeciesConfiguration speciesConfiguration;
-    protected String speciesShortName;
-    protected String ensemblHostUrl;
-    protected SpeciesConfiguration.Assembly assemblyConfiguration;
-    protected String ensemblVersion;
-    protected String ensemblRelease;
-    protected Path downloadFolder;
-    protected Path buildFolder; // <output>/<species>_<assembly>/generated-json
-    protected Logger logger;
-
-    public DownloadManager(String species, String assembly, Path outdir, CellBaseConfiguration configuration)
-            throws IOException, CellBaseException {
-        this.species = species;
-        this.assembly = assembly;
-        this.outdir = outdir;
-        this.configuration = configuration;
-
-        this.init();
-    }
-
-    @Deprecated
-    public DownloadManager(CellBaseConfiguration configuration, Path targetDirectory, SpeciesConfiguration speciesConfiguration,
-                           SpeciesConfiguration.Assembly assembly) throws IOException {
-        logger = LoggerFactory.getLogger(this.getClass());
-
-        this.configuration = configuration;
-        this.speciesConfiguration = speciesConfiguration;
-//        assemblyName = assembly.getName();
-
-        // Output folder creation
-        speciesShortName = SpeciesUtils.getSpeciesShortname(speciesConfiguration);
-        // <output>/<species>_<assembly>
-        Path speciesFolder = targetDirectory.resolve(speciesShortName + "_" + assembly.getName().toLowerCase());
-        // <output>/<species>_<assembly>/download
-        downloadFolder = targetDirectory.resolve(speciesFolder + "/download");
-        makeDir(downloadFolder);
-
-        ensemblHostUrl = getEnsemblURL(speciesConfiguration);
-        ensemblVersion = assembly.getEnsemblVersion();
-        ensemblRelease = "release-" + ensemblVersion.split("_")[0];
-    }
-
-    private void init() throws CellBaseException, IOException {
-        logger = LoggerFactory.getLogger(this.getClass());
-
-        // Check Species
-        this.speciesConfiguration = SpeciesUtils.getSpeciesConfiguration(configuration, species);
-        if (speciesConfiguration == null) {
-            throw new CellBaseException("Invalid species: '" + species + "'");
-        }
-        this.speciesShortName = SpeciesUtils.getSpeciesShortname(speciesConfiguration);
-        this.ensemblHostUrl = getEnsemblURL(speciesConfiguration);
-
-        // Check assembly and get Ensembl version
-        if (StringUtils.isEmpty(assembly)) {
-            this.assemblyConfiguration = SpeciesUtils.getDefaultAssembly(speciesConfiguration);
-        } else {
-            this.assemblyConfiguration = SpeciesUtils.getAssembly(speciesConfiguration, assembly);
-        }
-        if (assemblyConfiguration == null) {
-            throw new CellBaseException("Invalid assembly: '" + assembly + "'");
-        }
-        this.ensemblVersion = assemblyConfiguration.getEnsemblVersion();
-        this.ensemblRelease = "release-" + ensemblVersion.split("_")[0];
-
-        // Prepare outdir
-        Path speciesFolder = outdir.resolve(speciesShortName + "_" + assemblyConfiguration.getName().toLowerCase());
-        downloadFolder = outdir.resolve(speciesFolder + "/download");
-        Files.createDirectories(downloadFolder);
-
-        // <output>/<species>_<assembly>/generated_json
-        buildFolder = outdir.resolve(speciesFolder + "/generated_json");
-        Files.createDirectories(buildFolder);
-
-        logger.info("Processing species " + speciesConfiguration.getScientificName());
-    }
-
-    @Deprecated
-    public DownloadFile downloadStructuralVariants() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "svs")) {
-             return null;
-        }
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading DGV data ...");
-
-            Path structuralVariantsFolder = downloadFolder.resolve(EtlCommons.STRUCTURAL_VARIANTS_FOLDER);
-            Files.createDirectories(structuralVariantsFolder);
-            String sourceFilename = (assemblyConfiguration.getName().equalsIgnoreCase("grch37") ? "GRCh37_hg19" : "GRCh38_hg38")
-                    + "_variants_2016-05-15.txt";
-            String url = configuration.getDownload().getDgv().getHost() + "/" + sourceFilename;
-            saveVersionData(EtlCommons.STRUCTURAL_VARIANTS_DATA, DGV_NAME, getDGVVersion(sourceFilename), getTimeStamp(),
-                    Collections.singletonList(url), buildFolder.resolve(EtlCommons.DGV_VERSION_FILE));
-            return downloadFile(url, structuralVariantsFolder.resolve(EtlCommons.DGV_FILE).toString());
-        }
-        return null;
-    }
-
-    private String getDGVVersion(String sourceFilename) {
-        return sourceFilename.split("\\.")[0].split("_")[3];
-    }
-
-    protected boolean speciesHasInfoToDownload(SpeciesConfiguration sp, String info) {
-        boolean hasInfo = true;
-        if (sp.getData() == null || !sp.getData().contains(info)) {
-            logger.warn("Species '{}' has no '{}' information available to download", sp.getScientificName(), info);
-            hasInfo = false;
-        }
-        return hasInfo;
-    }
-
-    protected String getTimeStamp() {
-        return new SimpleDateFormat("yyyyMMdd_HHmmss").format(Calendar.getInstance().getTime());
-    }
-
-    protected void saveVersionData(String data, String source, String version, String date, List<String> url, Path outputFilePath)
-            throws IOException {
-        Map<String, Object> versionDataMap = new HashMap<>();
-        versionDataMap.put("data", data);
-        versionDataMap.put("source", source);
-        versionDataMap.put("version", version);
-        versionDataMap.put("downloadDate", date);
-        versionDataMap.put("uRL", url);
-
-        ObjectMapper jsonObjectMapper = new ObjectMapper();
-        jsonObjectMapper.writeValue(outputFilePath.toFile(), versionDataMap);
-    }
-
-    protected String getLine(Path readmePath, int lineNumber) {
-        Files.exists(readmePath);
-        try {
-            BufferedReader reader = Files.newBufferedReader(readmePath, Charset.defaultCharset());
-            String line = null;
-            for (int i = 0; i < lineNumber; i++) {
-                line = reader.readLine();
-            }
-            reader.close();
-            return line;
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-        return null;
-    }
-
-    protected List<Map<String, String>> parseResult(String json) throws IOException {
-        ObjectMapper jsonObjectMapper = new ObjectMapper();
-        jsonObjectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
-        ObjectReader reader = jsonObjectMapper
-                .readerFor(jsonObjectMapper.getTypeFactory().constructCollectionType(List.class, Map.class));
-        return reader.readValue(json);
-    }
-
-    protected String getPhylo(SpeciesConfiguration sp) {
-        if (configuration.getSpecies().getVertebrates().contains(sp)) {
-            return "vertebrates";
-        } else if (configuration.getSpecies().getMetazoa().contains(sp)) {
-            return "metazoa";
-        } else if (configuration.getSpecies().getFungi().contains(sp)) {
-            return "fungi";
-        } else if (configuration.getSpecies().getProtist().contains(sp)) {
-            return "protists";
-        } else if (configuration.getSpecies().getPlants().contains(sp)) {
-            return "plants";
-        } else if (configuration.getSpecies().getVirus().contains(sp)) {
-            return "virus";
-        } else if (configuration.getSpecies().getBacteria().contains(sp)) {
-            return "bacteria";
-        } else {
-            throw new ParameterException("Species " + sp.getScientificName() + " not associated to any phylo in the configuration file");
-        }
-    }
-
-    public DownloadFile downloadCaddScores() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "variation_functional_score")) {
-            return null;
-        }
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens") && assemblyConfiguration.getName().equalsIgnoreCase("GRCh37")) {
-            logger.info("Downloading CADD scores information ...");
-
-            Path variationFunctionalScoreFolder = downloadFolder.resolve("variation_functional_score");
-            Files.createDirectories(variationFunctionalScoreFolder);
-
-            // Downloads CADD scores
-            String url = configuration.getDownload().getCadd().getHost();
-
-            saveVersionData(EtlCommons.VARIATION_FUNCTIONAL_SCORE_DATA, CADD_NAME, url.split("/")[5], getTimeStamp(),
-                    Collections.singletonList(url), buildFolder.resolve("caddVersion.json"));
-            return downloadFile(url, variationFunctionalScoreFolder.resolve("whole_genome_SNVs.tsv.gz").toString());
-        }
-        return null;
-    }
-
-    protected DownloadFile downloadFile(String url, String outputFileName) throws IOException, InterruptedException {
-        return downloadFile(url, outputFileName, null);
-    }
-
-//    protected void downloadFiles(String host, List<String> fileNames) throws IOException, InterruptedException {
-//        downloadFiles(host, fileNames, fileNames);
-//    }
-
-//    protected void downloadFiles(String host, List<String> fileNames, List<String> ouputFileNames)
-//        throws IOException, InterruptedException {
-//        for (int i = 0; i < fileNames.size(); i++) {
-//            downloadFile(host + "/" + fileNames.get(i), ouputFileNames.get(i), null);
-//        }
-//    }
-
-    protected DownloadFile downloadFile(String url, String outputFileName, List<String> wgetAdditionalArgs)
-            throws IOException, InterruptedException {
-        DownloadFile downloadFileInfo = new DownloadFile(url, outputFileName, Timestamp.valueOf(LocalDateTime.now()).toString());
-        Long startTime = System.currentTimeMillis();
-        if (Paths.get(outputFileName).toFile().exists()) {
-            logger.warn("File '{}' is already downloaded", outputFileName);
-            setDownloadStatusAndMessage(outputFileName, downloadFileInfo, "File '" + outputFileName + "' is already downloaded", true);
-        } else {
-            final String outputLog = outputFileName + ".log";
-            List<String> wgetArgs = new ArrayList<>(Arrays.asList("--tries=10", url, "-O", outputFileName, "-o", outputLog));
-            if (wgetAdditionalArgs != null && !wgetAdditionalArgs.isEmpty()) {
-                wgetArgs.addAll(wgetAdditionalArgs);
-            }
-            boolean downloaded = EtlCommons.runCommandLineProcess(null, "wget", wgetArgs, outputLog);
-            setDownloadStatusAndMessage(outputFileName, downloadFileInfo, outputLog, downloaded);
-        }
-        downloadFileInfo.setElapsedTime(startTime, System.currentTimeMillis());
-        return downloadFileInfo;
-    }
-
-    private void setDownloadStatusAndMessage(String outputFileName, DownloadFile downloadFile, String outputLog, boolean downloaded) {
-        if (downloaded) {
-            boolean validFileSize = validateDownloadFile(downloadFile, outputFileName, outputLog);
-            if (validFileSize) {
-                downloadFile.setStatus(DownloadFile.Status.OK);
-                downloadFile.setMessage("File downloaded successfully");
-            } else {
-                downloadFile.setStatus(DownloadFile.Status.ERROR);
-                downloadFile.setMessage("Expected downloaded file size " + downloadFile.getExpectedFileSize()
-                + ", Actual file size " + downloadFile.getActualFileSize());
-            }
-        } else {
-            downloadFile.setMessage("See full error message in " + outputLog);
-            downloadFile.setStatus(DownloadFile.Status.ERROR);
-            // because we use the -O flag, a file will be written, even on error. See #467
-//            Files.deleteIfExists((new File(outputFileName)).toPath());
-        }
-    }
-
-    public void writeDownloadLogFile(List<DownloadFile> downloadFiles) throws IOException {
-        ObjectMapper mapper = new ObjectMapper();
-        ObjectWriter writer = mapper.writer(new DefaultPrettyPrinter());
-        writer.writeValue(new File(downloadFolder + "/download_log.json"), downloadFiles);
-    }
-
-    private boolean validateDownloadFile(DownloadFile downloadFile, String outputFileName, String outputFileLog) {
-        long expectedFileSize = getExpectedFileSize(outputFileLog);
-        long actualFileSize = FileUtils.sizeOf(new File(outputFileName));
-        downloadFile.setActualFileSize(actualFileSize);
-        downloadFile.setExpectedFileSize(expectedFileSize);
-        return expectedFileSize == actualFileSize;
-    }
-
-    private int getExpectedFileSize(String outputFileLog) {
-        try (BufferedReader reader = new BufferedReader(new FileReader(outputFileLog))) {
-            String line = null;
-            while ((line = reader.readLine()) != null) {
-                // looking for: Length: 13846591 (13M)
-                if (line.startsWith("Length:")) {
-                    String[] parts = line.split("\\s");
-                    return Integer.parseInt(parts[1]);
-                }
-            }
-        } catch (Exception e) {
-            System.err.println(e);
-        }
-        return 0;
-    }
-
-    protected String getVersionFromVersionLine(Path path, String tag) {
-        Files.exists(path);
-        try {
-            BufferedReader reader = Files.newBufferedReader(path, Charset.defaultCharset());
-            String line = reader.readLine();
-            // There shall be a line at the README.txt containing the version.
-            // e.g. The files in the current directory contain the data corresponding to the latest release
-            // (version 4.0, April 2016). ...
-            while (line != null) {
-                // tag specifies a certain string that must be found within the line supposed to contain the version
-                // info
-                if (line.contains(tag)) {
-                    String version = line.split("\\(")[1].split("\\)")[0];
-                    reader.close();
-                    return version;
-                }
-                line = reader.readLine();
-            }
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-        return null;
-    }
-
-    @Deprecated
-    private void makeDir(Path folderPath) throws IOException {
-        if (!Files.exists(folderPath)) {
-            Files.createDirectories(folderPath);
-        }
-    }
-
-    @Deprecated
-    private String getEnsemblURL(SpeciesConfiguration sp) {
-        // We need to find which is the correct Ensembl host URL.
-        // This can different depending on if is a vertebrate species.
-        String ensemblHostUrl;
-        if (configuration.getSpecies().getVertebrates().contains(sp)) {
-            ensemblHostUrl = configuration.getDownload().getEnsembl().getUrl().getHost();
-        } else {
-            ensemblHostUrl = configuration.getDownload().getEnsemblGenomes().getUrl().getHost();
-        }
-        return ensemblHostUrl;
-    }
-}
-
-
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/Downloader.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/Downloader.java
deleted file mode 100644
index 0deb62386b..0000000000
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/Downloader.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright 2015-2020 OpenCB
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.opencb.cellbase.lib.download;
-
-import org.opencb.biodata.formats.io.FileFormatException;
-import org.opencb.cellbase.core.config.CellBaseConfiguration;
-import org.opencb.cellbase.core.exception.CellBaseException;
-
-import java.io.IOException;
-import java.nio.file.Path;
-import java.util.List;
-
-public class Downloader {
-
-    private String species;
-    private String assembly;
-    private Path outputDirectory;
-    private CellBaseConfiguration configuration;
-
-    public Downloader(String species, String assembly, Path outputDirectory, CellBaseConfiguration configuration) {
-        this.species = species;
-        this.assembly = assembly;
-        this.outputDirectory = outputDirectory;
-        this.configuration = configuration;
-    }
-
-    public List<DownloadFile> downloadGenome() throws IOException, CellBaseException, InterruptedException {
-        GenomeDownloadManager manager = new GenomeDownloadManager(species, assembly, outputDirectory, configuration);
-        return manager.download();
-    }
-
-    public List<DownloadFile> downloadGene() throws IOException, CellBaseException, InterruptedException {
-        GeneDownloadManager manager = new GeneDownloadManager(species, assembly, outputDirectory, configuration);
-        return manager.download();
-    }
-
-    public List<DownloadFile> downloadRegulation() throws IOException, CellBaseException, InterruptedException,
-            NoSuchMethodException, FileFormatException {
-        RegulationDownloadManager manager = new RegulationDownloadManager(species, assembly, outputDirectory, configuration);
-        return manager.download();
-    }
-
-    public List<DownloadFile> downloadProtein() throws IOException, CellBaseException, InterruptedException {
-        ProteinDownloadManager manager = new ProteinDownloadManager(species, assembly, outputDirectory, configuration);
-        return manager.download();
-    }
-
-    public List<DownloadFile> downloadConservation() throws IOException, CellBaseException, InterruptedException {
-        GenomeDownloadManager manager = new GenomeDownloadManager(species, assembly, outputDirectory, configuration);
-        return manager.downloadConservation();
-    }
-
-    public List<DownloadFile> downloadVariation() throws IOException, CellBaseException, InterruptedException {
-        VariationDownloadManager manager = new VariationDownloadManager(species, assembly, outputDirectory, configuration);
-        return manager.download();
-    }
-
-    public List<DownloadFile> downloadClinicalVariants() throws IOException, CellBaseException, InterruptedException {
-        ClinicalDownloadManager manager = new ClinicalDownloadManager(species, assembly, outputDirectory, configuration);
-        return manager.download();
-    }
-
-    public List<DownloadFile> downloadRepeats() throws IOException, CellBaseException, InterruptedException {
-        GenomeDownloadManager manager = new GenomeDownloadManager(species, assembly, outputDirectory, configuration);
-        return manager.downloadRepeats();
-    }
-
-    public List<DownloadFile> downloadOntologies() throws IOException, CellBaseException, InterruptedException {
-        OntologyDownloadManager manager = new OntologyDownloadManager(species, assembly, outputDirectory, configuration);
-        return manager.download();
-    }
-
-    public List<DownloadFile> downloadCaddScores() throws IOException, CellBaseException, InterruptedException {
-        CaddDownloadManager manager = new CaddDownloadManager(species, assembly, outputDirectory, configuration);
-        return manager.download();
-    }
-
-    public List<DownloadFile> downloadPredictionScores() throws IOException, CellBaseException, InterruptedException {
-        MissenseScoresDownloadManager manager = new MissenseScoresDownloadManager(species, assembly, outputDirectory, configuration);
-        return manager.download();
-    }
-
-    public List<DownloadFile> downloadPubMed() throws IOException, CellBaseException, InterruptedException {
-        PubMedDownloadManager manager = new PubMedDownloadManager(species, assembly, outputDirectory, configuration);
-        return manager.download();
-    }
-
-    public List<DownloadFile> downloadPharmKGB() throws IOException, CellBaseException, InterruptedException {
-        PharmGKBDownloadManager manager = new PharmGKBDownloadManager(species, assembly, outputDirectory, configuration);
-        return manager.download();
-    }
-}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java
index 260ff75427..50163b58a1 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GeneDownloadManager.java
@@ -16,346 +16,356 @@
 
 package org.opencb.cellbase.lib.download;
 
-import org.apache.commons.io.FilenameUtils;
-import org.apache.commons.lang.StringUtils;
+import org.opencb.cellbase.core.common.GitRepositoryState;
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.config.DownloadProperties;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
 import org.opencb.commons.utils.DockerUtils;
-import org.opencb.commons.utils.FileUtils;
 
-import java.io.BufferedReader;
 import java.io.IOException;
-import java.net.URI;
-import java.nio.charset.Charset;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.*;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 
-public class GeneDownloadManager extends AbstractDownloadManager {
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
-    private static final String ENSEMBL_NAME = "ENSEMBL";
-    private static final String UNIPROT_NAME = "UniProt";
-//    private static final String GERP_NAME = "GERP++";
-//    private static final String PHASTCONS_NAME = "PhastCons";
-//    private static final String PHYLOP_NAME = "PhyloP";
-    private static final String GENE_EXPRESSION_ATLAS_NAME = "Gene Expression Atlas";
-    private static final String HPO_NAME = "HPO";
-    private static final String DISGENET_NAME = "DisGeNET";
-    private static final String GO_ANNOTATION_NAME = "EBI Gene Ontology Annotation";
-    private static final String DGIDB_NAME = "DGIdb";
-    private static final String GNOMAD_NAME = "gnomAD";
-    private static String dockerImage;
-
-    private static final Map<String, String> GENE_UNIPROT_XREF_FILES;
-
-    static {
-        GENE_UNIPROT_XREF_FILES = new HashMap<>();
-        GENE_UNIPROT_XREF_FILES.put("Homo sapiens", "HUMAN_9606_idmapping_selected.tab.gz");
-        GENE_UNIPROT_XREF_FILES.put("Mus musculus", "MOUSE_10090_idmapping_selected.tab.gz");
-        GENE_UNIPROT_XREF_FILES.put("Rattus norvegicus", "RAT_10116_idmapping_selected.tab.gz");
-        GENE_UNIPROT_XREF_FILES.put("Danio rerio", "DANRE_7955_idmapping_selected.tab.gz");
-        GENE_UNIPROT_XREF_FILES.put("Drosophila melanogaster", "DROME_7227_idmapping_selected.tab.gz");
-        GENE_UNIPROT_XREF_FILES.put("Saccharomyces cerevisiae", "YEAST_559292_idmapping_selected.tab.gz");
-    };
+public class GeneDownloadManager extends AbstractDownloadManager {
 
     public GeneDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
         super(species, assembly, targetDirectory, configuration);
-
-        dockerImage = "opencb/cellbase-builder:" + configuration.getApiVersion();
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        logger.info("Downloading gene information ...");
-        Path geneFolder = downloadFolder.resolve("gene");
-        Files.createDirectories(geneFolder);
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_MSG, getDataName(GENE_DATA));
+
+        // Create gene folder
+        Path geneDownloadPath = downloadFolder.resolve(GENE_DATA);
 
-        Path refseqFolder = downloadFolder.resolve("refseq");
-        Files.createDirectories(refseqFolder);
+        // Create Ensembl folder
+        Path ensemblDownloadPath = geneDownloadPath.resolve(ENSEMBL_DATA);
+        Files.createDirectories(ensemblDownloadPath);
+
+        // Create RefSeq folder
+        Path refSeqDownloadPath = geneDownloadPath.resolve(REFSEQ_DATA);
+        Files.createDirectories(refSeqDownloadPath);
 
         List<DownloadFile> downloadFiles = new ArrayList<>();
 
-        downloadFiles.addAll(downloadEnsemblData(geneFolder));
-        downloadFiles.addAll(downloadRefSeq(refseqFolder));
-        downloadFiles.add(downloadMane(geneFolder));
-        downloadFiles.add(downloadLrg(geneFolder));
-        downloadFiles.add(downloadHgnc(geneFolder));
-        downloadFiles.add(downloadCancerHotspot(geneFolder));
-        downloadFiles.add(downloadDrugData(geneFolder));
-        downloadFiles.addAll(downloadGeneUniprotXref(geneFolder));
-        downloadFiles.add(downloadGeneExpressionAtlas(geneFolder));
-        downloadFiles.addAll(downloadGeneDiseaseAnnotation(geneFolder));
-        downloadFiles.add(downloadGnomadConstraints(geneFolder));
-        downloadFiles.add(downloadGO(geneFolder));
-//        runGeneExtraInfo(geneFolder);
+        // Ensembl
+        downloadFiles.addAll(downloadEnsemblData(ensemblDownloadPath));
+
+        // Ensembl canonical
+        downloadEnsemblCanonical(geneDownloadPath);
+
+        // Gene extra info
+        downloadGeneExtraInfo(geneDownloadPath);
+
+        // RefSeq
+        downloadFiles.addAll(downloadRefSeq(refSeqDownloadPath));
+
+        // Gene annotation
+        logger.info(DOWNLOADING_MSG, getDataName(GENE_ANNOTATION_DATA));
+        downloadFiles.add(downloadMane(geneDownloadPath));
+        downloadFiles.add(downloadLrg(geneDownloadPath));
+        downloadFiles.add(downloadHgnc(geneDownloadPath));
+        downloadFiles.add(downloadCancerHotspot(geneDownloadPath));
+        downloadFiles.add(downloadDrugData(geneDownloadPath));
+        downloadFiles.add(downloadGeneUniprotXref(geneDownloadPath));
+        downloadFiles.add(downloadGeneExpressionAtlas(geneDownloadPath));
+        downloadFiles.add(downloadGnomadConstraints(geneDownloadPath));
+        downloadFiles.add(downloadGO(geneDownloadPath));
+        logger.info(DOWNLOADING_DONE_MSG, getDataName(GENE_ANNOTATION_DATA));
+
+        // Save data sources manually downloaded
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS)) {
+            // HPO
+            if (Files.exists(geneDownloadPath.resolve(getDataVersionFilename(HPO_DISEASE_DATA)))) {
+                logger.warn("The version file {} already exists", getDataVersionFilename(HPO_DISEASE_DATA));
+            } else {
+                saveDataSource(HPO_DISEASE_DATA, configuration.getDownload().getHpo().getVersion(), getTimeStamp(),
+                        Collections.singletonList(getManualUrl(configuration.getDownload().getHpo(), HPO_FILE_ID)),
+                        geneDownloadPath.resolve(getDataVersionFilename(HPO_DISEASE_DATA)));
+                logger.warn("{} must be downloaded manually; the version file {} was created at {}", getDataName(HPO_DISEASE_DATA),
+                        getDataVersionFilename(HPO_DISEASE_DATA), geneDownloadPath);
+            }
+
+            // Cancer gene census
+            if (Files.exists(geneDownloadPath.resolve(getDataVersionFilename(CANCER_GENE_CENSUS_DATA)))) {
+                logger.warn("The version file {} already exists", getDataVersionFilename(CANCER_GENE_CENSUS_DATA));
+            } else {
+                saveDataSource(CANCER_GENE_CENSUS_DATA, configuration.getDownload().getCancerGeneCensus().getVersion(), getTimeStamp(),
+                        Collections.singletonList(getManualUrl(configuration.getDownload().getCancerGeneCensus(),
+                                CANCER_GENE_CENSUS_FILE_ID)), geneDownloadPath.resolve(getDataVersionFilename(CANCER_GENE_CENSUS_DATA)));
+                logger.warn("{} must be downloaded manually; the version file {} was created at {}", getDataName(CANCER_GENE_CENSUS_DATA),
+                        getDataVersionFilename(CANCER_GENE_CENSUS_DATA), geneDownloadPath);
+            }
+        }
 
+        logger.info(DOWNLOADING_DONE_MSG, getDataName(GENE_DATA));
         return downloadFiles;
     }
 
-    private List<DownloadFile> downloadEnsemblData(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading gene Ensembl data (gtf, pep, cdna, motifs) ...");
-        List<String> downloadedUrls = new ArrayList<>(4);
+    private List<DownloadFile> downloadEnsemblData(Path ensemblDownloadPath) throws IOException, InterruptedException, CellBaseException {
         List<DownloadFile> downloadFiles = new ArrayList<>();
 
-        String ensemblHost = ensemblHostUrl + "/" + ensemblRelease;
-        if (!configuration.getSpecies().getVertebrates().contains(speciesConfiguration)) {
-            ensemblHost = ensemblHostUrl + "/" + ensemblRelease + "/" + getPhylo(speciesConfiguration);
-        }
-
-        String ensemblCollection = "";
-        if (configuration.getSpecies().getBacteria().contains(speciesConfiguration)) {
-            // WARN: assuming there's just one assembly
-            ensemblCollection =  speciesConfiguration.getAssemblies().get(0).getEnsemblCollection() + "/";
-        }
+        // Check if the species is supported
+        if (SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), GENE_DATA)) {
+            logger.info(CATEGORY_DOWNLOADING_MSG, getDataName(ENSEMBL_DATA), getDataCategory(ENSEMBL_DATA));
+            DownloadProperties.EnsemblProperties ensemblConfig = configuration.getDownload().getEnsembl();
 
-        // Ensembl leaves now several GTF files in the FTP folder, we need to build a more accurate URL
-        // to download the correct GTF file.
-        String version = ensemblRelease.split("-")[1];
-        String url = ensemblHost + "/gtf/" + ensemblCollection + speciesShortName + "/*" + version + ".gtf.gz";
-        String fileName = geneFolder.resolve(speciesShortName + ".gtf.gz").toString();
-        downloadFiles.add(downloadFile(url, fileName));
-        downloadedUrls.add(url);
+            // GTF, DNA, RNA
+            downloadFiles.add(downloadEnsemblDataSource(ensemblConfig, ENSEMBL_GTF_FILE_ID, ensemblDownloadPath));
+            downloadFiles.add(downloadEnsemblDataSource(ensemblConfig, ENSEMBL_PEP_FA_FILE_ID, ensemblDownloadPath));
+            downloadFiles.add(downloadEnsemblDataSource(ensemblConfig, ENSEMBL_CDNA_FA_FILE_ID, ensemblDownloadPath));
 
-        url = ensemblHost + "/fasta/" + ensemblCollection + speciesShortName + "/pep/*.pep.all.fa.gz";
-        fileName = geneFolder.resolve(speciesShortName + ".pep.all.fa.gz").toString();
-        downloadFiles.add(downloadFile(url, fileName));
-        downloadedUrls.add(url);
+            // Save data source (i.e., metadata)
+            List<String> urls = getUrls(downloadFiles);
 
-        url = ensemblHost + "/fasta/" + ensemblCollection + speciesShortName + "/cdna/*.cdna.all.fa.gz";
-        fileName = geneFolder.resolve(speciesShortName + ".cdna.all.fa.gz").toString();
-        downloadFiles.add(downloadFile(url, fileName));
-        downloadedUrls.add(url);
+            // Add files created by scripts
+            urls.add(getManualUrl(configuration.getDownload().getEnsembl().getUrl(), ENSEMBL_DESCRIPTION_FILE_ID));
+            urls.add(getManualUrl(configuration.getDownload().getEnsembl().getUrl(), ENSEMBL_XREFS_FILE_ID));
+            urls.add(getManualUrl(configuration.getDownload().getEnsembl().getUrl(), ENSEMBL_CANONICAL_FILE_ID));
 
-        saveVersionData(EtlCommons.GENE_DATA, ENSEMBL_NAME, ensemblVersion, getTimeStamp(), downloadedUrls,
-                geneFolder.resolve("ensemblCoreVersion.json"));
+            saveDataSource(ENSEMBL_DATA, ensemblVersion, getTimeStamp(), urls,
+                    ensemblDownloadPath.resolve(getDataVersionFilename(ENSEMBL_DATA)));
 
+            logger.info(CATEGORY_DOWNLOADING_DONE_MSG, getDataName(ENSEMBL_DATA), getDataCategory(ENSEMBL_DATA));
+        }
         return downloadFiles;
     }
 
-    private List<DownloadFile> downloadRefSeq(Path refSeqFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading RefSeq...");
-
-            List<DownloadFile> downloadFiles = new ArrayList<>();
-
-            // gtf
-            String url = configuration.getDownload().getRefSeq().getHost();
-            saveVersionData(EtlCommons.REFSEQ_DATA, "RefSeq", null, getTimeStamp(), Collections.singletonList(url),
-                    refSeqFolder.resolve("refSeqVersion.json"));
-            String outputFileName = "refSeq_" + StringUtils.capitalize(speciesShortName) + "." + assemblyConfiguration.getName()
-                    + ".gtf.gz";
-            logger.info("downloading " + url);
-            Path outputPath = refSeqFolder.resolve(outputFileName);
-            downloadFiles.add(downloadFile(url, outputPath.toString()));
-
-            // genomic fasta
-            url = configuration.getDownload().getRefSeqFasta().getHost();
-            outputFileName = "refSeq_" + StringUtils.capitalize(speciesShortName) + "." + assemblyConfiguration.getName()
-                    + "_genomic.fna.gz";
-            logger.info("downloading " + url);
-            outputPath = refSeqFolder.resolve(outputFileName);
-            saveVersionData(EtlCommons.REFSEQ_DATA, "RefSeq", null, getTimeStamp(),
-                    Collections.singletonList(url), refSeqFolder.resolve("refSeqFastaVersion.json"));
-            downloadFiles.add(downloadFile(url, outputPath.toString()));
-            logger.info("Unzipping file: " + outputFileName);
-            EtlCommons.runCommandLineProcess(null, "gunzip", Collections.singletonList(outputPath.toString()), null);
-
-            // protein fasta
-            url = configuration.getDownload().getRefSeqProteinFasta().getHost();
-            outputFileName = "refSeq_" + StringUtils.capitalize(speciesShortName) + "." + assemblyConfiguration.getName()
-                    + "_protein.faa.gz";
-            outputPath = refSeqFolder.resolve(outputFileName);
-            saveVersionData(EtlCommons.REFSEQ_DATA, "RefSeq", null, getTimeStamp(), Collections.singletonList(url),
-                    refSeqFolder.resolve("refSeqProteinFastaVersion.json"));
-            downloadFiles.add(downloadFile(url, outputPath.toString()));
-
-            // cDNA
-            url = configuration.getDownload().getRefSeqCdna().getHost();
-            outputFileName = "refSeq_" + StringUtils.capitalize(speciesShortName) + "." + assemblyConfiguration.getName()
-                    + "_cdna.fna.gz";
-            outputPath = refSeqFolder.resolve(outputFileName);
-            saveVersionData(EtlCommons.REFSEQ_DATA, "RefSeq", null, getTimeStamp(), Collections.singletonList(url),
-                    refSeqFolder.resolve("refSeqCdnaFastaVersion.json"));
-            downloadFiles.add(downloadFile(url, outputPath.toString()));
-            return downloadFiles;
+    private List<DownloadFile> downloadRefSeq(Path refSeqDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        List<DownloadFile> downloadFiles = new ArrayList<>();
 
+        // Check if the species is supported
+        if (SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), GENE_DATA)) {
+            // GTF, DNA, RNA, Protein
+            String prefixId = getConfigurationFileIdPrefix(speciesConfiguration.getScientificName());
+            if (configuration.getDownload().getRefSeq().getFiles().containsKey(prefixId + REFSEQ_GENOMIC_GTF_FILE_ID)) {
+                logger.info(CATEGORY_DOWNLOADING_MSG, getDataName(REFSEQ_DATA), getDataCategory(REFSEQ_DATA));
+
+                DownloadProperties.URLProperties refSeqConfig = configuration.getDownload().getRefSeq();
+                downloadFiles.add(downloadDataSource(refSeqConfig, prefixId + REFSEQ_GENOMIC_GTF_FILE_ID, refSeqDownloadPath));
+                downloadFiles.add(downloadDataSource(refSeqConfig, prefixId + REFSEQ_GENOMIC_FNA_FILE_ID, refSeqDownloadPath));
+                downloadFiles.add(downloadDataSource(refSeqConfig, prefixId + REFSEQ_RNA_FNA_FILE_ID, refSeqDownloadPath));
+                downloadFiles.add(downloadDataSource(refSeqConfig, prefixId + REFSEQ_PROTEIN_FAA_FILE_ID, refSeqDownloadPath));
+
+                // Save data source (i.e., metadata)
+                saveDataSource(REFSEQ_DATA, refSeqConfig.getVersion(), getTimeStamp(), getUrls(downloadFiles),
+                        refSeqDownloadPath.resolve(getDataVersionFilename(REFSEQ_DATA)));
+
+                logger.info(CATEGORY_DOWNLOADING_DONE_MSG, getDataName(REFSEQ_DATA), getDataCategory(REFSEQ_DATA));
+            }
         }
-        return null;
+        return downloadFiles;
     }
 
-    private DownloadFile downloadMane(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading MANE Select ...");
-            String url = configuration.getDownload().getManeSelect().getHost();
-            saveVersionData(EtlCommons.GENE_DATA, "MANE Select", configuration.getDownload().getManeSelect().getVersion(),
-                    getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("maneSelectVersion.json"));
-            String[] array = url.split("/");
-            return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString());
+    public void downloadEnsemblCanonical(Path geneDownloadPath) throws IOException, CellBaseException {
+        String ensemblCanonicalScript = "ensembl_canonical.pl";
+        String ensemblCanonicalFilename = "ensembl_canonical.txt";
+
+        if (Files.exists(geneDownloadPath.resolve(ensemblCanonicalFilename))) {
+            logger.warn("File {} already exists, skipping running the Perl script {}", ensemblCanonicalFilename, ensemblCanonicalScript);
+            return;
         }
-        return null;
-    }
 
-    private DownloadFile downloadLrg(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading LRG ...");
-            String url = configuration.getDownload().getLrg().getHost();
-            saveVersionData(EtlCommons.GENE_DATA, "LRG", configuration.getDownload().getLrg().getVersion(),
-                    getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("lrgVersion.json"));
-            String[] array = url.split("/");
-            return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString());
+        logger.info(DOWNLOADING_MSG, getDataName(ENSEMBL_CANONICAL_DATA));
+
+        String dockerImage = "opencb/cellbase-builder:" + GitRepositoryState.get().getBuildVersion();
+
+        // Build command line to run Perl script via docker image
+        // Output binding
+        AbstractMap.SimpleEntry<String, String> outputBinding = new AbstractMap.SimpleEntry<>(
+                geneDownloadPath.toAbsolutePath().toString(), "/tmp");
+
+        // Params
+        String params = "/opt/cellbase/scripts/ensembl-scripts/" + ensemblCanonicalScript
+                + " --species \"" + speciesConfiguration.getId() + "\""
+                + " --outdir \"" + outputBinding.getValue() + "\"";
+
+        try {
+            // Execute perl script in docker
+            DockerUtils.run(dockerImage, null, outputBinding, params, null);
+        } catch (Exception e) {
+            logger.error("Error executing script {}: {}", params, e.getStackTrace());
         }
-        return null;
+
+        logger.info(DOWNLOADING_DONE_MSG, getDataName(ENSEMBL_CANONICAL_DATA));
     }
 
-    private DownloadFile downloadHgnc(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading HGNC ...");
-            String url = configuration.getDownload().getHgnc().getHost();
-            saveVersionData(EtlCommons.GENE_DATA, "HGNC_GENE", configuration.getDownload().getHgnc().getVersion(),
-                    getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("hgncVersion.json"));
-            String[] array = url.split("/");
-            return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString());
+    public void downloadGeneExtraInfo(Path geneDownloadPath) throws IOException, CellBaseException {
+        String geneExtraInfoScript = "gene_extra_info.pl";
+        String descriptionFilename = "description.txt";
+        String xrefsFilename = "xrefs.txt";
+
+        if (Files.exists(geneDownloadPath.resolve(descriptionFilename)) && Files.exists(geneDownloadPath.resolve(xrefsFilename))) {
+            logger.warn("Files {} and {} already exist, skipping running the Perl script {}", descriptionFilename, xrefsFilename,
+                    geneExtraInfoScript);
+            return;
         }
-        return null;
-    }
 
-    private DownloadFile downloadCancerHotspot(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading Cancer Hotspot ...");
-            String url = configuration.getDownload().getCancerHotspot().getHost();
-            saveVersionData(EtlCommons.GENE_DATA, "CANCER_HOTSPOT", configuration.getDownload().getHgnc().getVersion(),
-                    getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("cancerHotspotVersion.json"));
-            String[] array = url.split("/");
-            return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString());
+        logger.info(DOWNLOADING_MSG, getDataName(GENE_EXTRA_INFO_DATA));
+
+        String dockerImage = "opencb/cellbase-builder:" + GitRepositoryState.get().getBuildVersion();
+
+        // Build command line to run Perl script via docker image
+        // Output binding
+        AbstractMap.SimpleEntry<String, String> outputBinding = new AbstractMap.SimpleEntry<>(
+                geneDownloadPath.toAbsolutePath().toString(), "/tmp");
+
+        // Params
+        String params = "/opt/cellbase/scripts/ensembl-scripts/" + geneExtraInfoScript
+                + " --species \"" + speciesConfiguration.getScientificName() + "\""
+                + " --assembly \"" + assemblyConfiguration.getName() + "\""
+                + " --outdir \"" + outputBinding.getValue() + "\"";
+
+        try {
+            // Execute perl script in docker
+            DockerUtils.run(dockerImage, null, outputBinding, params, null);
+        } catch (Exception e) {
+            logger.error("Error executing script {}: {}", params, e.getStackTrace());
         }
-        return null;
+
+        logger.info(DOWNLOADING_DONE_MSG, getDataName(GENE_EXTRA_INFO_DATA));
     }
 
-    private DownloadFile downloadGO(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading go annotation...");
-            String url = configuration.getDownload().getGoAnnotation().getHost();
-            saveVersionData(EtlCommons.GENE_DATA, GO_ANNOTATION_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                    geneFolder.resolve("goAnnotationVersion.json"));
-            return downloadFile(url, geneFolder.resolve("goa_human.gaf.gz").toString());
+    private DownloadFile downloadMane(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        DownloadFile downloadFile = null;
+
+        // Check if the species is supported
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS)) {
+            logger.info(DOWNLOADING_MSG, getDataName(MANE_SELECT_DATA));
+
+            downloadFile = downloadAndSaveDataSource(configuration.getDownload().getManeSelect(), MANE_SELECT_FILE_ID,
+                    MANE_SELECT_DATA, geneDownloadPath);
+
+            logger.info(DOWNLOADING_DONE_MSG, getDataName(MANE_SELECT_DATA));
         }
-        return null;
+        return downloadFile;
     }
 
-    private DownloadFile downloadGnomadConstraints(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading gnomAD constraints data...");
-            String url = configuration.getDownload().getGnomadConstraints().getHost();
-            saveVersionData(EtlCommons.GENE_DATA, GNOMAD_NAME, configuration.getDownload().
-                            getGnomadConstraints().getVersion(), getTimeStamp(),
-                    Collections.singletonList(url), geneFolder.resolve("gnomadVersion.json"));
-            return downloadFile(url, geneFolder.resolve("gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz").toString());
+    private DownloadFile downloadLrg(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        DownloadFile downloadFile = null;
+
+        // Check if the species is supported
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS)) {
+            logger.info(DOWNLOADING_MSG, getDataName(LRG_DATA));
+
+            downloadFile = downloadAndSaveDataSource(configuration.getDownload().getLrg(), LRG_FILE_ID, LRG_DATA, geneDownloadPath);
+
+            logger.info(DOWNLOADING_DONE_MSG, getDataName(LRG_DATA));
         }
-        return null;
+        return downloadFile;
     }
 
-    private DownloadFile downloadDrugData(Path geneFolder) throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading drug-gene data...");
-            String url = configuration.getDownload().getDgidb().getHost();
-            saveVersionData(EtlCommons.GENE_DATA, DGIDB_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                    geneFolder.resolve("dgidbVersion.json"));
-            return downloadFile(url, geneFolder.resolve("dgidb.tsv").toString());
+    private DownloadFile downloadHgnc(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        DownloadFile downloadFile = null;
+
+        // Check if the species is supported
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS)) {
+            logger.info(DOWNLOADING_MSG, getDataName(HGNC_DATA));
+
+            downloadFile = downloadAndSaveDataSource(configuration.getDownload().getHgnc(), HGNC_FILE_ID, HGNC_DATA, geneDownloadPath);
+
+            logger.info(DOWNLOADING_DONE_MSG, getDataName(HGNC_DATA));
         }
-        return null;
+        return downloadFile;
     }
 
+    private DownloadFile downloadCancerHotspot(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        DownloadFile downloadFile = null;
 
-    private String getUniProtReleaseNotesUrl() {
-        return URI.create(configuration.getDownload().getGeneUniprotXref().getHost()).resolve("../../../").toString()
-                + "/relnotes.txt";
-    }
+        // Check if the species is supported
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS)) {
+            logger.info(DOWNLOADING_MSG, getDataName(CANCER_HOTSPOT_DATA));
+
+            downloadFile = downloadAndSaveDataSource(configuration.getDownload().getCancerHotspot(), CANCER_HOTSPOT_FILE_ID,
+                    CANCER_HOTSPOT_DATA, geneDownloadPath);
 
-    private String getUniProtRelease(String relnotesFilename) throws IOException {
-        Path path = Paths.get(relnotesFilename);
-        FileUtils.checkFile(path);
-        // The first line at the relnotes.txt file contains the UniProt release
-        BufferedReader reader = Files.newBufferedReader(path, Charset.defaultCharset());
-        String release = reader.readLine().split(" ")[2];
-        reader.close();
-        return release;
+            logger.info(DOWNLOADING_DONE_MSG, getDataName(CANCER_HOTSPOT_DATA));
+        }
+        return downloadFile;
     }
 
-    private List<DownloadFile> downloadGeneUniprotXref(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading UniProt ID mapping ...");
+    private DownloadFile downloadDrugData(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        DownloadFile downloadFile = null;
 
-        List<DownloadFile> downloadFiles = new ArrayList<>();
+        // Check if the species is supported
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS)) {
+            logger.info(DOWNLOADING_MSG, getDataName(DGIDB_DATA));
 
-        if (GENE_UNIPROT_XREF_FILES.containsKey(speciesConfiguration.getScientificName())) {
-            String geneGtfUrl = configuration.getDownload().getGeneUniprotXref().getHost() + "/"
-                    + GENE_UNIPROT_XREF_FILES.get(speciesConfiguration.getScientificName());
-            downloadFiles.add(downloadFile(geneGtfUrl, geneFolder.resolve("idmapping_selected.tab.gz").toString()));
-            downloadFiles.add(downloadFile(getUniProtReleaseNotesUrl(), geneFolder.resolve("uniprotRelnotes.txt").toString()));
+            downloadFile = downloadAndSaveDataSource(configuration.getDownload().getDgidb(), DGIDB_FILE_ID, DGIDB_DATA, geneDownloadPath);
 
-            saveVersionData(EtlCommons.GENE_DATA, UNIPROT_NAME,
-                    getUniProtRelease(geneFolder.resolve("uniprotRelnotes.txt").toString()), getTimeStamp(),
-                    Collections.singletonList(geneGtfUrl), geneFolder.resolve("uniprotXrefVersion.json"));
+            logger.info(DOWNLOADING_DONE_MSG, getDataName(DGIDB_DATA));
         }
-
-        return downloadFiles;
+        return downloadFile;
     }
 
-    private DownloadFile downloadGeneExpressionAtlas(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading gene expression atlas ...");
-        String geneGtfUrl = configuration.getDownload().getGeneExpressionAtlas().getHost();
-        saveVersionData(EtlCommons.GENE_DATA, GENE_EXPRESSION_ATLAS_NAME, getGeneExpressionAtlasVersion(), getTimeStamp(),
-                Collections.singletonList(geneGtfUrl), geneFolder.resolve("geneExpressionAtlasVersion.json"));
-        return downloadFile(geneGtfUrl, geneFolder.resolve("allgenes_updown_in_organism_part.tab.gz").toString());
-    }
+    private DownloadFile downloadGeneUniprotXref(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        DownloadFile downloadFile = null;
 
-    private String getGeneExpressionAtlasVersion() {
-        return FilenameUtils.getBaseName(configuration.getDownload().getGeneExpressionAtlas().getHost())
-                .split("_")[5].replace(".tab", "");
+        // Check if the species is supported
+        String prefixId = getConfigurationFileIdPrefix(speciesConfiguration.getScientificName());
+        if (configuration.getDownload().getGeneUniprotXref().getFiles().containsKey(prefixId + UNIPROT_XREF_FILE_ID)) {
+            logger.info(DOWNLOADING_MSG, getDataName(UNIPROT_XREF_DATA));
+
+            downloadFile = downloadAndSaveDataSource(configuration.getDownload().getGeneUniprotXref(),
+                    prefixId + UNIPROT_XREF_FILE_ID, UNIPROT_XREF_DATA, geneDownloadPath);
+
+            logger.info(DOWNLOADING_DONE_MSG, getDataName(UNIPROT_XREF_DATA));
+        }
+        return downloadFile;
     }
 
-    private List<DownloadFile> downloadGeneDiseaseAnnotation(Path geneFolder) throws IOException, InterruptedException {
-        logger.info("Downloading gene disease annotation ...");
+    private DownloadFile downloadGeneExpressionAtlas(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        DownloadFile downloadFile = null;
 
-        List<DownloadFile> downloadFiles = new ArrayList<>();
+        // Check if the species is supported
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS)) {
+            logger.info(DOWNLOADING_MSG, getDataName(GENE_EXPRESSION_ATLAS_DATA));
 
-        String host = configuration.getDownload().getHpo().getHost();
-        String fileName = StringUtils.substringAfterLast(host, "/");
-        downloadFiles.add(downloadFile(host, geneFolder.resolve(fileName).toString()));
-        saveVersionData(EtlCommons.GENE_DATA, HPO_NAME, null, getTimeStamp(), Collections.singletonList(host),
-                geneFolder.resolve("hpoVersion.json"));
-
-        host = configuration.getDownload().getDisgenet().getHost();
-        List<String> files = configuration.getDownload().getDisgenet().getFiles();
-        for (String file : files) {
-            String outputFile = file.equalsIgnoreCase("readme.txt") ? "disgenetReadme.txt" : file;
-            downloadFiles.add(downloadFile(host + "/" + file, geneFolder.resolve(outputFile).toString()));
+            downloadFile = downloadAndSaveDataSource(configuration.getDownload().getGeneExpressionAtlas(),
+                    GENE_EXPRESSION_ATLAS_FILE_ID, GENE_EXPRESSION_ATLAS_DATA, geneDownloadPath);
+
+            logger.info(DOWNLOADING_DONE_MSG, getDataName(GENE_EXPRESSION_ATLAS_DATA));
         }
+        return downloadFile;
+    }
 
-        saveVersionData(EtlCommons.GENE_DISEASE_ASSOCIATION_DATA, DISGENET_NAME,
-                getVersionFromVersionLine(geneFolder.resolve("disgenetReadme.txt"), "(version"), getTimeStamp(),
-                Collections.singletonList(host), geneFolder.resolve("disgenetVersion.json"));
+    private DownloadFile downloadGnomadConstraints(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        DownloadFile downloadFile = null;
 
-        return downloadFiles;
+        // Check if the species is supported
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS)) {
+            logger.info(DOWNLOADING_MSG, getDataName(GNOMAD_CONSTRAINTS_DATA));
+
+            downloadFile = downloadAndSaveDataSource(configuration.getDownload().getGnomadConstraints(),
+                    GNOMAD_CONSTRAINTS_FILE_ID, GNOMAD_CONSTRAINTS_DATA, geneDownloadPath);
+
+            logger.info(DOWNLOADING_MSG, getDataName(GNOMAD_CONSTRAINTS_DATA));
+        }
+        return downloadFile;
     }
 
-    private void runGeneExtraInfo(Path geneFolder) throws IOException, InterruptedException {
-        // TODO skip if we already have these data
-        logger.info("Downloading gene extra info ...");
-
-        if ("true".equals(System.getenv("CELLBASE_BUILD_DOCKER"))) {
-            final String outputLog = downloadLogFolder + "/gene_extra_info.log";
-            EtlCommons.runCommandLineProcess(null, "/opt/cellbase/gene_extra_info.pl",
-                    Arrays.asList("--outdir", geneFolder.toAbsolutePath().toString()),
-                    outputLog);
-        } else {
-            AbstractMap.SimpleEntry<String, String> outputBinding = new AbstractMap.SimpleEntry(geneFolder.toAbsolutePath().toString(),
-                    "/ensembl-data");
-            String ensemblScriptParams = "/opt/cellbase/gene_extra_info.pl --outdir /ensembl-data";
-
-            DockerUtils.run(dockerImage, null, outputBinding, ensemblScriptParams, null);
+    private DownloadFile downloadGO(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
+        DownloadFile downloadFile = null;
+
+        // Check if the species is supported
+        String prefixId = getConfigurationFileIdPrefix(speciesConfiguration.getScientificName());
+        if (configuration.getDownload().getGoAnnotation().getFiles().containsKey(prefixId + GO_ANNOTATION_FILE_ID)) {
+            logger.info(DOWNLOADING_MSG, getDataName(GO_ANNOTATION_DATA));
+
+            downloadFile = downloadAndSaveDataSource(configuration.getDownload().getGoAnnotation(),
+                    prefixId + GO_ANNOTATION_FILE_ID, GO_ANNOTATION_DATA, geneDownloadPath);
+
+            logger.info(DOWNLOADING_MSG, getDataName(GO_ANNOTATION_DATA));
         }
+        return downloadFile;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java
index 0ba9f39db4..417e34831c 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/GenomeDownloadManager.java
@@ -16,222 +16,93 @@
 
 package org.opencb.cellbase.lib.download;
 
-import com.beust.jcommander.ParameterException;
-import org.apache.commons.lang.StringUtils;
+import org.opencb.cellbase.core.common.GitRepositoryState;
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
 import org.opencb.commons.utils.DockerUtils;
 
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.*;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
 public class GenomeDownloadManager extends AbstractDownloadManager {
 
-    private static final String ENSEMBL_NAME = "ENSEMBL";
-    private static final String GERP_NAME = "GERP++";
-    private static final String PHASTCONS_NAME = "PhastCons";
-    private static final String PHYLOP_NAME = "PhyloP";
-    private static final String TRF_NAME = "Tandem repeats finder";
-    private static final String GSD_NAME = "Genomic super duplications";
-    private static final String WM_NAME = "WindowMasker";
+    private Path sequenceFolder;
+
+    public static final String GENOME_INFO_FILENAME = "genome_info.json";
 
     public GenomeDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
         super(species, assembly, targetDirectory, configuration);
+
+        this.sequenceFolder = downloadFolder.resolve(GENOME_DATA);
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        List<DownloadFile> downloadFiles = new ArrayList<>();
-        downloadFiles.addAll(downloadReferenceGenome());
-        downloadFiles.addAll(downloadConservation());
-        downloadFiles.addAll(downloadRepeats());
-
-        // cytobands
-//        runGenomeInfo();
-        return downloadFiles;
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        downloadGenomeInfo();
+        return downloadReferenceGenome();
     }
 
-    public List<DownloadFile> downloadReferenceGenome() throws IOException, InterruptedException {
-        logger.info("Downloading genome information ...");
-        Path sequenceFolder = downloadFolder.resolve("genome");
+    public List<DownloadFile> downloadReferenceGenome() throws IOException, InterruptedException, CellBaseException {
+        Path genomeVersionFilePath = sequenceFolder.resolve(getDataVersionFilename(GENOME_DATA));
+
+        logger.info(DOWNLOADING_MSG, getDataName(GENOME_DATA));
         Files.createDirectories(sequenceFolder);
 
+        List<String> urls = new ArrayList<>();
+
         // Reference genome sequences are downloaded from Ensembl
         // New Homo sapiens assemblies contain too many ALT regions, so we download 'primary_assembly' file instead
-        String url = ensemblHostUrl + "/" + ensemblRelease;
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            url = url + "/fasta/" + speciesShortName + "/dna/*.dna.primary_assembly.fa.gz";
-        } else {
-            if (!configuration.getSpecies().getVertebrates().contains(speciesConfiguration)) {
-                url = ensemblHostUrl + "/" + ensemblRelease + "/" + getPhylo(speciesConfiguration);
-            }
-            url = url + "/fasta/";
-            if (configuration.getSpecies().getBacteria().contains(speciesConfiguration)) {
-                // WARN: assuming there's just one assembly
-                url = url + speciesConfiguration.getAssemblies().get(0).getEnsemblCollection() + "/";
-            }
-            url = url + speciesShortName + "/dna/*.dna.toplevel.fa.gz";
-        }
+        DownloadFile downloadFile = downloadEnsemblDataSource(configuration.getDownload().getEnsembl(), ENSEMBL_PRIMARY_FA_FILE_ID,
+                sequenceFolder);
+        urls.add(downloadFile.getUrl());
 
-        String outputFileName = StringUtils.capitalize(speciesShortName) + "." + assemblyConfiguration.getName() + ".fa.gz";
-        Path outputPath = sequenceFolder.resolve(outputFileName);
-        logger.info("Saving reference genome version data at {}", sequenceFolder.resolve("genomeVersion.json"));
-        saveVersionData(EtlCommons.GENOME_DATA, ENSEMBL_NAME, ensemblVersion, getTimeStamp(),
-                Collections.singletonList(url), sequenceFolder.resolve("genomeVersion.json"));
-        List<DownloadFile> downloadFiles = Collections.singletonList(downloadFile(url, outputPath.toString()));
-        logger.info("Unzipping file: " + outputFileName);
-        EtlCommons.runCommandLineProcess(null, "gunzip", Collections.singletonList(outputPath.toString()), null);
-        return downloadFiles;
-    }
+        // Add files generated by scripts
+        urls.add(configuration.getDownload().getEnsembl().getUrl().getFiles().get(GENOME_INFO_FILE_ID));
 
-    /**
-     * This method downloads bith PhastCons and PhyloP data from UCSC for Human and Mouse species.
-     * @return list of files downloaded
-     * @throws IOException if there is an error writing to a file
-     * @throws InterruptedException if there is an error downloading files
-     */
-    public List<DownloadFile> downloadConservation() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "conservation")) {
-            return null;
-        }
-        logger.info("Downloading conservation information ...");
-        Path conservationFolder = downloadFolder.resolve("conservation");
-        List<DownloadFile> downloadFiles = new ArrayList<>();
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            Files.createDirectories(conservationFolder);
-            Files.createDirectories(conservationFolder.resolve("gerp"));
-            Files.createDirectories(conservationFolder.resolve("phastCons"));
-            Files.createDirectories(conservationFolder.resolve("phylop"));
-
-            String[] chromosomes = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
-                    "15", "16", "17", "18", "19", "20", "21", "22", "X", "Y", "M", };
-
-            if (assemblyConfiguration.getName().equalsIgnoreCase("GRCh38")) {
-                String url = configuration.getDownload().getConservation().getHost() + "/hg38";
-                List<String> phastconsUrls = new ArrayList<>(chromosomes.length);
-                List<String> phyloPUrls = new ArrayList<>(chromosomes.length);
-                for (String chromosome : chromosomes) {
-                    String phastConsUrl = url + "/phastCons470way/hg38.470way.phastCons/chr" + chromosome
-                            + ".phastCons470way.wigFix.gz";
-                    downloadFiles.add(downloadFile(phastConsUrl, conservationFolder.resolve("phastCons")
-                            .resolve("chr" + chromosome + ".phastCons470way.wigFix.gz").toString()));
-                    phastconsUrls.add(phastConsUrl);
-
-                    String phyloPUrl = url + "/phyloP470way/hg38.470way.phyloP/chr" + chromosome
-                            + ".phyloP470way.wigFix.gz";
-                    downloadFiles.add(downloadFile(phyloPUrl, conservationFolder.resolve("phylop")
-                            .resolve("chr" + chromosome + ".phyloP470way.wigFix.gz").toString()));
-                    phyloPUrls.add(phyloPUrl);
-                }
-                String gerpUrl = configuration.getDownload().getGerp().getHost();
-                downloadFiles.add(downloadFile(gerpUrl, conservationFolder.resolve(EtlCommons.GERP_SUBDIRECTORY)
-                        .resolve(EtlCommons.GERP_FILE).toString()));
-
-                saveVersionData(EtlCommons.CONSERVATION_DATA, GERP_NAME, null, getTimeStamp(), Collections.singletonList(gerpUrl),
-                        conservationFolder.resolve("gerpVersion.json"));
-                saveVersionData(EtlCommons.CONSERVATION_DATA, PHASTCONS_NAME, null, getTimeStamp(), phastconsUrls,
-                        conservationFolder.resolve("phastConsVersion.json"));
-                saveVersionData(EtlCommons.CONSERVATION_DATA, PHYLOP_NAME, null, getTimeStamp(), phyloPUrls,
-                        conservationFolder.resolve("phyloPVersion.json"));
-            }
-        }
+        // Save data source
+        saveDataSource(GENOME_DATA, ensemblVersion, getTimeStamp(), urls, genomeVersionFilePath);
 
-        if (speciesConfiguration.getScientificName().equals("Mus musculus")) {
-            Files.createDirectories(conservationFolder);
-            Files.createDirectories(conservationFolder.resolve("phastCons"));
-            Files.createDirectories(conservationFolder.resolve("phylop"));
-
-            String url = configuration.getDownload().getConservation().getHost() + "/mm10";
-            String[] chromosomes = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
-                    "15", "16", "17", "18", "19", "X", "Y", "M", };
-            List<String> phastconsUrls = new ArrayList<>(chromosomes.length);
-            List<String> phyloPUrls = new ArrayList<>(chromosomes.length);
-            for (String chromosome : chromosomes) {
-                String phastConsUrl = url + "/phastCons60way/mm10.60way.phastCons/chr" + chromosome + ".phastCons60way.wigFix.gz";
-                downloadFiles.add(downloadFile(phastConsUrl, conservationFolder.resolve("phastCons").resolve("chr" + chromosome
-                        + ".phastCons60way.wigFix.gz").toString()));
-                phastconsUrls.add(phastConsUrl);
-                String phyloPUrl = url + "/phyloP60way/mm10.60way.phyloP60way/chr" + chromosome + ".phyloP60way.wigFix.gz";
-                downloadFiles.add(downloadFile(phyloPUrl, conservationFolder.resolve("phylop").resolve("chr" + chromosome
-                        + ".phyloP60way.wigFix.gz").toString()));
-                phyloPUrls.add(phyloPUrl);
-            }
-            saveVersionData(EtlCommons.CONSERVATION_DATA, PHASTCONS_NAME, null, getTimeStamp(), phastconsUrls,
-                    conservationFolder.resolve("phastConsVersion.json"));
-            saveVersionData(EtlCommons.CONSERVATION_DATA, PHYLOP_NAME, null, getTimeStamp(), phyloPUrls,
-                    conservationFolder.resolve("phastConsVersion.json"));
-        }
-        return downloadFiles;
-    }
+        logger.info(DOWNLOADING_DONE_MSG, getDataName(GENOME_DATA));
 
-    public List<DownloadFile> downloadRepeats() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "repeats")) {
-            return null;
-        }
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading repeats data ...");
-            Path repeatsFolder = downloadFolder.resolve(EtlCommons.REPEATS_FOLDER);
-            Files.createDirectories(repeatsFolder);
-            List<DownloadFile> downloadFiles = new ArrayList<>();
-            String pathParam;
-            if (assemblyConfiguration.getName().equalsIgnoreCase("grch38")) {
-                pathParam = "hg38";
-            } else {
-                logger.error("Please provide a valid human assembly {GRCh37, GRCh38)");
-                throw new ParameterException("Assembly '" + assemblyConfiguration.getName() + "' is not valid. Please provide "
-                        + "a valid human assembly {GRCh37, GRCh38)");
-            }
-
-            // Download tandem repeat finder
-            String url = configuration.getDownload().getSimpleRepeats().getHost() + "/" + pathParam
-                    + "/database/simpleRepeat.txt.gz";
-            downloadFiles.add(downloadFile(url, repeatsFolder.resolve(EtlCommons.TRF_FILE).toString()));
-            saveVersionData(EtlCommons.REPEATS_DATA, TRF_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                    repeatsFolder.resolve(EtlCommons.TRF_VERSION_FILE));
-
-            // Download genomic super duplications
-            url = configuration.getDownload().getGenomicSuperDups().getHost() + "/" + pathParam
-                    + "/database/genomicSuperDups.txt.gz";
-            downloadFiles.add(downloadFile(url, repeatsFolder.resolve(EtlCommons.GSD_FILE).toString()));
-            saveVersionData(EtlCommons.REPEATS_DATA, GSD_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                    repeatsFolder.resolve(EtlCommons.GSD_VERSION_FILE));
-
-            // Download WindowMasker
-            if (!pathParam.equalsIgnoreCase("hg19")) {
-                url = configuration.getDownload().getWindowMasker().getHost() + "/" + pathParam
-                        + "/database/windowmaskerSdust.txt.gz";
-                downloadFiles.add(downloadFile(url, repeatsFolder.resolve(EtlCommons.WM_FILE).toString()));
-                saveVersionData(EtlCommons.REPEATS_DATA, WM_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                        repeatsFolder.resolve(EtlCommons.WM_VERSION_FILE));
-            }
-            return downloadFiles;
-        }
-        return null;
+        return Collections.singletonList(downloadFile);
     }
 
-    public void runGenomeInfo() throws IOException, InterruptedException {
-        logger.info("Downloading genome info ...");
-
-        // TODO don't run this if file already exists
-
-        String outputFolder = downloadFolder.getParent().toAbsolutePath().toString() + "/generated_json/";
-
-        if ("true".equals(System.getenv("CELLBASE_BUILD_DOCKER"))) {
-            String outputLog = downloadLogFolder + "/genome_info.log";
-            EtlCommons.runCommandLineProcess(null, "/opt/cellbase/genome_info.pl",
-                    Arrays.asList("--outdir", outputFolder),
-                    outputLog);
-        } else {
-            String dockerImage = "opencb/cellbase-builder:" + configuration.getApiVersion();
+    public void downloadGenomeInfo() throws IOException, CellBaseException {
+        // Already downloaded
+        if (isAlreadyDownloaded(sequenceFolder.resolve(GENOME_INFO_FILENAME), getDataName(GENOME_INFO_DATA))) {
+            return;
+        }
 
-            AbstractMap.SimpleEntry<String, String> outputBinding = new AbstractMap.SimpleEntry(outputFolder, "/ensembl-data");
-            String ensemblScriptParams = "/opt/cellbase/genome_info.pl";
+        logger.info(DOWNLOADING_MSG, getDataName(GENOME_INFO_DATA));
+        Files.createDirectories(sequenceFolder);
 
-            DockerUtils.run(dockerImage, null, outputBinding, ensemblScriptParams, null);
+        String dockerImage = "opencb/cellbase-builder:" + GitRepositoryState.get().getBuildVersion();
+        try {
+            // Build command line to run Perl script via docker image
+            // Output binding
+            AbstractMap.SimpleEntry<String, String> outputBinding = new AbstractMap.SimpleEntry<>(
+                    sequenceFolder.toAbsolutePath().toString(), "/tmp");
+
+            // Params
+            String params = "/opt/cellbase/scripts/ensembl-scripts/genome_info.pl"
+                    + " --species \"" + speciesConfiguration.getScientificName() + "\""
+                    + " --assembly \"" + assemblyConfiguration.getName() + "\""
+                    + " --outfile \"" + outputBinding.getValue() + "/" + GENOME_INFO_FILENAME + "\"";
+
+            // Execute perl script in docker
+            DockerUtils.run(dockerImage, null, outputBinding, params, null);
+        } catch (Exception e) {
+            throw new CellBaseException("Error executing Perl script from Docker " + dockerImage, e);
         }
+
+        logger.info(DOWNLOADING_DONE_MSG, getDataName(GENOME_INFO_DATA));
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/MissenseScoresDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/MissenseScoresDownloadManager.java
index 1ae2514e49..58461018b4 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/MissenseScoresDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/MissenseScoresDownloadManager.java
@@ -18,7 +18,7 @@
 
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
 
 import java.io.IOException;
 import java.nio.file.Files;
@@ -26,6 +26,8 @@
 import java.util.Collections;
 import java.util.List;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 public class MissenseScoresDownloadManager extends AbstractDownloadManager {
 
     public MissenseScoresDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
@@ -34,23 +36,42 @@ public MissenseScoresDownloadManager(String species, String assembly, Path targe
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        return Collections.singletonList(downloadRevel());
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        // Check if the species supports this data
+        if (!SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), MISSENSE_VARIATION_SCORE_DATA)) {
+            logger.info(DATA_NOT_SUPPORTED_MSG, getDataName(MISSENSE_VARIATION_SCORE_DATA), speciesConfiguration.getScientificName());
+            return Collections.emptyList();
+        }
+
+        logger.info(DOWNLOADING_MSG, getDataName(MISSENSE_VARIATION_SCORE_DATA));
+
+        DownloadFile downloadFile = downloadRevel();
+
+        logger.info(DOWNLOADING_DONE_MSG, getDataName(MISSENSE_VARIATION_SCORE_DATA));
+
+        return Collections.singletonList(downloadFile);
     }
 
-    public DownloadFile downloadRevel() throws IOException, InterruptedException {
-        if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
-            logger.info("Downloading Revel data ...");
+    public DownloadFile downloadRevel() throws IOException, InterruptedException, CellBaseException {
+        DownloadFile downloadFile = null;
+
+        String prefixId = getConfigurationFileIdPrefix(speciesConfiguration.getScientificName());
 
-            Path missensePredictionScore = downloadFolder.resolve(EtlCommons.MISSENSE_VARIATION_SCORE_DATA);
-            Files.createDirectories(missensePredictionScore);
+        // Check if the species is supported
+        if (configuration.getDownload().getRevel().getFiles().containsKey(prefixId + REVEL_FILE_ID)) {
+            logger.info(DOWNLOADING_MSG, getDataName(REVEL_DATA));
 
-            String url = configuration.getDownload().getRevel().getHost();
+            // Create the REVEL download path
+            Path revelDownloadPath = downloadFolder.resolve(MISSENSE_VARIATION_SCORE_DATA).resolve(REVEL_DATA);
+            Files.createDirectories(revelDownloadPath);
 
-            saveVersionData(EtlCommons.MISSENSE_VARIATION_SCORE_DATA, "Revel", null, getTimeStamp(),
-                    Collections.singletonList(url), missensePredictionScore.resolve("revelVersion.json"));
-            return downloadFile(url, missensePredictionScore.resolve("revel_grch38_all_chromosomes.csv.zip").toString());
+            // Download REVEL and save data source
+            downloadFile = downloadAndSaveDataSource(configuration.getDownload().getRevel(), prefixId + REVEL_FILE_ID, REVEL_DATA,
+                    revelDownloadPath);
+
+            logger.info(DOWNLOADING_MSG, getDataName(REVEL_DATA));
         }
-        return null;
+
+        return downloadFile;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java
index 522be7b27d..5e262d6796 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/OntologyDownloadManager.java
@@ -18,8 +18,10 @@
 
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
+import org.opencb.commons.utils.FileUtils;
 
+import java.io.BufferedReader;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
@@ -27,41 +29,91 @@
 import java.util.Collections;
 import java.util.List;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
+
 public class OntologyDownloadManager extends AbstractDownloadManager {
 
+    private static final String DATA_VERSION_FIELD = "data-version:";
+
     public OntologyDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
         super(species, assembly, targetDirectory, configuration);
     }
 
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        // Check if the species supports this data
+        if (!SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), ONTOLOGY_DATA)) {
+            logger.info(DATA_NOT_SUPPORTED_MSG, getDataName(ONTOLOGY_DATA), speciesConfiguration.getScientificName());
+            return Collections.emptyList();
+        }
 
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        logger.info("Downloading OBO files ...");
+        logger.info(DOWNLOADING_MSG, getDataName(ONTOLOGY_DATA));
 
-        List<DownloadFile> downloadFiles = new ArrayList<>();
-        Path oboFolder = downloadFolder.resolve("ontology");
+        Path oboFolder = downloadFolder.resolve(ONTOLOGY_DATA);
         Files.createDirectories(oboFolder);
 
-        String url = configuration.getDownload().getHpoObo().getHost();
-        downloadFiles.add(downloadFile(url, oboFolder.resolve("hp.obo").toString()));
-        saveVersionData(EtlCommons.OBO_DATA, "HPO", getTimeStamp(), getTimeStamp(),
-                Collections.singletonList(url), buildFolder.resolve(EtlCommons.HPO_VERSION_FILE));
+        String version;
+        DownloadFile downloadFile;
+        List<DownloadFile> downloadFiles = new ArrayList<>();
+
+        if (speciesConfiguration.getScientificName().equalsIgnoreCase(HOMO_SAPIENS)) {
+            // HPO
+            Files.createDirectories(oboFolder.resolve(HPO_OBO_DATA));
+            downloadFile = downloadDataSource(configuration.getDownload().getHpoObo(), HPO_OBO_FILE_ID,
+                    oboFolder.resolve(HPO_OBO_DATA));
+            version = getVersionFromOboFile(oboFolder.resolve(HPO_OBO_DATA).resolve(downloadFile.getOutputFile()));
+            saveDataSource(HPO_OBO_DATA, version, getTimeStamp(), Collections.singletonList(downloadFile.getUrl()),
+                    oboFolder.resolve(HPO_OBO_DATA).resolve(getDataVersionFilename(HPO_OBO_DATA)));
+            downloadFiles.add(downloadFile);
+
+            // DOID
+            Files.createDirectories(oboFolder.resolve(DOID_OBO_DATA));
+            downloadFile = downloadDataSource(configuration.getDownload().getDoidObo(), DOID_OBO_FILE_ID,
+                    oboFolder.resolve(DOID_OBO_DATA));
+            version = getVersionFromOboFile(oboFolder.resolve(DOID_OBO_DATA).resolve(downloadFile.getOutputFile()));
+            saveDataSource(DOID_OBO_DATA, version, getTimeStamp(), Collections.singletonList(downloadFile.getUrl()),
+                    oboFolder.resolve(DOID_OBO_DATA).resolve(getDataVersionFilename(DOID_OBO_DATA)));
+            downloadFiles.add(downloadFile);
 
-        url = configuration.getDownload().getGoObo().getHost();
-        downloadFiles.add(downloadFile(url, oboFolder.resolve("go-basic.obo").toString()));
-        saveVersionData(EtlCommons.OBO_DATA, "GO", getTimeStamp(), getTimeStamp(),
-                Collections.singletonList(url), buildFolder.resolve(EtlCommons.GO_VERSION_FILE));
+            // Mondo
+            Files.createDirectories(oboFolder.resolve(MONDO_OBO_DATA));
+            downloadFile = downloadDataSource(configuration.getDownload().getMondoObo(), MONDO_OBO_FILE_ID,
+                    oboFolder.resolve(MONDO_OBO_DATA));
+            version = getVersionFromOboFile(oboFolder.resolve(MONDO_OBO_DATA).resolve(downloadFile.getOutputFile()));
+            saveDataSource(MONDO_OBO_DATA, version, getTimeStamp(), Collections.singletonList(downloadFile.getUrl()),
+                    oboFolder.resolve(MONDO_OBO_DATA).resolve(getDataVersionFilename(MONDO_OBO_DATA)));
+            downloadFiles.add(downloadFile);
+        }
 
-        url = configuration.getDownload().getDoidObo().getHost();
-        downloadFiles.add(downloadFile(url, oboFolder.resolve("doid.obo").toString()));
-        saveVersionData(EtlCommons.OBO_DATA, "DO", getTimeStamp(), getTimeStamp(),
-                Collections.singletonList(url), buildFolder.resolve(EtlCommons.DO_VERSION_FILE));
+        // GO
+        Files.createDirectories(oboFolder.resolve(GO_OBO_DATA));
+        downloadFile = downloadDataSource(configuration.getDownload().getGoObo(), GO_OBO_FILE_ID, oboFolder.resolve(GO_OBO_DATA));
+        version = getVersionFromOboFile(oboFolder.resolve(GO_OBO_DATA).resolve(downloadFile.getOutputFile()));
+        saveDataSource(GO_OBO_DATA, version, getTimeStamp(), Collections.singletonList(downloadFile.getUrl()),
+                oboFolder.resolve(GO_OBO_DATA).resolve(getDataVersionFilename(GO_OBO_DATA)));
+        downloadFiles.add(downloadFile);
 
-        url = configuration.getDownload().getMondoObo().getHost();
-        downloadFiles.add(downloadFile(url, oboFolder.resolve("mondo.obo").toString()));
-        saveVersionData(EtlCommons.OBO_DATA, "MONDO", getTimeStamp(), getTimeStamp(),
-                Collections.singletonList(url), buildFolder.resolve(EtlCommons.DO_VERSION_FILE));
+        logger.info(DOWNLOADING_DONE_MSG, getDataName(ONTOLOGY_DATA));
 
         return downloadFiles;
     }
+
+    private String getVersionFromOboFile(Path oboPath) throws CellBaseException, IOException {
+        if (!oboPath.toFile().exists()) {
+            throw new CellBaseException("OBO file " + oboPath + " does not exit");
+        }
+
+        String version = null;
+        try (BufferedReader reader = FileUtils.newBufferedReader(oboPath)) {
+            String line;
+            while ((line = reader.readLine()) != null) {
+                if (line.startsWith(DATA_VERSION_FIELD)) {
+                    version = line.split(DATA_VERSION_FIELD)[1].trim();
+                    break;
+                }
+            }
+        }
+        return version;
+    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PgsDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PgsDownloadManager.java
new file mode 100644
index 0000000000..af47941e84
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PgsDownloadManager.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.download;
+
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.config.DownloadProperties;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
+import org.opencb.commons.utils.FileUtils;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
+public class PgsDownloadManager extends AbstractDownloadManager {
+
+    public PgsDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
+            throws IOException, CellBaseException {
+        super(species, assembly, targetDirectory, configuration);
+    }
+
+    @Override
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        // Check if the species supports this data
+        if (!SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), PGS_DATA)) {
+            logger.info(DATA_NOT_SUPPORTED_MSG, getDataName(PGS_DATA), speciesConfiguration.getScientificName());
+            return Collections.emptyList();
+        }
+
+        String pgslabel = getDataCategory(PGS_CATALOG_DATA) + "/" + getDataName(PGS_CATALOG_DATA);
+        logger.info(DOWNLOADING_MSG, getDataName(PGS_DATA));
+
+        DownloadProperties.URLProperties pgsProps = configuration.getDownload().getPgsCatalog();
+
+        Path pgsPath = downloadFolder.resolve(PGS_DATA);
+        Files.createDirectories(pgsPath);
+
+        List<String> urls = new ArrayList<>();
+
+        String urlAllMeta = pgsProps.getFiles().get(PGS_CATALOG_FILE_ID);
+        urls.add(urlAllMeta);
+
+        String filename = new File(urlAllMeta).getName();
+
+        // Downloads PGS files
+        String url;
+        Path outPath;
+        List<DownloadFile> list = new ArrayList<>();
+        list.add(downloadFile(urlAllMeta, pgsPath.resolve(filename)));
+
+        String baseUrl = urlAllMeta.replace(filename, "").replace("metadata", "scores");
+        try (BufferedReader br = FileUtils.newBufferedReader(pgsPath.resolve(filename))) {
+            // Skip first line
+            String line = br.readLine();
+            while ((line = br.readLine()) != null) {
+                String[] field = line.split(",");
+                String pgsId = field[0];
+
+                url = baseUrl + pgsId + "/Metadata/" + pgsId + "_metadata.tar.gz";
+                outPath = pgsPath.resolve(new File(url).getName());
+                logger.info(DOWNLOADING_FROM_TO_MSG, url, outPath);
+                list.add(downloadFile(url, outPath));
+                urls.add(url);
+
+                url = baseUrl + pgsId + "/ScoringFiles/Harmonized/" + pgsId + "_hmPOS_GRCh38.txt.gz";
+                outPath = pgsPath.resolve(new File(url).getName());
+                logger.info(DOWNLOADING_FROM_TO_MSG, url, outPath);
+                list.add(downloadFile(url, outPath));
+                urls.add(url);
+            }
+        }
+
+        // Save version file
+        saveDataSource(PGS_CATALOG_DATA, pgsProps.getVersion(), getTimeStamp(), urls,
+                pgsPath.resolve(getDataVersionFilename(PGS_CATALOG_DATA)));
+
+        logger.info(DOWNLOADING_DONE_MSG, pgslabel);
+
+        return list;
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PharmGKBDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PharmGKBDownloadManager.java
index 274f6c62a7..5ffbf9ed93 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PharmGKBDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PharmGKBDownloadManager.java
@@ -19,14 +19,11 @@
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.config.DownloadProperties;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.commons.exec.Command;
-import org.opencb.commons.utils.FileUtils;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
 
 import java.io.IOException;
-import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@@ -41,48 +38,41 @@ public PharmGKBDownloadManager(String species, String assembly, Path targetDirec
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        logger.info("Downloading PharmGKB files...");
-        DownloadProperties.URLProperties pharmGKB = configuration.getDownload().getPharmGKB();
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        // Check if the species supports this data
+        if (!SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), PHARMACOGENOMICS_DATA)) {
+            logger.info(DATA_NOT_SUPPORTED_MSG, getDataName(PHARMACOGENOMICS_DATA), speciesConfiguration.getScientificName());
+            return Collections.emptyList();
+        }
+
+        logger.info(CATEGORY_DOWNLOADING_MSG, getDataCategory(PHARMGKB_DATA), getDataName(PHARMGKB_DATA));
+
         Path pharmgkbDownloadFolder = downloadFolder.resolve(PHARMACOGENOMICS_DATA).resolve(PHARMGKB_DATA);
         Files.createDirectories(pharmgkbDownloadFolder);
 
-        List<String> urls = new ArrayList<>();
+        DownloadProperties.URLProperties pharmGKBConfig = configuration.getDownload().getPharmGKB();
+
+        DownloadFile downloadFile;
         List<DownloadFile> downloadFiles = new ArrayList<>();
-        for (String url : pharmGKB.getFiles()) {
+
+        List<String> urls = new ArrayList<>();
+        for (String fileName : pharmGKBConfig.getFiles().values()) {
+            String url = pharmGKBConfig.getHost() + fileName;
             urls.add(url);
 
-            Path downloadedFileName = Paths.get(new URL(url).getPath()).getFileName();
-            Path downloadedFilePath = pharmgkbDownloadFolder.resolve(downloadedFileName);
-            logger.info("Downloading file {} to {}", url, downloadedFilePath);
-            DownloadFile downloadFile = downloadFile(url, downloadedFilePath.toString());
+            Path downloadedFilePath = pharmgkbDownloadFolder.resolve(getFilenameFromUrl(url));
+            logger.info(DOWNLOADING_FROM_TO_MSG, url, downloadedFilePath);
+            downloadFile = downloadFile(url, downloadedFilePath);
+            logger.info(OK_MSG);
             downloadFiles.add(downloadFile);
-
-            // Unzip downloaded file
-            unzip(downloadedFilePath.getParent(), downloadedFileName.toString(), Collections.emptyList(),
-                    pharmgkbDownloadFolder.resolve(downloadedFileName.toString().split("\\.")[0]));
         }
 
-        // Save versions
-        saveVersionData(PHARMACOGENOMICS_DATA, PHARMGKB_NAME, pharmGKB.getVersion(), getTimeStamp(), urls,
-                pharmgkbDownloadFolder.resolve(PHARMGKB_VERSION_FILENAME));
+        // Save data source
+        saveDataSource(PHARMGKB_DATA, pharmGKBConfig.getVersion(), getTimeStamp(), urls,
+                pharmgkbDownloadFolder.resolve(getDataVersionFilename(PHARMGKB_DATA)));
 
-        return downloadFiles;
-    }
+        logger.info(CATEGORY_DOWNLOADING_DONE_MSG, getDataCategory(PHARMGKB_DATA), getDataName(PHARMGKB_DATA));
 
-    private void unzip(Path inPath, String zipFilename, List<String> outFilenames, Path outPath) throws IOException {
-        // Check zip file exists
-        FileUtils.checkFile(inPath.resolve(zipFilename));
-
-        // Unzip files if output dir does NOT exist
-        if (!outPath.toFile().exists()) {
-            logger.info("Unzipping {} into {}", zipFilename, outPath);
-            Command cmd = new Command("unzip -d " + outPath + " " + inPath.resolve(zipFilename));
-            cmd.run();
-            // Check if expected files exist
-            for (String outFilename : outFilenames) {
-                FileUtils.checkFile(outPath.resolve(outFilename));
-            }
-        }
+        return downloadFiles;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java
index 5a722ed448..06347d12e2 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/ProteinDownloadManager.java
@@ -18,23 +18,18 @@
 
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
-import org.opencb.commons.utils.FileUtils;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
 
-import java.io.BufferedReader;
 import java.io.IOException;
-import java.io.PrintWriter;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 
-public class ProteinDownloadManager extends AbstractDownloadManager {
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
-    private static final String UNIPROT_NAME = "UniProt";
-    private static final String INTERPRO_NAME = "InterPro";
-    private static final String INTACT_NAME = "IntAct";
+public class ProteinDownloadManager extends AbstractDownloadManager {
 
     public ProteinDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
@@ -46,85 +41,45 @@ public ProteinDownloadManager(String species, String assembly, Path targetDirect
      *
      * @return list of files downloaded
      * @throws IOException if there is an error writing to a file
-     * @throws InterruptedException if there is an error downloading files     *
+     * @throws InterruptedException if there is an error downloading files
+     * @throws CellBaseException if there is an error in the CelllBase configuration file
      */
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "protein")) {
-            return null;
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        // Check if the species supports this data
+        if (!SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), PROTEIN_DATA)) {
+            logger.info(DATA_NOT_SUPPORTED_MSG, getDataName(PROTEIN_DATA), speciesConfiguration.getScientificName());
+            return Collections.emptyList();
         }
-        logger.info("Downloading protein information ...");
-        Path proteinFolder = downloadFolder.resolve("protein");
+
+        Path proteinFolder = downloadFolder.resolve(PROTEIN_DATA);
         Files.createDirectories(proteinFolder);
-        List<DownloadFile> downloadFiles = new ArrayList<>();
 
-        // Uniprot
-        String url = configuration.getDownload().getUniprot().getHost();
-        downloadFiles.add(downloadFile(url, proteinFolder.resolve("uniprot_sprot.xml.gz").toString()));
-        Files.createDirectories(proteinFolder.resolve("uniprot_chunks"));
-        splitUniprot(proteinFolder.resolve("uniprot_sprot.xml.gz"), proteinFolder.resolve("uniprot_chunks"));
+        Path uniProtFolder = Files.createDirectories(proteinFolder.resolve(UNIPROT_DATA));
+        Path interProFolder = Files.createDirectories(proteinFolder.resolve(INTERPRO_DATA));
+        Path intactFolder = Files.createDirectories(proteinFolder.resolve(INTACT_DATA));
+
+        DownloadFile downloadFile;
+        List<DownloadFile> downloadFiles = new ArrayList<>();
 
-        String relNotesUrl = configuration.getDownload().getUniprotRelNotes().getHost();
-        downloadFiles.add(downloadFile(relNotesUrl, proteinFolder.resolve("uniprotRelnotes.txt").toString()));
-        saveVersionData(EtlCommons.PROTEIN_DATA, UNIPROT_NAME, getLine(proteinFolder.resolve("uniprotRelnotes.txt"), 1),
-                getTimeStamp(), Collections.singletonList(url), proteinFolder.resolve("uniprotVersion.json"));
+        logger.info(DOWNLOADING_MSG, getDataName(PROTEIN_DATA));
 
-        // Interpro
-        String interproUrl = configuration.getDownload().getInterpro().getHost();
-        downloadFiles.add(downloadFile(interproUrl, proteinFolder.resolve("protein2ipr.dat.gz").toString()));
+        // Uniprot
+        downloadFile = downloadAndSaveDataSource(configuration.getDownload().getUniprot(), UNIPROT_FILE_ID, UNIPROT_DATA,
+                uniProtFolder);
+        downloadFiles.add(downloadFile);
 
-        relNotesUrl = configuration.getDownload().getInterproRelNotes().getHost();
-        downloadFiles.add(downloadFile(relNotesUrl, proteinFolder.resolve("interproRelnotes.txt").toString()));
-        saveVersionData(EtlCommons.PROTEIN_DATA, INTERPRO_NAME, getLine(proteinFolder.resolve("interproRelnotes.txt"), 5),
-                getTimeStamp(), Collections.singletonList(interproUrl), proteinFolder.resolve("interproVersion.json"));
+        // InterPro
+        downloadFile = downloadAndSaveDataSource(configuration.getDownload().getInterpro(), INTERPRO_FILE_ID, INTERPRO_DATA,
+                interProFolder);
+        downloadFiles.add(downloadFile);
 
         // Intact
-        String intactUrl = configuration.getDownload().getIntact().getHost();
-        downloadFiles.add(downloadFile(intactUrl, proteinFolder.resolve("intact.txt").toString()));
-        saveVersionData(EtlCommons.PROTEIN_DATA, INTACT_NAME, configuration.getDownload().getIntact().getVersion(),
-                getTimeStamp(), Collections.singletonList(intactUrl), proteinFolder.resolve("intactVersion.json"));
+        downloadFile = downloadAndSaveDataSource(configuration.getDownload().getIntact(), INTACT_FILE_ID, INTACT_DATA,
+                intactFolder);
+        downloadFiles.add(downloadFile);
 
-        return downloadFiles;
-    }
+        logger.info(DOWNLOADING_DONE_MSG, getDataName(PROTEIN_DATA));
 
-    private void splitUniprot(Path uniprotFilePath, Path splitOutdirPath) throws IOException {
-        BufferedReader br = FileUtils.newBufferedReader(uniprotFilePath);
-        PrintWriter pw = null;
-        StringBuilder header = new StringBuilder();
-        boolean beforeEntry = true;
-        boolean inEntry = false;
-        int count = 0;
-        int chunk = 0;
-        String line;
-        while ((line = br.readLine()) != null) {
-            if (line.trim().startsWith("<entry ")) {
-                inEntry = true;
-                beforeEntry = false;
-                if (count % 10000 == 0) {
-                    pw = new PrintWriter(Files.newOutputStream(splitOutdirPath.resolve("chunk_" + chunk + ".xml").toFile().toPath()));
-                    pw.println(header.toString().trim());
-                }
-                count++;
-            }
-
-            if (beforeEntry) {
-                header.append(line).append("\n");
-            }
-
-            if (inEntry) {
-                pw.println(line);
-            }
-
-            if (line.trim().startsWith("</entry>")) {
-                inEntry = false;
-                if (count % 10000 == 0) {
-                    pw.print("</uniprot>");
-                    pw.close();
-                    chunk++;
-                }
-            }
-        }
-        pw.print("</uniprot>");
-        pw.close();
-        br.close();
+        return downloadFiles;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PubMedDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PubMedDownloadManager.java
index b5edf0220b..b73a752b10 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PubMedDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/PubMedDownloadManager.java
@@ -17,8 +17,9 @@
 package org.opencb.cellbase.lib.download;
 
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.config.DownloadProperties;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
 
 import java.io.IOException;
 import java.nio.file.Files;
@@ -27,39 +28,62 @@
 import java.util.Collections;
 import java.util.List;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 public class PubMedDownloadManager extends AbstractDownloadManager {
 
-    private static final String PUBMED_NAME = "PUBMED";
     public PubMedDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
         super(species, assembly, targetDirectory, configuration);
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        logger.info("Downloading PubMed XML files...");
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        // Check if the species supports this data
+        if (!SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), PUBMED_DATA)) {
+            logger.info(DATA_NOT_SUPPORTED_MSG, getDataName(PUBMED_DATA), speciesConfiguration.getScientificName());
+            return Collections.emptyList();
+        }
+
+        logger.info(DOWNLOADING_MSG, getDataName(PUBMED_DATA));
 
-        Path pubmedFolder = downloadFolder.resolve("pubmed");
-        Files.createDirectories(pubmedFolder);
+        Path pubmedDownloadFolder = downloadFolder.resolve(PUBMED_DATA);
+        Files.createDirectories(pubmedDownloadFolder);
 
         // Downloads PubMed XML files
-        String url = configuration.getDownload().getPubmed().getHost();
-        String regexp = configuration.getDownload().getPubmed().getFiles().get(0);
+        String host = configuration.getDownload().getPubmed().getHost();
+        List<String> filenames = getPubMedFilenames(configuration.getDownload().getPubmed());
+        List<DownloadFile> downloadFiles = new ArrayList<>();
+        for (String filename : filenames) {
+            String url = host + filename;
+            logger.info(DOWNLOADING_FROM_TO_MSG, url, pubmedDownloadFolder.resolve(filename));
+            downloadFiles.add(downloadFile(url, pubmedDownloadFolder.resolve(filename)));
+            logger.info(OK_MSG);
+        }
+
+        // Save data source
+        saveDataSource(PUBMED_DATA, configuration.getDownload().getPubmed().getVersion(), getTimeStamp(), Collections.singletonList(host),
+                pubmedDownloadFolder.resolve(getDataVersionFilename(PUBMED_DATA)));
+
+        logger.info(DOWNLOADING_DONE_MSG, getDataName(PUBMED_DATA));
+
+        return downloadFiles;
+    }
+
+    public static List<String> getPubMedFilenames(DownloadProperties.URLProperties pubMedProps) {
+        String regexp = pubMedProps.getFiles().get(PUBMED_REGEX_FILE_ID);
         String[] name = regexp.split("[\\[\\]]");
         String[] split = name[1].split("\\.\\.");
-        int start = Integer.valueOf(split[0]);
-        int end = Integer.valueOf(split[1]);
-        int padding = Integer.valueOf(split[2]);
-
-        saveVersionData(EtlCommons.PUBMED_DATA, PUBMED_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                pubmedFolder.resolve("pubmedVersion.json"));
+        int start = Integer.parseInt(split[0]);
+        int end = Integer.parseInt(split[1]);
+        int padding = Integer.parseInt(split[2]);
 
-        List<DownloadFile> list = new ArrayList<>();
+        List<String> filenames = new ArrayList<>();
         for (int i = start; i <= end; i++) {
-            String filename = name[0] + String.format("%0" + padding + "d", i) + name[2];
-            logger.info("\tDownloading file " + filename);
-            list.add(downloadFile(url + "/" + filename, pubmedFolder.resolve(filename).toString()));
+            String padString = "%0" + padding + "d";
+            String filename = name[0] + String.format(padString, i) + name[2];
+            filenames.add(filename);
         }
-        return list;
+        return  filenames;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java
index 51152e478d..ca5473cc68 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RegulationDownloadManager.java
@@ -16,35 +16,26 @@
 
 package org.opencb.cellbase.lib.download;
 
-import com.fasterxml.jackson.databind.ObjectMapper;
-import org.apache.commons.lang3.StringUtils;
-import org.opencb.biodata.formats.feature.gff.Gff2;
-import org.opencb.biodata.formats.feature.gff.io.Gff2Reader;
-import org.opencb.biodata.formats.io.FileFormatException;
-import org.opencb.biodata.models.core.RegulatoryPfm;
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
-import org.opencb.cellbase.core.serializer.CellBaseSerializer;
-import org.opencb.cellbase.lib.EtlCommons;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
 
 import java.io.IOException;
-import java.net.URL;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.*;
-import java.util.concurrent.TimeUnit;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
-public class RegulationDownloadManager extends AbstractDownloadManager {
 
-    private Path regulationFolder;
+public class RegulationDownloadManager extends AbstractDownloadManager {
 
-    private static final String ENSEMBL_NAME = "ENSEMBL";
-    private static final String MIRBASE_NAME = "miRBase";
-    private static final String MIRTARBASE_NAME = "miRTarBase";
+    private Path regulatoryBuildFolder;
+    private Path motifFeaturesFolder;
+    private Path mirTarBaseFolder;
+    private Path mirBaseFolder;
 
     public RegulationDownloadManager(String species, String assembly, Path outdir, CellBaseConfiguration configuration)
             throws IOException, CellBaseException {
@@ -52,115 +43,101 @@ public RegulationDownloadManager(String species, String assembly, Path outdir, C
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException, NoSuchMethodException, FileFormatException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, "regulation")) {
-            return null;
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        // Check if the species supports this data
+        if (!SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), REGULATION_DATA)) {
+            logger.info(DATA_NOT_SUPPORTED_MSG, getDataName(REGULATION_DATA), speciesConfiguration.getScientificName());
+            return Collections.emptyList();
         }
-        this.regulationFolder = downloadFolder.resolve("regulation");
+
+        Path regulationFolder = downloadFolder.resolve(REGULATION_DATA);
         Files.createDirectories(regulationFolder);
+        regulatoryBuildFolder = Files.createDirectories(regulationFolder.resolve(REGULATORY_BUILD_DATA));
+        motifFeaturesFolder = Files.createDirectories(regulationFolder.resolve(MOTIF_FEATURES_DATA));
+        mirTarBaseFolder = Files.createDirectories(regulationFolder.resolve(MIRTARBASE_DATA));
+        mirBaseFolder = Files.createDirectories(regulationFolder.resolve(MIRBASE_DATA));
 
-        logger.info("Downloading regulation information ...");
+        String prefixId = getConfigurationFileIdPrefix(speciesConfiguration.getScientificName());
 
         List<DownloadFile> downloadFiles = new ArrayList<>();
 
-        downloadFiles.addAll(downloadRegulatoryaAndMotifFeatures());
+        logger.info(DOWNLOADING_MSG, getDataName(REGULATION_DATA));
+
+        downloadFiles.addAll(downloadRegulatoryaBuild());
+        downloadFiles.addAll(downloadMotifFeatures());
         downloadFiles.add(downloadMiRTarBase());
         downloadFiles.add(downloadMirna());
 
+        logger.info(DOWNLOADING_DONE_MSG, getDataName(REGULATION_DATA));
+
         return downloadFiles;
     }
 
     /**
-     * Downloads Ensembl regulatory buid and motif feature files.
+     * Downloads Ensembl regulatory build.
      * @throws IOException Any issue when writing files
      * @throws InterruptedException Any issue downloading files
      */
-    private List<DownloadFile> downloadRegulatoryaAndMotifFeatures()
-            throws IOException, InterruptedException, NoSuchMethodException, FileFormatException {
-        String regulationUrl = ensemblHostUrl + "/" + ensemblRelease;
-        if (!configuration.getSpecies().getVertebrates().contains(speciesConfiguration)) {
-            regulationUrl = ensemblHostUrl + "/" + ensemblRelease + "/" + getPhylo(speciesConfiguration);
-        }
-        regulationUrl += "/regulation/" + speciesShortName;
+    private List<DownloadFile> downloadRegulatoryaBuild() throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_MSG, getDataName(REGULATORY_BUILD_DATA));
 
+        DownloadFile downloadFile;
         List<DownloadFile> downloadFiles = new ArrayList<>();
 
-        Path outputFile = regulationFolder.resolve(EtlCommons.REGULATORY_FEATURES_FILE);
-        String regulatoryBuildUrl = regulationUrl + "/*Regulatory_Build.regulatory_features*.gff.gz";
-        downloadFiles.add(downloadFile(regulatoryBuildUrl, outputFile.toString()));
+        // Regulatory build
+        downloadFile = downloadAndSaveEnsemblDataSource(configuration.getDownload().getEnsembl(), ENSEMBL_REGULATORY_BUILD_FILE_ID,
+                REGULATORY_BUILD_DATA, regulatoryBuildFolder);
+        downloadFiles.add(downloadFile);
 
-        outputFile = regulationFolder.resolve(EtlCommons.MOTIF_FEATURES_FILE);
-        String motifUrl = regulationUrl + "/MotifFeatures/*" + assemblyConfiguration.getName() + ".motif_features.gff.gz";
-        downloadFiles.add(downloadFile(motifUrl, outputFile.toString()));
+        return downloadFiles;
+    }
 
-        String motifTbiUrl = regulationUrl + "/MotifFeatures/*" + assemblyConfiguration.getName() + ".motif_features.gff.gz.tbi";
-        outputFile = regulationFolder.resolve(EtlCommons.MOTIF_FEATURES_FILE + ".tbi");
-        downloadFiles.add(downloadFile(motifTbiUrl, outputFile.toString()));
+    /**
+     * Downloads Ensembl motif feature files.
+     * @throws IOException Any issue when writing files
+     * @throws InterruptedException Any issue downloading files
+     */
+    private List<DownloadFile> downloadMotifFeatures() throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_MSG, getDataName(MOTIF_FEATURES_DATA));
 
-        loadPfmMatrices();
+        DownloadFile downloadFile;
+        List<DownloadFile> downloadFiles = new ArrayList<>();
+
+        // Motifs features
+        List<String> urls = new ArrayList<>();
+        downloadFile = downloadEnsemblDataSource(configuration.getDownload().getEnsembl(), ENSEMBL_MOTIF_FEATURES_FILE_ID, null,
+                motifFeaturesFolder);
+        downloadFiles.add(downloadFile);
+        urls.add(downloadFile.getUrl());
+
+        // And now the index file
+        downloadFile = downloadEnsemblDataSource(configuration.getDownload().getEnsembl(), ENSEMBL_MOTIF_FEATURES_INDEX_FILE_ID, null,
+                motifFeaturesFolder);
+        downloadFiles.add(downloadFile);
+        urls.add(downloadFile.getUrl());
+
+        // Save data source (name, category, version,...)
+        saveDataSource(MOTIF_FEATURES_DATA, "(" + getDataName(ENSEMBL_DATA) + " " + ensemblVersion + ")", getTimeStamp(), urls,
+                motifFeaturesFolder.resolve(getDataVersionFilename(MOTIF_FEATURES_DATA)));
 
         return downloadFiles;
     }
 
-    private void loadPfmMatrices() throws IOException, NoSuchMethodException, FileFormatException, InterruptedException {
-        logger.info("Downloading and building pfm matrices...");
-        if (Files.exists(buildFolder.resolve("regulatory_pfm.json.gz"))) {
-            logger.info("regulatory_pfm.json.gz is already built");
-            return;
-        }
-        Path motifGffFile = regulationFolder.resolve(EtlCommons.MOTIF_FEATURES_FILE);
-        Gff2Reader motifsFeatureReader = new Gff2Reader(motifGffFile);
-        Gff2 tfbsMotifFeature;
-        Set<String> motifIds = new HashSet<>();
-        Pattern filePattern = Pattern.compile("ENSPFM(\\d+)");
-        while ((tfbsMotifFeature = motifsFeatureReader.read()) != null) {
-            String pfmId = getMatrixId(filePattern, tfbsMotifFeature);
-            if (StringUtils.isNotEmpty(pfmId)) {
-                motifIds.add(pfmId);
-            }
-        }
-        motifsFeatureReader.close();
-
-        ObjectMapper mapper = new ObjectMapper();
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(buildFolder, "regulatory_pfm", true);
-        logger.info("Looking up " + motifIds.size() + " pfms");
-        for (String pfmId : motifIds) {
-            String urlString = "https://rest.ensembl.org/species/homo_sapiens/binding_matrix/" + pfmId
-                    + "?unit=frequencies;content-type=application/json";
-            URL url = new URL(urlString);
-            RegulatoryPfm regulatoryPfm = mapper.readValue(url, RegulatoryPfm.class);
-            serializer.serialize(regulatoryPfm);
-            // https://github.com/Ensembl/ensembl-rest/wiki/Rate-Limits
-            TimeUnit.MILLISECONDS.sleep(250);
-        }
-        serializer.close();
-    }
+    private DownloadFile downloadMirna() throws IOException, InterruptedException, CellBaseException {
+        logger.info(DOWNLOADING_MSG, getDataName(MIRBASE_DATA));
 
-    private String getMatrixId(Pattern pattern, Gff2 tfbsMotifFeature) {
-        Matcher matcher = pattern.matcher(tfbsMotifFeature.getAttribute());
-        if (matcher.find()) {
-            return matcher.group(0);
-        }
-        return null;
+        return downloadAndSaveDataSource(configuration.getDownload().getMirbase(), MIRBASE_FILE_ID, MIRBASE_DATA, mirBaseFolder);
     }
 
-    private DownloadFile downloadMirna() throws IOException, InterruptedException {
-        String url = configuration.getDownload().getMirbase().getHost();
-        String readmeUrl = configuration.getDownload().getMirbaseReadme().getHost();
-        downloadFile(readmeUrl, regulationFolder.resolve("mirbaseReadme.txt").toString());
-        saveVersionData(EtlCommons.REGULATION_DATA, MIRBASE_NAME,
-                getLine(regulationFolder.resolve("mirbaseReadme.txt"), 1), getTimeStamp(),
-                Collections.singletonList(url), regulationFolder.resolve("mirbaseVersion.json"));
-        Path outputPath = regulationFolder.resolve("miRNA.xls.gz");
-        DownloadFile downloadFile = downloadFile(url, regulationFolder.resolve("miRNA.xls.gz").toString());
-        EtlCommons.runCommandLineProcess(null, "gunzip", Collections.singletonList(outputPath.toString()), null);
-        return downloadFile;
-    }
+    private DownloadFile downloadMiRTarBase() throws IOException, InterruptedException, CellBaseException {
+        DownloadFile downloadFile = null;
+        String prefixId = getConfigurationFileIdPrefix(speciesConfiguration.getScientificName());
+        if (configuration.getDownload().getMiRTarBase().getFiles().containsKey(prefixId + MIRTARBASE_FILE_ID)) {
+            logger.info(DOWNLOADING_MSG, getDataName(MIRTARBASE_DATA));
 
-    private DownloadFile downloadMiRTarBase() throws IOException, InterruptedException {
-        String url = configuration.getDownload().getMiRTarBase().getHost();
-        saveVersionData(EtlCommons.REGULATION_DATA, MIRTARBASE_NAME, null, getTimeStamp(), Collections.singletonList(url),
-                regulationFolder.resolve("miRTarBaseVersion.json"));
-        return downloadFile(url, regulationFolder.resolve("hsa_MTI.xlsx").toString());
+            downloadFile = downloadAndSaveDataSource(configuration.getDownload().getMiRTarBase(), prefixId + MIRTARBASE_FILE_ID,
+                    MIRTARBASE_DATA, mirTarBaseFolder);
+        }
+        return downloadFile;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RepeatsDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RepeatsDownloadManager.java
new file mode 100644
index 0000000000..b1c9fae975
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RepeatsDownloadManager.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.download;
+
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
+public class RepeatsDownloadManager extends AbstractDownloadManager {
+
+    public RepeatsDownloadManager(String species, String assembly, Path targetDirectory, CellBaseConfiguration configuration)
+            throws IOException, CellBaseException {
+        super(species, assembly, targetDirectory, configuration);
+    }
+
+    @Override
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        return downloadRepeats();
+    }
+
+    public List<DownloadFile> downloadRepeats() throws IOException, InterruptedException, CellBaseException {
+        // Check if the species supports this data
+        if (!SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), REPEATS_DATA)) {
+            logger.info(DATA_NOT_SUPPORTED_MSG, getDataName(REPEATS_DATA), speciesConfiguration.getScientificName());
+            return Collections.emptyList();
+        }
+
+        Path repeatsFolder = downloadFolder.resolve(REPEATS_DATA);
+        Files.createDirectories(repeatsFolder);
+        Path trfFolder = Files.createDirectories(repeatsFolder.resolve(TRF_DATA));
+        Path wmFolder = Files.createDirectories(repeatsFolder.resolve(WM_DATA));
+        Path gsdFolder = Files.createDirectories(repeatsFolder.resolve(GSD_DATA));
+
+        String prefixId = getConfigurationFileIdPrefix(speciesConfiguration.getScientificName());
+
+        String url;
+        Path outputPath;
+        List<DownloadFile> downloadFiles = new ArrayList<>();
+
+        logger.info(DOWNLOADING_MSG, getDataName(REPEATS_DATA));
+
+
+        // Download tandem repeat finder
+        logger.info(DOWNLOADING_MSG, getDataName(TRF_DATA));
+        url = configuration.getDownload().getSimpleRepeats().getHost()
+                + configuration.getDownload().getSimpleRepeats().getFiles().get(prefixId + SIMPLE_REPEATS_FILE_ID);
+        outputPath = repeatsFolder.resolve(getFilenameFromUrl(url));
+        logger.info(DOWNLOADING_FROM_TO_MSG, url, outputPath);
+        downloadFiles.add(downloadFile(url, outputPath));
+        logger.info(OK_MSG);
+
+        saveDataSource(TRF_DATA, configuration.getDownload().getSimpleRepeats().getVersion(), getTimeStamp(),
+                Collections.singletonList(url), trfFolder.resolve(getDataVersionFilename(TRF_DATA)));
+
+        // Download WindowMasker
+        logger.info(DOWNLOADING_MSG, getDataName(WM_DATA));
+        url = configuration.getDownload().getWindowMasker().getHost()
+                + configuration.getDownload().getWindowMasker().getFiles().get(prefixId + WINDOW_MASKER_FILE_ID);
+        outputPath = repeatsFolder.resolve(getFilenameFromUrl(url));
+        logger.info(DOWNLOADING_FROM_TO_MSG, url, outputPath);
+        downloadFiles.add(downloadFile(url, outputPath));
+        logger.info(OK_MSG);
+
+        saveDataSource(WM_DATA, configuration.getDownload().getWindowMasker().getVersion(), getTimeStamp(),
+                Collections.singletonList(url), wmFolder.resolve(getDataVersionFilename(WM_DATA)));
+
+        // Download genomic super duplications
+        logger.info(DOWNLOADING_MSG, getDataName(GSD_DATA));
+        url = configuration.getDownload().getGenomicSuperDups().getHost()
+                + configuration.getDownload().getGenomicSuperDups().getFiles().get(prefixId + GENOMIC_SUPER_DUPS_FILE_ID);
+        outputPath = repeatsFolder.resolve(getFilenameFromUrl(url));
+        logger.info(DOWNLOADING_FROM_TO_MSG, url, outputPath);
+        downloadFiles.add(downloadFile(url, outputPath));
+        logger.info(OK_MSG);
+
+        saveDataSource(GSD_DATA, configuration.getDownload().getGenomicSuperDups().getVersion(), getTimeStamp(),
+                Collections.singletonList(url), gsdFolder.resolve(getDataVersionFilename(GSD_DATA)));
+
+        logger.info(DOWNLOADING_DONE_MSG, getDataName(REPEATS_DATA));
+
+        return downloadFiles;
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/SpliceScoreDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/SpliceScoreDownloadManager.java
new file mode 100644
index 0000000000..f334ab2f00
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/SpliceScoreDownloadManager.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.download;
+
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.config.DownloadProperties;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Collections;
+import java.util.List;
+
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
+public class SpliceScoreDownloadManager extends AbstractDownloadManager {
+
+    public SpliceScoreDownloadManager(String species, String assembly, Path outdir, CellBaseConfiguration configuration)
+            throws IOException, CellBaseException {
+        super(species, assembly, outdir, configuration);
+    }
+
+    @Override
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        // Check if the species supports this data
+        if (!SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), SPLICE_SCORE_DATA)) {
+            logger.info(DATA_NOT_SUPPORTED_MSG, getDataName(SPLICE_SCORE_DATA), speciesConfiguration.getScientificName());
+            return Collections.emptyList();
+        }
+
+        logger.info(DOWNLOADING_MSG, getDataName(SPLICE_SCORE_DATA));
+
+        // Create splice score directory
+        Path spliceScorePath = downloadFolder.resolve(SPLICE_SCORE_DATA).toAbsolutePath();
+        Files.createDirectories(spliceScorePath);
+
+        // SpliceAI
+        saveSpliceScoreSource(SPLICEAI_DATA, configuration.getDownload().getSpliceAi(), spliceScorePath);
+
+        // MMSplice
+        saveSpliceScoreSource(MMSPLICE_DATA, configuration.getDownload().getMmSplice(), spliceScorePath);
+
+        logger.info(DOWNLOADING_DONE_MSG, getDataName(SPLICE_SCORE_DATA));
+        return Collections.emptyList();
+    }
+
+    private void saveSpliceScoreSource(String data, DownloadProperties.URLProperties props, Path spliceScorePath)
+            throws CellBaseException, IOException {
+        logger.warn("{} files must be downloaded manually !", getDataName(data));
+        saveDataSource(data, props.getVersion(), getTimeStamp(), Collections.singletonList(props.getHost()),
+                spliceScorePath.resolve(getDataVersionFilename(data)));
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java
index 7586505d21..657c59c43f 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/VariationDownloadManager.java
@@ -19,12 +19,14 @@
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.config.DownloadProperties;
 import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.lib.EtlCommons;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
 
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 
@@ -38,29 +40,61 @@ public VariationDownloadManager(String species, String assembly, Path targetDire
     }
 
     @Override
-    public List<DownloadFile> download() throws IOException, InterruptedException {
-        return Collections.singletonList(downloadDbSnp());
+    public List<DownloadFile> download() throws IOException, InterruptedException, CellBaseException {
+        return downloadVariation();
     }
 
-    public DownloadFile downloadDbSnp() throws IOException, InterruptedException {
-        if (!speciesHasInfoToDownload(speciesConfiguration, VARIATION_DATA)) {
-            return null;
+    public List<DownloadFile> downloadVariation() throws IOException, InterruptedException, CellBaseException {
+        // Check if the species supports this data
+        if (!SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), VARIATION_DATA)) {
+            logger.info(DATA_NOT_SUPPORTED_MSG, getDataName(VARIATION_DATA), speciesConfiguration.getScientificName());
+            return Collections.emptyList();
         }
-        if (speciesConfiguration.getScientificName().equals(EtlCommons.HOMO_SAPIENS_NAME)) {
-            logger.info("Downloading dbSNP information ...");
 
-            Path variation = downloadFolder.resolve(VARIATION_DATA);
-            Files.createDirectories(variation);
+        List<DownloadFile> downloadFiles = new ArrayList<>();
+        Path variationFolder = downloadFolder.resolve(VARIATION_DATA);
+
+        if (speciesConfiguration.getScientificName().equals(HOMO_SAPIENS)) {
+            // For homo sapiens, the dbSNP file is downloaded (since it does not need to download human variation data from Ensembl
+            // because it has already been included in CellBase
+            Path dbSnpFolder = variationFolder.resolve(DBSNP_DATA);
+            Files.createDirectories(dbSnpFolder);
+
+            logger.info(CATEGORY_DOWNLOADING_MSG, getDataCategory(DBSNP_DATA), getDataName(DBSNP_DATA));
 
             DownloadProperties.URLProperties dbSNP = configuration.getDownload().getDbSNP();
             String url = dbSNP.getHost();
-            saveVersionData(VARIATION_DATA, DBSNP_NAME, dbSNP.getVersion(), getTimeStamp(),
-                    Collections.singletonList(url), variation.resolve(DBSNP_VERSION_FILENAME));
+            Path outPath = dbSnpFolder.resolve(Paths.get(url).getFileName());
+            downloadFiles.add(downloadFile(url, outPath));
+
+            saveDataSource(DBSNP_DATA, dbSNP.getVersion(), getTimeStamp(), Collections.singletonList(url),
+                    dbSnpFolder.resolve(getDataVersionFilename(DBSNP_DATA)));
+
+            logger.info(CATEGORY_DOWNLOADING_DONE_MSG, getDataCategory(DBSNP_DATA), getDataName(DBSNP_DATA));
+        } else {
+            // Other species download the VCF files from Ensembl
+            Files.createDirectories(variationFolder);
+
+            logger.info(DOWNLOADING_MSG, getDataName(VARIATION_DATA));
 
-            Path outPath = variation.resolve(Paths.get(url).getFileName());
-            logger.info("Downloading {} to {} ...", url, outPath);
-            return downloadFile(url, outPath.toString());
+            DownloadFile downloadFile;
+            String prefixId = getConfigurationFileIdPrefix(speciesConfiguration.getScientificName());
+
+            // Variation and structural variations
+            List<String> fileIds = Arrays.asList(prefixId + VARIATION_FILE_ID, prefixId + STRUCTURAL_VARIATIONS_FILE_ID);
+            List<String> urls = new ArrayList<>();
+            for (String fileId : fileIds) {
+                downloadFile = downloadEnsemblDataSource(configuration.getDownload().getEnsembl(), fileId, null, variationFolder);
+                downloadFiles.add(downloadFile);
+                urls.add(downloadFile.getUrl());
+            }
+
+            saveDataSource(VARIATION_DATA, ensemblVersion, getTimeStamp(), urls,
+                    variationFolder.resolve(getDataVersionFilename(VARIATION_DATA)));
+
+            logger.info(DOWNLOADING_DONE_MSG, getDataName(VARIATION_DATA));
         }
-        return null;
+
+        return downloadFiles;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/GeneMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/GeneMongoDBAdaptor.java
index 94a686310b..9ee645fbe2 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/GeneMongoDBAdaptor.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/GeneMongoDBAdaptor.java
@@ -257,6 +257,7 @@ private void createDiseaseQuery(Object queryValues, List<Bson> andBsonList) {
             List<Bson> orBsonList = new ArrayList<>();
             orBsonList.add(getLogicalListFilter(queryValues, "annotation.diseases.id"));
             orBsonList.add(getLogicalListFilter(queryValues, "annotation.diseases.name"));
+            orBsonList.add(getLogicalListFilter(queryValues, "annotation.diseases.hpo"));
             andBsonList.add(Filters.or(orBsonList));
         }
     }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactory.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactory.java
index 8912840bd5..f89ca237f5 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactory.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactory.java
@@ -99,6 +99,10 @@ public SnpMongoDBAdaptor getSnpDBAdaptor() {
         return new SnpMongoDBAdaptor(mongoDatastore);
     }
 
+    public PolygenicScoreMongoDBAdaptor getPolygenicScoreMongoDBAdaptor() {
+        return new PolygenicScoreMongoDBAdaptor(mongoDatastore);
+    }
+
     @Override
     public String toString() {
         final StringBuilder sb = new StringBuilder("MongoDBAdaptorFactory{");
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/PolygenicScoreMongoDBAdaptor.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/PolygenicScoreMongoDBAdaptor.java
new file mode 100644
index 0000000000..66d10a2bb4
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/PolygenicScoreMongoDBAdaptor.java
@@ -0,0 +1,199 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.impl.core;
+
+import com.mongodb.client.model.Filters;
+import org.apache.commons.collections4.CollectionUtils;
+import org.bson.Document;
+import org.bson.conversions.Bson;
+import org.opencb.biodata.models.core.pgs.CommonPolygenicScore;
+import org.opencb.biodata.models.core.pgs.PolygenicScore;
+import org.opencb.biodata.models.core.pgs.VariantPolygenicScore;
+import org.opencb.biodata.models.variant.avro.PolygenicScoreAnnotation;
+import org.opencb.biodata.models.variant.avro.PolygenicScoreVariant;
+import org.opencb.cellbase.core.api.PolygenicScoreQuery;
+import org.opencb.cellbase.core.api.query.ProjectionQueryOptions;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.result.CellBaseDataResult;
+import org.opencb.cellbase.lib.EtlCommons;
+import org.opencb.cellbase.lib.iterator.CellBaseIterator;
+import org.opencb.cellbase.lib.iterator.CellBaseMongoDBIterator;
+import org.opencb.commons.datastore.core.DataResult;
+import org.opencb.commons.datastore.core.QueryOptions;
+import org.opencb.commons.datastore.core.QueryParam;
+import org.opencb.commons.datastore.mongodb.GenericDocumentComplexConverter;
+import org.opencb.commons.datastore.mongodb.MongoDBCollection;
+import org.opencb.commons.datastore.mongodb.MongoDBIterator;
+import org.opencb.commons.datastore.mongodb.MongoDataStore;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+public class PolygenicScoreMongoDBAdaptor extends CellBaseDBAdaptor
+        implements CellBaseCoreDBAdaptor<PolygenicScoreQuery, CommonPolygenicScore> {
+
+    protected Map<Integer, MongoDBCollection> pgsVariantMongoDBCollectionByRelease;
+
+    private static final GenericDocumentComplexConverter<CommonPolygenicScore> CONVERTER;
+
+    static {
+        CONVERTER = new GenericDocumentComplexConverter<>(CommonPolygenicScore.class);
+    }
+
+    public PolygenicScoreMongoDBAdaptor(MongoDataStore mongoDataStore) {
+        super(mongoDataStore);
+
+        init();
+    }
+
+    private void init() {
+        logger.debug("PolygenicScoreMongoDBAdaptor: in 'constructor'");
+
+        mongoDBCollectionByRelease = buildCollectionByReleaseMap(EtlCommons.PGS_COMMON_COLLECTION);
+        pgsVariantMongoDBCollectionByRelease = buildCollectionByReleaseMap(EtlCommons.PGS_VARIANT_COLLECTION);
+    }
+
+    public CellBaseDataResult<PolygenicScoreAnnotation> getPolygenicScoreAnnotation(String chromosome, int position, String reference,
+                                                                                    String alternate, int dataRelease)
+            throws CellBaseException {
+        long dbTimeStart = System.currentTimeMillis();
+
+        List<Bson> andBsonList = new ArrayList<>();
+        andBsonList.add(Filters.eq("chromosome", chromosome));
+        andBsonList.add(Filters.eq("position", position));
+        Bson query = Filters.and(andBsonList);
+
+        MongoDBCollection mongoDBCollection = getCollectionByRelease(pgsVariantMongoDBCollectionByRelease, dataRelease);
+        DataResult<VariantPolygenicScore> pgsVariantDataResult = mongoDBCollection.find(query, null, VariantPolygenicScore.class,
+                new QueryOptions());
+
+        List<PolygenicScoreAnnotation> results = new ArrayList<>();
+
+        // Search for the right polygenic score, i.e., checking reference and alternate with PGS effectAllele and otherAllele
+        if (pgsVariantDataResult.getNumResults() > 0) {
+            for (VariantPolygenicScore score : pgsVariantDataResult.getResults()) {
+                if ((score.getEffectAllele().equals(reference) && score.getOtherAllele().equals(alternate))
+                        || (score.getEffectAllele().equals(alternate) && score.getOtherAllele().equals(reference))) {
+                    List<String> pgsIds = score.getPolygenicScores().stream().map(PolygenicScore::getId).collect(Collectors.toList());
+                    List<CellBaseDataResult<CommonPolygenicScore>> infoResults = info(pgsIds, null, dataRelease, null);
+                    for (CellBaseDataResult<CommonPolygenicScore> infoResult : infoResults) {
+                        CommonPolygenicScore pgs = infoResult.first();
+
+                        // Init PGS
+                        PolygenicScoreAnnotation pgsAnnotation = new PolygenicScoreAnnotation(pgs.getId(), pgs.getName(), pgs.getSource(),
+                                pgs.getVersion(), pgs.getTraits(), pgs.getPubmedRefs(), pgs.getValues(), new ArrayList());
+
+                        // Add PGS variant scores to that PGS
+                        PolygenicScoreVariant pgsVariant = new PolygenicScoreVariant(score.getEffectAllele(), score.getOtherAllele(),
+                                new HashMap<>());
+                        for (PolygenicScore polygenicScore : score.getPolygenicScores()) {
+                            // Search the matched PGS
+                            System.out.println(">>> polygenic score ID = " + polygenicScore.getId() + ", " + pgs.getId());
+                            System.out.println(">>> polygenic score variant scores size = " + polygenicScore.getValues().size());
+                            if (pgs.getId().equals(polygenicScore.getId())) {
+                                System.out.println("FOUND !!!!!!");
+                                pgsVariant.setValues(polygenicScore.getValues());
+                                break;
+                            }
+                        }
+                        pgsAnnotation.setVariants(Collections.singletonList(pgsVariant));
+
+                        // Add annotation to the output list
+                        results.add(pgsAnnotation);
+                    }
+                }
+            }
+        }
+        int dbTime = Long.valueOf(System.currentTimeMillis() - dbTimeStart).intValue();
+        final String id = chromosome + ":" + position + ":" + reference + ":" + alternate;
+        return new CellBaseDataResult<>(id, dbTime, new ArrayList<>(), results.size(), results, results.size());
+    }
+
+    @Override
+    public CellBaseIterator<CommonPolygenicScore> iterator(PolygenicScoreQuery query) throws CellBaseException {
+        Bson bson = parseQuery(query);
+        QueryOptions queryOptions = query.toQueryOptions();
+        Bson projection = getProjection(query);
+        MongoDBIterator<CommonPolygenicScore> iterator;
+        MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, query.getDataRelease());
+        iterator = mongoDBCollection.iterator(null, bson, projection, CONVERTER, queryOptions);
+        return new CellBaseMongoDBIterator<>(iterator);
+    }
+
+    @Override
+    public CellBaseDataResult<CommonPolygenicScore> aggregationStats(PolygenicScoreQuery query) {
+        logger.error("Not implemented yet");
+        return null;
+    }
+
+    @Override
+    public CellBaseDataResult<CommonPolygenicScore> groupBy(PolygenicScoreQuery query) throws CellBaseException {
+        logger.error("Not implemented yet");
+        return null;
+    }
+
+    @Override
+    public CellBaseDataResult<String> distinct(PolygenicScoreQuery query) throws CellBaseException {
+        Bson bsonDocument = parseQuery(query);
+        MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, query.getDataRelease());
+        return new CellBaseDataResult<>(mongoDBCollection.distinct(query.getFacet(), bsonDocument, String.class));
+    }
+
+    @Override
+    public List<CellBaseDataResult<CommonPolygenicScore>> info(List<String> ids, ProjectionQueryOptions queryOptions, int dataRelease,
+                                                               String apiKey) throws CellBaseException {
+        List<CellBaseDataResult<CommonPolygenicScore>> results = new ArrayList<>();
+        Bson projection = getProjection(queryOptions);
+        for (String id : ids) {
+            List<Bson> orBsonList = new ArrayList<>(ids.size());
+            orBsonList.add(Filters.eq("id", id));
+            orBsonList.add(Filters.eq("name", id));
+            Bson query = Filters.or(orBsonList);
+            MongoDBCollection mongoDBCollection = getCollectionByRelease(mongoDBCollectionByRelease, dataRelease);
+            results.add(new CellBaseDataResult<>(mongoDBCollection.find(query, projection, CONVERTER, new QueryOptions())));
+        }
+        return results;
+    }
+
+    public Bson parseQuery(PolygenicScoreQuery pharmaQuery) {
+        List<Bson> andBsonList = new ArrayList<>();
+        try {
+            for (Map.Entry<String, Object> entry : pharmaQuery.toObjectMap().entrySet()) {
+                String dotNotationName = entry.getKey();
+                Object value = entry.getValue();
+                switch (dotNotationName) {
+                    case "token":
+                    case "apiKey":
+                    case "dataRelease":
+                        // do nothing
+                        break;
+                    default:
+                        createAndOrQuery(value, dotNotationName, QueryParam.Type.STRING, andBsonList);
+                        break;
+                }
+            }
+        } catch (IllegalAccessException e) {
+            e.printStackTrace();
+        }
+        logger.debug("PolygenicScoreQuery parsed query: {}", andBsonList);
+        if (CollectionUtils.isNotEmpty(andBsonList)) {
+            return Filters.and(andBsonList);
+        } else {
+            return new Document();
+        }
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/install/InstallManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/install/InstallManager.java
deleted file mode 100644
index d6192b3059..0000000000
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/install/InstallManager.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright 2015-2020 OpenCB
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.opencb.cellbase.lib.install;
-
-import org.opencb.cellbase.core.common.Species;
-import org.opencb.cellbase.core.config.CellBaseConfiguration;
-import org.opencb.cellbase.core.config.SpeciesConfiguration;
-import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.core.utils.SpeciesUtils;
-import org.opencb.cellbase.lib.db.MongoDBManager;
-import org.opencb.commons.datastore.mongodb.MongoDataStore;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.List;
-
-public class InstallManager {
-
-    private CellBaseConfiguration configuration;
-    private Logger logger;
-
-    public InstallManager(CellBaseConfiguration configuration) {
-        this.configuration = configuration;
-
-        logger = LoggerFactory.getLogger(this.getClass());
-    }
-
-    /**
-     * Add shard indexes and ranges in Mongo based on config file entries.
-     *
-     * @param speciesName name of species
-     * @param assemblyName name of assembly
-     * @throws CellBaseException if invalid input
-     */
-    public void install(String speciesName, String assemblyName) throws CellBaseException {
-        // TDDO check database credentials
-
-        // user API perms
-
-        // check repl sets
-
-        Species species = SpeciesUtils.getSpecies(configuration, speciesName, assemblyName);
-
-        SpeciesConfiguration speciesConfiguration = configuration.getSpeciesConfig(species.getId());
-        if (speciesConfiguration == null) {
-            LoggerFactory.getLogger(MongoDBShardUtils.class).warn("No config found for '" + species.getId() + "'");
-            return;
-        }
-
-        List<SpeciesConfiguration.ShardConfig> shards = speciesConfiguration.getShards();
-        if (shards != null) {
-            // if sharding in config
-            shard(species);
-        }
-    }
-
-    private void shard(Species species) throws CellBaseException {
-        MongoDBManager mongoDBManager = new MongoDBManager(configuration);
-        MongoDataStore mongoDBDatastore = mongoDBManager.createMongoDBDatastore(species.getId(), species.getAssembly());
-        MongoDBShardUtils.shard(mongoDBDatastore, configuration, species);
-    }
-}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/install/MongoDBShardUtils.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/install/MongoDBShardUtils.java
deleted file mode 100644
index bb96933be5..0000000000
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/install/MongoDBShardUtils.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright 2015-2020 OpenCB
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.opencb.cellbase.lib.install;
-
-import com.mongodb.client.MongoClient;
-import com.mongodb.client.MongoDatabase;
-import org.apache.commons.lang.StringUtils;
-import org.bson.Document;
-import org.opencb.cellbase.core.common.Species;
-import org.opencb.cellbase.core.config.CellBaseConfiguration;
-import org.opencb.cellbase.core.config.MongoDBDatabaseCredentials;
-import org.opencb.cellbase.core.config.SpeciesConfiguration;
-import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.commons.datastore.core.ObjectMap;
-import org.opencb.commons.datastore.mongodb.MongoDataStore;
-import org.slf4j.LoggerFactory;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-public class MongoDBShardUtils {
-
-    /**
-     * Add shards.
-     *
-     * @param mongoDataStore Database name
-     * @param cellBaseConfiguration config file with database details.
-     * @param species the species name and assembly for the database being sharded
-     * @throws CellBaseException if configuration isn't valid
-     */
-    public static void shard(MongoDataStore mongoDataStore, CellBaseConfiguration cellBaseConfiguration, Species species)
-            throws CellBaseException {
-        SpeciesConfiguration speciesConfiguration = cellBaseConfiguration.getSpeciesConfig(species.getId());
-        if (speciesConfiguration == null) {
-            LoggerFactory.getLogger(MongoDBShardUtils.class).warn("No config found for '" + species.getId() + "'");
-            return;
-        }
-
-        List<SpeciesConfiguration.ShardConfig> shards = speciesConfiguration.getShards();
-        if (shards == null) {
-            LoggerFactory.getLogger(MongoDBShardUtils.class).error("No sharding config found for '" + species.getId() + "'");
-            return;
-        }
-
-        for (SpeciesConfiguration.ShardConfig shardConfig : shards) {
-            // create the collection, if it's there already do nothing
-            String collectionName = createCollection(mongoDataStore, shardConfig);
-
-            // set the keymap, e.g. chromosome, start, end. Also can be a single key
-            Map<String, Object> keyMap = createKeyMap(shardConfig);
-
-            // shard keys must be indexed FIRST
-            createIndex(mongoDataStore, keyMap, collectionName);
-
-            String databaseName = mongoDataStore.getDatabaseName();
-            String fullCollectionName = mongoDataStore.getDatabaseName() + "." + collectionName;
-            MongoClient mongoClient = mongoDataStore.getMongoClient();
-            MongoDatabase adminDB = mongoClient
-                    .getDatabase(cellBaseConfiguration.getDatabases().getMongodb().getOptions().get("authenticationDatabase"));
-
-            // sh.enableSharding( "cellbase_hsapiens_grch37_v4" )
-//            adminDB.runCommand(new Document("enableSharding", databaseName));
-
-            // sh.shardCollection("cellbase_hsapiens_grch37_v4.variation", { "chromosome": 1, "start": 1, "end": 1 } )
-            adminDB.runCommand(new Document("shardcollection", fullCollectionName).append("key", new Document(keyMap)));
-
-            MongoDBDatabaseCredentials databaseCredentials = cellBaseConfiguration.getDatabases().getMongodb();
-            List<MongoDBDatabaseCredentials.ReplicaSet> replicaSets = databaseCredentials.getShards();
-
-            if (replicaSets == null || replicaSets.isEmpty()) {
-                LoggerFactory.getLogger(MongoDBShardUtils.class).warn("No replicaset config found for '" + species.getId() + "'");
-                return;
-            }
-
-            // different from our shard key, this is the key used for the zones ONLY
-            final String rangeKey = shardConfig.getRangeKey();
-
-            int i = 0;
-            for (SpeciesConfiguration.Zone zone : shardConfig.getZones()) {
-                MongoDBDatabaseCredentials.ReplicaSet replicaSet = replicaSets.get(i++);
-
-                // sh.addShard( "rs0/cb-mongo-shard1-1:27017,cb-mongo-shard1-2:27017,cb-mongo-shard1-3:27017" )
-//                String replicaSetName = replicaSet.getId() + "/" + replicaSet.getNodes();
-//                adminDB.runCommand(new Document("addShard", replicaSetName));
-
-                // sh.addShardToZone("rs0", "zone0")
-                adminDB.runCommand(new Document("addShardToZone", replicaSet.getId()).append("zone", zone.getName()));
-
-                // put chromosome 1 in shard0
-                //sh.addTagRange("cellbase_hsapiens_grch37_v4.variation", { "chromosome" :  "1" },  { "chromosome" :  "10"  }, "zone0" )
-                List<SpeciesConfiguration.ShardRange> shardRanges = zone.getShardRanges();
-                for (SpeciesConfiguration.ShardRange shardRange : shardRanges) {
-                    adminDB.runCommand(new Document("updateZoneKeyRange", fullCollectionName)
-                            .append("min", new Document(rangeKey, shardRange.getMinimum()))
-                            .append("max", new Document(rangeKey, shardRange.getMaximum()))
-                            .append("zone", zone.getName()));
-                }
-            }
-        }
-    }
-
-    private static String createCollection(MongoDataStore mongoDataStore, SpeciesConfiguration.ShardConfig shardConfig)
-            throws CellBaseException {
-        String collectionName = shardConfig.getCollection();
-        if (StringUtils.isEmpty(collectionName)) {
-            throw new CellBaseException("Sharding failed: collection name not found in config");
-        }
-        if (mongoDataStore.getCollection(collectionName) == null) {
-            mongoDataStore.createCollection(collectionName);
-        }
-        return collectionName;
-    }
-
-    private static void createIndex(MongoDataStore mongoDataStore, Map<String, Object> keyMap, String collectionName) {
-        HashMap<String, String> options = new HashMap<>();
-        options.put("background", "true");
-        Map<String, ObjectMap> indexes = new HashMap<>();
-        indexes.put("fields", new ObjectMap((Map) keyMap));
-        indexes.put("options", new ObjectMap((Map) options));
-        // FIXME We need to correctly call to MongoDBIndexUtils
-//        MongoDBIndexUtils mongoDBIndexUtils = new MongoDBIndexUtils(mongoDataStore, null);
-//        MongoDBIndexUtils.createIndexes(mongoDataStore, Collections.singletonList(indexes), false);
-    }
-
-    private static Map<String, Object> createKeyMap(SpeciesConfiguration.ShardConfig shardConfig) {
-        List<String> keys = shardConfig.getKey();
-        Map<String, Object> keyMap = new HashMap<>();
-        for (String key : keys) {
-            keyMap.put(key, 1);
-        }
-        return keyMap;
-    }
-}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/loader/LoadRunner.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/loader/LoadRunner.java
index f921403ffa..3904099332 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/loader/LoadRunner.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/loader/LoadRunner.java
@@ -195,7 +195,7 @@ private int readInputJsonFile(Path inputFile) {
                     batch = new ArrayList<>(batchSize);
                 }
                 if (inputFileRecords % batchSize == 0) {
-                    logger.info("{} records read from {}", inputFileRecords, inputFile.toString());
+                    logger.debug("{} records read from {}", inputFileRecords, inputFile);
                 }
             }
             br.close();
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/CellBaseManagerFactory.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/CellBaseManagerFactory.java
index ba6e90e150..9e610b8ae9 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/CellBaseManagerFactory.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/CellBaseManagerFactory.java
@@ -45,6 +45,7 @@ public class CellBaseManagerFactory {
     private FileManager fileManager;
     private PublicationManager publicationManager;
     private Map<String, PharmacogenomicsManager> pharmacogenomicsManagers;
+    private Map<String, PolygenicScoreManager> polygenicScoreManagers;
 
     private Map<String, DataReleaseManager> dataReleaseManagers;
 
@@ -67,6 +68,7 @@ public CellBaseManagerFactory(CellBaseConfiguration configuration) {
         ontologyManagers = new HashMap<>();
         dataReleaseManagers = new HashMap<>();
         pharmacogenomicsManagers = new HashMap<>();
+        polygenicScoreManagers = new HashMap<>();
     }
 
     private String getMultiKey(String species, String assembly) {
@@ -374,4 +376,15 @@ public PharmacogenomicsManager getPharmacogenomicsManager(String species, String
         }
         return pharmacogenomicsManagers.get(multiKey);
     }
+
+    public PolygenicScoreManager getPolygenicScoreManager(String species, String assembly) throws CellBaseException {
+        String multiKey = getMultiKey(species, assembly);
+        if (!polygenicScoreManagers.containsKey(multiKey)) {
+            if (!validateSpeciesAssembly(species, assembly)) {
+                throw new CellBaseException("Invalid species " + species + " or assembly " + assembly);
+            }
+            polygenicScoreManagers.put(multiKey, new PolygenicScoreManager(species, assembly, configuration));
+        }
+        return polygenicScoreManagers.get(multiKey);
+    }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/DataReleaseManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/DataReleaseManager.java
index c768cb15dc..c73f80ff89 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/DataReleaseManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/DataReleaseManager.java
@@ -26,7 +26,7 @@
 import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.models.DataRelease;
-import org.opencb.cellbase.core.models.DataReleaseSource;
+import org.opencb.cellbase.core.models.DataSource;
 import org.opencb.cellbase.core.result.CellBaseDataResult;
 import org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor;
 import org.opencb.cellbase.lib.impl.core.ReleaseMongoDBAdaptor;
@@ -107,7 +107,7 @@ public DataRelease get(int release) throws CellBaseException {
                 }
             }
         }
-        throw new CellBaseException("Data release '" + release + "' does not exist for species = " + species + ", assembly = " + assembly);
+        throw new CellBaseException("Data release '" + release + "' does not exist" + getSpeciesAssemblyMessage());
     }
 
     public DataRelease getDefault(String cellBaseVersion) throws CellBaseException {
@@ -119,15 +119,18 @@ public DataRelease getDefault(String cellBaseVersion) throws CellBaseException {
                 }
             }
         }
-        throw new CellBaseException("No data release found for CellBase " + cellBaseVersion + " (species = " + species + ", assembly = "
-                + assembly + ")");
+        throw new CellBaseException("No data release found for CellBase " + cellBaseVersion + getSpeciesAssemblyMessage());
     }
 
     public DataRelease update(int release, List<String> versions) throws CellBaseException {
         return releaseDBAdaptor.update(release, versions).first();
     }
 
-    public DataRelease update(int release, String collection, String data, List<Path> dataSourcePaths)
+    public DataRelease update(int release, String collection) throws CellBaseException {
+        return update(release, collection, Collections.emptyList());
+    }
+
+    public DataRelease update(int release, String collection, List<Path> dataSourcePaths)
             throws CellBaseException {
         DataRelease currDataRelease = get(release);
         if (currDataRelease != null) {
@@ -135,33 +138,8 @@ public DataRelease update(int release, String collection, String data, List<Path
             currDataRelease.getCollections().put(collection, CellBaseDBAdaptor.buildCollectionName(collection, release));
 
             // Check sources
-            if (StringUtils.isNotEmpty(data) && CollectionUtils.isNotEmpty(dataSourcePaths)) {
-                List<DataReleaseSource> newSources = new ArrayList<>();
-
-                // First, add new data sources
-                Set<String> sourceSet = new HashSet<>();
-                ObjectMapper jsonObjectMapper = new ObjectMapper();
-                ObjectReader jsonObjectReader = jsonObjectMapper.readerFor(DataReleaseSource.class);
-                for (Path dataSourcePath : dataSourcePaths) {
-                    if (dataSourcePath.toFile().exists()) {
-                        try {
-                            DataReleaseSource dataReleaseSource = jsonObjectReader.readValue(dataSourcePath.toFile());
-                            newSources.add(dataReleaseSource);
-                            sourceSet.add(dataReleaseSource.getData() + "__" + dataReleaseSource.getName());
-                        } catch (IOException e) {
-                            logger.warn("Something wrong happened when reading data release source " + dataSourcePath + ". "
-                                    + e.getMessage());
-                        }
-                    }
-                }
-
-                // Second, add previous data sources if necessary (to avoid duplicated sources)
-                for (DataReleaseSource source : currDataRelease.getSources()) {
-                    String key = source.getData() + "__" + source.getName();
-                    if (!sourceSet.contains(key)) {
-                        newSources.add(source);
-                    }
-                }
+            if (CollectionUtils.isNotEmpty(dataSourcePaths)) {
+                List<DataSource> newSources = getDataSources(dataSourcePaths, currDataRelease.getSources());
 
                 if (CollectionUtils.isNotEmpty(newSources)) {
                     currDataRelease.setSources(newSources);
@@ -173,7 +151,29 @@ public DataRelease update(int release, String collection, String data, List<Path
 
             return currDataRelease;
         }
-        throw new CellBaseException("Data release '" + release + "' does not exist for species = " + species + ", assembly = " + assembly);
+        throw new CellBaseException("Data release '" + release + "' does not exist" + getSpeciesAssemblyMessage());
+    }
+
+    public DataRelease updateSources(int release, List<Path> dataSourcePaths) throws CellBaseException {
+        DataRelease currDataRelease = get(release);
+        if (currDataRelease == null) {
+            throw new CellBaseException("Data release '" + release + "' does not exist" + getSpeciesAssemblyMessage());
+        }
+
+        // Check sources
+        if (CollectionUtils.isNotEmpty(dataSourcePaths)) {
+            List<DataSource> newSources = getDataSources(dataSourcePaths, currDataRelease.getSources());
+
+            if (CollectionUtils.isNotEmpty(newSources)) {
+                currDataRelease.setSources(newSources);
+            }
+        }
+
+        // Update data release in the database
+        update(currDataRelease);
+
+        return currDataRelease;
+
     }
 
     public void update(DataRelease dataRelase) {
@@ -184,22 +184,22 @@ public void update(DataRelease dataRelase) {
         if (CollectionUtils.isNotEmpty(dataRelase.getSources())) {
             // TODO: use native functions
             List<Map<String, Object>> tmp = new ArrayList<>();
-            for (DataReleaseSource source : dataRelase.getSources()) {
+            for (DataSource source : dataRelase.getSources()) {
                 Map<String, Object> map = new HashMap<>();
-                if (StringUtils.isNotEmpty(source.getData())) {
-                    map.put("data", source.getData());
-                }
                 if (StringUtils.isNotEmpty(source.getName())) {
                     map.put("name", source.getName());
                 }
+                if (StringUtils.isNotEmpty(source.getCategory())) {
+                    map.put("category", source.getCategory());
+                }
                 if (StringUtils.isNotEmpty(source.getVersion())) {
                     map.put("version", source.getVersion());
                 }
-                if (CollectionUtils.isNotEmpty(source.getUrl())) {
-                    map.put("url", source.getUrl());
+                if (StringUtils.isNotEmpty(source.getDownloadDate())) {
+                    map.put("downloadDate", source.getDownloadDate());
                 }
-                if (StringUtils.isNotEmpty(source.getDate())) {
-                    map.put("date", source.getDate());
+                if (CollectionUtils.isNotEmpty(source.getUrls())) {
+                    map.put("urls", source.getUrls());
                 }
                 tmp.add(map);
             }
@@ -223,9 +223,11 @@ public DataRelease checkDataRelease(int inRelease) throws CellBaseException {
         if (inRelease == 0) {
             String[] split = GitRepositoryState.get().getBuildVersion().split("[.-]");
             String version = "v" + split[0] + "." + split[1];
+
             outRelease = getDefault(version);
-            logger.info("Using data release 0: it means to take default data release '" + outRelease.getRelease()
-                    + "' for CellBase version '" + version + "'");
+            logger.info("Using data release 0: it means to take default data release {} for CellBase version {}", outRelease.getRelease(),
+                    version);
+
             return outRelease;
         }
 
@@ -236,8 +238,41 @@ public DataRelease checkDataRelease(int inRelease) throws CellBaseException {
             }
         }
 
-        throw new CellBaseException("Invalid data release " + inRelease + " for species = " + species + ", assembly = " + assembly
-                + ". Valid data releases are: " + StringUtils.join(dataReleases.stream().map(dr -> dr.getRelease())
-                .collect(Collectors.toList()), ","));
+        throw new CellBaseException("Invalid data release " + inRelease + getSpeciesAssemblyMessage() + ". Valid data releases are: "
+                + StringUtils.join(dataReleases.stream().map(dr -> dr.getRelease()).collect(Collectors.toList()), ","));
+    }
+
+    private String getSpeciesAssemblyMessage() {
+        return " (species = " + species + ", assembly = " + assembly + ")";
+    }
+
+    private List<DataSource> getDataSources(List<Path> dataSourcePaths, List<DataSource> currDataSources) {
+        List<DataSource> newDataSources = new ArrayList<>();
+
+        // First, add new data sources
+        Set<String> sourceSet = new HashSet<>();
+        ObjectMapper jsonObjectMapper = new ObjectMapper();
+        ObjectReader jsonObjectReader = jsonObjectMapper.readerFor(DataSource.class);
+        for (Path dataSourcePath : dataSourcePaths) {
+            if (dataSourcePath.toFile().exists()) {
+                try {
+                    DataSource dataSource = jsonObjectReader.readValue(dataSourcePath.toFile());
+                    newDataSources.add(dataSource);
+                    sourceSet.add(dataSource.getCategory() + "__" + dataSource.getName());
+                } catch (IOException e) {
+                    logger.warn("Something wrong happened when reading data release source {}: {}", dataSourcePath, e.getMessage());
+                }
+            }
+        }
+
+        // Second, add previous data sources if necessary (to avoid duplicated sources)
+        for (DataSource source : currDataSources) {
+            String key = source.getCategory() + "__" + source.getName();
+            if (!sourceSet.contains(key)) {
+                newDataSources.add(source);
+            }
+        }
+
+        return newDataSources;
     }
 }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/PolygenicScoreManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/PolygenicScoreManager.java
new file mode 100644
index 0000000000..4c0630569e
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/PolygenicScoreManager.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.managers;
+
+import org.opencb.biodata.models.core.pgs.CommonPolygenicScore;
+import org.opencb.biodata.models.variant.avro.PolygenicScoreAnnotation;
+import org.opencb.cellbase.core.api.PolygenicScoreQuery;
+import org.opencb.cellbase.core.api.query.ProjectionQueryOptions;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.exception.CellBaseException;
+import org.opencb.cellbase.core.result.CellBaseDataResult;
+import org.opencb.cellbase.lib.impl.core.CellBaseCoreDBAdaptor;
+import org.opencb.cellbase.lib.impl.core.PolygenicScoreMongoDBAdaptor;
+
+import java.util.List;
+
+public class PolygenicScoreManager extends AbstractManager implements AggregationApi<PolygenicScoreQuery, CommonPolygenicScore> {
+
+    private PolygenicScoreMongoDBAdaptor pgsDBAdaptor;
+
+    public PolygenicScoreManager(String species, CellBaseConfiguration configuration) throws CellBaseException {
+        this(species, null, configuration);
+    }
+
+    public PolygenicScoreManager(String species, String assembly, CellBaseConfiguration configuration) throws CellBaseException {
+        super(species, assembly, configuration);
+
+        this.init();
+    }
+
+    private void init() {
+        pgsDBAdaptor = dbAdaptorFactory.getPolygenicScoreMongoDBAdaptor();
+    }
+
+    @Override
+    public CellBaseCoreDBAdaptor<PolygenicScoreQuery, CommonPolygenicScore> getDBAdaptor() {
+        return pgsDBAdaptor;
+    }
+
+    public List<CellBaseDataResult<CommonPolygenicScore>> info(List<String> ids, ProjectionQueryOptions query, int dataRelease,
+                                               String apiKey) throws CellBaseException {
+        return pgsDBAdaptor.info(ids, query, dataRelease, apiKey);
+    }
+
+    public CellBaseDataResult<PolygenicScoreAnnotation> getPolygenicScoreAnnotation(String chromosome, Integer start, String reference,
+                                                                                    String alternate, int dataRelease)
+            throws CellBaseException {
+        return pgsDBAdaptor.getPolygenicScoreAnnotation(chromosome, start, reference, alternate, dataRelease);
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java
index 670585204d..d9387097a3 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/managers/VariantManager.java
@@ -97,7 +97,7 @@ public List<CellBaseDataResult<String>> getHgvsByVariant(String variants, DataRe
         HgvsCalculator hgvsCalculator = new HgvsCalculator(genomeManager, dataRelease.getRelease());
         List<CellBaseDataResult<String>> results = new ArrayList<>();
         VariantAnnotationCalculator variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly,
-                dataRelease, "", cellbaseManagerFactory);
+                dataRelease, "", cellbaseManagerFactory, configuration);
         List<Gene> batchGeneList = variantAnnotationCalculator.getBatchGeneList(variantList);
         for (Variant variant : variantList) {
             List<Gene> variantGeneList = variantAnnotationCalculator.getAffectedGenes(batchGeneList, variant);
@@ -121,7 +121,7 @@ public CellBaseDataResult<Variant> getNormalizationByVariant(String variants, bo
                                                                  DataRelease dataRelease) throws CellBaseException {
         List<Variant> variantList = parseVariants(variants);
         VariantAnnotationCalculator variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly,
-                dataRelease, "", cellbaseManagerFactory);
+                dataRelease, "", cellbaseManagerFactory, configuration);
 
 
         // Set decompose MNV behaviour
@@ -196,7 +196,7 @@ public List<CellBaseDataResult<VariantAnnotation>> getAnnotationByVariant(QueryO
         }
 
         VariantAnnotationCalculator variantAnnotationCalculator = new VariantAnnotationCalculator(species, assembly,
-                dataRelease, apiKey, cellbaseManagerFactory);
+                dataRelease, apiKey, cellbaseManagerFactory, configuration);
         List<CellBaseDataResult<VariantAnnotation>> queryResults = variantAnnotationCalculator.getAnnotationByVariantList(variantList,
                 queryOptions);
         return queryResults;
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java
index 1b86b49367..56b62498af 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/VariantAnnotationCalculator.java
@@ -33,13 +33,16 @@
 import org.opencb.cellbase.core.api.RepeatsQuery;
 import org.opencb.cellbase.core.api.query.LogicalList;
 import org.opencb.cellbase.core.api.query.QueryException;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.models.DataRelease;
 import org.opencb.cellbase.core.result.CellBaseDataResult;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
 import org.opencb.cellbase.lib.EtlCommons;
 import org.opencb.cellbase.lib.managers.*;
 import org.opencb.cellbase.lib.variant.VariantAnnotationUtils;
 import org.opencb.cellbase.lib.variant.annotation.futures.FuturePharmacogenomicsAnnotator;
+import org.opencb.cellbase.lib.variant.annotation.futures.FuturePolygenicScoreAnnotator;
 import org.opencb.cellbase.lib.variant.annotation.futures.FutureSnpAnnotator;
 import org.opencb.cellbase.lib.variant.annotation.futures.FutureSpliceScoreAnnotator;
 import org.opencb.cellbase.lib.variant.hgvs.HgvsCalculator;
@@ -55,15 +58,11 @@
 
 import static org.opencb.cellbase.core.ParamConstants.API_KEY_PARAM;
 import static org.opencb.cellbase.core.variant.PhasedQueryManager.*;
+import static org.opencb.cellbase.lib.EtlCommons.*;
 
 /**
  * Created by imedina on 06/02/16.
  */
-/**
- * Created by imedina on 11/07/14.
- *
- * @author Javier Lopez fjlopez@ebi.ac.uk;
- */
 public class VariantAnnotationCalculator {
 
     private static final String EMPTY_STRING = "";
@@ -76,11 +75,17 @@ public class VariantAnnotationCalculator {
     private RepeatsManager repeatsManager;
     private ProteinManager proteinManager;
     private PharmacogenomicsManager pharmacogenomicsManager;
+    private PolygenicScoreManager polygenicScoreManager;
+
     private DataRelease dataRelease;
     private String apiKey;
     private Set<String> annotatorSet;
     private List<String> includeGeneFields;
 
+    private String species;
+    private String assembly;
+    private CellBaseConfiguration configuration;
+
     private final VariantNormalizer normalizer;
     private boolean normalize = false;
     private boolean decompose = false;
@@ -101,7 +106,14 @@ public class VariantAnnotationCalculator {
     private static Logger logger = LoggerFactory.getLogger(VariantAnnotationCalculator.class);
 
     public VariantAnnotationCalculator(String species, String assembly, DataRelease dataRelease, String apiKey,
-                                       CellBaseManagerFactory cellbaseManagerFactory) throws CellBaseException {
+                                       CellBaseManagerFactory cellbaseManagerFactory, CellBaseConfiguration configuration)
+            throws CellBaseException {
+        logger.debug("VariantAnnotationCalculator: in 'constructor'");
+
+        this.species = species;
+        this.assembly = assembly;
+        this.configuration = configuration;
+
         this.genomeManager = cellbaseManagerFactory.getGenomeManager(species, assembly);
         this.variantManager = cellbaseManagerFactory.getVariantManager(species, assembly);
         this.geneManager = cellbaseManagerFactory.getGeneManager(species, assembly);
@@ -110,6 +122,7 @@ public VariantAnnotationCalculator(String species, String assembly, DataRelease
         this.clinicalManager = cellbaseManagerFactory.getClinicalManager(species, assembly);
         this.repeatsManager = cellbaseManagerFactory.getRepeatsManager(species, assembly);
         this.pharmacogenomicsManager = cellbaseManagerFactory.getPharmacogenomicsManager(species, assembly);
+        this.polygenicScoreManager = cellbaseManagerFactory.getPolygenicScoreManager(species, assembly);
 
         // Init data release and API key
         this.dataRelease = dataRelease;
@@ -120,9 +133,9 @@ public VariantAnnotationCalculator(String species, String assembly, DataRelease
         // at parseQueryParam
         this.normalizer = new VariantNormalizer(getNormalizerConfig());
 
-        hgvsCalculator = new HgvsCalculator(genomeManager, this.dataRelease.getRelease());
+        this.hgvsCalculator = new HgvsCalculator(genomeManager, this.dataRelease.getRelease());
+
 
-        logger.debug("VariantAnnotationMongoDBAdaptor: in 'constructor'");
     }
 
     private VariantNormalizer.VariantNormalizerConfig getNormalizerConfig() {
@@ -469,14 +482,14 @@ private List<VariantAnnotation> runAnnotationProcess(List<Variant> normalizedVar
 
         FutureSnpAnnotator futureSnpAnnotator = null;
         Future<List<CellBaseDataResult<Snp>>> snpFuture = null;
-        if (annotatorSet.contains("xrefs") && dataRelease.getCollections().containsKey(EtlCommons.SNP_COLLECTION_NAME)) {
+        if (annotatorSet.contains("xrefs") && dataRelease.getCollections().containsKey(SNP_DATA)) {
             futureSnpAnnotator = new FutureSnpAnnotator(normalizedVariantList, dataRelease.getRelease(), variantManager, logger);
             snpFuture = CACHED_THREAD_POOL.submit(futureSnpAnnotator);
         }
 
         FutureConservationAnnotator futureConservationAnnotator = null;
         Future<List<CellBaseDataResult<Score>>> conservationFuture = null;
-        if (annotatorSet.contains("conservation")) {
+        if (SpeciesUtils.hasData(configuration, species, CONSERVATION_DATA) && annotatorSet.contains("conservation")) {
             futureConservationAnnotator = new FutureConservationAnnotator(normalizedVariantList, QueryOptions.empty(),
                     dataRelease.getRelease());
             conservationFuture = CACHED_THREAD_POOL.submit(futureConservationAnnotator);
@@ -484,7 +497,7 @@ private List<VariantAnnotation> runAnnotationProcess(List<Variant> normalizedVar
 
         FutureVariantFunctionalScoreAnnotator futureVariantFunctionalScoreAnnotator = null;
         Future<List<CellBaseDataResult<Score>>> variantFunctionalScoreFuture = null;
-        if (annotatorSet.contains("functionalScore")) {
+        if (SpeciesUtils.hasData(configuration, species, VARIATION_FUNCTIONAL_SCORE_DATA) && annotatorSet.contains("functionalScore")) {
             futureVariantFunctionalScoreAnnotator = new FutureVariantFunctionalScoreAnnotator(normalizedVariantList, QueryOptions.empty(),
                     dataRelease.getRelease());
             variantFunctionalScoreFuture = CACHED_THREAD_POOL.submit(futureVariantFunctionalScoreAnnotator);
@@ -493,18 +506,16 @@ private List<VariantAnnotation> runAnnotationProcess(List<Variant> normalizedVar
         FutureClinicalAnnotator futureClinicalAnnotator = null;
         Future<List<CellBaseDataResult<Variant>>> clinicalFuture = null;
         // FIXME "clinical" is deprecated, replaced with traitAssociation
-        if (annotatorSet.contains("clinical") || annotatorSet.contains("traitAssociation")) {
-            QueryOptions queryOptions = new QueryOptions();
-            queryOptions.add(ParamConstants.QueryParams.PHASE.key(), phased);
-            queryOptions.add(ParamConstants.QueryParams.CHECK_AMINO_ACID_CHANGE.key(), checkAminoAcidChange);
-            queryOptions.add(API_KEY_PARAM, apiKey);
+        if (SpeciesUtils.hasData(configuration, species, CLINICAL_VARIANT_DATA)
+                && (annotatorSet.contains("clinical") || annotatorSet.contains("traitAssociation"))) {
+            QueryOptions queryOptions = getClinicalQueryOptions();
             futureClinicalAnnotator = new FutureClinicalAnnotator(normalizedVariantList, batchGeneList, queryOptions);
             clinicalFuture = CACHED_THREAD_POOL.submit(futureClinicalAnnotator);
         }
 
         FutureRepeatsAnnotator futureRepeatsAnnotator = null;
         Future<List<CellBaseDataResult<Repeat>>> repeatsFuture = null;
-        if (annotatorSet.contains("repeats")) {
+        if (SpeciesUtils.hasData(configuration, species, REPEATS_DATA) && annotatorSet.contains("repeats")) {
             futureRepeatsAnnotator = new FutureRepeatsAnnotator(normalizedVariantList, dataRelease.getRelease());
             repeatsFuture = CACHED_THREAD_POOL.submit(futureRepeatsAnnotator);
         }
@@ -518,20 +529,30 @@ private List<VariantAnnotation> runAnnotationProcess(List<Variant> normalizedVar
 
         FutureSpliceScoreAnnotator futureSpliceScoreAnnotator = null;
         Future<List<CellBaseDataResult<SpliceScore>>> spliceScoreFuture = null;
-        if (annotatorSet.contains("consequenceType")) {
-            futureSpliceScoreAnnotator = new FutureSpliceScoreAnnotator(normalizedVariantList, dataRelease.getRelease(), apiKey,
-                    variantManager, logger);
+
+        if (SpeciesUtils.hasData(configuration, species, SPLICE_SCORE_DATA) && annotatorSet.contains("consequenceType")) {
+            futureSpliceScoreAnnotator = new FutureSpliceScoreAnnotator(normalizedVariantList, QueryOptions.empty(),
+                    dataRelease.getRelease(), apiKey, variantManager);
             spliceScoreFuture = CACHED_THREAD_POOL.submit(futureSpliceScoreAnnotator);
         }
 
         FuturePharmacogenomicsAnnotator futurePharmacogenomicsAnnotator = null;
         Future<List<CellBaseDataResult<PharmaChemical>>> pharmacogenomicsFuture = null;
-        if (annotatorSet.contains("pharmacogenomics") && dataRelease.getCollections().containsKey(EtlCommons.PHARMACOGENOMICS_DATA)) {
+        if (SpeciesUtils.hasData(configuration, species, PHARMACOGENOMICS_DATA) && annotatorSet.contains("pharmacogenomics")
+                && dataRelease.getCollections().containsKey(EtlCommons.PHARMACOGENOMICS_DATA)) {
             futurePharmacogenomicsAnnotator = new FuturePharmacogenomicsAnnotator(normalizedVariantList, QueryOptions.empty(),
                     dataRelease.getRelease(), pharmacogenomicsManager, logger);
             pharmacogenomicsFuture = CACHED_THREAD_POOL.submit(futurePharmacogenomicsAnnotator);
         }
 
+        FuturePolygenicScoreAnnotator futurePolygenicScoreAnnotator = null;
+        Future<List<CellBaseDataResult<PolygenicScoreAnnotation>>> polygenicScoreFuture = null;
+        if (SpeciesUtils.hasData(configuration, species, PGS_DATA) && annotatorSet.contains(EtlCommons.PGS_DATA)) {
+            futurePolygenicScoreAnnotator = new FuturePolygenicScoreAnnotator(normalizedVariantList, QueryOptions.empty(),
+                    dataRelease.getRelease(), polygenicScoreManager, logger);
+            polygenicScoreFuture = CACHED_THREAD_POOL.submit(futurePolygenicScoreAnnotator);
+        }
+
         // We iterate over all variants to get the rest of the annotations and to create the VariantAnnotation objects
         Queue<Variant> variantBuffer = new LinkedList<>();
         long startTime = System.currentTimeMillis();
@@ -676,6 +697,9 @@ private List<VariantAnnotation> runAnnotationProcess(List<Variant> normalizedVar
         if (futurePharmacogenomicsAnnotator != null) {
             futurePharmacogenomicsAnnotator.processResults(pharmacogenomicsFuture, variantAnnotationList);
         }
+        if (futurePolygenicScoreAnnotator != null) {
+            futurePolygenicScoreAnnotator.processResults(polygenicScoreFuture, variantAnnotationList);
+        }
 
         // Not needed with newCachedThreadPool
         // fixedThreadPool.shutdown();
@@ -685,6 +709,14 @@ private List<VariantAnnotation> runAnnotationProcess(List<Variant> normalizedVar
         return variantAnnotationList;
     }
 
+    private QueryOptions getClinicalQueryOptions() {
+        QueryOptions queryOptions = new QueryOptions();
+        queryOptions.add(ParamConstants.QueryParams.PHASE.key(), phased);
+        queryOptions.add(ParamConstants.QueryParams.CHECK_AMINO_ACID_CHANGE.key(), checkAminoAcidChange);
+        queryOptions.add(API_KEY_PARAM, apiKey);
+        return queryOptions;
+    }
+
     public List<Gene> getBatchGeneList(List<Variant> variantList)
             throws QueryException, IllegalAccessException, CellBaseException {
         List<Region> regionList = variantListToRegionList(variantList);
@@ -1183,7 +1215,8 @@ private Set<String> getAnnotatorSet(QueryOptions queryOptions) {
             // 'expression' removed in CB 5.0
             annotatorSet = new HashSet<>(Arrays.asList("variation", "traitAssociation", "conservation", "functionalScore",
                     "consequenceType", "geneDisease", "drugInteraction", "geneConstraints", "mirnaTargets", "pharmacogenomics",
-                    "cancerGeneAssociation", "cancerHotspots", "populationFrequencies", "repeats", "cytoband", "hgvs", "xrefs"));
+                    "cancerGeneAssociation", "cancerHotspots", "populationFrequencies", "repeats", "cytoband", "hgvs", "xrefs",
+                    EtlCommons.PGS_DATA));
             List<String> excludeList = queryOptions.getAsStringList("exclude");
             excludeList.forEach(annotatorSet::remove);
         }
@@ -1432,8 +1465,6 @@ private List<ConsequenceType> getConsequenceTypeList(Variant variant, List<Gene>
     }
 
     private List<Region> variantListToRegionList(List<Variant> variantList) {
-//        return variantList.stream().map((variant) -> variantToRegion(variant)).collect(Collectors.toList());
-
         // In great majority of cases returned region list size will equal variant list; this will happen except when
         // there's a breakend within the variantList
         List<Region> regionList = new ArrayList<>(variantList.size());
@@ -1596,7 +1627,8 @@ public void processResults(Future<List<CellBaseDataResult<Variant>>> variationFu
                         }
                     }
 
-                    if (annotatorSet.contains("populationFrequencies") && preferredVariant != null) {
+                    if (annotatorSet.contains("populationFrequencies") && preferredVariant != null
+                            && preferredVariant.getAnnotation() != null) {
                         variantAnnotationList.get(i)
                                 .setPopulationFrequencies(preferredVariant.getAnnotation().getPopulationFrequencies());
                     }
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FuturePolygenicScoreAnnotator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FuturePolygenicScoreAnnotator.java
new file mode 100644
index 0000000000..76f8ed1e85
--- /dev/null
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FuturePolygenicScoreAnnotator.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2015-2020 OpenCB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.opencb.cellbase.lib.variant.annotation.futures;
+
+import org.apache.commons.collections4.CollectionUtils;
+import org.opencb.biodata.models.variant.Variant;
+import org.opencb.biodata.models.variant.avro.PolygenicScoreAnnotation;
+import org.opencb.biodata.models.variant.avro.VariantAnnotation;
+import org.opencb.cellbase.core.result.CellBaseDataResult;
+import org.opencb.cellbase.lib.managers.PolygenicScoreManager;
+import org.opencb.commons.datastore.core.QueryOptions;
+import org.slf4j.Logger;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.*;
+
+public class FuturePolygenicScoreAnnotator implements Callable<List<CellBaseDataResult<PolygenicScoreAnnotation>>> {
+    private PolygenicScoreManager polygenicScoreManager;
+
+    private List<Variant> variantList;
+    private QueryOptions queryOptions;
+    private int dataRelease;
+
+    private Logger logger;
+
+    public FuturePolygenicScoreAnnotator(List<Variant> variantList, QueryOptions queryOptions, int dataRelease,
+                                         PolygenicScoreManager polygenicScoreManager, Logger logger) {
+        this.polygenicScoreManager = polygenicScoreManager;
+
+        this.variantList = variantList;
+        this.queryOptions = queryOptions;
+        this.dataRelease = dataRelease;
+
+        this.logger = logger;
+    }
+
+    @Override
+    public List<CellBaseDataResult<PolygenicScoreAnnotation>> call() throws Exception {
+        long startTime = System.currentTimeMillis();
+
+        List<CellBaseDataResult<PolygenicScoreAnnotation>> cellBaseDataResultList = new ArrayList<>(variantList.size());
+
+        logger.debug("PolygenicScore queries...");
+        // Want to return only one CellBaseDataResult object per Variant
+        for (Variant variant : variantList) {
+            cellBaseDataResultList.add(polygenicScoreManager.getPolygenicScoreAnnotation(variant.getChromosome(), variant.getStart(),
+                    variant.getReference(), variant.getAlternate(), dataRelease));
+        }
+        logger.info("PolygenicScore queries performance in {} ms for {} variants", System.currentTimeMillis() - startTime,
+                variantList.size());
+        return cellBaseDataResultList;
+    }
+
+    public void processResults(Future<List<CellBaseDataResult<PolygenicScoreAnnotation>>> pgsFuture,
+                               List<VariantAnnotation> variantAnnotationList)
+            throws InterruptedException, ExecutionException {
+        List<CellBaseDataResult<PolygenicScoreAnnotation>> pgsCellBaseDataResults;
+        try {
+            pgsCellBaseDataResults = pgsFuture.get(30, TimeUnit.SECONDS);
+        } catch (TimeoutException e) {
+            pgsFuture.cancel(true);
+            throw new ExecutionException("Unable to finish polygenic scores query on time", e);
+        }
+
+        if (CollectionUtils.isNotEmpty(pgsCellBaseDataResults)) {
+            for (int i = 0; i < variantAnnotationList.size(); i++) {
+                CellBaseDataResult<PolygenicScoreAnnotation> pgsResult = pgsCellBaseDataResults.get(i);
+                if (pgsResult != null && CollectionUtils.isNotEmpty(pgsResult.getResults())) {
+                    // Set the polygenic scores in the variant annotation
+                    variantAnnotationList.get(i).setPolygenicScores(pgsResult.getResults());
+                }
+            }
+        }
+    }
+}
diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSpliceScoreAnnotator.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSpliceScoreAnnotator.java
index 40523fdbc8..61c39a2436 100644
--- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSpliceScoreAnnotator.java
+++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/variant/annotation/futures/FutureSpliceScoreAnnotator.java
@@ -26,7 +26,9 @@
 import org.opencb.biodata.models.variant.avro.VariantAnnotation;
 import org.opencb.cellbase.core.result.CellBaseDataResult;
 import org.opencb.cellbase.lib.managers.VariantManager;
+import org.opencb.commons.datastore.core.QueryOptions;
 import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -34,22 +36,21 @@
 
 public class FutureSpliceScoreAnnotator implements Callable<List<CellBaseDataResult<SpliceScore>>> {
     private List<Variant> variantList;
+
+    private QueryOptions queryOptions;
     private int dataRelease;
     private String apiKey;
-
     private VariantManager variantManager;
 
-    private Logger logger;
+    private static Logger logger = LoggerFactory.getLogger(FutureSpliceScoreAnnotator.class);
 
-    public FutureSpliceScoreAnnotator(List<Variant> variantList, int dataRelease, String apiKey, VariantManager variantManager,
-                                      Logger logger) {
+    public FutureSpliceScoreAnnotator(List<Variant> variantList, QueryOptions queryOptions, int dataRelease, String apiKey,
+                               VariantManager variantManager) {
         this.variantList = variantList;
+        this.queryOptions = queryOptions;
         this.dataRelease = dataRelease;
         this.apiKey = apiKey;
-
         this.variantManager = variantManager;
-
-        this.logger = logger;
     }
 
     @Override
diff --git a/cellbase-lib/src/main/resources/mongodb-indexes.json b/cellbase-lib/src/main/resources/mongodb-indexes.json
index 93b86af2df..d03c6da0a1 100644
--- a/cellbase-lib/src/main/resources/mongodb-indexes.json
+++ b/cellbase-lib/src/main/resources/mongodb-indexes.json
@@ -16,6 +16,8 @@
 {"collection": "conservation", "fields": {"_chunkIds": 1}, "options": {"background": true}}
 {"collection": "conservation", "fields": {"chromosome": 1, "start": 1, "end": 1}, "options": {"background": true}}
 
+{"collection": "genome_info", "fields": {"supercontigs.name": 1}, "options": {"background": true}}
+
 {"collection": "genome_sequence", "fields": {"_chunkIds": 1}, "options": {"background": true}}
 {"collection": "genome_sequence", "fields": {"chromosome": 1, "start": 1, "end": 1}, "options": {"background": true}}
 {"collection": "genome_sequence", "fields": {"sequenceType": 1}, "options": {"background": true}}
@@ -43,6 +45,7 @@
 {"collection": "gene", "fields": {"mirna.matures.id": 1}, "options": {"background": true, "sparse": true}}
 {"collection": "gene", "fields": {"annotation.diseases.id": 1}, "options": {"background": true}}
 {"collection": "gene", "fields": {"annotation.diseases.name": 1}, "options": {"background": true}}
+{"collection": "gene", "fields": {"annotation.diseases.hpo": 1}, "options": {"background": true}}
 {"collection": "gene", "fields": {"annotation.expression.expression": 1}, "options": {"background": true}}
 {"collection": "gene", "fields": {"annotation.expression.factorValue": 1}, "options": {"background": true}}
 {"collection": "gene", "fields": {"annotation.drugs.drugName": 1}, "options": {"background": true}}
@@ -146,5 +149,14 @@
 {"collection": "pharmacogenomics", "fields": {"variants.confidence": 1}, "options": {"background": true}}
 {"collection": "pharmacogenomics", "fields": {"variants.evidences.pubmed": 1}, "options": {"background": true}}
 
+<<<<<<< HEAD
+{"collection": "common_polygenic_scores", "fields": {"id": 1}, "options": {"background": true}}
+{"collection": "common_polygenic_scores", "fields": {"name": 1}, "options": {"background": true}}
+{"collection": "common_polygenic_scores", "fields": {"source": 1}, "options": {"background": true}}
+{"collection": "variant_polygenic_scores", "fields": {"_chunkIds": 1}, "options": {"background": true}}
+{"collection": "variant_polygenic_scores", "fields": {"chromosome": 1, "position": 1}, "options": {"background": true}}
+{"collection": "variant_polygenic_scores", "fields": {"polygenicScores.id": 1}, "options": {"background": true}}
+=======
 {"collection": "snp", "fields": {"id": 1}, "options": {"background": true}}
-{"collection": "snp", "fields": {"chromosome": 1, "position": 1, "reference": 1}, "options": {"background": true}}
\ No newline at end of file
+{"collection": "snp", "fields": {"chromosome": 1, "position": 1, "reference": 1}, "options": {"background": true}}
+>>>>>>> TASK-5564
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java
index 1b217c671d..bfb4eb8680 100644
--- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java
@@ -23,7 +23,7 @@
 import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.models.DataRelease;
 import org.opencb.cellbase.core.result.CellBaseDataResult;
-import org.opencb.cellbase.lib.db.MongoDBManager;
+import org.opencb.cellbase.core.utils.DatabaseNameUtils;
 import org.opencb.cellbase.lib.loader.LoadRunner;
 import org.opencb.cellbase.lib.loader.LoaderException;
 import org.opencb.cellbase.lib.managers.CellBaseManagerFactory;
@@ -94,7 +94,7 @@ public GenericMongoDBAdaptorTest() {
             cellBaseConfiguration.setVersion("v" + versionSplit[0] + "." + versionSplit[1]);
             cellBaseManagerFactory = new CellBaseManagerFactory(cellBaseConfiguration);
 
-            cellBaseName = MongoDBManager.getDatabaseName(SPECIES, ASSEMBLY, cellBaseConfiguration.getVersion());
+            cellBaseName = DatabaseNameUtils.getDatabaseName(SPECIES, ASSEMBLY, cellBaseConfiguration.getVersion());
 
             loadRunner = new LoadRunner(MONGODB_CELLBASE_LOADER, cellBaseName, 2,
                     cellBaseManagerFactory.getDataReleaseManager(SPECIES, ASSEMBLY), cellBaseConfiguration);
@@ -160,7 +160,7 @@ private void downloadAndPopulate() throws IOException, ExecutionException, Class
                 loadData("conservation", "conservation", file.toPath(), true);
             }
         }
-        dataReleaseManager.update(dataRelease.getRelease(), "conservation", "conservation", Collections.emptyList());
+        dataReleaseManager.update(dataRelease.getRelease(), "conservation", Collections.emptyList());
 
         // Regulatory regions: regulatory_region.json.gz
         loadData("regulatory_region", "regulatory_region", baseDir.resolve("regulatory_region.json.gz"));
@@ -174,7 +174,7 @@ private void downloadAndPopulate() throws IOException, ExecutionException, Class
                 loadData("protein_functional_prediction", "protein_functional_prediction", file.toPath(), true);
             }
         }
-        dataReleaseManager.update(dataRelease.getRelease(), "protein_functional_prediction", "protein_functional_prediction", Collections.emptyList());
+        dataReleaseManager.update(dataRelease.getRelease(), "protein_functional_prediction", Collections.emptyList());
 
         // Variation: variation_chr_all.json.gz
         loadData("variation", "variation", baseDir.resolve("variation_chr_all.json.gz"));
@@ -195,7 +195,7 @@ private void downloadAndPopulate() throws IOException, ExecutionException, Class
         // splice_score
         loadData("splice_score", "splice_score", baseDir.resolve("splice_score/spliceai/splice_score_all.json.gz"), true);
         loadData("splice_score", "splice_score", baseDir.resolve("splice_score/mmsplice/splice_score_all.json.gz"), true);
-        dataReleaseManager.update(dataRelease.getRelease(), "splice_score", "splice_score", Collections.emptyList());
+        dataReleaseManager.update(dataRelease.getRelease(), "splice_score", Collections.emptyList());
 
         // clinical_variants.full.json.gz
         loadData("clinical_variants", "clinical_variants", baseDir.resolve("clinical_variants.full.json.gz"));
@@ -221,7 +221,7 @@ private void loadData(String collection, String data, Path filePath, boolean ski
             logger.info("Loading (" + collection + ", " + data + ") from file " + filePath);
             loadRunner.load(filePath, collection, dataRelease.getRelease());
             if (!skipUpdate) {
-                dataReleaseManager.update(dataRelease.getRelease(), collection, data, Collections.emptyList());
+                dataReleaseManager.update(dataRelease.getRelease(), collection, Collections.emptyList());
             }
         } else {
             logger.error("(" + collection + ", " + data + ") not loading: file " + filePath + "does not exist");
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/ConservationBuilderTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/ConservationBuilderTest.java
index 5af6cbd7e9..32386fdb0e 100644
--- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/ConservationBuilderTest.java
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/ConservationBuilderTest.java
@@ -19,12 +19,15 @@
 import com.fasterxml.jackson.annotation.JsonInclude;
 import com.fasterxml.jackson.databind.MapperFeature;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 import org.eclipse.jetty.util.ajax.JSON;
 import org.opencb.biodata.models.core.GenomicScoreRegion;
 import org.opencb.biodata.models.variant.avro.Repeat;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
 import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
+import org.opencb.cellbase.lib.EtlCommons;
 import org.opencb.commons.utils.FileUtils;
 
 import java.io.BufferedReader;
@@ -39,6 +42,7 @@ public class ConservationBuilderTest {
 
     private final int BATCH_SIZE = 100;
 
+    @Disabled
     @Test
     public void testParse() throws Exception {
         Path conservationDir = Paths.get(ConservationBuilderTest.class.getResource("/conservation").toURI());
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilderTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilderTest.java
new file mode 100644
index 0000000000..629d491542
--- /dev/null
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/EnsemblGeneBuilderTest.java
@@ -0,0 +1,23 @@
+package org.opencb.cellbase.lib.builders;
+
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
+import org.opencb.cellbase.core.config.SpeciesConfiguration;
+import org.opencb.cellbase.core.utils.SpeciesUtils;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+class EnsemblGeneBuilderTest {
+
+    public void testGeneBuilder() throws Exception {
+        Path downloadPath = Paths.get("/home/jtarraga/data/cellbase/cb6/v6.1.0-dr1/homo_sapiens_grch38/download/gene");
+        Path buildPath = Paths.get("/home/jtarraga/data/cellbase/cb6/v6.1.0-dr1/homo_sapiens_grch38/generated_json/gene");
+        boolean flexibleGTFParsing = false;
+        CellBaseConfiguration configuration = CellBaseConfiguration.load(Paths.get("/home/jtarraga/appl/cellbase/build/conf/configuration.yml"));
+        SpeciesConfiguration speciesConfiguration = SpeciesUtils.getSpeciesConfiguration(configuration, "hsapiens");
+
+        GeneBuilder geneBuilder = new GeneBuilder(downloadPath, buildPath, speciesConfiguration, flexibleGTFParsing, configuration);
+        geneBuilder.check();
+        geneBuilder.parse();
+    }
+}
\ No newline at end of file
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/GeneBuilderIndexerTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/GeneBuilderIndexerTest.java
new file mode 100644
index 0000000000..fabf46f6d7
--- /dev/null
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/GeneBuilderIndexerTest.java
@@ -0,0 +1,65 @@
+package org.opencb.cellbase.lib.builders;
+
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.commons.lang3.tuple.Pair;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.opencb.biodata.models.core.MirnaTarget;
+import org.opencb.biodata.models.core.TargetGene;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.jupiter.api.Assertions.fail;
+
+public class GeneBuilderIndexerTest {
+
+    @Test
+    public void testPareMirTarFile() throws IOException {
+
+        Path mirtarbasePath = Paths.get(this.getClass().getClassLoader().getResource("regulation/hsa_MTI.xlsx").getPath());
+        MiRTarBaseIndexer indexer = new MiRTarBaseIndexer();
+        Map<String, List<MirnaTarget>> result = indexer.index(mirtarbasePath);
+
+        Assertions.assertEquals(5, result.size());
+
+        List<Pair<String, String>> pairs = Arrays.asList(new ImmutablePair<>("WASH7P", "MIRT000002"),
+                new ImmutablePair<>("CXCR4", "MIRT000006"),
+                new ImmutablePair<>("CYP7A1", "MIRT000012"),
+                new ImmutablePair<>("STAT5A", "MIRT000018"),
+                new ImmutablePair<>("RASGRP1", "MIRT000019"));
+
+
+        for (Pair<String, String> pair : pairs) {
+            Assertions.assertTrue(result.containsKey(pair.getKey()));
+            Assertions.assertEquals(pair.getValue(), result.get(pair.getKey()).get(0).getId());
+        }
+
+        // MIRT000018	hsa-miR-222-3p	Homo sapiens	STAT5A	6776	Homo sapiens	qRT-PCR//Luciferase reporter assay//Western blot	Functional MTI	20489169
+        // MIRT000018	hsa-miR-222-3p	Homo sapiens	STAT5A	6776	Homo sapiens	Luciferase reporter assay	Functional MTI	24736554
+        Assertions.assertEquals(1, result.get("STAT5A").size());
+        Assertions.assertEquals("hsa-miR-222-3p", result.get("STAT5A").get(0).getSourceId());
+        Assertions.assertEquals(2, result.get("STAT5A").get(0).getTargets().size());
+        for (TargetGene target : result.get("STAT5A").get(0).getTargets()) {
+            switch (target.getPubmed()) {
+                case "20489169": {
+                    Assertions.assertEquals("Functional MTI", target.getEvidence());
+                    Assertions.assertEquals("qRT-PCR//Luciferase reporter assay//Western blot", target.getExperiment());
+                    break;
+                }
+                case "24736554": {
+                    Assertions.assertEquals("Functional MTI", target.getEvidence());
+                    Assertions.assertEquals("Luciferase reporter assay", target.getExperiment());
+                    break;
+                }
+                default: {
+                    fail();
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/GeneBuilderTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/GeneBuilderTest.java
index 5926c0184b..83af84b232 100644
--- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/GeneBuilderTest.java
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/GeneBuilderTest.java
@@ -17,29 +17,19 @@
 package org.opencb.cellbase.lib.builders;
 
 
-import com.fasterxml.jackson.annotation.JsonInclude;
-import com.fasterxml.jackson.databind.MapperFeature;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.TestInstance;
-import org.opencb.biodata.formats.feature.gff.Gff2;
-import org.opencb.biodata.formats.feature.gtf.Gtf;
 import org.opencb.biodata.models.core.*;
 import org.opencb.cellbase.core.config.SpeciesConfiguration;
-import org.opencb.cellbase.core.exception.CellBaseException;
-import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
-import org.opencb.cellbase.core.serializer.CellBaseSerializer;
 import org.opencb.commons.utils.FileUtils;
 
 import java.io.BufferedReader;
 import java.io.IOException;
-import java.net.URISyntaxException;
-import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Set;
 
@@ -49,29 +39,29 @@
 public class GeneBuilderTest {
     private GeneBuilder geneParser;
     private ObjectMapper jsonObjectMapper;
-    private static final SpeciesConfiguration SPECIES = new SpeciesConfiguration("hsapiens", "Homo sapiens", "human", null, null, null);
+    private static final SpeciesConfiguration SPECIES = new SpeciesConfiguration("hsapiens", "Homo sapiens", "human", null, null);
     public GeneBuilderTest() {
     }
 
     @BeforeAll
     public void init() {
-        try {
-            Path genomeSequenceFastaFile
-                    = Paths.get(GeneBuilderTest.class.getResource("/gene/Homo_sapiens.GRCh38.fa").toURI());
-            Path geneDirectoryPath = Paths.get(GeneBuilderTest.class.getResource("/gene").toURI());
-            // put the results in /tmp
-            CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), "gene",
-                    true);
-            SpeciesConfiguration species = new SpeciesConfiguration("hsapiens", "Homo sapiens",
-                    "human", null, null, null);
-            geneParser = new GeneBuilder(geneDirectoryPath, genomeSequenceFastaFile, species, serializer);
-            jsonObjectMapper = new ObjectMapper();
-            jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
-            jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
-            geneParser.parse();
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
+//        try {
+//            Path genomeSequenceFastaFile
+//                    = Paths.get(GeneBuilderTest.class.getResource("/gene/Homo_sapiens.GRCh38.fa").toURI());
+//            Path geneDirectoryPath = Paths.get(GeneBuilderTest.class.getResource("/gene").toURI());
+//            // put the results in /tmp
+//            CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), "gene",
+//                    true);
+//            SpeciesConfiguration species = new SpeciesConfiguration("hsapiens", "Homo sapiens",
+//                    "human", null, null, null);
+//            geneParser = new GeneBuilder(geneDirectoryPath, genomeSequenceFastaFile, species, serializer);
+//            jsonObjectMapper = new ObjectMapper();
+//            jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
+//            jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
+//            geneParser.parse();
+//        } catch (Exception e) {
+//            e.printStackTrace();
+//        }
     }
 
     @Test
@@ -226,36 +216,36 @@ public void testProteinSequence() throws Exception {
         }
     }
 
-    @Test
-    @Disabled
-    public void testaddTranscriptTfbstoList() throws Exception {
-        String attributes = "binding_matrix_stable_id=ENSPFM0542;epigenomes_with_experimental_evidence=SK-N.%2CMCF-7%2CH1-hESC_3%2CHCT116;stable_id=ENSM00208374688;transcription_factor_complex=TEAD4::ESRRB";
-        String source = null;
-        String sequenceName = "1";
-        String feature = "TF_binding_site";
-        int start = 10000;
-        int end = 100100;
-        String score = "1.2870005";
-        String strand = "+";
-        String frame = null;
-
-        Gff2 tfbs = new Gff2(sequenceName, source, feature, start, end, score, strand, frame, attributes);
-        Gtf transcript = new Gtf(sequenceName, source, feature, start, end, score, strand, frame, new HashMap<>());
-
-        List<TranscriptTfbs> transcriptTfbs = geneParser.addTranscriptTfbstoList(tfbs, transcript,"1", new ArrayList<>());
-
-        assertEquals(1, transcriptTfbs.size());
-        TranscriptTfbs result = transcriptTfbs.get(0);
-
-        assertEquals(sequenceName, result.getChromosome());
-        assertEquals(feature, result.getType());
-        assertEquals(start, result.getStart());
-        assertEquals(end, result.getEnd());
-        assertEquals(score, String.valueOf(result.getScore()));
-        assertEquals("ENSPFM0542", result.getPfmId());
-        assertEquals("ENSM00208374688", result.getId());
-        assertEquals(2, result.getTranscriptionFactors().size());
-    }
+//    @Test
+//    @Disabled
+//    public void testaddTranscriptTfbstoList() throws Exception {
+//        String attributes = "binding_matrix_stable_id=ENSPFM0542;epigenomes_with_experimental_evidence=SK-N.%2CMCF-7%2CH1-hESC_3%2CHCT116;stable_id=ENSM00208374688;transcription_factor_complex=TEAD4::ESRRB";
+//        String source = null;
+//        String sequenceName = "1";
+//        String feature = "TF_binding_site";
+//        int start = 10000;
+//        int end = 100100;
+//        String score = "1.2870005";
+//        String strand = "+";
+//        String frame = null;
+//
+//        Gff2 tfbs = new Gff2(sequenceName, source, feature, start, end, score, strand, frame, attributes);
+//        Gtf transcript = new Gtf(sequenceName, source, feature, start, end, score, strand, frame, new HashMap<>());
+//
+//        List<TranscriptTfbs> transcriptTfbs = geneParser.addTranscriptTfbstoList(tfbs, transcript,"1", new ArrayList<>());
+//
+//        assertEquals(1, transcriptTfbs.size());
+//        TranscriptTfbs result = transcriptTfbs.get(0);
+//
+//        assertEquals(sequenceName, result.getChromosome());
+//        assertEquals(feature, result.getType());
+//        assertEquals(start, result.getStart());
+//        assertEquals(end, result.getEnd());
+//        assertEquals(score, String.valueOf(result.getScore()));
+//        assertEquals("ENSPFM0542", result.getPfmId());
+//        assertEquals("ENSM00208374688", result.getId());
+//        assertEquals(2, result.getTranscriptionFactors().size());
+//    }
 
     private List<Gene> loadSerializedGenes(String fileName) {
         List<Gene> geneList = new ArrayList();
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilderTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilderTest.java
index 806c096873..3507f69cca 100644
--- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilderTest.java
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RefSeqGeneBuilderTest.java
@@ -26,6 +26,7 @@
 import org.junit.jupiter.api.TestInstance;
 import org.eclipse.jetty.util.ajax.JSON;
 import org.opencb.biodata.models.core.*;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.config.SpeciesConfiguration;
 import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
 import org.opencb.cellbase.core.serializer.CellBaseSerializer;
@@ -54,12 +55,13 @@ public RefSeqGeneBuilderTest() {
     public void init() throws Exception {
         try {
             Path geneDirectoryPath = Paths.get(RefSeqGeneBuilderTest.class.getResource("/gene_refseq").toURI());
+            Path configurationPath = Paths.get(RefSeqGeneBuilderTest.class.getResource("configuration.test.yml").toURI());
             // put the results in /tmp
             CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), "refseq",
                     true);
-            SpeciesConfiguration species = new SpeciesConfiguration("hsapiens", "Homo sapiens",
-                    "human", null, null, null);
-            geneParser = new RefSeqGeneBuilder(geneDirectoryPath, species, serializer);
+            SpeciesConfiguration species = new SpeciesConfiguration("hsapiens", "Homo sapiens", "human", null, null);
+            CellBaseConfiguration configuration = CellBaseConfiguration.load(configurationPath);
+            geneParser = new RefSeqGeneBuilder(geneDirectoryPath, species, configuration, serializer);
             geneParser.parse();
             jsonObjectMapper = new ObjectMapper();
             jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RegulatoryFeatureBuilderTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RegulatoryFeatureBuilderTest.java
index 1bd36998b6..cde955fb63 100644
--- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RegulatoryFeatureBuilderTest.java
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RegulatoryFeatureBuilderTest.java
@@ -33,7 +33,7 @@ public void testParse() throws Exception {
         CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), "regulatory_feature", true);
         RegulatoryFeatureBuilder parser = new RegulatoryFeatureBuilder(regulationDirectoryPath, serializer);
         parser.parse();
-        Set<Gff2> features = parser.regulatoryFeatureSet;
+        Set<Gff2> features = parser.getRegulatoryFeatureSet();
         assertEquals(1, features.size());
 
         Gff2 feature = features.iterator().next();
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RepeatsBuilderTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RepeatsBuilderTest.java
index 9c69a1e602..6a98066f92 100644
--- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RepeatsBuilderTest.java
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/RepeatsBuilderTest.java
@@ -16,11 +16,10 @@
 
 package org.opencb.cellbase.lib.builders;
 
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import org.junit.jupiter.api.Test;
 import org.eclipse.jetty.util.ajax.JSON;
+import org.junit.jupiter.api.Test;
 import org.opencb.biodata.models.variant.avro.Repeat;
+import org.opencb.cellbase.core.config.CellBaseConfiguration;
 import org.opencb.cellbase.core.serializer.CellBaseFileSerializer;
 import org.opencb.cellbase.core.serializer.CellBaseJsonFileSerializer;
 import org.opencb.commons.utils.FileUtils;
@@ -29,9 +28,14 @@
 import java.io.IOException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.opencb.cellbase.lib.EtlCommons.*;
+
 
 /**
  * Created by fjlopez on 10/05/17.
@@ -46,12 +50,20 @@ public RepeatsBuilderTest() {
 
     @Test
     public void testParse() throws Exception {
+        CellBaseConfiguration configuration = CellBaseConfiguration.load(getClass().getClassLoader().getResourceAsStream("configuration.test.yaml"));
         Path repeatsFilesDir = Paths.get(getClass().getResource("/repeats").getPath());
         CellBaseFileSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), "repeats.test");
-        (new RepeatsBuilder(repeatsFilesDir, serializer)).parse();
+        (new RepeatsBuilder(Arrays.asList(WM_DATA, GSD_DATA, TRF_DATA), repeatsFilesDir, serializer, configuration)).parse();
         serializer.close();
-        assertEquals(loadRepeatSet(Paths.get(getClass().getResource("/repeats/repeats.test.json.gz").getFile())),
-                loadRepeatSet(Paths.get("/tmp/repeats.test.json.gz")));
+        Set<Repeat> expected = loadRepeatSet(Paths.get(getClass().getClassLoader().getResource("repeats/repeats.test.json.gz").getPath()));
+        Set<Repeat> current = loadRepeatSet(Paths.get("/tmp/repeats.test.json.gz"));
+        assertEquals(expected.size(), current.size());
+        for (Repeat repeat : expected) {
+            assertTrue(current.contains(repeat));
+        }
+        for (Repeat repeat : current) {
+            assertTrue(expected.contains(repeat));
+        }
     }
 
     private Set<Repeat> loadRepeatSet(Path path) throws IOException {
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalVariantBuilderTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalVariantBuilderTest.java
index 7ad0892c1e..aea3b9e7fe 100644
--- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalVariantBuilderTest.java
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/builders/clinical/variant/ClinicalVariantBuilderTest.java
@@ -89,8 +89,8 @@ public void noNormaliseTest() throws Exception {
                         .getResource("/variant/annotation/clinicalVariant/ClinVarFullRelease_2020-02.xml.gz").toURI()).toFile(),
                 clinicalVariantChunksFolder.resolve("ClinVarFullRelease_2020-02.xml.gz").toFile());
 
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), EtlCommons.CLINICAL_VARIANTS_DATA, true);
-        (new ClinicalVariantBuilder(clinicalVariantFolder, false, genomeSequenceFilePath, "GRCh37",  serializer)).parse();
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), EtlCommons.CLINICAL_VARIANT_DATA, true);
+        (new ClinicalVariantBuilder(clinicalVariantFolder, false, genomeSequenceFilePath, "GRCh37", null, serializer)).parse();
 
         List<Variant> parsedVariantList = loadSerializedVariants("/tmp/" + EtlCommons.CLINICAL_VARIANTS_JSON_FILE);
         assertEquals(23, parsedVariantList.size());
@@ -145,8 +145,8 @@ public void parseMNVTest() throws Exception {
 
         Path genomeSequenceFilePath = clinicalVariantFolder.resolve("Homo_sapiens.GRCh37.75.dna.primary_assembly.chr17.fa.gz");
 
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), EtlCommons.CLINICAL_VARIANTS_DATA, true);
-        (new ClinicalVariantBuilder(clinicalVariantFolder, true, genomeSequenceFilePath, "GRCh37",  serializer)).parse();
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), EtlCommons.CLINICAL_VARIANT_DATA, true);
+        (new ClinicalVariantBuilder(clinicalVariantFolder, true, genomeSequenceFilePath, "GRCh37", null, serializer)).parse();
 
         List<Variant> parsedVariantList = loadSerializedVariants("/tmp/" + EtlCommons.CLINICAL_VARIANTS_JSON_FILE);
         assertEquals(29, parsedVariantList.size());
@@ -230,8 +230,8 @@ public void parse() throws Exception {
                         .getResource("/variant/annotation/clinicalVariant/ClinVarFullRelease_2020-02.xml.gz").toURI()).toFile(),
                 clinicalVariantChunksFolder.resolve("ClinVarFullRelease_2020-02.xml.gz").toFile());
 
-        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), EtlCommons.CLINICAL_VARIANTS_DATA, true);
-        (new ClinicalVariantBuilder(clinicalVariantFolder, true, genomeSequenceFilePath, "GRCh37",  serializer)).parse();
+        CellBaseSerializer serializer = new CellBaseJsonFileSerializer(Paths.get("/tmp/"), EtlCommons.CLINICAL_VARIANT_DATA, true);
+        (new ClinicalVariantBuilder(clinicalVariantFolder, true, genomeSequenceFilePath, "GRCh37", null, serializer)).parse();
 
         List<Variant> parsedVariantList = loadSerializedVariants("/tmp/" + EtlCommons.CLINICAL_VARIANTS_JSON_FILE);
         assertEquals(29, parsedVariantList.size());
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactoryTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactoryTest.java
index 3e2c755f98..b1c244a9b3 100644
--- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactoryTest.java
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/MongoDBAdaptorFactoryTest.java
@@ -18,13 +18,14 @@
 
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.TestInstance;
+import org.opencb.cellbase.core.utils.DatabaseNameUtils;
 import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest;
 import org.opencb.cellbase.lib.db.MongoDBManager;
 
 import java.security.InvalidParameterException;
 
 import static org.junit.jupiter.api.Assertions.*;
-import static org.opencb.cellbase.lib.db.MongoDBManager.DBNAME_SEPARATOR;
+import static org.opencb.cellbase.core.utils.DatabaseNameUtils.DBNAME_SEPARATOR;
 
 @TestInstance(TestInstance.Lifecycle.PER_CLASS)
 public class MongoDBAdaptorFactoryTest extends GenericMongoDBAdaptorTest {
@@ -46,18 +47,18 @@ public void testGetDatabaseName() {
         }
 
         // provide assembly
-        String databaseName = mongoDBManager.getDatabaseName("speciesName", "assemblyName", cellBaseConfiguration.getVersion());
+        String databaseName = DatabaseNameUtils.getDatabaseName("speciesName", "assemblyName", cellBaseConfiguration.getVersion());
         assertEquals("cellbase_speciesname_assemblyname_" + version, databaseName);
 
         // don't provide assembly
         InvalidParameterException thrown =
                 assertThrows(InvalidParameterException.class,
-                        () -> mongoDBManager.getDatabaseName("speciesName", null, cellBaseConfiguration.getVersion()),
+                        () -> DatabaseNameUtils.getDatabaseName("speciesName", null, cellBaseConfiguration.getVersion()),
                         "Expected getDatabaseName() to throw an exception, but it didn't");
         assertTrue(thrown.getMessage().contains("Species and assembly are required"));
 
         // handle special characters
-        databaseName = mongoDBManager.getDatabaseName("speciesName", "my_funny.assembly--name", cellBaseConfiguration.getVersion());
+        databaseName = DatabaseNameUtils.getDatabaseName("speciesName", "my_funny.assembly--name", cellBaseConfiguration.getVersion());
         assertEquals("cellbase_speciesname_myfunnyassemblyname_" + version, databaseName);
     }
 }
\ No newline at end of file
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantAnnotationCalculatorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantAnnotationCalculatorTest.java
index b973f0b996..856d19c1c4 100644
--- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantAnnotationCalculatorTest.java
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/VariantAnnotationCalculatorTest.java
@@ -51,7 +51,8 @@ public class VariantAnnotationCalculatorTest extends GenericMongoDBAdaptorTest {
     public VariantAnnotationCalculatorTest() throws Exception {
         super();
 
-        variantAnnotationCalculator = new VariantAnnotationCalculator(SPECIES, ASSEMBLY, dataRelease, apiKey, cellBaseManagerFactory);
+        variantAnnotationCalculator = new VariantAnnotationCalculator(SPECIES, ASSEMBLY, dataRelease, apiKey, cellBaseManagerFactory,
+                cellBaseConfiguration);
 
         jsonObjectMapper = new ObjectMapper();
         jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
@@ -646,7 +647,7 @@ public void testCellBaseDataResultGroupingDecomposedMNVs() throws Exception {
         // Creating here a local VariantAnnotationCalculator since this test requires setting normalizer decompose
         // option to true which probably breaks some other tests.
         VariantAnnotationCalculator localScopeCalculator = new VariantAnnotationCalculator("hsapiens", "GRCh37", dataRelease,
-                apiKey, cellBaseManagerFactory);
+                apiKey, cellBaseManagerFactory, cellBaseConfiguration);
 
         // One MNV and one singleton SNV. Two CellBaseDataResults must be returned: first with two VariantAnnotation objects
         // and id corresponding to the original MNV call. Second with just one VariantAnnotation object.
@@ -1041,7 +1042,8 @@ null, new Breakend(new BreakendMate("2", 10000, 10000 - 100,
 
     @Test
     public void testLicensedClinicalHGMDAnnotation() throws Exception {
-        variantAnnotationCalculator = new VariantAnnotationCalculator(SPECIES, ASSEMBLY, dataRelease, HGMD_ACCESS_API_KEY, cellBaseManagerFactory);
+        variantAnnotationCalculator = new VariantAnnotationCalculator(SPECIES, ASSEMBLY, dataRelease, HGMD_ACCESS_API_KEY,
+                cellBaseManagerFactory, cellBaseConfiguration);
 
         QueryOptions queryOptions = new QueryOptions("useCache", false);
         queryOptions.put("include", "clinical");
@@ -1060,7 +1062,8 @@ public void testLicensedClinicalHGMDAnnotation() throws Exception {
 
     @Test
     public void testLicensedClinicalHGMDandCOSMICAnnotation() throws Exception {
-        variantAnnotationCalculator = new VariantAnnotationCalculator(SPECIES, ASSEMBLY, dataRelease, HGMD_COSMIC_ACCESS_API_KEY, cellBaseManagerFactory);
+        variantAnnotationCalculator = new VariantAnnotationCalculator(SPECIES, ASSEMBLY, dataRelease, HGMD_COSMIC_ACCESS_API_KEY,
+                cellBaseManagerFactory, cellBaseConfiguration);
 
         QueryOptions queryOptions = new QueryOptions("useCache", false);
         queryOptions.put("include", "clinical");
@@ -1084,7 +1087,8 @@ public void testNoLicensedClinicalAnnotation() throws Exception {
         queryOptions.put("include", "clinical");
         queryOptions.put("normalize", true);
 
-        variantAnnotationCalculator = new VariantAnnotationCalculator(SPECIES, ASSEMBLY, dataRelease, null, cellBaseManagerFactory);
+        variantAnnotationCalculator = new VariantAnnotationCalculator(SPECIES, ASSEMBLY, dataRelease, null, cellBaseManagerFactory,
+                cellBaseConfiguration);
 
         Variant variant = new Variant("10", 113588287, "G", "A");
         CellBaseDataResult<VariantAnnotation> cellBaseDataResult = variantAnnotationCalculator
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/managers/DataReleaseManagerTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/managers/DataReleaseManagerTest.java
index 5c0f687e62..aa13081bab 100644
--- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/managers/DataReleaseManagerTest.java
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/managers/DataReleaseManagerTest.java
@@ -152,7 +152,7 @@ public void testAnnotation() throws CellBaseException, QueryException, Execution
 
         DataRelease dataRelease = dataReleaseManager.get(1);
         VariantAnnotationCalculator annotator = new VariantAnnotationCalculator(SPECIES, ASSEMBLY, dataRelease, apiKey,
-                cellBaseManagerFactory);
+                cellBaseManagerFactory, cellBaseConfiguration);
 
         Variant variant = new Variant("10", 113588287, "G", "A");
         CellBaseDataResult<VariantAnnotation> cellBaseDataResult = annotator.getAnnotationByVariant(variant, QueryOptions.empty());
diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/VariantManagerTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/VariantManagerTest.java
index 8d9c6467f9..60c4d59ff8 100644
--- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/VariantManagerTest.java
+++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/variant/VariantManagerTest.java
@@ -46,7 +46,8 @@ public VariantManagerTest() throws CellBaseException {
         jsonObjectMapper.configure(MapperFeature.REQUIRE_SETTERS_FOR_GETTERS, true);
         jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
 
-        variantAnnotationCalculator = new VariantAnnotationCalculator(SPECIES, ASSEMBLY, dataRelease, apiKey, cellBaseManagerFactory);
+        variantAnnotationCalculator = new VariantAnnotationCalculator(SPECIES, ASSEMBLY, dataRelease, apiKey, cellBaseManagerFactory,
+                cellBaseConfiguration);
         variantManager = cellBaseManagerFactory.getVariantManager(SPECIES, ASSEMBLY);
     }
 
diff --git a/cellbase-lib/src/test/resources/configuration.test.yaml b/cellbase-lib/src/test/resources/configuration.test.yaml
index e1750e7b4b..48b5261596 100644
--- a/cellbase-lib/src/test/resources/configuration.test.yaml
+++ b/cellbase-lib/src/test/resources/configuration.test.yaml
@@ -16,147 +16,373 @@ databases:
       readPreference: ''
       replicaSet: ''
       connectionsPerHost: 20
-  neo4j:
-    hsapiens:
-      host: "${JUNIT.CELLBASE.DB.NEO4J.HOST}"
-      user: "${JUNIT.CELLBASE.DB.USER}"
-      password: "${JUNIT.CELLBASE.DB.PASSWORD}"
-    mmusculus:
-      host: "${JUNIT.CELLBASE.DB.NEO4J.HOST}"
-      user: "${JUNIT.CELLBASE.DB.USER}"
-      password: "${JUNIT.CELLBASE.DB.PASSWORD}"
 defaultOutdir: "/tmp"
 download:
+  ## Genomic and Gene information
   ensembl:
     database:
       host: ensembldb.ensembl.org:3306
       user: anonymous
       password: ''
-    libs: "${JUNIT.CELLBASE.ENSEMBL.LIBS}"
+    libs: "${CELLBASE.ENSEMBL.LIBS}"
     url:
-      host: ftp://ftp.ensembl.org/pub
+      host: https://ftp.ensembl.org/pub/
+      files:
+        # New Homo sapiens assemblies contain too many ALT regions, so we download 'primary_assembly' file instead
+        PRIMARY_FA: "release-put_release_here/fasta/put_species_here/dna/put_capital_species_here.put_assembly_here.dna.primary_assembly.fa.gz"
+        GTF: "release-put_release_here/gtf/put_species_here/put_capital_species_here.put_assembly_here.put_release_here.gtf.gz"
+        PEP_FA: "release-put_release_here/fasta/put_species_here/pep/put_capital_species_here.put_assembly_here.pep.all.fa.gz"
+        CDNA_FA: "release-put_release_here/fasta/put_species_here/cdna/put_capital_species_here.put_assembly_here.cdna.all.fa.gz"
+        REGULATORY_BUILD: "release-put_release_here/regulation/put_species_here/put_species_here.put_assembly_here.Regulatory_Build.regulatory_features.20221007.gff.gz"
+        MOTIF_FEATURES: "release-put_release_here/regulation/put_species_here/MotifFeatures/put_species_here.put_assembly_here.motif_features.gff.gz"
+        MOTIF_FEATURES_INDEX: "release-put_release_here/regulation/put_species_here/MotifFeatures/put_species_here.put_assembly_here.motif_features.gff.gz.tbi"
+        # To be generated manually
+        DESCRIPTION: "manual@description.txt"
+        # To be generated manually
+        XREFS: "manual@xrefs.txt"
+        # To be downloaded manually
+        HAEM_ONC_TRANSCRIPTS: "manual@EGLH_HaemOnc_transcripts.txt"
+        # To be downloaded manually
+        TSO500: "manual@TSO500_transcripts.txt"
+        # To be downloaded manually
+        CANONICAL: "manual@ensembl_canonical.txt"
+
   ensemblGenomes:
     database:
       host: mysql-eg-publicsql.ebi.ac.uk:4157
       user: anonymous
       password: ''
-    libs: "${JUNIT.CELLBASE.ENSEMBL.LIBS}"
+    libs: "${CELLBASE.ENSEMBL.LIBS}"
     url:
       host: ftp://ftp.ensemblgenomes.org/pub
+  refSeq:
+    host: https://ftp.ncbi.nih.gov/refseq/
+    version: "2023-10-11"
+    files:
+      GENOMIC_GTF: H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz
+      GENOMIC_FNA: H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.fna.gz
+      PROTEIN_FAA: H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_protein.faa.gz
+      RNA_FNA: H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_rna.fna.gz
+      MMUSCULUS_GENOMIC_GTF: M_musculus/annotation_releases/GCF_000001635.27-RS_2024_02/GCF_000001635.27_GRCm39_genomic.gtf.gz
+      MMUSCULUS_GENOMIC_FNA: M_musculus/annotation_releases/GCF_000001635.27-RS_2024_02/GCF_000001635.27_GRCm39_genomic.fna.gz
+      MMUSCULUS_PROTEIN_FAA: M_musculus/annotation_releases/GCF_000001635.27-RS_2024_02/GCF_000001635.27_GRCm39_protein.faa.gz
+      MMUSCULUS_RNA_FNA: M_musculus/annotation_releases/GCF_000001635.27-RS_2024_02/GCF_000001635.27_GRCm39_rna.fna.gz
+      RNORVEGICUS_GENOMIC_GTF: R_norvegicus/annotation_releases/GCF_036323735.1-RS_2024_02/GCF_036323735.1_GRCr8_genomic.gtf.gz
+      RNORVEGICUS_GENOMIC_FNA: R_norvegicus/annotation_releases/GCF_036323735.1-RS_2024_02/GCF_036323735.1_GRCr8_genomic.fna.gz
+      RNORVEGICUS_PROTEIN_FAA: R_norvegicus/annotation_releases/GCF_036323735.1-RS_2024_02/GCF_036323735.1_GRCr8_protein.faa.gz
+      RNORVEGICUS_RNA_FNA: R_norvegicus/annotation_releases/GCF_036323735.1-RS_2024_02/GCF_036323735.1_GRCr8_rna.fna.gz
+      BTAURUS_GENOMIC_GTF: B_taurus/annotation_releases/GCF_002263795.3-RS_2023_09/GCF_002263795.3_ARS-UCD2.0_genomic.gtf.gz
+      BTAURUS_GENOMIC_FNA: B_taurus/annotation_releases/GCF_002263795.3-RS_2023_09/GCF_002263795.3_ARS-UCD2.0_genomic.fna.gz
+      BTAURUS_PROTEIN_FAA: B_taurus/annotation_releases/GCF_002263795.3-RS_2023_09/GCF_002263795.3_ARS-UCD2.0_protein.faa.gz
+      BTAURUS_RNA_FNA: B_taurus/annotation_releases/GCF_002263795.3-RS_2023_09/GCF_002263795.3_ARS-UCD2.0_rna.fna.gz
+  maneSelect:
+    host: https://ftp.ncbi.nlm.nih.gov/refseq/
+    version: "1.2"
+    files:
+      MANE_SELECT: MANE/MANE_human/release_1.2/MANE.GRCh38.v1.2.summary.txt.gz
+  lrg:
+    host: http://ftp.ebi.ac.uk/
+    version: "2021-03-30"
+    files:
+      LRG: pub/databases/lrgex/list_LRGs_transcripts_xrefs.txt
+  hgnc:
+    host: https://ftp.ebi.ac.uk/
+    version: "2024-04-01"
+    files:
+      HGNC: pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2024-04-01.txt
+  cancerHotspot:
+    host: https://www.cancerhotspots.org/
+    version: "v2"
+    files:
+      CANCER_HOTSPOT: files/hotspots_v2.xls
+  dgidb:
+    host: https://old.dgidb.org/
+    version: "2022-02-01"
+    files:
+      DGIDB: data/monthly_tsvs/2022-Feb/interactions.tsv
   geneUniprotXref:
-    host: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
+    host: http://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/
+    version: "2024-03-27"
+    files:
+      UNIPROT_XREF: HUMAN_9606_idmapping_selected.tab.gz
+      MMUSCULUS_UNIPROT_XREF: MOUSE_10090_idmapping_selected.tab.gz
+      RNORVEGICUS_UNIPROT_XREF: RAT_10116_idmapping_selected.tab.gz
+      DRERIO_UNIPROT_XREF: DANRE_7955_idmapping_selected.tab.gz
+      DMELOANOGASTER_UNIPROT_XREF: DROME_7227_idmapping_selected.tab.gz
+      SCEREVISIAE_UNIPROT_XREF: YEAST_559292_idmapping_selected.tab.gz
+      CELEGANS_UNIPROT_XREF: CAEEL_6239_idmapping_selected.tab.gz
   geneExpressionAtlas:
-    host: ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/gxa/allgenes_updown_in_organism_part_2.0.14.tab.gz
+    host: https://ftp.ebi.ac.uk/
+    version: "2.0.14"
+    files:
+      GENE_EXPRESSION_ATLAS: pub/databases/microarray/data/gxa/allgenes_updown_in_organism_part_2.0.14.tab.gz
+  hpo:
+    ## NOTE: Download manually from here now
+    host: https://hpo.jax.org/app/data/annotations/
+    version: "2024-04-26"
+    files:
+      HPO: "manual@phenotype_to_genes.txt"
+  gnomadConstraints:
+    host: https://storage.googleapis.com/
+    version: "2.1.1"
+    files:
+      GNOMAD_CONSTRAINTS: gcp-public-data--gnomad/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz
+  goAnnotation:
+    host: http://geneontology.org/
+    files:
+      GO_ANNOTATION: gene-associations/goa_human.gaf.gz
+      MMUSCULUS_GO_ANNOTATION: gene-associations/mgi.gaf.gz
+  cancerGeneCensus:
+    ## To be downloaded manually
+    host: https://cancer.sanger.ac.uk/census/
+    version: "v99"
+    files:
+      CANCER_GENE_CENSUS: "manual@cancer-gene-census.tsv"
+
+  ## Regulation
   mirbase:
-    host: ftp://mirbase.org/pub/mirbase/CURRENT/
-  mirbaseReadme:
-    host: ftp://mirbase.org/pub/mirbase/CURRENT/README
+    host: https://www.mirbase.org/
+    version: "22.1"
+    files:
+      MIRBASE: download/miRNA.dat
   targetScan:
     host: http://hgdownload.cse.ucsc.edu/goldenPath/
   miRTarBase:
-    host: http://mirtarbase.mbc.nctu.edu.tw/cache/download/4.5/
+    host: https://mirtarbase.cuhk.edu.cn/
+    version: "9.0"
+    files:
+      MIRTARBASE: ~miRTarBase/miRTarBase_2022/cache/download/9.0/hsa_MTI.xlsx
+      MMUSCULUS_MIRTARBASE: ~miRTarBase/miRTarBase_2022/cache/download/9.0/mmu_MTI.xlsx
+      RNORVEGICUS_MIRTARBASE: ~miRTarBase/miRTarBase_2022/cache/download/9.0/rno_MTI.xlsx
+
+  ## Protein Data
   uniprot:
-    host: ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz
-  uniprotRelNotes:
-    host: ftp://ftp.uniprot.org/pub/databases/uniprot/relnotes.txt
-  intact:
-    host: ftp://ftp.ebi.ac.uk/pub/databases/intact/current/psimitab/intact.txt
+    host: https://ftp.uniprot.org/
+    version: "2024-03-27"
+    files:
+      UNIPROT: pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz
   interpro:
-    host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/protein2ipr.dat.gz
-  interproRelNotes:
-    host: ftp://ftp.ebi.ac.uk/pub/databases/interpro/current/release_notes.txt
-  conservation:
-    host: ftp://hgdownload.cse.ucsc.edu/goldenPath/
+    host: https://ftp.ebi.ac.uk/
+    version: "2024-03-27"
+    files:
+      INTERPRO: pub/databases/interpro/current_release/protein2ipr.dat.gz
+  intact:
+    host: https://ftp.ebi.ac.uk/
+    version: "2024-02-16"
+    files:
+      INTACT: pub/databases/intact/current/psimitab/intact.txt
+
+  ## Conservation Scores
+  phastCons:
+    ## The CellBase downloader will change put_assembly_here by the assembly, e.g. hg38; and put_chromosome_here by the chromosomes: 1,2,..X,Y,M
+    host: https://hgdownload.cse.ucsc.edu/
+    version: "2022-08-30"
+    files:
+      PHASTCONS: goldenPath/hg38/phastCons470way/hg38.470way.phastCons/
+      MMUSCULUS_PHASTCONS: goldenPath/mm39/phastCons35way/mm39.35way.phastCons/
+  phylop:
+    ## The CellBase downloader will change put_assembly_here by the assembly, e.g. hg38; and put_chromosome_here by the chromosomes: 1,2,..X,Y,M
+    host: https://hgdownload.cse.ucsc.edu/
+    version: "2022-08-30"
+    files:
+      PHYLOP: goldenPath/hg38/phyloP470way/hg38.470way.phyloP/
+      MMUSCULUS_PHYLOP: goldenPath/mm39/phyloP35way/mm39.35way.phyloP/
   gerp:
-    host: http://mendel.stanford.edu/SidowLab/downloads/gerp/hg19.GERP_scores.tar.gz
+    host: http://ftp.ensembl.org/
+    version: "2023-05-17"
+    files:
+      GERP: pub/release-111/compara/conservation_scores/91_mammals.gerp_conservation_score/gerp_conservation_scores.homo_sapiens.GRCh38.bw
+      MMUSCULUS_GERP: pub/release-111/compara/conservation_scores/91_mammals.gerp_conservation_score/gerp_conservation_scores.mus_musculus.GRCm39.bw
+
+  ## Clinical Variant
   clinvar:
-    host: ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_2017-07.xml.gz
-  clinvarSummary:
-    host: ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variant_summary.txt.gz
-  clinvarVariationAllele:
-    host: ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/tab_delimited/variation_allele.txt.gz
-  clinvarEfoTerms:
-    host: ftp://ftp.ebi.ac.uk/pub/databases/eva/ClinVar/2015/ClinVar_Traits_EFO_Names_260615.csv
-  iarctp53:
-    host: http://p53.iarc.fr/ajax/Zipper.ashx
-  docm:
-    host: http://docm.genome.wustl.edu/api/
-  docmVersion:
-    host: http://docm.genome.wustl.edu
+    host: https://ftp.ncbi.nlm.nih.gov/
+    version: "2024-02"
+    files:
+      FULL_RELEASE: pub/clinvar/xml/RCV_xml_old_format/ClinVarFullRelease_2024-02.xml.gz
+      SUMMARY: pub/clinvar/tab_delimited/variant_summary.txt.gz
+      ALLELE: pub/clinvar/tab_delimited/variation_allele.txt.gz
+      EFO_TERMS: ftp://ftp.ebi.ac.uk/pub/databases/eva/ClinVar/2015/ClinVar_Traits_EFO_Names_260615.csv
+  cosmic:
+    ## To be downloaded manually
+    host: https://cancer.sanger.ac.uk/cosmic/
+    version: "v99"
+    files:
+      COSMIC: CosmicMutantExport.tsv.gz
+  hgmd:
+    ## To be downloaded manually
+    host: https://www.hgmd.cf.ac.uk/
+    version: "2020-03"
+    files:
+      HGMD: hgmd.vcf
+  gwasCatalog:
+    ## Download file from https://www.ebi.ac.uk/gwas/docs/file-downloads to find the real version, which is 'e111_r2024-04-22'
+    host: https://ftp.ebi.ac.uk/
+    version: "2024-04-22"
+    files:
+      GWAS: pub/databases/gwas/releases/2024/04/22/gwas-catalog-associations_ontology-annotated.tsv
+      DBSNP: All.vcf.gz
+
+  dbSNP:
+    host: https://ftp.ncbi.nih.gov/snp/latest_release/VCF/GCF_000001405.40.gz
+    version: "156"
+
+  pharmGKB:
+    host: https://api.pharmgkb.org/v1/download/file/data/
+    version: v1
+    files:
+      GENES: genes.zip
+      CHEMICALS: chemicals.zip
+      VARIANTS: variants.zip
+      GUIDELINE_ANNOTATIONS: guidelineAnnotations.json.zip
+      VARIANT_ANNOTATIONS: variantAnnotations.zip
+      CLINICAL_ANNOTATIONS: clinicalAnnotations.zip
+      CLINICAL_VARIANTS: clinicalVariants.zip
+      DRUG_LABELS: drugLabels.zip
+      RELATIONSHIPS: relationships.zip
+
   dgv:
     host: http://dgv.tcag.ca/v106/docs
   simpleRepeats:
-    host: http://hgdownload.cse.ucsc.edu/goldenPath
+    host: http://hgdownload.cse.ucsc.edu/
+    files:
+      SIMPLE_REPEATS: goldenPath/hg38/database/simpleRepeat.txt.gz
+      MMUSCULUS_SIMPLE_REPEATS: goldenPath/mm39/database/simpleRepeat.txt.gz
   windowMasker:
-    host: http://hgdownload.cse.ucsc.edu/goldenPath
+    host: http://hgdownload.cse.ucsc.edu/
+    files:
+      WINDOW_MASKER: goldenPath/hg38/database/windowmaskerSdust.txt.gz
+      MMUSCULUS_WINDOW_MASKER: goldenPath/mm39/database/windowmaskerSdust.txt.gz
   genomicSuperDups:
-    host: http://hgdownload.cse.ucsc.edu/goldenPath
-  gwasCatalog:
-    host: ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/2016/09/28/gwas-catalog-associations.tsv
-  hpo:
-    host: http://compbio.charite.de/hudson/job/hpo.annotations.monthly/lastStableBuild/artifact/annotation/ALL_SOURCES_ALL_FREQUENCIES_diseases_to_genes_to_phenotypes.txt
-  disgenet:
-    host: http://www.disgenet.org/ds/DisGeNET/results/all_gene_disease_associations.tsv.gz
-  disgenetReadme:
-    host: http://www.disgenet.org/ds/DisGeNET/results/readme.txt
-  dgidb:
-    host: http://dgidb.org/data/interactions.tsv
+    host: http://hgdownload.cse.ucsc.edu/
+    files:
+      GENOMIC_SUPER_DUPS: goldenPath/hg38/database/genomicSuperDups.txt.gz
+
+  ## Variant Pathogenic Prediction
+  revel:
+    host: https://zenodo.org/
+    version: "1.3"
+    files:
+      REVEL: record/7072866/files/revel-v1.3_all_chromosomes.zip
   cadd:
-    host: http://krishna.gs.washington.edu/download/CADD/v1.3/whole_genome_SNVs.tsv.gz
+    host: https://krishna.gs.washington.edu/
+    version: "1.7"
+    files:
+      CADD: download/CADD/v1.7/GRCh38/whole_genome_SNVs.tsv.gz
+
+  ## OBO Ontologies
+  ## The version is retrieved from the OBO file
+  hpoObo:
+    host: http://purl.obolibrary.org/obo/
+    files:
+      HPO: hp.obo
+  goObo:
+    host: http://purl.obolibrary.org/obo/
+    files:
+      GO: go/go-basic.obo
+  doidObo:
+    host: http://purl.obolibrary.org/obo/
+    files:
+      DOID: doid.obo
+  mondoObo:
+    host: http://purl.obolibrary.org/obo/
+    files:
+      MONDO: mondo.obo
+
+  ## Splice score
+  mmSplice:
+    host: http://kipoi.org/models/MMSplice/mtsplice/
+    version: 2.0
+  spliceAi:
+    host: https://basespace.illumina.com/s/otSPW8hnhaZR
+    version: 1.3.1
+
+  ## Others
+  pubmed:
+    host: https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/
+    version: 2024
+    files:
+      PUBMED_REGEX: pubmed24n[1..1219..4].xml.gz
   reactome:
     host: http://www.reactome.org/download/current/biopax.zip
+
+
 species:
   vertebrates:
     - id: hsapiens
       scientificName: Homo sapiens
       assemblies:
-        - ensemblVersion: '89_38'
+        - ensemblVersion: '111_38'
           name: GRCh38
-        - ensemblVersion: '82_37'
-          name: GRCh37
+      #        - ensemblVersion: '82_37'
+      #          name: GRCh37
       data:
         - genome
-        - genome_info
+        - conservation
+        - repeats
         - gene
-        - gene_disease_association
-        - variation
-        - variation_functional_score
         - regulation
         - protein
-        - conservation
-        - clinical_variants
-        - clinical
-        - svs
-        - repeats
+        - clinical_variant
+        - missense_variation_functional_score
+        - ontology
+        - variation_functional_score
+        - splice_score
+        - pharmacogenomics
     - id: mmusculus
       scientificName: Mus musculus
       assemblies:
-        - ensemblVersion: '82_38'
-          name: GRCm38
+        - ensemblVersion: '111_39'
+          name: GRCm39
       data:
         - genome
-        - genome_info
+        - conservation
+        - repeats
         - gene
+        - regulation
+        - protein
         - variation
+        - ontology
+    - id: rnorvegicus
+      scientificName: Rattus norvegicus
+      assemblies:
+        - ensemblVersion: '111_7.2'
+          name: mRatBN7.2
+      data:
+        - genome
+        - gene
         - regulation
         - protein
-        - conservation
+    #        - variation
     - id: drerio
       scientificName: Danio rerio
       assemblies:
-        - ensemblVersion: '82_10'
-          name: GRCz10
+        - ensemblVersion: '111_11'
+          name: GRCz11
+      data:
+        - genome
+        - gene
+        - regulation
+        - protein
+    #        - variation
+    - id: btaurus
+      scientificName: Bos taurus
+      assemblies:
+        - ensemblVersion: '111_1.3'
+          name: ARS-UCD1.3
       data:
         - genome
         - genome_info
         - gene
+        #        - refseq
+        - regulation
         - variation
         - protein
-    - id: rnorvegicus
-      scientificName: Rattus norvegicus
+    - id: sscrofa
+      scientificName: Sus scrofa
       assemblies:
-        - ensemblVersion: '82_6'
-          name: Rnor_6.0
+        - ensemblVersion: '111_11.1'
+          name: Sscrofa11.1
       data:
         - genome
         - genome_info
diff --git a/cellbase-lib/src/test/resources/conservation/gerpVersion.json b/cellbase-lib/src/test/resources/conservation/gerpVersion.json
new file mode 100644
index 0000000000..f74cabaa9f
--- /dev/null
+++ b/cellbase-lib/src/test/resources/conservation/gerpVersion.json
@@ -0,0 +1 @@
+{"downloadDate":"20240612_094720","name":"gerp","version":"version-11","urls":["https://toto.com/conservation_19.json.gz"]}
\ No newline at end of file
diff --git a/cellbase-lib/src/test/resources/conservation/phastConsVersion.json b/cellbase-lib/src/test/resources/conservation/phastConsVersion.json
new file mode 100644
index 0000000000..6b5201a8ce
--- /dev/null
+++ b/cellbase-lib/src/test/resources/conservation/phastConsVersion.json
@@ -0,0 +1 @@
+{"downloadDate":"20240612_094720","name":"phastcons","version":"version-12","urls":["https://toto.com/empty.wigFix.gz"]}
\ No newline at end of file
diff --git a/cellbase-lib/src/test/resources/conservation/phyloPVersion.json b/cellbase-lib/src/test/resources/conservation/phyloPVersion.json
new file mode 100644
index 0000000000..ab917129f7
--- /dev/null
+++ b/cellbase-lib/src/test/resources/conservation/phyloPVersion.json
@@ -0,0 +1 @@
+{"downloadDate":"20240612_094720","name":"phylop","version":"version-11","urls":["https://toto.com/empty.wigFix.gz"]}
\ No newline at end of file
diff --git a/cellbase-lib/src/test/resources/index/mongodb-indexes.json b/cellbase-lib/src/test/resources/index/mongodb-indexes.json
index 7c264a469a..a77b79f49f 100644
--- a/cellbase-lib/src/test/resources/index/mongodb-indexes.json
+++ b/cellbase-lib/src/test/resources/index/mongodb-indexes.json
@@ -127,3 +127,10 @@
 
 {"collection": "splice_score", "fields": {"_chunkIds": 1}, "options": {"background": true}}
 {"collection": "splice_score", "fields": {"chromosome": 1, "position": 1}, "options": {"background": true}}
+
+{"collection": "common_polygenic_scores", "fields": {"id": 1}, "options": {"background": true}}
+{"collection": "common_polygenic_scores", "fields": {"name": 1}, "options": {"background": true}}
+{"collection": "common_polygenic_scores", "fields": {"source": 1}, "options": {"background": true}}
+{"collection": "variant_polygenic_scores", "fields": {"_chunkIds": 1}, "options": {"background": true}}
+{"collection": "variant_polygenic_scores", "fields": {"chromosome": 1, "position": 1}, "options": {"background": true}}
+{"collection": "variant_polygenic_scores", "fields": {"polygenicScores.id": 1}, "options": {"background": true}}
diff --git a/cellbase-lib/src/test/resources/regulation/motif_features/motifFeaturesVersion.json b/cellbase-lib/src/test/resources/regulation/motif_features/motifFeaturesVersion.json
new file mode 100644
index 0000000000..85161a2e1c
--- /dev/null
+++ b/cellbase-lib/src/test/resources/regulation/motif_features/motifFeaturesVersion.json
@@ -0,0 +1 @@
+{"downloadDate":"20240612_094720","name":"motif features","version":"version-11","urls":["https://toto.com/motif_features.gff.gz", "https://toto.com/motif_features.gff.gz.tbi"]}
\ No newline at end of file
diff --git a/cellbase-lib/src/test/resources/regulation/motif_features.gff.gz b/cellbase-lib/src/test/resources/regulation/motif_features/motif_features.gff.gz
similarity index 100%
rename from cellbase-lib/src/test/resources/regulation/motif_features.gff.gz
rename to cellbase-lib/src/test/resources/regulation/motif_features/motif_features.gff.gz
diff --git a/cellbase-lib/src/test/resources/regulation/motif_features.gff.gz.tbi b/cellbase-lib/src/test/resources/regulation/motif_features/motif_features.gff.gz.tbi
similarity index 100%
rename from cellbase-lib/src/test/resources/regulation/motif_features.gff.gz.tbi
rename to cellbase-lib/src/test/resources/regulation/motif_features/motif_features.gff.gz.tbi
diff --git a/cellbase-lib/src/test/resources/regulation/Regulatory_Build.regulatory_features.gff.gz b/cellbase-lib/src/test/resources/regulation/regulatory_build/Regulatory_Build.regulatory_features.gff.gz
similarity index 100%
rename from cellbase-lib/src/test/resources/regulation/Regulatory_Build.regulatory_features.gff.gz
rename to cellbase-lib/src/test/resources/regulation/regulatory_build/Regulatory_Build.regulatory_features.gff.gz
diff --git a/cellbase-lib/src/test/resources/regulation/regulatory_build/regulatoryBuildVersion.json b/cellbase-lib/src/test/resources/regulation/regulatory_build/regulatoryBuildVersion.json
new file mode 100644
index 0000000000..dcafc16b10
--- /dev/null
+++ b/cellbase-lib/src/test/resources/regulation/regulatory_build/regulatoryBuildVersion.json
@@ -0,0 +1 @@
+{"downloadDate":"20240612_094720","name":"regulatory build","version":"version-11","urls":["https://toto.com/Regulatory_Build.regulatory_features.gff.gz"]}
\ No newline at end of file
diff --git a/cellbase-lib/src/test/resources/repeats/repeats.test.json.gz b/cellbase-lib/src/test/resources/repeats/repeats.test.json.gz
index 5aef8a765f..2f0f85084d 100644
Binary files a/cellbase-lib/src/test/resources/repeats/repeats.test.json.gz and b/cellbase-lib/src/test/resources/repeats/repeats.test.json.gz differ
diff --git a/cellbase-lib/src/test/resources/repeats/windowMasker.txt.gz b/cellbase-lib/src/test/resources/repeats/windowmaskerSdust.txt.gz
similarity index 100%
rename from cellbase-lib/src/test/resources/repeats/windowMasker.txt.gz
rename to cellbase-lib/src/test/resources/repeats/windowmaskerSdust.txt.gz
diff --git a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/FileWSServer.java b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/FileWSServer.java
index de97c37718..7c7058dc7a 100644
--- a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/FileWSServer.java
+++ b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/FileWSServer.java
@@ -31,6 +31,7 @@
 import javax.ws.rs.GET;
 import javax.ws.rs.Path;
 import javax.ws.rs.PathParam;
+import javax.ws.rs.QueryParam;
 import javax.ws.rs.core.Context;
 import javax.ws.rs.core.Response;
 import javax.ws.rs.core.UriInfo;
@@ -47,9 +48,14 @@ public class FileWSServer extends GenericRestWSServer {
     public FileWSServer(@PathParam("apiVersion")
                         @ApiParam(name = "apiVersion", value = ParamConstants.VERSION_DESCRIPTION,
                                 defaultValue = ParamConstants.DEFAULT_VERSION) String apiVersion,
+                        @PathParam("species")
+                        @ApiParam(name = "species", value = ParamConstants.SPECIES_DESCRIPTION,
+                                defaultValue = ParamConstants.DEFAULT_SPECIES, required = true) String species,
+                        @ApiParam(name = "assembly", value = ParamConstants.ASSEMBLY_DESCRIPTION,
+                                defaultValue = ParamConstants.DEFAULT_ASSEMBLY) @QueryParam("assembly") String assembly,
                         @Context UriInfo uriInfo, @Context HttpServletRequest hsr)
             throws CellBaseServerException {
-        super(apiVersion, uriInfo, hsr);
+        super(apiVersion, species, assembly, uriInfo, hsr);
         try {
             fileManager = cellBaseManagerFactory.getFileManager();
         } catch (Exception e) {
diff --git a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/GenericRestWSServer.java b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/GenericRestWSServer.java
index fc961bb9ae..0969f874fb 100755
--- a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/GenericRestWSServer.java
+++ b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/GenericRestWSServer.java
@@ -101,11 +101,6 @@ public class GenericRestWSServer implements IWSServer {
     protected static String defaultApiKey;
     protected static ApiKeyManager apiKeyManager;
 
-    public GenericRestWSServer(@PathParam("version") String version, @Context UriInfo uriInfo, @Context HttpServletRequest hsr)
-            throws CellBaseServerException {
-        this(version, "hsapiens", null, uriInfo, hsr);
-    }
-
     public GenericRestWSServer(@PathParam("version") String version, @PathParam("species") String species,
                                @PathParam("assembly") String assembly, @Context UriInfo uriInfo,
                                @Context HttpServletRequest hsr)
diff --git a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/MetaWSServer.java b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/MetaWSServer.java
index d8bb3a9f6d..ab57592ef2 100644
--- a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/MetaWSServer.java
+++ b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/MetaWSServer.java
@@ -28,7 +28,7 @@
 import org.opencb.cellbase.core.config.SpeciesProperties;
 import org.opencb.cellbase.core.exception.CellBaseException;
 import org.opencb.cellbase.core.models.DataRelease;
-import org.opencb.cellbase.core.models.DataReleaseSource;
+import org.opencb.cellbase.core.models.DataSource;
 import org.opencb.cellbase.core.result.CellBaseDataResult;
 import org.opencb.cellbase.core.utils.SpeciesUtils;
 import org.opencb.cellbase.lib.managers.DataReleaseManager;
@@ -75,9 +75,14 @@ public class MetaWSServer extends GenericRestWSServer {
     public MetaWSServer(@PathParam("apiVersion")
                         @ApiParam(name = "apiVersion", value = ParamConstants.VERSION_DESCRIPTION,
                                 defaultValue = ParamConstants.DEFAULT_VERSION) String apiVersion,
+                        @PathParam("species")
+                        @ApiParam(name = "species", value = ParamConstants.SPECIES_DESCRIPTION,
+                                defaultValue = ParamConstants.DEFAULT_SPECIES, required = true) String species,
+                        @ApiParam(name = "assembly", value = ParamConstants.ASSEMBLY_DESCRIPTION,
+                                defaultValue = ParamConstants.DEFAULT_ASSEMBLY) @QueryParam("assembly") String assembly,
                         @Context UriInfo uriInfo, @Context HttpServletRequest hsr)
             throws CellBaseServerException {
-        super(apiVersion, uriInfo, hsr);
+        super(apiVersion, species, assembly, uriInfo, hsr);
         try {
             metaManager = cellBaseManagerFactory.getMetaManager();
         } catch (Exception e) {
@@ -88,7 +93,7 @@ public MetaWSServer(@PathParam("apiVersion")
     @GET
     @Path("/{species}/versions")
     @ApiOperation(httpMethod = "GET", value = "Returns source version metadata, including source urls from which "
-            + "data files were downloaded.", response = DataReleaseSource.class, responseContainer = "QueryResponse")
+            + "data files were downloaded.", response = DataSource.class, responseContainer = "QueryResponse")
     public Response getVersion(@PathParam("species")
                                @ApiParam(name = "species", value = ParamConstants.SPECIES_DESCRIPTION,
                                        defaultValue = ParamConstants.DEFAULT_SPECIES, required = true) String species,
@@ -115,8 +120,8 @@ public Response getVersion(@PathParam("species")
                 return createErrorResponse("/versions", "Could not find data release '" + dataRelease + "'");
             }
             // Remove some sources
-            List<DataReleaseSource> sources = new ArrayList<>();
-            for (DataReleaseSource source : dr.getSources()) {
+            List<DataSource> sources = new ArrayList<>();
+            for (DataSource source : dr.getSources()) {
                 if (!COSMIC_DATA.equalsIgnoreCase(source.getName()) && !HGMD_DATA.equalsIgnoreCase(source.getName())) {
                     sources.add(source);
                 }
diff --git a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/PublicationWSServer.java b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/PublicationWSServer.java
index 11734d11b3..a881f4be4e 100644
--- a/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/PublicationWSServer.java
+++ b/cellbase-server/src/main/java/org/opencb/cellbase/server/rest/PublicationWSServer.java
@@ -48,13 +48,18 @@ public class PublicationWSServer extends GenericRestWSServer {
     public PublicationWSServer(@PathParam("apiVersion")
                         @ApiParam(name = "apiVersion", value = ParamConstants.VERSION_DESCRIPTION,
                                 defaultValue = ParamConstants.DEFAULT_VERSION) String apiVersion,
+                               @PathParam("species")
+                               @ApiParam(name = "species", value = ParamConstants.SPECIES_DESCRIPTION,
+                                       defaultValue = ParamConstants.DEFAULT_SPECIES, required = true) String species,
+                               @ApiParam(name = "assembly", value = ParamConstants.ASSEMBLY_DESCRIPTION,
+                                       defaultValue = ParamConstants.DEFAULT_ASSEMBLY) @QueryParam("assembly") String assembly,
                                @ApiParam(name = "dataRelease", value = DATA_RELEASE_DESCRIPTION) @DefaultValue("0")
                                @QueryParam("dataRelease") int dataRelease,
                                @ApiParam(name = "apiKey", value = API_KEY_DESCRIPTION) @DefaultValue("") @QueryParam("apiKey")
                                        String apiKey,
                                @Context UriInfo uriInfo, @Context HttpServletRequest hsr)
             throws CellBaseServerException {
-        super(apiVersion, uriInfo, hsr);
+        super(apiVersion, species, assembly, uriInfo, hsr);
         try {
             publicationManager = cellBaseManagerFactory.getPublicationManager();
         } catch (Exception e) {
diff --git a/pom.xml b/pom.xml
index 7d22cff469..e708520d72 100644
--- a/pom.xml
+++ b/pom.xml
@@ -625,6 +625,8 @@
                 <JUNIT.CELLBASE.DB.PASSWORD>cellbase</JUNIT.CELLBASE.DB.PASSWORD>
                 <JUNIT.CELLBASE.DB.MONGODB.AUTHENTICATIONDATABASE>admin</JUNIT.CELLBASE.DB.MONGODB.AUTHENTICATIONDATABASE>
                 <JUNIT.CELLBASE.DB.MONGODB.AUTHENTICATION_MECHANISM>SCRAM-SHA-256</JUNIT.CELLBASE.DB.MONGODB.AUTHENTICATION_MECHANISM>
+                <JUNIT.CELLBASE.DB.MONGODB.READPREFERENCE>secondaryPreferred</JUNIT.CELLBASE.DB.MONGODB.READPREFERENCE>
+                <JUNIT.CELLBASE.SERVER.REST.PORT>9090</JUNIT.CELLBASE.SERVER.REST.PORT>
             </properties>
         </profile>