diff --git a/workflow/envs/basic.yml b/workflow/envs/basic.yml new file mode 100644 index 0000000..ca3968b --- /dev/null +++ b/workflow/envs/basic.yml @@ -0,0 +1,4 @@ +channels: + - conda-forge +dependencies: + - gzip>=1.14 diff --git a/workflow/envs/biopython.yml b/workflow/envs/biopython.yml new file mode 100644 index 0000000..1ba2c96 --- /dev/null +++ b/workflow/envs/biopython.yml @@ -0,0 +1,4 @@ +channels: + - conda-forge +dependencies: + - biopython>=1.85 \ No newline at end of file diff --git a/workflow/envs/openbabel.yml b/workflow/envs/openbabel.yml new file mode 100644 index 0000000..7f4d8a8 --- /dev/null +++ b/workflow/envs/openbabel.yml @@ -0,0 +1,4 @@ +channels: + - conda-forge +dependencies: + - openbabel>=3.1.1 diff --git a/workflow/envs/plotting.yml b/workflow/envs/plotting.yml new file mode 100644 index 0000000..4ee8ded --- /dev/null +++ b/workflow/envs/plotting.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge +dependencies: + - matplotlib>=3.10.6 + - venn>=0.1.3 \ No newline at end of file diff --git a/workflow/envs/simple_pandas.yml b/workflow/envs/simple_pandas.yml new file mode 100644 index 0000000..961f3af --- /dev/null +++ b/workflow/envs/simple_pandas.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - python>=3.12.4 + - pandas>=2.2.2 \ No newline at end of file diff --git a/workflow/envs/vinalc.yml b/workflow/envs/vinalc.yml new file mode 100644 index 0000000..65f2cb2 --- /dev/null +++ b/workflow/envs/vinalc.yml @@ -0,0 +1,4 @@ +channels: + - bioconda +dependencies: + - vinalc>=1.4.2 diff --git a/workflow/rules/analyse.smk b/workflow/rules/analyse.smk index eb7b05c..2cb9e3b 100644 --- a/workflow/rules/analyse.smk +++ b/workflow/rules/analyse.smk @@ -172,6 +172,8 @@ rule makeHistogram: ), log: "logs/makeHistogram_{receptorID}.log", + conda: + "../envs/plotting.yml" envmodules: config["PYPLOT"], script: @@ -214,6 +216,8 @@ rule dockingResultsTxt: path.join(OUTPUT_DIR, "results", "{receptorID}_{percentage}.csv"), log: "logs/dockingResultsTxt_{receptorID}_{percentage}.log", + conda: + "../envs/simple_pandas.yml" wildcard_constraints: receptorID="[^/]+", percentage="[^/]+", @@ -277,11 +281,11 @@ rule prepareSecondDocking: ), log: "logs/prepareSecondDocking_{name}_{receptorID}_{percentage}.log", - shell: - """ - cp {input.grid} {output.grid} - echo {input.receptor} > {output.receptor} - """ + run: + import shutil + + shutil.copy(input.grid, output.grid) + shutil.copy(input.receptor, output.receptor) rule docking2: @@ -314,6 +318,8 @@ rule docking2: tasks=config["DOCKING"]["ntasks"], slurm_extra=config["DOCKING"]["slurm_extra"], runtime=config["DOCKING"]["runtime"], + conda: + "../envs/vinalc.yml" envmodules: config["VINALC"], shell: @@ -420,6 +426,8 @@ rule makeVenn: ), category="Rescreening", ), + conda: + "../envs/plotting.yml" log: "logs/makeVenn_{receptorID}_{percentage}.log", script: diff --git a/workflow/rules/docking.smk b/workflow/rules/docking.smk index c1ed1ac..1470c85 100644 --- a/workflow/rules/docking.smk +++ b/workflow/rules/docking.smk @@ -27,6 +27,8 @@ rule docking: "{dataset}", "{receptorID}.txt_{database}_{dataset}_{name}_{i}.txt.pdbqt.gz", ), + conda: + "../envs/vinalc.yml" envmodules: config["VINALC"], params: diff --git a/workflow/rules/preparation.smk b/workflow/rules/preparation.smk index 303b2b8..c1da5a3 100644 --- a/workflow/rules/preparation.smk +++ b/workflow/rules/preparation.smk @@ -32,6 +32,8 @@ rule convertMol2: path.join(INPUT_DIR, "ZINC", "subsets", "{subset}.mol2"), output: path.join(TMP_DIR, "unzipped", "ZINC", "subsets", "{subset}.pdbqt"), + conda: + "../envs/openbabel.yml" envmodules: config["OPENBABEL"], shell: @@ -43,6 +45,8 @@ rule mergeLocalInput: in_dir=LOCAL_INPUT, output: path.join(TMP_DIR, "unzipped", "{database}", "{dataset}", "local.pdbqt"), + conda: + "../envs/openbabel.yml" envmodules: config["OPENBABEL"], script: @@ -64,6 +68,8 @@ rule SDFToPDBQT: path.join(TMP_DIR, "unzipped", "{database}", "{dataset}", "{name}.sdf"), output: path.join(TMP_DIR, "unzipped", "{database}", "{dataset}", "{name}.pdbqt"), + conda: + "../envs/openbabel.yml" envmodules: config["OPENBABEL"], shell: @@ -75,6 +81,8 @@ rule prepareReceptor: path.join(TMP_DIR, "unzipped", "PDB", "receptor", "{name}.pdb"), output: path.join(TMP_DIR, "PDB", "receptor", "{name}.pdb"), + conda: + "../envs/biopython.yml" envmodules: config["BIOPYTHON"], script: @@ -86,6 +94,8 @@ rule makeReceptorPDBQT: path.join(TMP_DIR, "PDB", "receptor", "{name}.pdb"), output: path.join(PREPARED_DIR, "receptor", "{name}.pdbqt"), + conda: + "../envs/openbabel.yml" envmodules: config["OPENBABEL"], shell: @@ -97,6 +107,8 @@ rule gunzip: path.join(INPUT_DIR, "{database}", "{dataset}", "{name}.{filetype}.gz"), output: path.join(TMP_DIR, "unzipped", "{database}", "{dataset}", "{name}.{filetype}"), + conda: + "../envs/basic.yml" shell: "gunzip < {input} > {output} || touch {output}" @@ -138,6 +150,8 @@ rule energyMin: partition=config["ENERGY_MIN"]["partition"], runtime=config["ENERGY_MIN"]["runtime"], mem_mb=config["ENERGY_MIN"]["mem_mb"], + conda: + "../envs/openbabel.yml" envmodules: config["OPENBABEL"], shell: @@ -149,8 +163,24 @@ rule prepareGeometry: path.join(config["GRID_DIR"], "{receptorID}.gpf"), output: path.join(OUTPUT_DIR, "grid", "{receptorID}_grid.txt"), - shell: - "egrep 'npts|gridcenter' {input} |cut -f2-4 -d' '| tac |tr '\n' ' ' > {output} && sed -i -e '$a\ ' {output}" + run: + grid_params = [] + + with open(input[0], "r") as f: + for line in f: + # Match lines starting with 'npts' or 'gridcenter' + if line.startswith(("npts", "gridcenter")): + # Extract fields 2-4 (space-separated values after the first field) + parts = line.strip().split() + if len(parts) >= 4: + grid_params.append(" ".join(parts[1:4])) + + # Reverse the order (equivalent to 'tac') + grid_params.reverse() + + # Write to output file with space separation and trailing newline + with open(output[0], "w") as f: + f.write(" ".join(grid_params) + " \n") rule prepareLibrary: