Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion phenograph/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,11 +287,12 @@ def cluster(
print("Setting directed=False because prune=True")
directed = False

kernelargs = {}
if n_jobs == 1:
kernel = jaccard_kernel
else:
kernelargs["n_jobs"] = n_jobs
kernel = parallel_jaccard_kernel
kernelargs = {}

# Start timer
tic = time.time()
Expand Down
26 changes: 21 additions & 5 deletions phenograph/core.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
from sklearn.neighbors import NearestNeighbors
from multiprocessing import Pool
from multiprocessing import Pool, cpu_count
from contextlib import closing
from itertools import repeat
from scipy import sparse as sp
Expand Down Expand Up @@ -136,14 +136,30 @@ def calc_jaccard(i, idx):
return idx[i], coefficients


def parallel_jaccard_kernel(idx):
def parallel_jaccard_kernel(idx, n_jobs=-1):
"""Compute Jaccard coefficient between nearest-neighbor sets in parallel
:param idx: n-by-k integer matrix of k-nearest neighbors

:return (i, j, s): row indices, column indices, and nonzero values for a sparse adjacency matrix
Parameters
----------
idx
n-by-k integer matrix of k-nearest neighbors
n_jobs
Number of concurrently running workers. If 1 is given, no parallelism is
used. If set to -1, all CPUs are used. For n_jobs below -1, `n_cpus + 1 + n_jobs`
are used.

Returns
-------
i, j, s
row indices, column indices, and nonzero values for a sparse adjacency matrix
"""
if n_jobs == -1:
n_jobs = cpu_count()
Copy link

@jpintar jpintar Sep 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would not use multiprocessing.cpu_count() here – in HPC contexts it won't always correctly pick up how many cores your job has been allocated, but the total core count of the node. It's safer to use len(os.sched_getaffinity(0)) instead (there's also os.process_cpu_count() introduced in Python 3.13).

Hopefully this pull request gets incorporated... Not sure how actively Phenograph is being maintained...

if n_jobs < -1:
n_jobs = cpu_count() + 1 + n_jobs

n = len(idx)
with closing(Pool()) as pool:
with closing(Pool(n_jobs)) as pool:
jaccard_values = pool.starmap(calc_jaccard, zip(range(n), repeat(idx)))

graph = sp.lil_matrix((n, n), dtype=float)
Expand Down