-
Notifications
You must be signed in to change notification settings - Fork 10
Description
thank for providing these code.
I have installed package scrattch.bigcat. then try to use your iter_clust_big method to cluster h5ad file.
Could you please help debug my code or give me a sample for dealing with h5ad file using your methods?
Here are my code:
'''
library(arrow)
library(hdf5r)
library(scrattch.bigcat)
library(bigstatsr)
library(anndata)
library(Matrix)
library(arrow)
library(data.table)
library(doMC)
library(foreach)
library(dplyr)
h5ad_file <- "/data2/STG/data/paper3/singlenuclei_data/Macosko_Mouse_Atlas_Single_Nuclei.Use_Backed1%.h5ad"
a_data =read_h5ad(h5ad_file)
a.dat = list()
a.dat$ann = a_data
a.dat$type="h5ad"
a.dat$col_id = a.dat$ann$obs_names
a.dat$row_id = a.dat$ann$var_names
big.dat <- convert_h5ad_big.dat_parquet(
fn = h5ad_file,
adata = a_data,
dir = getwd(),
parquet.dir = file.path(getwd(), "norm.dat_parquet"),
col.fn = file.path(getwd(), "samples.parquet"),
row.fn = file.path(getwd(), "gene.parquet"),
col.bin.size = 50000,
row.bin.size = 500,
do.logNormal = TRUE,
logNormal = TRUE,
mc.cores = 10
)
scaled_data <- rescale_samples(t(a_data$X))
select.cells <- big.dat$col_id
genes.allowed <- big.dat$row_id
result <- iter_clust_big(
big.dat = big.dat,
prefix = NULL,
split.size = 10,
result = NULL,
method = "auto",
counts = NULL,
sampleSize = 50000,
mc.cores=10,
overwrite=TRUE,
verbose=FALSE,
jaccard.sampleSize=300000,
)
print(result$cl)
'''
'''
ERROR are :
Error in data.frame(gene = rownames(dat), g.means = means, g.vars = vars, :
arguments imply differing number of rows: 1816, 2189
Calls: iter_clust_big ... onestep_clust -> find_vg -> compute_vg_stats -> data.frame
'''