Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion browser/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ const App = () => {
<Suspense fallback={null}>
<TopBarWrapper>
<NavBar />
{BANNER_CONTENT && <Banner>{BANNER_CONTENT}</Banner>}
</TopBarWrapper>
<Notifications />

Expand Down
26 changes: 2 additions & 24 deletions browser/src/NavBar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ const NavBar = () => {
<Wrapper>
<LogoWrapper>
<Link to="/" onClick={closeMenu}>
<Logo>gnomAD browser</Logo>
<Logo>OurDNA Browser</Logo>
</Link>
<ToggleMenuButton onClick={toggleMenu}>☰</ToggleMenuButton>
</LogoWrapper>
Expand All @@ -119,42 +119,20 @@ const NavBar = () => {
Policies
</Link>
</li>
<li>
<Link to="/publications" onClick={closeMenu}>
Publications
</Link>
</li>
{/* two <a> tags instead of <Link>s because the blog is a separate application */}
<li>
<a href="https://gnomad.broadinstitute.org/news/">Blog</a>
</li>
<li>
<a href="https://gnomad.broadinstitute.org/news/changelog/">Changelog</a>
<a href="https://ourdna.populationgenomics.org.au/news/">Blog</a>
</li>
<li>
<Link to="/data" onClick={closeMenu}>
Data
</Link>
</li>
<li>
<a
href="https://discuss.gnomad.broadinstitute.org"
target="_blank"
rel="noopener noreferrer"
>
Forum
</a>
</li>
<li>
<Link to="/contact" onClick={closeMenu}>
Contact
</Link>
</li>
<li>
<Link to="/help" onClick={closeMenu}>
Help/FAQ
</Link>
</li>
</Menu>
</Wrapper>
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ def export_table_to_elasticsearch(
table = table.select_globals(exported_at=export_time.isoformat(timespec="seconds"), table_globals=table.globals)
table = table.key_by()

print('table', table)
print('table.describe', table.describe())
print('table.globals.show()', table.globals.show())
print('table.show()', table.show())

if index_fields:
if id_field and id_field not in [f.split(".")[-1] for f in index_fields]:
raise RuntimeError("id_field must be included in index_fields")
Expand Down
143 changes: 143 additions & 0 deletions data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@

from data_pipeline.pipelines.gnomad_v4_cnv_dup_burden import pipeline as gnomad_v4_cnv_dup_burden

from data_pipeline.data_types.locus import x_position


logger = logging.getLogger("gnomad_data_pipeline")

Expand All @@ -56,11 +58,16 @@ def subset_table(ds):
return ds


def add_xpos(ds):
return ds.annotate(xpos=x_position(ds.locus))

def add_variant_document_id(ds):
return ds.annotate(document_id=compressed_variant_id(ds.locus, ds.alleles))


def truncate_clinvar_variant_ids(ds):
# clinvar tables needs release meta data
ds = ds.annotate_globals(clinvar_release_date='2022-10-31')
return ds.annotate(
variant_id=hl.if_else(hl.len(ds.variant_id) >= 32_766, ds.variant_id[:32_632] + "...", ds.variant_id)
)
Expand All @@ -72,6 +79,74 @@ def add_liftover_document_id(ds):
)


"""
"""
# import gzip
# import cloudstorage as gcs
# from collections import OrderedDict
# from zlib import MAX_WBITS, decompress

# import orjson
# from os import path

# def parse_schema(s):
# def parse_type(s: str, end_delimiter: str, element_type: str):
# keys: List[str] = []
# values = []
# i = 0
# while i < len(s):
# if s[i] == end_delimiter:
# if s[:i]:
# values.append(s[:i])
# if element_type in ['Array', 'Set', 'Dict', 'Tuple', 'Interval']:
# return {'type': element_type, 'value': values}, s[i + 1 :]
# return {'type': element_type, 'value': OrderedDict(zip(keys, values))}, s[i + 1 :]

# if s[i] == ':':
# keys.append(s[:i])
# s = s[i + 1 :]
# i = 0
# elif s[i] == '{':
# struct, s = parse_type(s[i + 1 :], '}', s[:i])
# values.append(struct)
# i = 0
# elif s[i] == '[':
# arr, s = parse_type(s[i + 1 :], ']', s[:i] if s[:i] else 'Array')
# values.append(arr)
# i = 0
# elif s[i] == ',':
# if s[:i]:
# values.append(s[:i])
# s = s[i + 1 :]
# i = 0
# else:
# i += 1

# raise ValueError(f'End of {element_type} not found')

# start_schema_index = s.index('{')
# return parse_type(s[start_schema_index + 1 :], "}", s[:start_schema_index])[0]

# def load_schema(path):
# #
# filename = path + "/metadata.json.gz"
# #response = urllib.request.urlopen(filename)
# fs = gcs.open(filename)
# j = orjson.loads(gzip.decompress(fs.read()))

# print(j)

# fs.close()

# # j = orjson.loads(decompress(fs.read(path.join(file, filename)), 16 + MAX_WBITS))

# # Get the file schema
# file_schema = parse_schema(j[next(k for k in j.keys() if k.endswith('type'))])

# print(file_schema)
# return file_schema


DATASETS_CONFIG = {
##############################################################################################################
# Genes
Expand Down Expand Up @@ -484,6 +559,69 @@ def add_liftover_document_id(ds):
"id_field": "element_id",
},
},

##############################################################################################################
# Genomic / Non Coding Constraints
##############################################################################################################
"ourdna_bioheart_variants_v4": {
"get_table": lambda: subset_table(
add_variant_document_id(hl.read_table("gs://cpg-ourdna-browser-dev-test/ourDNA-browser/browser.ht"))
),
"get_schema": lambda: load_schema("gs://cpg-ourdna-browser-dev-test/ourDNA-browser/frequencies.ht"),
"args": {
"index": "gnomad_v4_variants",
"index_fields": [
"document_id",
"variant_id",
"rsids",
#"caid",
"locus",
#"transcript_consequences.gene_id",
#"transcript_consequences.transcript_id",
#"vrs.alt.allele_id",
],
"id_field": "document_id",
"num_shards": 48,
"block_size": 10_000,
},
},

"ourdna_bioheart_genes_grch38": {
"get_table": lambda: hl.read_table("gs://cpg-ourdna-browser-dev-test/genes/gnomad.genes.GRCh38.GENCODEv39.pext.ht"),
"args": {
"index": "genes_grch38",
"index_fields": ["gene_id", "symbol_upper_case", "search_terms", "xstart", "xstop"],
"id_field": "gene_id",
"block_size": 200,
},
},

"ourdna_bioheart_genes_grch38_noext": {
"get_table": lambda: hl.read_table("gs://cpg-ourdna-browser-dev-test/genes/gnomad.genes.GRCh38.GENCODEv39.ht"),
"args": {
"index": "genes_grch38_noext",
"index_fields": ["gene_id", "symbol_upper_case", "search_terms", "xstart", "xstop"],
"id_field": "gene_id",
"block_size": 200,
},
},

"ourdna_bioheart_v3_genome_coverage": {
"get_table": lambda: add_xpos(hl.read_table("gs://cpg-ourdna-browser-dev-test/ourDNA-browser/genome/merged_coverage.ht")),
"args": {"index": "gnomad_v3_genome_coverage", "id_field": "xpos",
"num_shards": 48,
"block_size": 100_000
},
},

"ourdna_bioheart_v4_exome_coverage": {
"get_table": lambda: add_xpos(hl.read_table("gs://cpg-ourdna-browser-dev-test/ourDNA-browser/merged_coverage.ht")),
"args": {"index": "gnomad_v4_exome_coverage", "id_field": "xpos",
"num_shards": 48,
"block_size": 50_000
},
},

}


Expand All @@ -496,6 +634,11 @@ def export_datasets(elasticsearch_host, elasticsearch_auth, datasets):
for dataset in datasets:
logger.info("exporting dataset %s", dataset)
dataset_config = DATASETS_CONFIG[dataset]

# logger.info("getting schema %s", dataset)
# table_schema = dataset_config["get_schema"]()
# logger.info("table_schema: %s", table_schema)

table = dataset_config["get_table"]()
export_table_to_elasticsearch(table, **base_args, **dataset_config.get("args", {}))

Expand Down
22 changes: 11 additions & 11 deletions data-pipeline/src/data_pipeline/pipelines/genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
pipeline = Pipeline()

external_sources_subdir = "external_sources"
genes_subdir = "genes"
genes_subdir = "genes_test"
constraint_subdir = "constraint"


Expand Down Expand Up @@ -240,7 +240,7 @@
prepare_gtex_expression_data,
"/gtex/gtex_v10_tissue_expression.ht",
{
"transcript_tpms_path": "gs://gnomad-v4-data-pipeline/output/external_sources/gtex/v10/GTEx_Analysis_v10_RSEMv1.3.3_transcripts_tpm.txt.bgz",
"transcript_tpms_path": "gs://gcp-public-data--gnomad/resources/grch38/gtex/v10/GTEx_Analysis_2022-06-06_v10_RSEMv1.3.3_transcripts_tpm.txt.bgz",
"sample_annotations_path": pipeline.get_task("download_gtex_v10_sample_attributes"),
},
{
Expand All @@ -258,14 +258,14 @@
},
)

pipeline.add_task(
"reshape_pext_v4_data_to_tissue_array",
reshape_pext_data_to_tissue_array,
"/pext/pext_v4_tissue_array",
{
"pext_struct_path": "gs://gnomad-v4-data-pipeline/output/external_sources/pext/gnomad.pext.gtex_v10.browser.ht",
},
)
# pipeline.add_task(
# "reshape_pext_v4_data_to_tissue_array",
# reshape_pext_data_to_tissue_array,
# "/pext/pext_v4_tissue_array",
# {
# "pext_struct_path": "gs://gnomad-v4-data-pipeline/output/external_sources/pext/gnomad.pext.gtex_v10.browser.ht",
# },
# )

###############################################
# Constraint
Expand Down Expand Up @@ -418,7 +418,7 @@ def annotate_with_preferred_transcript(table_path):
"table_path": pipeline.get_task("prepare_grch38_genes"),
"canonical_transcript": pipeline.get_task("get_grch38_canonical_transcripts"),
"mane_select_transcript": pipeline.get_task("import_mane_select_transcripts"),
"pext": pipeline.get_task("reshape_pext_v4_data_to_tissue_array"),
# "pext": pipeline.get_task("reshape_pext_v4_data_to_tissue_array"),
},
)

Expand Down
24 changes: 13 additions & 11 deletions deploy/deployctl/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ class Configuration:
environment_tag: str = None
authorized_networks: str = "0.0.0.0/0"
data_pipeline_output: str = None
docker_registry: str = "us-docker.pkg.dev"
cluster_name: str = "gnomad"

def __init__(self, config_path):
self._config_path = config_path
Expand Down Expand Up @@ -50,20 +52,20 @@ def region(self):
@property
def network_name(self):
if self.environment_tag:
return f"gnomad-{self.environment_tag}"
return f"{self.cluster_name}-{self.environment_tag}"

return "gnomad"
return self.cluster_name

@property
def ip_address_name(self):
if self.environment_tag:
return f"gnomad-browser-{self.environment_tag}"
return f"{self.cluster_name}-{self.environment_tag}"

return "gnomad-browser"
return f"{self.cluster_name}-browser"

@property
def gke_service_account_name(self):
return "gnomad-gke"
return f"{self.cluster_name}-gke"

@property
def gke_service_account_full_name(self):
Expand All @@ -72,29 +74,29 @@ def gke_service_account_full_name(self):
@property
def gke_cluster_name(self):
if self.environment_tag:
return f"gnomad-{self.environment_tag}"
return f"{self.cluster_name}-{self.environment_tag}"

return "gnomad"
return self.cluster_name

@property
def kubectl_context(self):
return f"gke_{self.project}_{self.zone}_{self.gke_cluster_name}"

@property
def api_image_repository(self):
return f"us-docker.pkg.dev/{self.project}/gnomad/gnomad-api"
return f"{self.docker_registry}/{self.project}/gnomad/gnomad-api"

@property
def browser_image_repository(self):
return f"us-docker.pkg.dev/{self.project}/gnomad/gnomad-browser"
return f"{self.docker_registry}/{self.project}/gnomad/gnomad-browser"

@property
def reads_server_image_repository(self):
return f"us-docker.pkg.dev/{self.project}/gnomad/gnomad-reads-server"
return f"{self.docker_registry}/{self.project}/gnomad/gnomad-reads-server"

@property
def reads_api_image_repository(self):
return f"us-docker.pkg.dev/{self.project}/gnomad/gnomad-reads-api"
return f"{self.docker_registry}/{self.project}/gnomad/gnomad-reads-api"


config = Configuration(_CONFIG_PATH) # pylint: disable=invalid-name
4 changes: 2 additions & 2 deletions deploy/deployctl/subcommands/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def get_elasticsearch_password(cluster_name: str, namespace: str) -> None:
f"-n={namespace}",
"get",
"secret",
f"{cluster_name}-es-elastic-user",
"gnomad-es-elastic-user",
"-o=go-template={{.data.elastic | base64decode}}",
]
)
Expand All @@ -28,7 +28,7 @@ def load_datasets(cluster_name: str, namespace: str, dataproc_cluster: str, secr
f"-n={namespace}",
"get",
"service",
f"{cluster_name}-elasticsearch-lb",
f"gnomad-elasticsearch-lb",
"--output=jsonpath={.status.loadBalancer.ingress[0].ip}",
]
)
Expand Down
Loading