diff --git a/browser/src/App.tsx b/browser/src/App.tsx index edd9c0958..157e43aa5 100644 --- a/browser/src/App.tsx +++ b/browser/src/App.tsx @@ -121,7 +121,6 @@ const App = () => { - {BANNER_CONTENT && {BANNER_CONTENT}} diff --git a/browser/src/NavBar.tsx b/browser/src/NavBar.tsx index 397ffe7c6..232d5df22 100644 --- a/browser/src/NavBar.tsx +++ b/browser/src/NavBar.tsx @@ -92,7 +92,7 @@ const NavBar = () => { - gnomAD browser + OurDNA Browser @@ -119,42 +119,20 @@ const NavBar = () => { Policies -
  • - - Publications - -
  • {/* two tags instead of s because the blog is a separate application */}
  • - Blog -
  • -
  • - Changelog + Blog
  • Data
  • -
  • - - Forum - -
  • Contact
  • -
  • - - Help/FAQ - -
  • ) diff --git a/data-pipeline/src/data_pipeline/helpers/elasticsearch_export.py b/data-pipeline/src/data_pipeline/helpers/elasticsearch_export.py index e4fcbb9ed..747157070 100644 --- a/data-pipeline/src/data_pipeline/helpers/elasticsearch_export.py +++ b/data-pipeline/src/data_pipeline/helpers/elasticsearch_export.py @@ -109,6 +109,11 @@ def export_table_to_elasticsearch( table = table.select_globals(exported_at=export_time.isoformat(timespec="seconds"), table_globals=table.globals) table = table.key_by() + print('table', table) + print('table.describe', table.describe()) + print('table.globals.show()', table.globals.show()) + print('table.show()', table.show()) + if index_fields: if id_field and id_field not in [f.split(".")[-1] for f in index_fields]: raise RuntimeError("id_field must be included in index_fields") diff --git a/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py b/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py index 9ddfa8b7b..8c2046ac4 100644 --- a/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py +++ b/data-pipeline/src/data_pipeline/pipelines/export_to_elasticsearch.py @@ -47,6 +47,8 @@ from data_pipeline.pipelines.gnomad_v4_cnv_dup_burden import pipeline as gnomad_v4_cnv_dup_burden +from data_pipeline.data_types.locus import x_position + logger = logging.getLogger("gnomad_data_pipeline") @@ -56,11 +58,16 @@ def subset_table(ds): return ds +def add_xpos(ds): + return ds.annotate(xpos=x_position(ds.locus)) + def add_variant_document_id(ds): return ds.annotate(document_id=compressed_variant_id(ds.locus, ds.alleles)) def truncate_clinvar_variant_ids(ds): + # clinvar tables needs release meta data + ds = ds.annotate_globals(clinvar_release_date='2022-10-31') return ds.annotate( variant_id=hl.if_else(hl.len(ds.variant_id) >= 32_766, ds.variant_id[:32_632] + "...", ds.variant_id) ) @@ -72,6 +79,74 @@ def add_liftover_document_id(ds): ) +""" +""" +# import gzip +# import cloudstorage as gcs +# from collections import OrderedDict +# from zlib import MAX_WBITS, decompress + +# import orjson +# from os import path + +# def parse_schema(s): +# def parse_type(s: str, end_delimiter: str, element_type: str): +# keys: List[str] = [] +# values = [] +# i = 0 +# while i < len(s): +# if s[i] == end_delimiter: +# if s[:i]: +# values.append(s[:i]) +# if element_type in ['Array', 'Set', 'Dict', 'Tuple', 'Interval']: +# return {'type': element_type, 'value': values}, s[i + 1 :] +# return {'type': element_type, 'value': OrderedDict(zip(keys, values))}, s[i + 1 :] + +# if s[i] == ':': +# keys.append(s[:i]) +# s = s[i + 1 :] +# i = 0 +# elif s[i] == '{': +# struct, s = parse_type(s[i + 1 :], '}', s[:i]) +# values.append(struct) +# i = 0 +# elif s[i] == '[': +# arr, s = parse_type(s[i + 1 :], ']', s[:i] if s[:i] else 'Array') +# values.append(arr) +# i = 0 +# elif s[i] == ',': +# if s[:i]: +# values.append(s[:i]) +# s = s[i + 1 :] +# i = 0 +# else: +# i += 1 + +# raise ValueError(f'End of {element_type} not found') + +# start_schema_index = s.index('{') +# return parse_type(s[start_schema_index + 1 :], "}", s[:start_schema_index])[0] + +# def load_schema(path): +# # +# filename = path + "/metadata.json.gz" +# #response = urllib.request.urlopen(filename) +# fs = gcs.open(filename) +# j = orjson.loads(gzip.decompress(fs.read())) + +# print(j) + +# fs.close() + +# # j = orjson.loads(decompress(fs.read(path.join(file, filename)), 16 + MAX_WBITS)) + +# # Get the file schema +# file_schema = parse_schema(j[next(k for k in j.keys() if k.endswith('type'))]) + +# print(file_schema) +# return file_schema + + DATASETS_CONFIG = { ############################################################################################################## # Genes @@ -484,6 +559,69 @@ def add_liftover_document_id(ds): "id_field": "element_id", }, }, + + ############################################################################################################## + # Genomic / Non Coding Constraints + ############################################################################################################## + "ourdna_bioheart_variants_v4": { + "get_table": lambda: subset_table( + add_variant_document_id(hl.read_table("gs://cpg-ourdna-browser-dev-test/ourDNA-browser/browser.ht")) + ), + "get_schema": lambda: load_schema("gs://cpg-ourdna-browser-dev-test/ourDNA-browser/frequencies.ht"), + "args": { + "index": "gnomad_v4_variants", + "index_fields": [ + "document_id", + "variant_id", + "rsids", + #"caid", + "locus", + #"transcript_consequences.gene_id", + #"transcript_consequences.transcript_id", + #"vrs.alt.allele_id", + ], + "id_field": "document_id", + "num_shards": 48, + "block_size": 10_000, + }, + }, + + "ourdna_bioheart_genes_grch38": { + "get_table": lambda: hl.read_table("gs://cpg-ourdna-browser-dev-test/genes/gnomad.genes.GRCh38.GENCODEv39.pext.ht"), + "args": { + "index": "genes_grch38", + "index_fields": ["gene_id", "symbol_upper_case", "search_terms", "xstart", "xstop"], + "id_field": "gene_id", + "block_size": 200, + }, + }, + + "ourdna_bioheart_genes_grch38_noext": { + "get_table": lambda: hl.read_table("gs://cpg-ourdna-browser-dev-test/genes/gnomad.genes.GRCh38.GENCODEv39.ht"), + "args": { + "index": "genes_grch38_noext", + "index_fields": ["gene_id", "symbol_upper_case", "search_terms", "xstart", "xstop"], + "id_field": "gene_id", + "block_size": 200, + }, + }, + + "ourdna_bioheart_v3_genome_coverage": { + "get_table": lambda: add_xpos(hl.read_table("gs://cpg-ourdna-browser-dev-test/ourDNA-browser/genome/merged_coverage.ht")), + "args": {"index": "gnomad_v3_genome_coverage", "id_field": "xpos", + "num_shards": 48, + "block_size": 100_000 + }, + }, + + "ourdna_bioheart_v4_exome_coverage": { + "get_table": lambda: add_xpos(hl.read_table("gs://cpg-ourdna-browser-dev-test/ourDNA-browser/merged_coverage.ht")), + "args": {"index": "gnomad_v4_exome_coverage", "id_field": "xpos", + "num_shards": 48, + "block_size": 50_000 + }, + }, + } @@ -496,6 +634,11 @@ def export_datasets(elasticsearch_host, elasticsearch_auth, datasets): for dataset in datasets: logger.info("exporting dataset %s", dataset) dataset_config = DATASETS_CONFIG[dataset] + + # logger.info("getting schema %s", dataset) + # table_schema = dataset_config["get_schema"]() + # logger.info("table_schema: %s", table_schema) + table = dataset_config["get_table"]() export_table_to_elasticsearch(table, **base_args, **dataset_config.get("args", {})) diff --git a/data-pipeline/src/data_pipeline/pipelines/genes.py b/data-pipeline/src/data_pipeline/pipelines/genes.py index 7b7ab14eb..2bd27dedf 100644 --- a/data-pipeline/src/data_pipeline/pipelines/genes.py +++ b/data-pipeline/src/data_pipeline/pipelines/genes.py @@ -42,7 +42,7 @@ pipeline = Pipeline() external_sources_subdir = "external_sources" -genes_subdir = "genes" +genes_subdir = "genes_test" constraint_subdir = "constraint" @@ -240,7 +240,7 @@ prepare_gtex_expression_data, "/gtex/gtex_v10_tissue_expression.ht", { - "transcript_tpms_path": "gs://gnomad-v4-data-pipeline/output/external_sources/gtex/v10/GTEx_Analysis_v10_RSEMv1.3.3_transcripts_tpm.txt.bgz", + "transcript_tpms_path": "gs://gcp-public-data--gnomad/resources/grch38/gtex/v10/GTEx_Analysis_2022-06-06_v10_RSEMv1.3.3_transcripts_tpm.txt.bgz", "sample_annotations_path": pipeline.get_task("download_gtex_v10_sample_attributes"), }, { @@ -258,14 +258,14 @@ }, ) -pipeline.add_task( - "reshape_pext_v4_data_to_tissue_array", - reshape_pext_data_to_tissue_array, - "/pext/pext_v4_tissue_array", - { - "pext_struct_path": "gs://gnomad-v4-data-pipeline/output/external_sources/pext/gnomad.pext.gtex_v10.browser.ht", - }, -) +# pipeline.add_task( +# "reshape_pext_v4_data_to_tissue_array", +# reshape_pext_data_to_tissue_array, +# "/pext/pext_v4_tissue_array", +# { +# "pext_struct_path": "gs://gnomad-v4-data-pipeline/output/external_sources/pext/gnomad.pext.gtex_v10.browser.ht", +# }, +# ) ############################################### # Constraint @@ -418,7 +418,7 @@ def annotate_with_preferred_transcript(table_path): "table_path": pipeline.get_task("prepare_grch38_genes"), "canonical_transcript": pipeline.get_task("get_grch38_canonical_transcripts"), "mane_select_transcript": pipeline.get_task("import_mane_select_transcripts"), - "pext": pipeline.get_task("reshape_pext_v4_data_to_tissue_array"), + # "pext": pipeline.get_task("reshape_pext_v4_data_to_tissue_array"), }, ) diff --git a/deploy/deployctl/config.py b/deploy/deployctl/config.py index 9292536bb..67f5bda4d 100644 --- a/deploy/deployctl/config.py +++ b/deploy/deployctl/config.py @@ -15,6 +15,8 @@ class Configuration: environment_tag: str = None authorized_networks: str = "0.0.0.0/0" data_pipeline_output: str = None + docker_registry: str = "us-docker.pkg.dev" + cluster_name: str = "gnomad" def __init__(self, config_path): self._config_path = config_path @@ -50,20 +52,20 @@ def region(self): @property def network_name(self): if self.environment_tag: - return f"gnomad-{self.environment_tag}" + return f"{self.cluster_name}-{self.environment_tag}" - return "gnomad" + return self.cluster_name @property def ip_address_name(self): if self.environment_tag: - return f"gnomad-browser-{self.environment_tag}" + return f"{self.cluster_name}-{self.environment_tag}" - return "gnomad-browser" + return f"{self.cluster_name}-browser" @property def gke_service_account_name(self): - return "gnomad-gke" + return f"{self.cluster_name}-gke" @property def gke_service_account_full_name(self): @@ -72,9 +74,9 @@ def gke_service_account_full_name(self): @property def gke_cluster_name(self): if self.environment_tag: - return f"gnomad-{self.environment_tag}" + return f"{self.cluster_name}-{self.environment_tag}" - return "gnomad" + return self.cluster_name @property def kubectl_context(self): @@ -82,19 +84,19 @@ def kubectl_context(self): @property def api_image_repository(self): - return f"us-docker.pkg.dev/{self.project}/gnomad/gnomad-api" + return f"{self.docker_registry}/{self.project}/gnomad/gnomad-api" @property def browser_image_repository(self): - return f"us-docker.pkg.dev/{self.project}/gnomad/gnomad-browser" + return f"{self.docker_registry}/{self.project}/gnomad/gnomad-browser" @property def reads_server_image_repository(self): - return f"us-docker.pkg.dev/{self.project}/gnomad/gnomad-reads-server" + return f"{self.docker_registry}/{self.project}/gnomad/gnomad-reads-server" @property def reads_api_image_repository(self): - return f"us-docker.pkg.dev/{self.project}/gnomad/gnomad-reads-api" + return f"{self.docker_registry}/{self.project}/gnomad/gnomad-reads-api" config = Configuration(_CONFIG_PATH) # pylint: disable=invalid-name diff --git a/deploy/deployctl/subcommands/elasticsearch.py b/deploy/deployctl/subcommands/elasticsearch.py index 8fa0b2194..d1d27663a 100644 --- a/deploy/deployctl/subcommands/elasticsearch.py +++ b/deploy/deployctl/subcommands/elasticsearch.py @@ -14,7 +14,7 @@ def get_elasticsearch_password(cluster_name: str, namespace: str) -> None: f"-n={namespace}", "get", "secret", - f"{cluster_name}-es-elastic-user", + "gnomad-es-elastic-user", "-o=go-template={{.data.elastic | base64decode}}", ] ) @@ -28,7 +28,7 @@ def load_datasets(cluster_name: str, namespace: str, dataproc_cluster: str, secr f"-n={namespace}", "get", "service", - f"{cluster_name}-elasticsearch-lb", + f"gnomad-elasticsearch-lb", "--output=jsonpath={.status.loadBalancer.ingress[0].ip}", ] ) diff --git a/deploy/deployctl/subcommands/ingress_demo.py b/deploy/deployctl/subcommands/ingress_demo.py index e30c844a0..dc2c798da 100644 --- a/deploy/deployctl/subcommands/ingress_demo.py +++ b/deploy/deployctl/subcommands/ingress_demo.py @@ -23,6 +23,41 @@ targetPort: 80 """ +# INGRESS_MANIFEST_TEMPLATE = """--- +# --- +# apiVersion: networking.k8s.io/v1 +# kind: Ingress +# metadata: +# name: gnomad-ingress-demo-{name} +# labels: +# tier: demo +# spec: +# rules: +# - http: +# paths: +# - path: /reads +# pathType: ImplementationSpecific +# backend: +# service: +# name: {reads_service} +# port: +# number: 80 +# - path: /reads/* +# pathType: ImplementationSpecific +# backend: +# service: +# name: {reads_service} +# port: +# number: 80 +# - path: +# pathType: ImplementationSpecific +# backend: +# service: +# name: gnomad-browser-demo-{name} +# port: +# number: 80 +# """ + INGRESS_MANIFEST_TEMPLATE = """--- --- apiVersion: networking.k8s.io/v1 @@ -35,20 +70,6 @@ rules: - http: paths: - - path: /reads - pathType: ImplementationSpecific - backend: - service: - name: {reads_service} - port: - number: 80 - - path: /reads/* - pathType: ImplementationSpecific - backend: - service: - name: {reads_service} - port: - number: 80 - path: pathType: ImplementationSpecific backend: diff --git a/deploy/manifests/browser/base/api.deployment.yaml b/deploy/manifests/browser/base/api.deployment.yaml index 95866fc67..7f8dd687a 100644 --- a/deploy/manifests/browser/base/api.deployment.yaml +++ b/deploy/manifests/browser/base/api.deployment.yaml @@ -6,7 +6,7 @@ metadata: labels: component: gnomad-api spec: - replicas: 2 + replicas: 1 selector: matchLabels: name: gnomad-api @@ -67,10 +67,12 @@ spec: resources: requests: cpu: '1' - memory: '11Gi' + # memory: '11Gi' + memory: '2Gi' limits: cpu: '2' - memory: '12Gi' + # memory: '12Gi' + memory: '2Gi' readinessProbe: httpGet: path: /health/ready diff --git a/deploy/manifests/ingress/gnomad.ingress.yaml b/deploy/manifests/ingress/gnomad.ingress.yaml index e08aa4e16..c1c31ab7e 100644 --- a/deploy/manifests/ingress/gnomad.ingress.yaml +++ b/deploy/manifests/ingress/gnomad.ingress.yaml @@ -10,7 +10,7 @@ metadata: networking.gke.io/v1beta1.FrontendConfig: 'gnomad-frontend-config' spec: rules: - - host: gnomad.broadinstitute.org + - host: ourdna.populationgenomics.org.au http: paths: - backend: diff --git a/deploy/manifests/ingress/gnomad.managedcertificate.yaml b/deploy/manifests/ingress/gnomad.managedcertificate.yaml index 636ecdd63..a5df8c5ba 100644 --- a/deploy/manifests/ingress/gnomad.managedcertificate.yaml +++ b/deploy/manifests/ingress/gnomad.managedcertificate.yaml @@ -6,4 +6,4 @@ metadata: tier: production spec: domains: - - gnomad.broadinstitute.org + - ourdna.populationgenomics.org.au