diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 282bab918a11..a0f15c8faf46 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,20 +1,18 @@ - // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/javascript-node { "name": "GraphScope", // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile "image": "registry.cn-hongkong.aliyuncs.com/graphscope/graphscope-dev:v0.24.2-amd64", - // Features to add to the dev container. More info: https://containers.dev/features. - "features": { - "ghcr.io/devcontainers/features/common-utils:2":{ - "installZsh": "true", + "features": { + "ghcr.io/devcontainers/features/common-utils:2": { + "installZsh": "true", "configureZshAsDefaultShell": "true", "installOhMyZsh": true, - "upgradePackages": "false" - } - }, + "upgradePackages": "false" + } + }, // Configure tool-specific properties. "customizations": { // Configure properties specific to VS Code. @@ -28,20 +26,15 @@ ] } }, - // Set `remoteUser` to `root` to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. "remoteUser": "graphscope", - // Use 'postCreateCommand' to run commands before the container is created. "initializeCommand": "sudo docker pull registry.cn-hongkong.aliyuncs.com/graphscope/graphscope-dev:latest", - // Uncomment this to enable C++ and Rust debugging in containers // "capAdd": ["SYS_PTRACE"], // "securityOpt": ["seccomp=unconfined"], - // Use 'forwardPorts' to make a list of ports inside the container available locally. // "forwardPorts": [3000], - // Use 'portsAttributes' to set default properties for specific forwarded ports. // More info: https://containers.dev/implementors/json_reference/#port-attributes // "portsAttributes": { @@ -50,23 +43,21 @@ // "onAutoForward": "notify" // } // }, - // Use 'postCreateCommand' to run commands after the container is created. // "postCreateCommand": "yarn install" - // Improve performance - // Uncomment these to mount a folder to a volume // https://code.visualstudio.com/remote/advancedcontainers/improve-performance#_use-a-targeted-named-volume // "mounts": [ - // "source=${localWorkspaceFolderBasename}-node_modules,target=${containerWorkspaceFolder}/node_modules,type=volume" + // "source=${localWorkspaceFolderBasename}-node_modules,target=${containerWorkspaceFolder}/node_modules,type=volume" // ], // "postCreateCommand": "sudo chown graphscope node_modules" - - + "runArgs": [ + "--network=host", + ], // Uncomment these to use a named volume for your entire source tree // https://code.visualstudio.com/remote/advancedcontainers/improve-performance#_use-a-named-volume-for-your-entire-source-tree // "workspaceMount": "source=gs,target=/workspaces,type=volume", // "workspaceFolder": "/workspaces" "postCreateCommand": "sudo chown -R graphscope /workspaces && bash pre-commit/install-hook.sh && bash pre-commit/prepare-commit-msg" -} +} \ No newline at end of file diff --git a/.github/workflows/flex-interactive.yml b/.github/workflows/flex-interactive.yml index d45b3fe37b3a..13e6692da2e6 100644 --- a/.github/workflows/flex-interactive.yml +++ b/.github/workflows/flex-interactive.yml @@ -70,7 +70,7 @@ jobs: docker image inspect graphscope/interactive:latest --format='{{.Size}}' # launch service: ${COORDINATOR_PORT} for coordinator http port; ${CYPHER_PORT} for cypher port; gsctl instance deploy --type interactive --image-registry graphscope --image-tag latest --cypher-port ${CYPHER_PORT} \ - --coordinator-port ${COORDINATOR_PORT} --config ${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_test.yaml \ + --coordinator-port ${COORDINATOR_PORT} --config ${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_standalone.yaml \ --set storage.string_default_max_length=1024 sleep 20 # test @@ -97,9 +97,9 @@ jobs: run: | # launch service: ${COORDINATOR_PORT} for coordinator http port; ${CYPHER_PORT} for cypher port; # replace max_content_length to 1MB - sed -i 's/max_content_length: .*/max_content_length: 1MB/g' ${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_test.yaml + sed -i 's/max_content_length: .*/max_content_length: 1MB/g' ${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_standalone.yaml gsctl instance deploy --type interactive --image-registry graphscope --image-tag latest --cypher-port ${CYPHER_PORT} \ - --coordinator-port ${COORDINATOR_PORT} --config ${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_test.yaml \ + --coordinator-port ${COORDINATOR_PORT} --config ${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_standalone.yaml \ --admin-port 7778 --storedproc-port 10001 sleep 20 # test @@ -121,7 +121,7 @@ jobs: cd python && pip3 install -r requirements.txt && python3 setup.py build_proto python3 setup.py bdist_wheel pip3 install dist/*.whl - gsctl instance deploy --type interactive --image-registry graphscope --image-tag latest --config ${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_test.yaml + gsctl instance deploy --type interactive --image-registry graphscope --image-tag latest --config ${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_standalone.yaml sleep 20 # test cd ${GITHUB_WORKSPACE}/flex/interactive/sdk/examples/python diff --git a/.github/workflows/interactive.yml b/.github/workflows/interactive.yml index cdd0e869d206..fa37033d5c64 100644 --- a/.github/workflows/interactive.yml +++ b/.github/workflows/interactive.yml @@ -184,7 +184,23 @@ jobs: cp ${SCHEMA_FILE} ${TMP_INTERACTIVE_WORKSPACE}/data/modern_graph/graph.yaml GLOG_v=10 ./bin/bulk_loader -g ${SCHEMA_FILE} -l ${BULK_LOAD_FILE} -d ${TMP_INTERACTIVE_WORKSPACE}/data/modern_graph/indices/ cd ${GITHUB_WORKSPACE}/flex/tests/hqps - bash hqps_admin_test.sh ${TMP_INTERACTIVE_WORKSPACE} ./interactive_config_test.yaml ${GS_TEST_DIR} + bash hqps_admin_test.sh ${TMP_INTERACTIVE_WORKSPACE} ./interactive_config_standalone.yaml ${GS_TEST_DIR} + + # test admin service with etcd metastore + GOOGLE_URL=https://storage.googleapis.com/etcd + GITHUB_URL=https://github.com/etcd-io/etcd/releases/download + DOWNLOAD_URL=${GOOGLE_URL} + rm -rf /tmp/etcd-download-test && mkdir -p /tmp/etcd-download-test + curl -L ${DOWNLOAD_URL}/${ETCD_VER}/etcd-${ETCD_VER}-linux-amd64.tar.gz -o /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz + tar xzvf /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz -C /tmp/etcd-download-test + /tmp/etcd-download-test/etcd & + sleep 3 + + cd ${GITHUB_WORKSPACE}/flex/tests/hqps + python3 -c 'import yaml;f=open("./interactive_config_standalone.yaml");y=yaml.safe_load(f);y["compute_engine"]["metadata_store"]["uri"] = "http://localhost:2379"; f.close();f=open("./interactive_config_test_etcd.yaml","w");yaml.dump(y,f);f.close()' + bash hqps_admin_test.sh ${TMP_INTERACTIVE_WORKSPACE} ./interactive_config_test_etcd.yaml ${GS_TEST_DIR} + rm ./interactive_config_test_etcd.yaml + - name: Test Interactive Python Admin Service run: @@ -216,11 +232,11 @@ jobs: cp ${SCHEMA_FILE} ${TMP_INTERACTIVE_WORKSPACE}/data/modern_graph/graph.yaml GLOG_v=10 ./bin/bulk_loader -g ${SCHEMA_FILE} -l ${BULK_LOAD_FILE} -d ${TMP_INTERACTIVE_WORKSPACE}/data/modern_graph/indices/ cd ${GITHUB_WORKSPACE}/flex/tests/hqps - sed -i 's/interactive_workspace/temp_workspace/g' ./interactive_config_test.yaml - bash hqps_sdk_test.sh ${TMP_INTERACTIVE_WORKSPACE} ./interactive_config_test.yaml java + sed -i 's/interactive_workspace/temp_workspace/g' ./interactive_config_standalone.yaml + bash hqps_sdk_test.sh ${TMP_INTERACTIVE_WORKSPACE} ./interactive_config_standalone.yaml java export ENGINE_TYPE=interactive - bash hqps_sdk_test.sh ${TMP_INTERACTIVE_WORKSPACE} ./interactive_config_test.yaml python - sed -i 's/temp_workspace/interactive_workspace/g' ./interactive_config_test.yaml + bash hqps_sdk_test.sh ${TMP_INTERACTIVE_WORKSPACE} ./interactive_config_standalone.yaml python + sed -i 's/temp_workspace/interactive_workspace/g' ./interactive_config_standalone.yaml - name: Robustness test env: @@ -228,7 +244,7 @@ jobs: GS_TEST_DIR: ${{ github.workspace }}/gstest run: | cd ${GITHUB_WORKSPACE}/flex/tests/hqps - bash hqps_robust_test.sh ${INTERACTIVE_WORKSPACE} ./interactive_config_test.yaml ./interactive_config_test_cbo.yaml + bash hqps_robust_test.sh ${INTERACTIVE_WORKSPACE} ./interactive_config_standalone.yaml ./interactive_config_standalone_cbo.yaml - name: Sample Query test env: @@ -251,7 +267,7 @@ jobs: cd ${GITHUB_WORKSPACE}/flex/tests/hqps pip3 install argparse pip3 install neo4j - bash hqps_compiler_get_meta_test.sh ${INTERACTIVE_WORKSPACE} ./interactive_config_test.yaml + bash hqps_compiler_get_meta_test.sh ${INTERACTIVE_WORKSPACE} ./interactive_config_standalone.yaml - name: Run codegen test. env: @@ -259,7 +275,7 @@ jobs: INTERACTIVE_WORKSPACE: /tmp/interactive_workspace run: | cd ${GITHUB_WORKSPACE}/flex/tests/hqps - bash hqps_codegen_test.sh ${INTERACTIVE_WORKSPACE} ./interactive_config_test.yaml ./interactive_config_test_cbo.yaml + bash hqps_codegen_test.sh ${INTERACTIVE_WORKSPACE} ./interactive_config_standalone.yaml ./interactive_config_standalone_cbo.yaml - name: Test cypher&cpp procedure generation and loading env: @@ -280,18 +296,18 @@ jobs: cd ${GITHUB_WORKSPACE}/flex/bin ./load_plan_and_gen.sh -e=hqps -i=../tests/interactive/plus_one.cc -w=/tmp/codegen \ - --ir_conf=${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_test.yaml -o=${PLUGIN_DIR} \ + --ir_conf=${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_standalone.yaml -o=${PLUGIN_DIR} \ --procedure_name=plus_one \ --graph_schema_path=../interactive/examples/modern_graph/graph.yaml \ --procedure_desc="This is test procedure, and the input is a number, and the output is the number plus one." ./load_plan_and_gen.sh -e=hqps -i=../interactive/sdk/java/src/test/resources/sample_app.cc -w=/tmp/codegen \ - --ir_conf=${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_test.yaml -o=${PLUGIN_DIR} \ + --ir_conf=${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_standalone.yaml -o=${PLUGIN_DIR} \ --procedure_name=sample_app \ --graph_schema_path=../interactive/examples/modern_graph/graph.yaml ./load_plan_and_gen.sh -e=hqps -i=../interactive/examples/modern_graph/count_vertex_num.cypher -w=/tmp/codegen \ - --ir_conf=${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_test.yaml -o=${PLUGIN_DIR} \ + --ir_conf=${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_standalone.yaml -o=${PLUGIN_DIR} \ --procedure_name=count_vertex_num \ --graph_schema_path=../interactive/examples/modern_graph/graph.yaml @@ -300,12 +316,12 @@ jobs: # plus_one: (num: int64) -> (num: int64), CppEncoder # sample_app: (num: int64) -> (num: int64), kCypherJson - sed -i 's/interactive_workspace/temp_workspace/g' ${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_test.yaml + sed -i 's/interactive_workspace/temp_workspace/g' ${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_standalone.yaml cd ${GITHUB_WORKSPACE}/flex/tests/interactive/ bash test_plugin_loading.sh ${TMP_INTERACTIVE_WORKSPACE} modern_graph \ - ${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_test.yaml \ + ${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_standalone.yaml \ ./modern_graph_schema_v0_0.yaml ./modern_graph_schema_v0_1.yaml - sed -i 's/temp_workspace/interactive_workspace/g' ${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_test.yaml + sed -i 's/temp_workspace/interactive_workspace/g' ${GITHUB_WORKSPACE}/flex/tests/hqps/interactive_config_standalone.yaml - name: Let compiler use latest interactive java sdk env: @@ -476,7 +492,6 @@ jobs: git clone --single-branch https://github.com/alibaba/libgrape-lite.git /tmp/libgrape-lite cd /tmp/libgrape-lite mkdir -p build && cd build - cmake .. make -j$(nproc) make install @@ -641,4 +656,4 @@ jobs: SCHEMA_FILE=${GITHUB_WORKSPACE}/flex/interactive/examples/modern_graph/graph.yaml BULK_LOAD_FILE=${GITHUB_WORKSPACE}/flex/interactive/examples/modern_graph/bulk_load.yaml sed -i 's/|/\\t/g' ${BULK_LOAD_FILE} - GLOG_v=10 ./bin/bulk_loader -g ${SCHEMA_FILE} -l ${BULK_LOAD_FILE} -d /tmp/csr-data-dir/ + GLOG_v=10 ./bin/bulk_loader -g ${SCHEMA_FILE} -l ${BULK_LOAD_FILE} -d /tmp/csr-data-dir/ \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 076e5da42810..7dbe1a033b6e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,6 +13,15 @@ [submodule "flex/third_party/parallel-hashmap"] path = flex/third_party/parallel-hashmap url = https://github.com/greg7mdp/parallel-hashmap.git + +[submodule "flex/third_party/etcd-cpp-apiv3"] + path = flex/third_party/etcd-cpp-apiv3 + url = https://github.com/etcd-cpp-apiv3/etcd-cpp-apiv3.git + +[submodule "flex/third_party/cpprestsdk"] + path = flex/third_party/cpprestsdk + url = https://github.com/microsoft/cpprestsdk.git + [submodule "flex/third_party/aliyun-oss-cpp-sdk"] path = flex/third_party/aliyun-oss-cpp-sdk url = https://github.com/aliyun/aliyun-oss-cpp-sdk.git diff --git a/charts/graphscope-interactive/templates/_helpers.tpl b/charts/graphscope-interactive/templates/_helpers.tpl index a77e42303686..a13185944947 100644 --- a/charts/graphscope-interactive/templates/_helpers.tpl +++ b/charts/graphscope-interactive/templates/_helpers.tpl @@ -156,7 +156,7 @@ Return the configmap with the graphscope configuration Return the engineConfigPath with the graphscope configuration */}} {{- define "graphscope-interactive.engineConfigPath" -}} -/etc/interactive/interactive_config.yaml +/opt/flex/share/interactive_config.yaml {{- end -}} {{/* diff --git a/charts/graphscope-interactive/values.yaml b/charts/graphscope-interactive/values.yaml index ef424b655da5..52ec07161c6b 100644 --- a/charts/graphscope-interactive/values.yaml +++ b/charts/graphscope-interactive/values.yaml @@ -18,6 +18,8 @@ commonAnnotations: {} ## commonLabels: {} +imagePullSecrets: {} + ## javaOpts: "" @@ -30,12 +32,10 @@ workspace: "/tmp/interactive_workspace" ## default graph defaultGraph: modern_graph - hiactorWorkerNum: 1 hiactorTimeout: 240000 - # ## need by vineyard in distributed env # etcdEndpoint: "etcd-for-vineyard.default.svc.cluster.local:2379" @@ -73,7 +73,7 @@ persistence: ## storageClass: "" accessModes: - - ReadWriteOnce + - ReadWriteOnce annotations: {} ## @param persistence.labels Labels for the PVC ## @@ -98,7 +98,7 @@ engine: ## pullSecrets: ## - myRegistryKeySecretName ## - pullSecrets: [ ] + pullSecrets: [] replicaCount: 1 @@ -133,6 +133,7 @@ engine: ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity ## Note: podAffinityPreset, podAntiAffinityPreset, and nodeAffinityPreset will be ignored when it's set ## + affinity: {} # affinity: # nodeAffinity: # requiredDuringSchedulingIgnoredDuringExecution: @@ -170,7 +171,8 @@ engine: ## GraphScope Interactive container's resource requests and limits ## ref: http://kubernetes.io/docs/user-guide/compute-resources/ ## - resources: {} + resources: + {} # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following @@ -319,7 +321,7 @@ frontend: ## pullSecrets: ## - myRegistryKeySecretName ## - pullSecrets: [ ] + pullSecrets: [] replicaCount: 1 @@ -373,7 +375,8 @@ frontend: ## GraphScope Interactive container's resource requests and limits ## ref: http://kubernetes.io/docs/user-guide/compute-resources/ ## - resources: {} + resources: + {} # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following diff --git a/charts/interactive/.helmignore b/charts/interactive/.helmignore new file mode 100644 index 000000000000..0e8a0eb36f4c --- /dev/null +++ b/charts/interactive/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/interactive/Chart.yaml b/charts/interactive/Chart.yaml new file mode 100644 index 000000000000..d64c3d2ea600 --- /dev/null +++ b/charts/interactive/Chart.yaml @@ -0,0 +1,36 @@ +apiVersion: v1 +name: interactive +description: GraphScope Interactive +icon: https://graphscope.io/assets/images/graphscope-logo.svg +home: https://graphscope.io + +sources: + - https://github.com/alibaba/GraphScope/flex/interactive/ + +keywords: + - GraphScope + - Interactive + - Graph Analytics + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.0.2 + +dependencies: + - name: kafka + repository: oci://registry-1.docker.io/bitnamicharts + version: "20.0.6" + - name: etcd + repository: oci://registry-1.docker.io/bitnamicharts + version: "11.1.3" diff --git a/charts/interactive/README.md b/charts/interactive/README.md new file mode 100644 index 000000000000..1b9c5d33448b --- /dev/null +++ b/charts/interactive/README.md @@ -0,0 +1,8 @@ +# GraphScope Interactive Helm Chart + + +## Installation + +```bash +helm dependency build +``` diff --git a/charts/interactive/requirements.lock b/charts/interactive/requirements.lock new file mode 100644 index 000000000000..c94598687cfe --- /dev/null +++ b/charts/interactive/requirements.lock @@ -0,0 +1,9 @@ +dependencies: +- name: kafka + repository: oci://registry-1.docker.io/bitnamicharts + version: 20.0.6 +- name: etcd + repository: oci://registry-1.docker.io/bitnamicharts + version: 11.1.3 +digest: sha256:0ad74c0346724654a5d22bb2c3ae832b9bbeee3c9351eb0ee417d3317b6fdbd0 +generated: "2025-03-18T15:12:46.261383953+08:00" diff --git a/charts/interactive/templates/_helpers.tpl b/charts/interactive/templates/_helpers.tpl new file mode 100644 index 000000000000..b9f67b5caae9 --- /dev/null +++ b/charts/interactive/templates/_helpers.tpl @@ -0,0 +1,339 @@ +{{- define "graphscope-interactive.etcd.endpoint" -}} +{{- printf "http://%s-etcd.%s.svc.cluster.local:2379" .Release.Name .Release.Namespace | quote }} +{{- end -}} + +{{- define "graphscope-interactive.master.workspace" -}} +{{- if .Values.workspace }} +{{- .Values.workspace }} +{{- else }} +{{- "/tmp/interactive_workspace/" }} +{{- end -}} +{{- end -}} + +{{- define "graphscope-interactive.master.codegenWorkDir" -}} +{{- printf "%s/codegen" (include "graphscope-interactive.master.workspace" .) }} +{{- end -}} + +{{- define "graphscope-interactive.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end -}} + +{{- define "graphscope-interactive.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end -}} +{{- end -}} +{{- end -}} + +{{- define "graphscope-interacitve.engine.defaultGraphSchemaPath" -}} +{{- "/opt/flex/share/gs_interactive_default_graph/graph.yaml" }} +{{- end -}} + + +{{- define "graphscope-interactive.ossAccessKeyId" -}} +{{- if .Values.oss.accessKeyId }} +{{- .Values.oss.accessKeyId | quote }} +{{- else }} +{{- "" }} +{{- end -}} +{{- end -}} + +{{- define "graphscope-interactive.ossAccessKeySecret" -}} +{{- if .Values.oss.accessKeySecret }} +{{- .Values.oss.accessKeySecret | quote }} +{{- else }} +{{- "" }} +{{- end -}} +{{- end -}} + +{{- define "graphscope-interactive.ossEndpoint" -}} +{{- if .Values.oss.endpoint }} +{{- .Values.oss.endpoint | quote }} +{{- else }} +{{- "" }} +{{- end -}} +{{- end -}} + +{{- define "graphscope-interactive.ossBucketName" -}} +{{- if .Values.oss.bucketName }} +{{- .Values.oss.bucketName | quote }} +{{- else }} +{{- "" }} +{{- end -}} +{{- end -}} + + +{{- define "graphscope-interactive.engine.fullname" -}} +{{- printf "%s-%s" (include "graphscope-interactive.fullname" .) "engine" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + + +{{- define "graphscope-interactive.master.fullname" -}} +{{- printf "%s-%s" (include "graphscope-interactive.fullname" .) "master" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "graphscope-interactive.master.serviceName" -}} +{{- printf "%s-%s" (include "graphscope-interactive.master.fullname" .) "headless" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "graphscope-interactive.master.servicePort" -}} +{{- if .Values.master.service.adminPort }} +{{- .Values.master.service.adminPort }} +{{- else }} +{{- 7776 }} +{{- end -}} +{{- end -}} + +{{- define "graphscope-interactive.master.endpoint" -}} +{{- printf "%s.%s.svc.cluster.local:%s" (include "graphscope-interactive.master.serviceName" . ) .Release.Namespace (include "graphscope-interactive.master.servicePort" .) | trimSuffix "-" }} +{{- end -}} + +{{- define "graphscope-interactive.master.entrypointMountPath" -}} +{{- if .Values.master.entrypointMountPath }} +{{- .Values.master.entrypointMountPath }} +{{- else }} +{{- "/etc/interactive/master_entrypoint.sh" }} +{{- end -}} +{{- end -}} + + +{{- define "graphscope-interactive.engine.entrypointMountPath" -}} +{{- if .Values.engine.entrypointMountPath }} +{{- .Values.engine.entrypointMountPath }} +{{- else }} +{{- "/etc/interactive/engine_entrypoint.sh" }} +{{- end -}} +{{- end -}} + +{{- define "graphscope-interactive.master.command" -}} +{{- if .Values.master.command }} +{{- toYaml .Values.master.command }} +{{- else }} +{{- include "graphscope-interactive.master.entrypointMountPath" . }} +{{- end -}} +{{- end -}} + +{{- define "graphscope-interactive.engine.command" -}} +{{- if .Values.engine.command }} +{{- toYaml .Values.engine.command }} +{{- else }} +{{- include "graphscope-interactive.engine.entrypointMountPath" . }} +{{- end -}} +{{- end -}} + + + +{{- define "graphscope-interactive.engine.metadataStoreUri" -}} +{{- if .Values.engine.metadataStoreUri -}} +{{- .Values.engine.metadataStoreUri }} +{{- else }} +{{- include "graphscope-interactive.etcd.endpoint" . }} +{{- end -}} +{{- end -}} + +{{- define "graphscope-interactive.engine.compiler.metaReaderSchemaUri" -}} +{{- if .Values.engine.compiler.meta.reader.schema.uri -}} +{{- .Values.engine.compiler.meta.reader.schema.uri }} +{{- else }} +{{- printf "http://%s/v1/graph/%s/schema" (include "graphscope-interactive.master.endpoint" . ) "1" | trimSuffix "-" }} +{{- end -}} +{{- end -}} + +{{- define "graphscope-interactive.engine.compiler.metaStatisticsUri" -}} +{{- if .Values.engine.compiler.meta.reader.statistics.uri -}} +{{- .Values.engine.compiler.meta.reader.statistics.uri }} +{{- else }} +{{- printf "http://%s/v1/graph/%s/statistics" (include "graphscope-interactive.master.endpoint" . ) "1" | trimSuffix "-" }} +{{- end -}} +{{- end -}} + + +{{- define "graphscope-interactive.engine.walUri" -}} +{{- if .Values.engine.walUri -}} +{{- .Values.engine.walUri }} +{{- else }} +{{- "file://{GRAPH_DATA_DIR}/wal" }} +{{- end -}} +{{- end -}} + +{{- define "graphscope-interactive.master.serviceRegistry.endpoint" -}} +{{- if .Values.master.serviceRegistry.endpoint }} +{{- .Values.master.serviceRegistry.endpoint }} +{{- else }} +{{- include "graphscope-interactive.etcd.endpoint" . }} +{{- end -}} +{{- end -}} + + +{{- define "graphscope-interactive.master.configFileMountPath" -}} +{{- if .Values.master.configFileMountPath }} +{{- .Values.master.configFileMountPath }} +{{- else }} +{{- "/opt/flex/share/interactive_config.yaml" }} +{{- end -}} +{{- end -}} + +{{- define "graphscope-interactive.engine.configFileMountPath" -}} +{{- if .Values.engine.configFileMountPath }} +{{- .Values.engine.configFileMountPath }} +{{- else }} +{{- "/opt/flex/share/interactive_config.yaml" }} +{{- end -}} +{{- end -}} + +{{- define "graphscope-interactive.labels" -}} +helm.sh/chart: {{ include "graphscope-interactive.chart" . }} +{{ include "graphscope-interactive.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{- define "graphscope-interactive.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "graphscope-interactive.selectorLabels" -}} +app.kubernetes.io/name: {{ include "graphscope-interactive.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{- define "graphscope-interactive.master.image" -}} +{{- $tag := .Chart.AppVersion | toString -}} +{{- with .Values.master.image -}} +{{- if .tag -}} +{{- $tag = .tag | toString -}} +{{- end -}} +{{- if .registry -}} +{{- printf "%s/%s:%s" .registry .repository $tag -}} +{{- else -}} +{{- printf "%s:%s" .repository $tag -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{- define "graphscope-interactive.engine.image" -}} +{{- $tag := .Chart.AppVersion | toString -}} +{{- with .Values.engine.image -}} +{{- if .tag }} +{{- $tag = .tag | toString -}} +{{- end -}} +{{- if .registry -}} +{{- printf "%s/%s:%s" .registry .repository $tag -}} +{{- else -}} +{{- printf "%s:%s" .repository $tag -}} +{{- end -}} +{{- end -}} +{{- end -}} + + +{{- define "graphscope-interactive.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "graphscope-interactive.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end -}} +{{- end -}} + + +{{- define "graphscope-interactive.storageClass" -}} +{{/* +Helm 2.11 supports the assignment of a value to a variable defined in a different scope, +but Helm 2.9 and 2.10 does not support it, so we need to implement this if-else logic. +*/}} +{{- if .Values.global -}} + {{- if .Values.global.storageClass -}} + {{- if (eq "-" .Values.global.storageClass) -}} + {{- printf "storageClassName: \"\"" -}} + {{- else }} + {{- printf "storageClassName: %s" .Values.global.storageClass -}} + {{- end -}} + {{- else -}} + {{- if .Values.persistence.storageClass -}} + {{- if (eq "-" .Values.persistence.storageClass) -}} + {{- printf "storageClassName: \"\"" -}} + {{- else }} + {{- printf "storageClassName: %s" .Values.persistence.storageClass -}} + {{- end -}} + {{- end -}} + {{- end -}} +{{- else -}} + {{- if .Values.persistence.storageClass -}} + {{- if (eq "-" .Values.persistence.storageClass) -}} + {{- printf "storageClassName: \"\"" -}} + {{- else }} + {{- printf "storageClassName: %s" .Values.persistence.storageClass -}} + {{- end -}} + {{- end -}} +{{- end -}} +{{- end -}} + + +{{- define "graphscope-interactive.configmapName" -}} +{{- if .Values.existingConfigmap -}} + {{- printf "%s" (tpl .Values.existingConfigmap $) -}} +{{- else -}} + {{- printf "%s-%s" (include "graphscope-interactive.fullname" .) "config" | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + + + +{{- define "graphscope-interactive.engineBinaryPath" -}} +/opt/flex/bin/interactive_server +{{- end -}} + +{{- define "graphscope-interactive.bulkLoaderBinaryPath" -}} +/opt/flex/bin/bulk_loader +{{- end -}} + + +{{- define "graphscope-interactive.createConfigmap" -}} +{{- if not .Values.existingConfigmap }} + {{- true -}} +{{- else -}} +{{- end -}} +{{- end -}} + + +{{/* +Renders a value that contains template. +Usage: +{{ include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.path.to.the.Value "context" $) }} +*/}} +{{- define "graphscope-interactive.tplvalues.render" -}} + {{- if typeIs "string" .value }} + {{- tpl .value .context }} + {{- else }} + {{- tpl (.value | toYaml) .context }} + {{- end -}} +{{- end -}} + + +{{/* +Return the proper Storage Class +{{ include "graphscope-interactive.storage.class" .Values.path.to.the.persistence }} +*/}} +{{- define "graphscope-interactive.storage.class" -}} + +{{- $storageClass := .storageClass -}} +{{- if $storageClass -}} + {{- if (eq "-" $storageClass) -}} + {{- printf "storageClassName: \"\"" -}} + {{- else }} + {{- printf "storageClassName: %s" $storageClass -}} + {{- end -}} +{{- end -}} + +{{- end -}} + diff --git a/charts/interactive/templates/configmap.yaml b/charts/interactive/templates/configmap.yaml new file mode 100644 index 000000000000..6a3d1a9383c3 --- /dev/null +++ b/charts/interactive/templates/configmap.yaml @@ -0,0 +1,107 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "graphscope-interactive.configmapName" . }} + namespace: {{ .Release.Namespace }} + labels: {{- include "graphscope-interactive.labels" . | nindent 4 }} + app.kubernetes.io/component: configmap + {{- if .Values.commonLabels }} + {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }} + {{- end }} + {{- if .Values.commonAnnotations }} + annotations: {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} + {{- end }} +data: + interactive_config.yaml: |- + log_level: {{ .Values.engine.logLevel }} + verbose_level: {{ .Values.engine.verboseLevel }} + default_graph: {{ .Values.defaultGraph }} + compute_engine: + type: hiactor + workers: + - localhost: {{ .Values.engine.service.queryPort }} + thread_num_per_worker: {{ .Values.engine.threadNumPerWorker }} + memory_per_worker: {{ .Values.engine.resources.limits.memory }} + store: + type: cpp-mcsr + metadata_store: + uri: {{ include "graphscope-interactive.engine.metadataStoreUri" . }} + wal_uri: {{ include "graphscope-interactive.engine.walUri" . }} + compiler: + planner: + is_on: true + opt: RBO + rules: + - FilterIntoJoinRule + - FilterMatchRule + - NotMatchToAntiJoinRule + meta: + reader: + schema: + uri: {{ include "graphscope-interactive.engine.compiler.metaReaderSchemaUri" . }} + interval: {{ printf "%d" ( int .Values.engine.compiler.meta.reader.schema.interval) }} + statistics: + uri: {{ include "graphscope-interactive.engine.compiler.metaStatisticsUri" . }} + interval: {{ printf "%d" (int .Values.engine.compiler.meta.reader.statistics.interval) }} + timeout: {{ .Values.engine.compiler.meta.reader.timeout }} + endpoint: + default_listen_address: localhost + bolt_connector: + disabled: false + port: {{ .Values.engine.compiler.endpoint.boltConnector.port }} + gremlin_connector: + disabled: true + port: {{ .Values.engine.compiler.endpoint.gremlinConnector.port }} + query_timeout: {{ .Values.engine.compiler.queryTimeout }} + http_service: + default_listen_address: localhost + admin_port: {{ .Values.engine.service.adminPort }} + query_port: {{ .Values.engine.service.queryPort }} + max_content_length: {{ .Values.engine.service.maxContentLength }} + master: + instance_name: {{ include "graphscope-interactive.master.fullname" . }} + admin_port: {{ .Values.master.service.adminPort }} + service_registry: + type: etcd + endpoint: {{ include "graphscope-interactive.master.serviceRegistry.endpoint" . }} + ttl: {{ .Values.master.serviceRegistry.ttl }} + launcher_type: k8s + k8s_launcher_config: + namespace: {{ .Release.Namespace }} + instance_prefix: {{ .Values.master.k8sLauncherConfig.instancePrefix }} + config_file: {{ .Values.master.k8sLauncherConfig.configFile }} + image_pull_policy: {{ .Values.master.k8sLauncherConfig.imagePullPolicy }} + default_replicas: {{ .Values.master.k8sLauncherConfig.defaultReplicas }} + service_account_create: false + service_account_name: {{ include "graphscope-interactive.serviceAccountName" . }} + + master_entrypoint.sh: |- + #!/bin/bash + # This should be the entrypoint of the master instance + echo "Starting master instance..." + mkdir -p {{ include "graphscope-interactive.master.workspace" . }} + mkdir -p {{ include "graphscope-interactive.master.codegenWorkDir" . }} + echo "using configfile: {{ include "graphscope-interactive.master.configFileMountPath" . }}" + #python3 -m gs_interactive_admin --config-file {{ include "graphscope-interactive.master.configFileMountPath" . }} + sleep infinity + + + engine_entrypoint.sh: |- + #!/bin/bash + # This should be the entrypoint of the engine instance + if [ $# -gt 1 ]; then + echo "Expect one optional argument [GraphSchemaPath], bot got $#" + exit + fi + if [ $# -eq 1 ]; then + graph_file=$1 + else + graph_file="{{ include "graphscope-interacitve.engine.defaultGraphSchemaPath" . }}" + fi + echo "Starting engine instance..." + echo "using configfile: {{ include "graphscope-interactive.engine.configFileMountPath" . }}" + echo "Workspace: {{ .Values.workspace }} " + mkdir -p {{ .Values.workspace }}/conf + echo "config file ${graph_file}" + #/opt/flex/bin/entrypoint.sh -w {{ .Values.workspace }} -g $graph_file + sleep infinity \ No newline at end of file diff --git a/charts/interactive/templates/engine/statefulset.yaml b/charts/interactive/templates/engine/statefulset.yaml new file mode 100644 index 000000000000..7280f98923a4 --- /dev/null +++ b/charts/interactive/templates/engine/statefulset.yaml @@ -0,0 +1,151 @@ +{{- $releaseNamespace := .Release.Namespace }} +{{- $clusterDomain := .Values.clusterDomain }} + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "graphscope-interactive.engine.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: {{- include "graphscope-interactive.labels" . | nindent 4 }} + app.kubernetes.io/component: engine + {{- if .Values.commonLabels }} + {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }} + {{- end }} + {{- if .Values.commonAnnotations }} + annotations: {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} + {{- end }} +spec: + replicas: {{ .Values.engine.replicaCount }} + selector: + matchLabels: {{ include "graphscope-interactive.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: engine + serviceName: {{ include "graphscope-interactive.engine.fullname" . }}-headless + updateStrategy: + type: {{ .Values.engine.updateStrategy }} + {{- if (eq "Recreate" .Values.engine.updateStrategy) }} + rollingUpdate: null + {{- end }} + template: + metadata: + annotations: + {{- if .Values.engine.podAnnotations }} + {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.engine.podAnnotations "context" $) | nindent 8 }} + {{- end }} + labels: {{- include "graphscope-interactive.labels" . | nindent 8 }} + app.kubernetes.io/component: engine + {{- if .Values.commonLabels }} + {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 8 }} + {{- end }} + spec: + {{- if .Values.imagePullSecrets }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} + serviceAccountName: {{ include "graphscope-interactive.serviceAccountName" . }} + {{- if .Values.engine.affinity }} + affinity: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.engine.affinity "context" $) | nindent 8 }} + {{- end }} + initContainers: + {{- if .Values.engine.initContainers }} + {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.engine.initContainers "context" $) | nindent 8 }} + {{- end }} + containers: + - name: engine + image: {{ include "graphscope-interactive.engine.image" . }} + imagePullPolicy: {{ .Values.engine.image.pullPolicy | quote }} + command: + - {{ include "graphscope-interactive.engine.command" . | quote }} + env: + - name: INTERACTIVE_WORKSPACE + value: {{ .Values.workspace | quote }} + - name: OSS_ACCESS_KEY_ID + value: {{ include "graphscope-interactive.ossAccessKeyId" . }} + - name: OSS_ACCESS_KEY_SECRET + value: {{ include "graphscope-interactive.ossAccessKeySecret" . }} + - name: OSS_ENDPOINT + value: {{ include "graphscope-interactive.ossEndpoint" . }} + - name: OSS_BUCKET_NAME + value: {{ include "graphscope-interactive.ossBucketName" . }} + - name: FLEX_INTERACTIVE_ENGINE_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + ports: + - name: admin-port + containerPort: {{ .Values.engine.service.adminPort }} + - name: query-port + containerPort: {{ .Values.engine.service.queryPort }} + {{- if .Values.engine.resources }} + resources: {{- toYaml .Values.engine.resources | nindent 12 }} + {{- end }} + volumeMounts: + - name: workspace + mountPath: {{ .Values.workspace }} + - name: config + mountPath: {{ include "graphscope-interactive.engine.configFileMountPath" . }} + subPath: interactive_config.yaml + - name: engine-entrypoint + mountPath: {{ include "graphscope-interactive.engine.entrypointMountPath" . }} + subPath: engine_entrypoint.sh + {{- if and .Values.engine.livenessProbe .Values.engine.livenessProbe.enabled }} + livenessProbe: + httpGet: + path: {{ .Values.engine.livenessProbe.path }} + port: {{ .Values.engine.service.adminPort }} + initialDelaySeconds: {{ .Values.engine.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.engine.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.engine.livenessProbe.timeoutSeconds }} + successThreshold: {{ .Values.engine.livenessProbe.successThreshold }} + failureThreshold: {{ .Values.engine.livenessProbe.failureThreshold }} + {{- end }} + {{- if and .Values.engine.readinessProbe .Values.engine.readinessProbe.enabled }} + readinessProbe: + httpGet: + path: {{ .Values.engine.readinessProbe.path }} + port: {{ .Values.engine.service.queryPort }} + initialDelaySeconds: {{ .Values.engine.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.engine.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.engine.readinessProbe.timeoutSeconds }} + successThreshold: {{ .Values.engine.readinessProbe.successThreshold }} + failureThreshold: {{ .Values.engine.readinessProbe.failureThreshold }} + {{- end }} + volumes: + - name: config + configMap: + name: {{ include "graphscope-interactive.configmapName" . }} + defaultMode: 0755 + - name: engine-entrypoint + configMap: + name: {{ include "graphscope-interactive.configmapName" . }} + defaultMode: 0755 + {{- if and .Values.engine.persistence.enabled .Values.engine.persistence.existingClaim }} + - name: workspace + persistentVolumeClaim: + claimName: {{ tpl .Values.engine.persistence.existingClaim . }} + {{- else if not .Values.engine.persistence.enabled }} + - name: workspace + emptyDir: {} + {{- else if and .Values.engine.persistence.enabled (not .Values.engine.persistence.existingClaim) }} + volumeClaimTemplates: + - metadata: + name: workspace + {{- if .Values.persistence.annotations }} + annotations: {{- include "common.tplvalues.render" (dict "value" .Values.persistence.annotations "context" $) | nindent 10 }} + {{- end }} + {{- if .Values.persistence.labels }} + labels: {{- include "common.tplvalues.render" (dict "value" .Values.persistence.labels "context" $) | nindent 10 }} + {{- end }} + spec: + accessModes: + {{- range .Values.persistence.accessModes }} + - {{ . | quote }} + {{- end }} + resources: + requests: + storage: {{ .Values.engine.persistence.size | quote }} + {{ include "graphscope-interactive.storageClass" . | nindent 8 }} + {{- if .Values.engine.persistence.selector }} + selector: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.engine.persistence.selector "context" $) | nindent 10 }} + {{- end -}} + {{- end }} diff --git a/charts/interactive/templates/engine/svc-headless.yaml b/charts/interactive/templates/engine/svc-headless.yaml new file mode 100644 index 000000000000..4f1c452a8030 --- /dev/null +++ b/charts/interactive/templates/engine/svc-headless.yaml @@ -0,0 +1,41 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "graphscope-interactive.engine.fullname" . }}-headless + namespace: {{ .Release.Namespace }} + labels: {{- include "graphscope-interactive.labels" . | nindent 4 }} + app.kubernetes.io/component: engine + {{- if .Values.commonLabels }} + {{- include "common.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.commonAnnotations }} + {{- include "common.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.engine.service.type }} + {{- if and (eq .Values.engine.service.type "ClusterIP") .Values.engine.service.clusterIP }} + clusterIP: {{ .Values.engine.service.clusterIP }} + {{- end }} + {{- if and .Values.engine.service.loadBalancerIP (eq .Values.engine.service.type "LoadBalancer") }} + loadBalancerIP: {{ .Values.engine.service.loadBalancerIP }} + externalTrafficPolicy: {{ .Values.engine.service.externalTrafficPolicy | quote }} + {{- end }} + {{- if and (eq .Values.engine.service.type "LoadBalancer") .Values.engine.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: {{- toYaml .Values.engine.service.loadBalancerSourceRanges | nindent 4 }} + {{- end }} + ports: + - name: admin-port + port: {{ .Values.engine.service.adminPort }} + protocol: TCP + targetPort: admin-port + - name: query-port + port: {{ .Values.engine.service.queryPort }} + protocol: TCP + targetPort: query-port + - name: cypher-port + port: {{ .Values.engine.service.cypherPort }} + protocol: TCP + targetPort: cypher-port + selector: {{- include "graphscope-interactive.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: engine diff --git a/charts/interactive/templates/master/statefulset.yaml b/charts/interactive/templates/master/statefulset.yaml new file mode 100644 index 000000000000..05f9382af3bd --- /dev/null +++ b/charts/interactive/templates/master/statefulset.yaml @@ -0,0 +1,114 @@ +{{- $masterFullName := include "graphscope-interactive.master.fullname" . }} +{{- $releaseNamespace := .Release.Namespace }} +{{- $clusterDomain := .Values.clusterDomain }} + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "graphscope-interactive.master.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: {{- include "graphscope-interactive.labels" . | nindent 4 }} + app.kubernetes.io/component: master + {{- if .Values.commonLabels }} + {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }} + {{- end }} + {{- if .Values.commonAnnotations }} + annotations: {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} + {{- end }} +spec: + replicas: {{ .Values.master.replicaCount }} + selector: + matchLabels: {{ include "graphscope-interactive.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: master + serviceName: {{ include "graphscope-interactive.master.fullname" . }}-headless + updateStrategy: + type: {{ .Values.master.updateStrategy }} + {{- if (eq "Recreate" .Values.master.updateStrategy) }} + rollingUpdate: null + {{- end }} + template: + metadata: + annotations: + {{- if .Values.master.podAnnotations }} + {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.master.podAnnotations "context" $) | nindent 8 }} + {{- end }} + labels: {{- include "graphscope-interactive.labels" . | nindent 8 }} + app.kubernetes.io/component: master + {{- if .Values.commonLabels }} + {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "graphscope-interactive.serviceAccountName" . }} + {{- if .Values.master.affinity }} + affinity: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.master.affinity "context" $) | nindent 8 }} + {{- end }} + initContainers: + {{- if .Values.master.initContainers }} + {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.master.initContainers "context" $) | nindent 8 }} + {{- end }} + containers: + - name: master + image: {{ include "graphscope-interactive.master.image" . }} + imagePullPolicy: {{ .Values.master.image.pullPolicy | quote }} + command: + - {{ include "graphscope-interactive.master.command" . | quote }} + env: + - name: INTERACTIVE_WORKSPACE + value: {{ include "graphscope-interactive.master.workspace" . }} + - name: OSS_ACCESS_KEY_ID + value: {{ include "graphscope-interactive.ossAccessKeyId" . }} + - name: OSS_ACCESS_KEY_SECRET + value: {{ include "graphscope-interactive.ossAccessKeySecret" . }} + - name: OSS_ENDPOINT + value: {{ include "graphscope-interactive.ossEndpoint" . }} + - name: OSS_BUCKET_NAME + value: {{ include "graphscope-interactive.ossBucketName" . }} + - name: INTERACTIVE_CODE_GEN_WORKDIR + value: {{ include "graphscope-interactive.master.codegenWorkDir" . }} + ports: + - name: admin-port + containerPort: {{ .Values.master.service.adminPort }} + {{- if .Values.master.resources }} + resources: {{- toYaml .Values.master.resources | nindent 12 }} + {{- end }} + volumeMounts: + - name: config + mountPath: {{ include "graphscope-interactive.master.configFileMountPath" . }} + subPath: interactive_config.yaml + - name: master-entrypoint + mountPath: {{ include "graphscope-interactive.master.entrypointMountPath" . }} + subPath: master_entrypoint.sh + {{- if and .Values.master.livenessProbe .Values.master.livenessProbe.enabled }} + livenessProbe: + httpGet: + path: {{ .Values.master.livenessProbe.path }} + port: {{ .Values.master.service.adminPort }} + initialDelaySeconds: {{ .Values.master.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.master.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.master.livenessProbe.timeoutSeconds }} + successThreshold: {{ .Values.master.livenessProbe.successThreshold }} + failureThreshold: {{ .Values.master.livenessProbe.failureThreshold }} + {{- end }} + {{- if and .Values.master.readinessProbe .Values.master.readinessProbe.enabled }} + readinessProbe: + httpGet: + path: {{ .Values.master.readinessProbe.path }} + port: {{ .Values.master.service.adminPort }} + initialDelaySeconds: {{ .Values.master.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.master.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.master.readinessProbe.timeoutSeconds }} + successThreshold: {{ .Values.master.readinessProbe.successThreshold }} + failureThreshold: {{ .Values.master.readinessProbe.failureThreshold }} + {{- end }} + volumes: + - name: config + configMap: + name: {{ include "graphscope-interactive.configmapName" . }} + defaultMode: 0755 + - name: master-entrypoint + configMap: + name: {{ include "graphscope-interactive.configmapName" . }} + defaultMode: 0755 diff --git a/charts/interactive/templates/master/svc-headless.yaml b/charts/interactive/templates/master/svc-headless.yaml new file mode 100644 index 000000000000..daa139fd7d20 --- /dev/null +++ b/charts/interactive/templates/master/svc-headless.yaml @@ -0,0 +1,33 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "graphscope-interactive.master.serviceName" . }} + namespace: {{ .Release.Namespace }} + labels: {{- include "graphscope-interactive.labels" . | nindent 4 }} + app.kubernetes.io/component: master + {{- if .Values.commonLabels }} + {{- include "common.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.commonAnnotations }} + {{- include "common.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.master.service.type }} + {{- if and (eq .Values.master.service.type "ClusterIP") .Values.master.service.clusterIP }} + clusterIP: {{ .Values.master.service.clusterIP }} + {{- end }} + {{- if and .Values.master.service.loadBalancerIP (eq .Values.master.service.type "LoadBalancer") }} + loadBalancerIP: {{ .Values.master.service.loadBalancerIP }} + externalTrafficPolicy: {{ .Values.master.service.externalTrafficPolicy | quote }} + {{- end }} + {{- if and (eq .Values.master.service.type "LoadBalancer") .Values.master.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: {{- toYaml .Values.master.service.loadBalancerSourceRanges | nindent 4 }} + {{- end }} + ports: + - name: admin-port + port: {{ .Values.master.service.adminPort }} + protocol: TCP + targetPort: admin-port + selector: {{- include "graphscope-interactive.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: master diff --git a/charts/interactive/templates/role_and_binding.yaml b/charts/interactive/templates/role_and_binding.yaml new file mode 100644 index 000000000000..8be44afa6688 --- /dev/null +++ b/charts/interactive/templates/role_and_binding.yaml @@ -0,0 +1,26 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ include "graphscope-interactive.fullname" . }}-role + namespace: {{ .Release.Namespace }} +rules: +- apiGroups: ["apps", "extensions", ""] + resources: ["configmaps", "statefulsets", "statefulsets/status", "endpoints", "events", "pods", "pods/log", "pods/exec", "pods/status", "services"] + verbs: ["create", "get", "list", "watch", "delete", "update"] +- apiGroups: ["metrics.k8s.io"] + resources: ["pods"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ include "graphscope-interactive.fullname" . }}-role-binding + namespace: {{ .Release.Namespace }} +subjects: +- kind: ServiceAccount + name: {{ include "graphscope-interactive.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: Role + name: {{ include "graphscope-interactive.fullname" . }}-role + apiGroup: rbac.authorization.k8s.io diff --git a/charts/interactive/templates/serviceaccount.yaml b/charts/interactive/templates/serviceaccount.yaml new file mode 100644 index 000000000000..23aa390cd1cb --- /dev/null +++ b/charts/interactive/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "graphscope-interactive.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "graphscope-interactive.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/charts/interactive/values.yaml b/charts/interactive/values.yaml new file mode 100644 index 000000000000..089e3d3d1094 --- /dev/null +++ b/charts/interactive/values.yaml @@ -0,0 +1,447 @@ +# Default values for graphscope-interactive. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +nameOverride: "" + +fullnameOverride: "" + +## Cluster domain +## +clusterDomain: cluster.local + +## Common annotations to add to all GraphScope resources (sub-charts are not considered). Evaluated as a template +## +commonAnnotations: {} + +## Common labels to add to all GraphScope resources (sub-charts are not considered). Evaluated as a template +## +commonLabels: {} + +## +javaOpts: "" + +## Vineyard or Experimental Storage +# storageType: Vineyard + +## The workspace directory for interactive. +workspace: "/tmp/interactive_workspace" + +## default graph +defaultGraph: modern_graph + +hiactorWorkerNum: 1 + +hiactorTimeout: 240000 + +# ## need by vineyard in distributed env +# etcdEndpoint: "etcd-for-vineyard.default.svc.cluster.local:2379" + +## Configure GraphScope Components with a custom groot.config file +# extraConfig: "" + +## Role Based Access +## ref: https://kubernetes.io/docs/admin/authorization/rbac/ +## +rbac: + ## Specifies whether RBAC rules should be created + ## + create: false + +serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "interactive-service-account" + +## Name of existing ConfigMap with GraphScope configuration. +## NOTE: When it's set the 'configuration' parameter is ignored +## +# existingConfiguration: + +persistence: + ## @param persistence.storageClass PVC Storage Class + ## If defined, storageClassName: + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. + ## + storageClass: "" + accessModes: + - ReadWriteOnce + annotations: {} + ## @param persistence.labels Labels for the PVC + ## + labels: {} + +master: + image: + registry: registry.cn-hongkong.aliyuncs.com + repository: graphscope/interactive + tag: "master-debug" + pullPolicy: Always + pullSecrets: [] + + instance_name: "" + serviceRegistry: + type: etcd + endpoint: "" + # The endpoint is set to none, should be set to the service endpoint of etcd service + ttl: 5 + launcher_type: k8s + k8sLauncherConfig: + instancePrefix: default + configFile: ~/.kube/config + imagePullPolicy: Always + defaultReplicas: 1 + + replicaCount: 1 + updateStrategy: RollingUpdate + podAnnotations: {} + podAffinityPreset: "" + podAntiAffinityPreset: soft + nodeSelector: {} + tolerations: [] + podSecurityContext: + enabled: false + fsGroup: 1001 + containerSecurityContext: + enabled: false + runAsUser: 1001 + + resources: + limits: + cpu: 1000m + memory: 1Gi + requests: + cpu: 1000m + memory: 1Gi + + # livenessProbe: + # enabled: true + # path: /v1/service/ready + # initialDelaySeconds: 120 + # periodSeconds: 10 + # timeoutSeconds: 1 + # failureThreshold: 3 + # successThreshold: 1 + readinessProbe: + enabled: true + path: /v1/service/ready + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 1 + failureThreshold: 3 + successThreshold: 1 + + initContainers: [] + + # Master has no persistence configuration. It should store all data in etcd + + service: + type: ClusterIP + adminPort: 7776 + + nodePorts: + admin: "" + + clusterIP: "" + + loadBalancerIP: "" + externalTrafficPolicy: Cluster + loadBalancerSourceRanges: [] + annotations: {} + + pdb: + enabled: false + minAvailable: 1 + + podLabels: {} + + entrypointMountPath: /etc/interactive/master_entrypoint.sh + configFileMountPath: /opt/flex/share/interactive_config.yaml + +## GraphScope Interactive parameters +## +engine: + image: + registry: registry.cn-hongkong.aliyuncs.com + repository: graphscope/interactive + # Overrides the image tag whose default is the chart appVersion. + tag: "debug" + ## Specify a imagePullPolicy + ## Defaults to 'Always' if image tag is 'latest', else set to 'IfNotPresent' + ## ref: http://kubernetes.io/docs/user-guide/images/#pre-pulling-images + ## + pullPolicy: "Always" + ## Optionally specify an array of imagePullSecrets (secrets must be manually created in the namespace) + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ + ## Example: + ## pullSecrets: + ## - myRegistryKeySecretName + ## + pullSecrets: [] + + metdadataStoreUri: "" + walUri: "file://{GRAPH_DATA_DIR}/wal" + + replicaCount: 1 + + logLevel: INFO + + verboseLevel: 1 + + # Number of thread each worker will use + threadNumPerWorker: 1 + + ## updateStrategy for GraphScope Interactive statefulset + ## ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#update-strategies + ## + updateStrategy: RollingUpdate + + ## GraphScope Interactive pod annotations + ## ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ + ## + podAnnotations: {} + + ## GraphScope Interactive pod affinity preset + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#inter-pod-affinity-and-anti-affinity + ## Allowed values: soft, hard + ## + podAffinityPreset: "" + + ## GraphScope Interactive pod anti-affinity preset + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#inter-pod-affinity-and-anti-affinity + ## Allowed values: soft, hard + ## + podAntiAffinityPreset: soft + + ## Affinity for GraphScope Interactive pods assignment + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + ## Note: podAffinityPreset, podAntiAffinityPreset, and nodeAffinityPreset will be ignored when it's set + ## + # affinity: + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: app + # operator: In + # values: + # - interactive_single_node + + ## Node labels for GraphScope Interactive pods assignment + ## ref: https://kubernetes.io/docs/user-guide/node-selection/ + ## + nodeSelector: {} + + ## Tolerations for GraphScope Interactive pods assignment + ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ + ## + tolerations: [] + + ## GraphScope Interactive Pod security context + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod + ## + podSecurityContext: + enabled: false + fsGroup: 1001 + + ## GraphScope Interactive container security context + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container + ## + containerSecurityContext: + enabled: false + runAsUser: 1001 + + ## GraphScope Interactive container's resource requests and limits + ## ref: http://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + limits: + cpu: 1000m + memory: 1Gi + requests: + cpu: 1000m + memory: 1Gi + + ## GraphScope Interactive container's liveness and readiness probes + ## ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-probes + ## + # livenessProbe: + # enabled: true + # path: /v1/service/ready + # initialDelaySeconds: 120 + # periodSeconds: 10 + # timeoutSeconds: 1 + # failureThreshold: 3 + # successThreshold: 1 + readinessProbe: + enabled: true + path: /v1/service/ready + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 1 + failureThreshold: 3 + successThreshold: 1 + + ## Enable persistence using Persistent Volume Claims + ## ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ + ## + persistence: + ## If true, use a Persistent Volume Claim, If false, use emptyDir + ## + enabled: true + ## Name of existing PVC to hold GraphScope Interactive data + ## NOTE: When it's set the rest of persistence parameters are ignored + ## + # existingClaim: "graphscope-interactive-pvc" + existingClaim: "" + + ## Persistent Volume Storage Class + ## If defined, storageClassName: + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. (gp2 on AWS, standard on + ## GKE, AWS & OpenStack) + ## + # storageClass: "manual" + ## Persistent Volume Claim annotations + ## + annotations: {} + ## Persistent Volume Access Mode + ## + accessModes: + - ReadWriteOnce # read and write by a single node. + ## Persistent Volume size + ## + size: 1Gi + ## selector can be used to match an existing PersistentVolume + ## selector: + ## matchLabels: + ## app: my-app + ## + selector: {} + + initContainers: [] + + ## GraphScope interactive Service parameters + ## + service: + ## Service type + ## + type: NodePort + queryPort: 10000 + cypherPort: 7687 + adminPort: 7777 + maxContentLength: 1GB + + ## Specify the nodePort value for the LoadBalancer and NodePort service types. + ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + ## + nodePorts: + service: "" + query: "" + admin: "" + ## Service clusterIP + ## + # clusterIP: None + clusterIP: "" + ## Set the LoadBalancer service type to internal only. + ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#internal-load-balancer + ## + loadBalancerIP: "" + ## Enable client source IP preservation + ## ref http://kubernetes.io/docs/tasks/access-application-cluster/create-external-load-balancer/#preserving-the-client-source-ip + ## + externalTrafficPolicy: Cluster + ## Load Balancer sources + ## https://kubernetes.io/docs/tasks/access-application-cluster/configure-cloud-provider-firewall/#restrict-access-for-loadbalancer-service + ## E.g. + ## loadBalancerSourceRanges: + ## - 10.10.10.0/24 + ## + loadBalancerSourceRanges: [] + ## Provide any additional annotations which may be required + ## + annotations: {} + + ## GraphScope Interactive Pod Disruption Budget configuration + ## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/ + ## + pdb: + enabled: false + ## Min number of pods that must still be available after the eviction + ## + minAvailable: 1 + ## Max number of pods that can be unavailable after the eviction + ## + # maxUnavailable: 1 + + ## GraphScope Interactive pod label. If labels are same as commonLabels , this will take precedence. + ## + podLabels: {} + + # In GraphScope Interactive, compiler is a component inside engine. + compiler: + meta: + reader: + schema: + uri: "" + interval: 1000 + statistics: + uri: "" + interval: 86400000 + timeout: 1000 + endpoint: + default_listen_address: localhost + boltConnector: + disabled: false + port: 7687 + gremlinConnector: + disabled: false + port: 8182 + queryTimeout: 40000 + + configFileMountPath: /opt/flex/share/interactive_config.yaml + entrypointMountPath: /etc/interactive/engine_entrypoint.sh + +## Kafka Config +## +## Kafka chart configuration +## +## https://github.com/bitnami/charts/blob/master/bitnami/kafka/values.yaml +## +kafka: + enabled: true + replicaCount: 1 + service: + ## Kafka port for client connections + ## + port: 9092 + socketRequestMaxBytes: _1048576000 + +etcd: + enabled: true + auth: + rbac: + create: false + replicaCount: 1 + +## TODO: etcd config + +global: + storageClass: "" + +oss: + accessKeyId: "" + accessKeySecret: "" + endpoint: "" + bucketName: "" diff --git a/docs/flex/interactive/development/dev_and_test.md b/docs/flex/interactive/development/dev_and_test.md index e831f00e9591..a1b221bfa32b 100644 --- a/docs/flex/interactive/development/dev_and_test.md +++ b/docs/flex/interactive/development/dev_and_test.md @@ -158,10 +158,10 @@ Subsequently, execute the `hqps_admin_test.sh` script to test the of the interac ```bash cd ${GITHUB_WORKSPACE}/flex/tests/hqps # Change the default_graph field to -bash hqps_admin_test.sh ${TMP_INTERACTIVE_WORKSPACE} ./interactive_config_test.yaml ${GS_TEST_DIR} +bash hqps_admin_test.sh ${TMP_INTERACTIVE_WORKSPACE} ./interactive_config_standalone.yaml ${GS_TEST_DIR} ``` -The `interactive_config_test.yaml` specifies the configuration for interactive services. +The `interactive_config_standalone.yaml` specifies the configuration for interactive services. ```yaml directories: @@ -262,3 +262,38 @@ In Interactive's execution engine, transactions such as `ReadTransaction`, `Upda 2. If a transaction returns `false` during the `commit()` process, the error occurred prior to applying the WAL to the graph data. This type of failure could arise during the construction of the WAL or during its writing phase. 3. It is important to note that errors can still occur when replaying the WAL to the graph database. Replaying might fail due to limitations in resources or due to unforeseen bugs. **However,** any errors encountered during this stage will be handled via exceptions or may result in process failure. Currently, there is no established mechanism to handle such failures. Future improvements should focus on implementing failover strategies, potentially allowing the GraphDB to continue replaying the WAL until it succeeds. + + +## Enable Service Registry + +```bash +ETCD_VER=v3.4.13 + +# choose either URL +GOOGLE_URL=https://storage.googleapis.com/etcd +GITHUB_URL=https://github.com/etcd-io/etcd/releases/download +DOWNLOAD_URL=${GOOGLE_URL} + +rm -f /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz +rm -rf /tmp/etcd-download-test && mkdir -p /tmp/etcd-download-test + +curl -L ${DOWNLOAD_URL}/${ETCD_VER}/etcd-${ETCD_VER}-linux-amd64.tar.gz -o /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz +tar xzvf /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz -C /tmp/etcd-download-test --strip-components=1 +rm -f /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz + +cd /tmp/etcd-download-test +# You may find etcd and etcdctl in the directory +``` + +Start a local etcd server + +```bash +cd /tmp/etcd-download-test +etcd +``` + +Now in another terminal test etcd metadata store. +```bash +cd GraphScope/flex/build +./tests/hqps/etcd_meta_test http://localhost:2379 +``` \ No newline at end of file diff --git a/flex/CMakeLists.txt b/flex/CMakeLists.txt index f05e2190205e..c6f2322d055e 100644 --- a/flex/CMakeLists.txt +++ b/flex/CMakeLists.txt @@ -17,7 +17,10 @@ option(USE_PTHASH "Whether to use pthash" OFF) option(OPTIMIZE_FOR_HOST "Whether to optimize on host" ON) # Whether to build optimized code on host option(USE_STATIC_ARROW "Whether to use static arrow" OFF) # Whether to link arrow statically, default is OFF option(BUILD_WITH_OTEL "Whether to build with opentelemetry-cpp" OFF) # Whether to build with opentelemetry-cpp, default is OFF + +option(ENABLE_SERVICE_REGISTER "Whether to enable service register" ON) # Whether to enable service register, default is OFF option(BUILD_WITH_OSS "Whether to build with oss support" OFF) # Whether to build with oss support, default is OFF +option(BUILD_FOR_MASTER "Only to build bulkloader" OFF) #print options message(STATUS "Build test: ${BUILD_TEST}") @@ -31,6 +34,7 @@ message(STATUS "Use pthash indexer : ${USE_PTHASH}") include(CheckLibraryExists) include(GNUInstallDirs) +include(CheckCXXCompilerFlag) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) @@ -54,8 +58,8 @@ endif() if (BUILD_WITH_OSS) add_definitions(-DBUILD_WITH_OSS) - include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/aliyun-oss-cpp-sdk/sdk/include) - set(TARGET_OUTPUT_NAME_PREFIX "alibabacloud-oss-" CACHE STRING "The target's output name prefix") + set(OSS_CPP_SDK_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third_party/aliyun-oss-cpp-sdk/sdk/include) + include_directories(${OSS_CPP_SDK_INCLUDE_DIR}) add_subdirectory(third_party/aliyun-oss-cpp-sdk) endif() @@ -94,6 +98,11 @@ endif() set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") +# if compiler support -Wno-format-truncation, add it +CHECK_CXX_COMPILER_FLAG("-Wno-format-truncation" COMPILER_SUPPORTS_WNO_FORMAT_TRUNCATION) +if(COMPILER_SUPPORTS_WNO_FORMAT_TRUNCATION) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format-truncation") +endif() find_package(MPI REQUIRED) include_directories(SYSTEM ${MPI_CXX_INCLUDE_PATH}) @@ -137,7 +146,9 @@ add_definitions("-DBOOST_BIND_GLOBAL_PLACEHOLDERS") include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) #find hiactor---------------------------------------------------------------------- -find_package(Hiactor) +if (NOT BUILD_FOR_MASTER) + find_package(Hiactor) +endif() if (NOT Hiactor_FOUND) message(STATUS "hiactor not found, please install the hiactor library") else () @@ -234,15 +245,31 @@ macro(install_without_export_flex_target target) ) endmacro() +if (ENABLE_SERVICE_REGISTER) + include("cmake/BuildEtcdCpp.cmake") + include_directories(SYSTEM ${CPPREST_INCLUDE_DIR}) + message(STATUS "Include directory: ${CPPREST_INCLUDE_DIR}") + foreach(dir ${ETCD_CPP_INCLUDE_DIR}) + message(STATUS "Include directory: ${dir}") + include_directories(SYSTEM ${dir}) + endforeach() +endif() + +if (ENABLE_SERVICE_REGISTER) + add_definitions(-DENABLE_SERVICE_REGISTER) +endif() + add_subdirectory(utils) -add_subdirectory(codegen) add_subdirectory(storages) -add_subdirectory(engines) +add_subdirectory(codegen) +if (NOT BUILD_FOR_MASTER) + add_subdirectory(engines) +endif() add_subdirectory(bin) if (OPENTELEMETRY_CPP_FOUND) add_subdirectory(otel) endif() -if (BUILD_TEST) +if ((NOT BUILD_FOR_MASTER) AND BUILD_TEST) add_subdirectory(tests) endif() diff --git a/flex/bin/CMakeLists.txt b/flex/bin/CMakeLists.txt index fd08037668f0..c16ea6e07491 100644 --- a/flex/bin/CMakeLists.txt +++ b/flex/bin/CMakeLists.txt @@ -1,41 +1,52 @@ -if(Hiactor_FOUND) - add_executable(rt_server rt_server.cc) - target_link_libraries(rt_server flex_server) - install_without_export_flex_target(rt_server) -endif() +if (NOT BUILD_FOR_MASTER) + if(Hiactor_FOUND) + add_executable(rt_server rt_server.cc) + target_link_libraries(rt_server flex_server) + install_without_export_flex_target(rt_server) + endif() -if(Hiactor_FOUND) - include_directories(../engines/http_server) - add_executable(rt_bench rt_bench.cc) - target_link_libraries(rt_bench flex_server) - install_without_export_flex_target(rt_bench) -endif() + if(Hiactor_FOUND) + include_directories(../engines/http_server) + add_executable(rt_bench rt_bench.cc) + target_link_libraries(rt_bench flex_server) + install_without_export_flex_target(rt_bench) + endif() -add_executable(rt_admin rt_admin.cc) -target_link_libraries(rt_admin flex_utils) -install_without_export_flex_target(rt_admin) + add_executable(rt_admin rt_admin.cc) + target_link_libraries(rt_admin flex_utils) + install_without_export_flex_target(rt_admin) -add_executable(adhoc_runner adhoc_runner.cc) -target_link_libraries(adhoc_runner flex_graph_db) -install_without_export_flex_target(adhoc_runner) + add_executable(adhoc_runner adhoc_runner.cc) + target_link_libraries(adhoc_runner flex_graph_db) + install_without_export_flex_target(adhoc_runner) -add_executable(cypher_client cypher_client.cc) -target_link_libraries(cypher_client flex_utils) -install_without_export_flex_target(cypher_client) + add_executable(cypher_client cypher_client.cc) + target_link_libraries(cypher_client flex_utils) + install_without_export_flex_target(cypher_client) + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../engines/http_server) -add_executable(flex_analytical_engine flex_analytical_engine.cc) -target_link_libraries(flex_analytical_engine flex_immutable_graph flex_bsp ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES}) -install_without_export_flex_target(flex_analytical_engine) + add_executable(flex_analytical_engine flex_analytical_engine.cc) + target_link_libraries(flex_analytical_engine flex_immutable_graph flex_bsp ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES}) + install_without_export_flex_target(flex_analytical_engine) -if(Hiactor_FOUND) - add_executable(interactive_server interactive_server.cc) - target_link_libraries(interactive_server flex_server ${GFLAGS_LIBRARIES}) - if (OPENTELEMETRY_CPP_FOUND) - target_link_libraries(interactive_server otel) + if(Hiactor_FOUND) + add_executable(interactive_server interactive_server.cc) + target_link_libraries(interactive_server flex_server ${GFLAGS_LIBRARIES}) + if (OPENTELEMETRY_CPP_FOUND) + target_link_libraries(interactive_server otel) + endif() + if (BUILD_WITH_OSS) + target_link_libraries(interactive_server cpp-sdk flex_utils) + endif() + install_without_export_flex_target(interactive_server) endif() - install_without_export_flex_target(interactive_server) + + + add_executable(stored_procedure_runner stored_procedure_runner.cc) + target_link_libraries(stored_procedure_runner flex_graph_db ${GFLAGS_LIBRARIES}) + install_without_export_flex_target(stored_procedure_runner) endif() # install the script install(PROGRAMS load_plan_and_gen.sh DESTINATION bin) @@ -48,7 +59,3 @@ if (BUILD_WITH_OSS) target_link_libraries(bulk_loader cpp-sdk) endif() install_without_export_flex_target(bulk_loader) - -add_executable(stored_procedure_runner stored_procedure_runner.cc) -target_link_libraries(stored_procedure_runner flex_graph_db ${GFLAGS_LIBRARIES}) -install_without_export_flex_target(stored_procedure_runner) \ No newline at end of file diff --git a/flex/bin/bulk_loader.cc b/flex/bin/bulk_loader.cc index cf1f22d27afe..5b3dc1f1dab0 100644 --- a/flex/bin/bulk_loader.cc +++ b/flex/bin/bulk_loader.cc @@ -52,14 +52,16 @@ void signal_handler(int signal) { #ifdef BUILD_WITH_OSS void check_oss_object_not_exist(std::string& data_path, - std::string& object_path, + std::string& indices_object_path, + std::string& statistic_object_path, gs::OSSConf& oss_conf) { auto pos = data_path.find("/", 6); if (pos == std::string::npos) { LOG(FATAL) << "Invalid data path: " << data_path; } oss_conf.bucket_name_ = data_path.substr(6, pos - 6); - object_path = data_path.substr(pos + 1); + indices_object_path = data_path.substr(pos + 1); + statistic_object_path = indices_object_path + "_statistics.json"; oss_conf.load_conf_from_env(); // check whether the object exists auto oss_reader = std::make_shared(oss_conf); @@ -67,9 +69,9 @@ void check_oss_object_not_exist(std::string& data_path, LOG(FATAL) << "Failed to open oss reader"; } std::vector path_list; - auto status = oss_reader->List(object_path, path_list); + auto status = oss_reader->List(indices_object_path, path_list); if (status.ok() && path_list.size() > 0) { - LOG(FATAL) << "Object already exists: " << object_path + LOG(FATAL) << "Object already exists: " << indices_object_path << ", list size: " << path_list.size() << ", please remove the object and try again."; } @@ -78,7 +80,8 @@ void check_oss_object_not_exist(std::string& data_path, } int32_t upload_data_dir_to_oss(const std::filesystem::path& data_dir_path, - const std::string& object_path, + const std::string& indices_object_path, + const std::string& statistic_object_path, const gs::OSSConf& oss_conf) { // zip the data directory std::string zip_file = data_dir_path.string() + ".zip"; @@ -93,22 +96,38 @@ int32_t upload_data_dir_to_oss(const std::filesystem::path& data_dir_path, return -1; } - auto oss_writer = std::make_shared(oss_conf); - if (!oss_writer || !oss_writer->Open().ok()) { - LOG(ERROR) << "Failed to open oss writer"; - return -1; - } - auto status = oss_writer->Put(zip_file, object_path, false); - if (!status.ok()) { - LOG(ERROR) << "Failed to upload data to oss: " << status.ToString(); - return -1; - } - status = oss_writer->Close(); - if (!status.ok()) { - LOG(ERROR) << "Failed to close oss writer: " << status.ToString(); - return -1; + { + auto oss_writer = std::make_shared(oss_conf); + if (!oss_writer || !oss_writer->Open().ok()) { + LOG(ERROR) << "Failed to open oss writer"; + return -1; + } + // upload the zip file to oss + auto status = oss_writer->Put(zip_file, indices_object_path, false); + if (!status.ok()) { + LOG(ERROR) << "Failed to upload data to oss: " << status.ToString(); + return -1; + } + // upload the statistic file to oss + auto statistics_file = data_dir_path.string() + "/statistics.json"; + if (std::filesystem::exists(statistics_file)) { + status = oss_writer->Put(statistics_file, statistic_object_path, false); + } else { + LOG(ERROR) << "Statistic file not found: " << statistics_file; + } + if (!status.ok()) { + LOG(ERROR) << "Failed to upload statistics json to oss: " + << status.ToString(); + return -1; + } + status = oss_writer->Close(); + if (!status.ok()) { + LOG(ERROR) << "Failed to close oss writer: " << status.ToString(); + return -1; + } } - LOG(INFO) << "Successfully uploaded data to oss: " << object_path + + LOG(INFO) << "Successfully uploaded data to oss: " << indices_object_path << ", it is in zip format"; std::filesystem::remove(zip_file); std::filesystem::remove_all(data_dir_path); @@ -166,11 +185,14 @@ int main(int argc, char** argv) { * If the data path is an oss path, the data will be uploaded to oss after * loading to a temporary directory. To improve the performance of the * performance, bulk_loader will zip the data directory before uploading. - * The data path should be in the format of oss://bucket_name/object_path + * The data path should be in the format of + * oss://bucket_name/indices_object_path, and the statistics file will be + * uploaded to indices_object_path + "_statistics.json" */ #ifdef BUILD_WITH_OSS bool upload_to_oss = false; - std::string object_path = ""; + std::string indices_object_path = ""; + std::string statistics_object_path = ""; auto oss_conf = gs::OSSConf(); #endif std::string bulk_load_config_path = ""; @@ -227,7 +249,8 @@ int main(int argc, char** argv) { if (data_path.find("oss://") == 0) { #ifdef BUILD_WITH_OSS upload_to_oss = true; - check_oss_object_not_exist(data_path, object_path, oss_conf); + check_oss_object_not_exist(data_path, indices_object_path, + statistics_object_path, oss_conf); #else LOG(ERROR) << "OSS is not supported in this build"; return -1; @@ -279,8 +302,13 @@ int main(int argc, char** argv) { LOG(INFO) << "Finished bulk loading in " << t << " seconds."; #ifdef BUILD_WITH_OSS + // If build_with_oss, we open the data_dir, and generate the statistic file + gs::MutablePropertyFragment frag; + frag.Open(data_dir_path.string(), 1); + frag.generateStatistics(data_dir_path.string()); if (upload_to_oss) { - return upload_data_dir_to_oss(data_dir_path, object_path, oss_conf); + return upload_data_dir_to_oss(data_dir_path, indices_object_path, + statistics_object_path, oss_conf); } #endif diff --git a/flex/bin/load_plan_and_gen.sh b/flex/bin/load_plan_and_gen.sh index 5670c83ca079..2cf4a5a62d73 100755 --- a/flex/bin/load_plan_and_gen.sh +++ b/flex/bin/load_plan_and_gen.sh @@ -249,6 +249,8 @@ compile_hqps_so() { query_name="${last_file_name%.pb}" elif [[ $last_file_name == *.cc ]]; then query_name="${last_file_name%.cc}" + elif [[ $last_file_name == *.cpp ]]; then + query_name="${last_file_name%.cpp}" elif [[ $last_file_name == *.cypher ]]; then query_name="${last_file_name%.cypher}" else diff --git a/flex/cmake/BuildEtcdCpp.cmake b/flex/cmake/BuildEtcdCpp.cmake new file mode 100644 index 000000000000..c22de02a4d9d --- /dev/null +++ b/flex/cmake/BuildEtcdCpp.cmake @@ -0,0 +1,35 @@ +# Copyright 2020-2023 Alibaba Group Holding Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This File is copied from https://github.com/v6d-io/v6d/blob/main/cmake/BuildEtcdCpp.cmake +# build cpprestsdk +set(WERROR OFF CACHE BOOL "Treat warnings as errors") +set(BUILD_TESTS OFF CACHE BOOL "Build tests.") +set(BUILD_SAMPLES OFF CACHE BOOL "Build sample applications.") +set(CPPREST_EXCLUDE_WEBSOCKETS ON CACHE BOOL "Exclude websockets functionality..") +add_subdirectory(third_party/cpprestsdk) +set(CPPREST_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/third_party/cpprestsdk/Release/include) +set(CPPREST_LIB cpprest) + +# disable a warning message inside cpprestsdk on Mac with llvm/clang +if(W_NO_UNUSED_BUT_SET_PARAMETER) + target_compile_options(cpprest PRIVATE -Wno-unused-but-set-parameter) +endif() + +# build etcd-cpp-apiv3 +add_subdirectory(third_party/etcd-cpp-apiv3) +set(ETCD_CPP_LIBRARIES etcd-cpp-api) +set(ETCD_CPP_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/third_party/etcd-cpp-apiv3/ + ${PROJECT_BINARY_DIR}/third_party/etcd-cpp-apiv3/proto/gen + ${PROJECT_BINARY_DIR}/third_party/etcd-cpp-apiv3/proto/gen/proto) diff --git a/flex/engines/http_server/CMakeLists.txt b/flex/engines/http_server/CMakeLists.txt index c1f0913de10f..eb53b97e0344 100644 --- a/flex/engines/http_server/CMakeLists.txt +++ b/flex/engines/http_server/CMakeLists.txt @@ -1,10 +1,9 @@ -find_package (Hiactor) if (Hiactor_FOUND) include (${Hiactor_CODEGEN_CMAKE_FILE}) hiactor_codegen (server_actor_autogen server_actor_autogen_files SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/ - INCLUDE_PATHS ${Hiactor_INCLUDE_DIR},${CMAKE_CURRENT_SOURCE_DIR}/../../../,${CMAKE_CURRENT_BINARY_DIR}/../../utils/) + INCLUDE_PATHS ${Hiactor_INCLUDE_DIR},${CMAKE_CURRENT_SOURCE_DIR}/../../../,${CMAKE_CURRENT_BINARY_DIR}/../../utils/,${CPPREST_INCLUDE_DIR},${ETCD_CPP_INCLUDE_DIR},${OSS_CPP_SDK_INCLUDE_DIR}) # get all .cc files in current directory, except for generated/ @@ -14,6 +13,12 @@ if (Hiactor_FOUND) add_library(flex_server STATIC ${SERVER_FILES} ${server_actor_autogen_files}) add_dependencies(flex_server flex_utils) # Make sure flex_utils is built before flex_server add_dependencies(flex_server server_actor_autogen) + if (BUILD_WITH_OSS) + add_dependencies(server_actor_autogen cpp-sdk) + endif() + if (ENABLE_SERVICE_REGISTER) + add_dependencies(server_actor_autogen ${ETCD_CPP_LIBRARIES} ${CPPREST_LIB}) + endif() target_compile_options (flex_server PUBLIC -Wno-attributes) @@ -38,5 +43,8 @@ if (Hiactor_FOUND) target_link_libraries(flex_server otel) endif() target_link_libraries(flex_server flex_metadata_store) + if (ENABLE_SERVICE_REGISTER) + target_link_libraries(flex_server ${ETCD_CPP_LIBRARIES}) + endif() install_without_export_flex_target(flex_server) endif () diff --git a/flex/engines/http_server/actor/admin_actor.act.cc b/flex/engines/http_server/actor/admin_actor.act.cc index 30f5bbd25029..ead684927035 100644 --- a/flex/engines/http_server/actor/admin_actor.act.cc +++ b/flex/engines/http_server/actor/admin_actor.act.cc @@ -378,7 +378,7 @@ seastar::future admin_actor::run_create_graph( // query_param is the graph name seastar::future admin_actor::run_get_graph_schema( query_param&& query_param) { - LOG(INFO) << "Get Graph schema for graph_id: " << query_param.content; + VLOG(10) << "Get Graph schema for graph_id: " << query_param.content; auto schema_res = metadata_store_->GetGraphMeta(query_param.content); if (schema_res.ok()) { @@ -395,15 +395,13 @@ seastar::future admin_actor::run_get_graph_schema( // Get the metadata of a graph. seastar::future admin_actor::run_get_graph_meta( query_param&& query_param) { - LOG(INFO) << "Get Graph meta for graph_id: " << query_param.content; + VLOG(10) << "Get Graph meta for graph_id: " << query_param.content; auto meta_res = metadata_store_->GetGraphMeta(query_param.content); if (meta_res.ok()) { auto get_all_procedure_res = metadata_store_->GetAllPluginMeta(query_param.content); if (get_all_procedure_res.ok()) { - VLOG(10) << "Successfully get all procedures: " - << get_all_procedure_res.value().size(); auto& all_plugin_metas = get_all_procedure_res.value(); for (auto& plugin_meta : all_plugin_metas) { add_runnable_info(plugin_meta); @@ -1148,6 +1146,7 @@ seastar::future admin_actor::service_status( res.AddMember("status", graph_db_service.is_actors_running() ? "Running" : "Stopped", res.GetAllocator()); + res.AddMember("deploy_mode", "standalone", res.GetAllocator()); res.AddMember("hqps_port", query_port, res.GetAllocator()); res.AddMember("bolt_port", graph_db_service.get_service_config().bolt_port, res.GetAllocator()); diff --git a/flex/engines/http_server/graph_db_service.cc b/flex/engines/http_server/graph_db_service.cc index eb27be41b1e4..d3a49dddf71f 100644 --- a/flex/engines/http_server/graph_db_service.cc +++ b/flex/engines/http_server/graph_db_service.cc @@ -127,6 +127,16 @@ void GraphDBService::init(const ServiceConfig& config) { config.admin_port, config.get_exclusive_shard_id(), config.admin_svc_max_content_length); } +#ifdef ENABLE_SERVICE_REGISTER + LOG(INFO) << "Service registry endpoint: " + << config.service_registry_endpoint; + if (!config.service_registry_endpoint.empty()) { + service_register_ = std::make_unique( + config.service_registry_endpoint, config.namespace_, + config.master_instance_name, [this]() { return get_service_info(); }, + config.service_registry_ttl); + } +#endif initialized_.store(true); service_config_ = config; @@ -207,6 +217,11 @@ GraphDBService::~GraphDBService() { if (metadata_store_) { metadata_store_->Close(); } +#ifdef ENABLE_SERVICE_REGISTER + if (service_register_) { + service_register_->Stop(); + } +#endif } const ServiceConfig& GraphDBService::get_service_config() const { @@ -264,6 +279,14 @@ void GraphDBService::run_and_wait_for_exit() { if (admin_hdl_) { admin_hdl_->start(); } +#ifdef ENABLE_SERVICE_REGISTER + if (service_register_) { + LOG(INFO) << "Start service register thread"; + service_register_->Start(); + } else { + LOG(INFO) << "Service register is not started!"; + } +#endif if (service_config_.start_compiler) { if (!start_compiler_subprocess()) { LOG(FATAL) << "Failed to start compiler subprocess! exiting..."; @@ -278,6 +301,11 @@ void GraphDBService::run_and_wait_for_exit() { if (admin_hdl_) { admin_hdl_->stop(); } +#ifdef ENABLE_SERVICE_REGISTER + if (service_register_) { + service_register_->Stop(); + } +#endif actor_sys_->terminate(); } @@ -333,6 +361,45 @@ bool GraphDBService::check_compiler_ready() const { return true; } +#ifdef ENABLE_SERVICE_REGISTER +std::pair GraphDBService::get_service_info() { + auto ip = gs::get_local_ip(); + AllServiceRegisterPayload payload; + if (!is_running()) { + LOG(INFO) << "Service is not running, skip service register."; + return std::make_pair(false, payload); + } + + if (metadata_store_) { + auto cur_running_graph = metadata_store_->GetRunningGraph(); + if (!cur_running_graph.ok()) { + LOG(ERROR) << "Failed to get running graph: " + << cur_running_graph.status().error_message(); + return std::make_pair(false, payload); + } + payload.graph_id = cur_running_graph.value(); + } else { + // Try to get from current graph_db + auto& db = gs::GraphDB::get(); + LOG(INFO) << "Get service info from current graph db: " + << db.schema().GetGraphId(); + payload.graph_id = db.schema().GetGraphId(); + } + + auto procedure_endpoint = + ip + ":" + std::to_string(service_config_.query_port); + auto cypher_endpoint = ip + ":" + std::to_string(service_config_.bolt_port); + ServiceMetrics procedure_metrics("0"); // TODO: get snapshot id + payload.services.emplace(std::make_pair( + "procedure", + ServiceRegisterPayload(procedure_endpoint, procedure_metrics))); + payload.services.emplace(std::make_pair( + "cypher", ServiceRegisterPayload(cypher_endpoint, procedure_metrics))); + + return std::make_pair(true, payload); +} +#endif + bool GraphDBService::start_compiler_subprocess( const std::string& graph_schema_path) { if (!service_config_.start_compiler) { diff --git a/flex/engines/http_server/graph_db_service.h b/flex/engines/http_server/graph_db_service.h index 0689cf1003ba..f25817e4d181 100644 --- a/flex/engines/http_server/graph_db_service.h +++ b/flex/engines/http_server/graph_db_service.h @@ -23,6 +23,9 @@ #include "flex/engines/http_server/actor_system.h" #include "flex/engines/http_server/handler/admin_http_handler.h" #include "flex/engines/http_server/handler/graph_db_http_handler.h" +#ifdef ENABLE_SERVICE_REGISTER +#include "flex/engines/http_server/service_register.h" +#endif #include "flex/engines/http_server/workdir_manipulator.h" #include "flex/storages/metadata/graph_meta_store.h" #include "flex/storages/metadata/metadata_store_factory.h" @@ -51,9 +54,14 @@ struct ServiceConfig { 1024 * 1024 * 1024; // 1GB static constexpr const char* DEFAULT_WAL_URI = "{GRAPH_DATA_DIR}/wal"; // By default we will use the wal directory in - // the graph data directory. The {GRAPH_DATA_DIR} - // is a placeholder, which will be replaced by - // the actual graph data directory. + // the graph data directory. The {GRAPH_DATA_DIR} + // is a placeholder, which will be replaced by + // the actual graph data directory. + static constexpr const char* DEFAULT_METADATA_STORE_URI = + "{WORKSPACE}/METADATA"; // By default we will use the local file system + // as + + std::string instance_name, namespace_; // Those has default value uint32_t bolt_port; @@ -90,6 +98,10 @@ struct ServiceConfig { std::string engine_config_path; // used for codegen. size_t admin_svc_max_content_length; // max content length for admin service. std::string wal_uri; // The uri of the wal storage. + std::string master_instance_name; // The name of the master instance. + std::string + service_registry_endpoint; // The address of the service registry. + int32_t service_registry_ttl; // The ttl of the service registry. ServiceConfig(); @@ -175,6 +187,10 @@ class GraphDBService { bool check_compiler_ready() const; +#ifdef ENABLE_SERVICE_REGISTER + std::pair get_service_info(); +#endif + private: GraphDBService() = default; @@ -197,6 +213,10 @@ class GraphDBService { boost::process::child compiler_process_; // handler for metadata store std::shared_ptr metadata_store_; +#ifdef ENABLE_SERVICE_REGISTER + // A thread periodically wakeup and register the service itself to master. + std::unique_ptr service_register_; +#endif }; } // namespace server @@ -360,6 +380,38 @@ struct convert { } else { LOG(WARNING) << "Fail to find default_graph configuration"; } + + // parse service registry + + if (config["master"]) { + auto master_node = config["master"]; + if (master_node["instance_name"]) { + service_config.master_instance_name = + master_node["instance_name"].as(); + } + if (master_node["service_registry"]) { + if (master_node["service_registry"]["endpoint"]) { + service_config.service_registry_endpoint = + master_node["service_registry"]["endpoint"].as(); + VLOG(10) << "service_registry_endpoint: " + << service_config.service_registry_endpoint; + } + if (master_node["service_registry"]["ttl"]) { + service_config.service_registry_ttl = + master_node["service_registry"]["ttl"].as(); + VLOG(10) << "service_registry_ttl: " + << service_config.service_registry_ttl; + } + } + if (master_node["k8s_launcher_config"]) { + auto k8s_config_node = master_node["k8s_launcher_config"]; + if (k8s_config_node["namespace"]) { + service_config.namespace_ = + k8s_config_node["namespace"].as(); + } + } + } + return true; } }; diff --git a/flex/engines/http_server/handler/graph_db_http_handler.cc b/flex/engines/http_server/handler/graph_db_http_handler.cc index 1e4be79b2dbf..0e7debf36395 100644 --- a/flex/engines/http_server/handler/graph_db_http_handler.cc +++ b/flex/engines/http_server/handler/graph_db_http_handler.cc @@ -1118,4 +1118,4 @@ seastar::future<> graph_db_http_handler::set_routes() { }); } -} // namespace server +} // namespace server \ No newline at end of file diff --git a/flex/engines/http_server/service_register.cc b/flex/engines/http_server/service_register.cc new file mode 100644 index 000000000000..e1357f43e8d0 --- /dev/null +++ b/flex/engines/http_server/service_register.cc @@ -0,0 +1,254 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef ENABLE_SERVICE_REGISTER + +#include "flex/engines/http_server/service_register.h" +#include "flex/utils/service_utils.h" + +namespace std { +std::string to_string(const etcd::Event::EventType event_type) { + switch (event_type) { + case etcd::Event::EventType::PUT: + return "PUT"; + case etcd::Event::EventType::DELETE_: + return "DELETE"; + case etcd::Event::EventType::INVALID: + return "INVALID"; + default: + return "INVALID"; + } +} + +} // namespace std + +namespace server { + +void ServiceRegister::Start() { + if (service_register_thread_) { + LOG(ERROR) << "ServiceRegister is already started"; + return; + } + // Expect the path is like http://ip:port + VLOG(10) << "ETCD base URI: " << etcd_endpoint_; + if (etcd_endpoint_.empty()) { + LOG(FATAL) << "Invalid etcd endpoint: " << etcd_endpoint_; + } + client_ = std::make_unique(etcd_endpoint_); + running_.store(2); + + init_lease(); + init_register_thread(); + init_election_thread(); +} + +void ServiceRegister::Stop() { + if (running_.load() == 0) { + return; + } + running_.store(0); + // use condition variable to wake up the thread + if (service_register_thread_) { + cv_.notify_all(); + service_register_thread_->join(); + service_register_thread_.reset(); + } + if (watcher_) { + watcher_->Cancel(); + watcher_.reset(); + } + if (election_thread_) { + election_thread_->join(); + election_thread_.reset(); + } + if (keep_alive_) { + keep_alive_->Cancel(); + keep_alive_.reset(); + } + if (client_) { + client_.reset(); + } + LOG(INFO) << "ServiceRegister stopped"; +} + +void ServiceRegister::init_lease() { + auto _resp = client_->leasegrant(ttl_seconds_); + if (!_resp.is_ok()) { + LOG(ERROR) << "Failed to grant lease: " << _resp.error_message(); + return; + } + lease_id_ = _resp.value().lease(); + handler_ = [](std::exception_ptr eptr) { + try { + if (eptr) { + std::rethrow_exception(eptr); + } + } catch (const std::runtime_error& e) { + LOG(ERROR) << "Keep alive error: " << e.what(); + } catch (const std::out_of_range& e) { + LOG(ERROR) << "Lease expiry \"" << e.what(); + } + }; + keep_alive_ = std::make_unique(client_.get(), handler_, + interval_seconds_, lease_id_); + LOG(INFO) << "ServiceRegister started, lease id: " << lease_id_; +} + +void ServiceRegister::init_register_thread() { + service_register_thread_ = std::make_unique([this]() { + while (running_.load(std::memory_order_relaxed)) { + { + std::unique_lock lock(mutex_); + // TODO: consider cancel keepAlive when service is stopped + cv_.wait_for(lock, std::chrono::seconds(interval_seconds_)); + auto service_info = get_service_info_(); + if (!service_info.first) { + continue; + } + LOG(INFO) << "Start to register service: " + << service_info.second.to_string(); + for (auto& [service_name, service_payload] : + service_info.second.services) { + auto instance_key = get_service_instance_list_key( + service_name, service_payload.endpoint, + service_info.second.graph_id); + auto service_payload_string = service_payload.to_string(); + // For instance key-value, insert or update + if (!insert_to_instance_list(instance_key, service_payload_string) + .ok()) { + LOG(ERROR) << "Failed to insert to instance list: " << instance_key; + } + } + } + } + }); +} + +void ServiceRegister::init_election_thread() { + election_thread_ = std::make_unique([this]() { + std::pair service_info; + while (true) { + service_info = get_service_info_(); + if (service_info.first) { + break; + } + LOG(INFO) << "In initial election thread, service info is not ready"; + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + if (add_primary_until_success()) { + is_primary_.store(true); + } + LOG(INFO) << "Start to watch primary key: " + << get_service_primary_key(service_info.second.graph_id); + + watcher_ = std::make_unique( + client_.get(), get_service_primary_key(service_info.second.graph_id), + [&, graph_id = service_info.second.graph_id](etcd::Response resp) { + if (!resp.is_ok()) { + LOG(ERROR) << "Failed to watch primary key: " + << resp.error_message(); + std::this_thread::sleep_for(std::chrono::seconds(1)); + } else { + if (resp.action() == "delete") { + LOG(INFO) << "Got delete events size: " << resp.events().size(); + for (auto& event : resp.events()) { + process_delete_events(event, graph_id); + } + } else { + LOG(INFO) << "Got action: " << resp.action() << ", just skip"; + } + } + }, + false); + }); +} + +void ServiceRegister::process_delete_events(const etcd::Event& event, + const std::string graph_id) { + if (event.event_type() != etcd::Event::EventType::DELETE_) { + LOG(ERROR) << "Expect delete event, bot got: " + << std::to_string(event.event_type()); + return; + } + auto primary_key = get_service_primary_key(graph_id); + if (!event.has_kv()) { + LOG(ERROR) << "Event has no kv: "; + return; + } + auto kv = event.kv(); + if (kv.key() == primary_key) { + // If somehow myself is primary, then try to add primary key again + if (is_primary_.load()) { + is_primary_.store(false); + } + if (add_primary_until_success()) { + LOG(INFO) << "Successfully add primary key after delete event: " + << kv.key(); + is_primary_.store(true); + } else { + LOG(INFO) << "Failed to add primary key after delete event: " << kv.key() + << ", maybe other node is primary"; + } + } else { + LOG(INFO) << "Unknown delete event, key: " << kv.key() + << ", primary key: " << primary_key; + } +} + +bool ServiceRegister::add_primary_until_success() { + auto ip = gs::get_local_ip(); + auto service_info = get_service_info_(); + if (!service_info.first) { + LOG(INFO) << "Service info is not ready, skip add primary"; + return false; + } + auto primary_key = get_service_primary_key(service_info.second.graph_id); + auto retry = MAX_RETRY; + LOG(INFO) << "Try to add primary key for service: " << primary_key; + while (retry > 0) { + auto get_resp = client_->get(primary_key); + if (get_resp.is_ok() && !get_resp.value().as_string().empty()) { + LOG(INFO) << "Primary key already exists: " << primary_key; + return false; + } + LOG(INFO) << "Try lock: " << primary_key; + auto lock = client_->lock_with_lease(primary_key, lease_id_); + if (lock.is_ok()) { + auto add_resp = client_->add(primary_key, ip, lease_id_); + if (add_resp.is_ok()) { + LOG(INFO) << "Add primary key success: " << primary_key; + return true; + } + LOG(ERROR) << "Failed to add primary key: " << primary_key; + } else { + LOG(ERROR) << "Failed to lock primary key: " << primary_key; + } + std::this_thread::sleep_for(std::chrono::seconds(1)); + retry--; + } + LOG(ERROR) << "Max retry reached, failed to add primary key: " << primary_key; + return false; +} + +gs::Status ServiceRegister::insert_to_instance_list(const std::string& key, + const std::string& value) { + LOG(INFO) << "Insert to instance list: " << key << ", value: " << value; + INSERT_OR_UPDATE_ETCD_KEY_VALUE(client_, key, value, lease_id_, MAX_RETRY); + return gs::Status::OK(); +} + +} // namespace server + +#endif diff --git a/flex/engines/http_server/service_register.h b/flex/engines/http_server/service_register.h new file mode 100644 index 000000000000..eaa122039a17 --- /dev/null +++ b/flex/engines/http_server/service_register.h @@ -0,0 +1,226 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifdef ENABLE_SERVICE_REGISTER + +#ifndef FLEX_ENGINES_HTTP_SERVICE_SERVICE_REGISTER_H_ +#define FLEX_ENGINES_HTTP_SERVICE_SERVICE_REGISTER_H_ + +#include +#include +#include +#include +#include + +#include "flex/engines/http_server/types.h" +#include "flex/third_party/etcd-cpp-apiv3/etcd/Client.hpp" +#include "flex/third_party/etcd-cpp-apiv3/etcd/KeepAlive.hpp" +#include "flex/third_party/etcd-cpp-apiv3/etcd/Watcher.hpp" +#include "flex/third_party/etcd-cpp-apiv3/etcd/v3/Transaction.hpp" +#include "flex/utils/result.h" + +#include +#include +#include +#include +#include +#include +#include "flex/third_party/etcd-cpp-apiv3/etcd/v3/V3Response.hpp" + +namespace server { + +struct ServiceMetrics { + std::string snapshot_id; + ServiceMetrics() = default; + ServiceMetrics(const std::string& snapshot_id) : snapshot_id(snapshot_id) {} + + inline std::string to_string() const { return "\"snapshot_id\": \"" + snapshot_id + "\""; } +}; + +struct ServiceRegisterPayload { + std::string endpoint; // ip:port + ServiceMetrics metrics; // service metrics + + ServiceRegisterPayload() = default; + ServiceRegisterPayload(const std::string& endpoint, const ServiceMetrics& metrics) + : endpoint(endpoint), metrics(metrics) {} + + std::string to_string() const { + rapidjson::Document json(rapidjson::kObjectType); + json.AddMember("endpoint", rapidjson::Value(endpoint.c_str(), json.GetAllocator()).Move(), + json.GetAllocator()); + json.AddMember("metrics", + rapidjson::Value(metrics.to_string().c_str(), json.GetAllocator()).Move(), + json.GetAllocator()); + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + json.Accept(writer); + return buffer.GetString(); + } +}; + +struct AllServiceRegisterPayload { + std::unordered_map + services; // service name to service payload + std::string graph_id; + + std::string to_string() const { + std::string res = "{"; + for (const auto& [name, payload] : services) { + res += "\"" + name + "\": " + payload.to_string() + ", "; + } + if (!services.empty()) { + res.pop_back(); + } + res += "}"; + return res; + } +}; + +#define INSERT_OR_UPDATE_ETCD_KEY_VALUE(client, key, value, lease_id, retry) \ + { \ + int _retry = retry; \ + while (_retry-- > 0) { \ + auto _resp = client->put(key, value, lease_id); \ + if (_resp.is_ok()) { \ + return gs::Status::OK(); \ + } else { \ + continue; \ + } \ + } \ + LOG(ERROR) << "Failed to insert or update key: " << key; \ + return gs::Status(gs::StatusCode::INTERNAL_ERROR, "Failed to insert or update key: " + key); \ + } + +#define INSERT_IF_ETCD_KEY_VALUE(client, key, value, lease_id, retry) \ + { \ + int _retry = retry; \ + while (_retry-- > 0) { \ + auto _resp = client->add(key, value, lease_id); \ + if (_resp.is_ok()) { \ + return gs::Status::OK(); \ + } else { \ + continue; \ + } \ + } \ + LOG(ERROR) << "Failed to insert key: " << key; \ + return gs::Status(gs::StatusCode::INTERNAL_ERROR, "Failed to insert key: " + key); \ + } + +/** + * A wapper of a thread that periodically register the service to master. + * . +├── graph_1 +│ ├── instance_list +│ │ ├── cypher +│ │ │ ├── 11.12.13.14_7687 +│ │ │ └── 22.12.13.14_7687 +│ | ├── gremlin +│ │ │ ├── 11.12.13.14_12314 +│ │ │ └── 22.12.13.14_12314 +│ | └── procedure +│ | ├── 11.12.13.14_10000 +│ | └── 22.12.13.14_10000 +│ │ +| └─── primary +└── metadata + ├── graph_meta + │ ├── graph_1 + │ └── graph_2 + ├── job_meta + │ └── job_1 + └── plugin_meta + └── plugin_1 + */ +class ServiceRegister { + public: + static constexpr const char* PRIMARY_SUFFIX = "primary"; + static constexpr const char* INSTANCE_LIST = "instance_list"; + static constexpr const char* SERVICE_NAME = "service"; + static constexpr const int32_t MAX_RETRY = 5; + ServiceRegister(const std::string& etcd_endpoint, const std::string& namespace_, + const std::string& instance_name, + std::function()> get_service_info, + int interval_seconds = 10) + : etcd_endpoint_(etcd_endpoint), + namespace_(namespace_), + instance_name_(instance_name), + interval_seconds_(interval_seconds), + ttl_seconds_(interval_seconds_ + 1), + get_service_info_(get_service_info), + lease_id_(0) {} + + ~ServiceRegister() { Stop(); } + + /** + * Start the service register thread. + */ + void Start(); + + void Stop(); + + private: + void init_register_thread(); + void init_election_thread(); + void init_lease(); + void process_delete_events(const etcd::Event& event, const std::string graph_id); + // Set primary etcd key-value pair until the primary is set by use or by other + // nodes. If it is set by us, then we are primary node, return true. Else + // return false. + bool add_primary_until_success(); + + // Should align with service_registry.py + inline std::string get_service_instance_list_key(const std::string& service_name, + const std::string& endpoint, + const std::string& graph_id) { + return "/" + namespace_ + "/" + instance_name_ + "/" + std::string(SERVICE_NAME) + "/" + + graph_id + "/" + INSTANCE_LIST + "/" + service_name + "/" + endpoint; + } + + inline std::string get_service_primary_key(const std::string& graph_id) { + return "/" + namespace_ + "/" + instance_name_ + "/" + std::string(SERVICE_NAME) + "/" + + graph_id + "/" + PRIMARY_SUFFIX; + } + + gs::Status insert_to_instance_list(const std::string& key, const std::string& value); + + std::string etcd_endpoint_; + std::string namespace_; + std::string instance_name_; + int interval_seconds_; + int ttl_seconds_; // considering the network latency, the ttl should be a bit + // larger than the interval_seconds + std::atomic running_{false}; + std::function()> get_service_info_; + std::function handler_; + + // A thread periodically wakeup and register the service itself to master. + std::unique_ptr service_register_thread_; + std::unique_ptr election_thread_; + std::unique_ptr client_; + + std::mutex mutex_; + std::condition_variable cv_; + int64_t lease_id_; + std::unique_ptr keep_alive_; + + std::atomic is_primary_{false}; + std::unique_ptr watcher_; +}; + +} // namespace server + +#endif // FLEX_ENGINES_HTTP_SERVICE_SERVICE_REGISTER_H_ + +#endif // ENABLE_SERVICE_REGISTER \ No newline at end of file diff --git a/flex/engines/http_server/types.h b/flex/engines/http_server/types.h index 74b0e8f8f09b..a4a23368fc3e 100644 --- a/flex/engines/http_server/types.h +++ b/flex/engines/http_server/types.h @@ -20,7 +20,7 @@ #include #include #include -#include "flex/utils/service_utils.h" +#include "flex/utils/result.h" #include diff --git a/flex/interactive/examples/modern_graph/graph.yaml b/flex/interactive/examples/modern_graph/graph.yaml index 6551adf93cc9..e3fe3e3154ac 100644 --- a/flex/interactive/examples/modern_graph/graph.yaml +++ b/flex/interactive/examples/modern_graph/graph.yaml @@ -1,7 +1,9 @@ +id: 1 name: modern_graph # then must have a modern dir under ${data} directory version: v0.1 -store_type: mutable_csr # v6d, groot, gart +store_type: mutable_csr # v6d, groot, gart description: A graph with 2 vertex types and 2 edge types +remote_path: oss://graphscope/interactive/63/1742371604887 schema: vertex_types: - type_id: 0 @@ -73,4 +75,4 @@ schema: - property_id: 0 property_name: weight property_type: - primitive_type: DT_DOUBLE \ No newline at end of file + primitive_type: DT_DOUBLE diff --git a/flex/interactive/sdk/master/README.md b/flex/interactive/sdk/master/README.md index c2634b126972..8e6c44ffef36 100644 --- a/flex/interactive/sdk/master/README.md +++ b/flex/interactive/sdk/master/README.md @@ -1,4 +1,10 @@ -# OpenAPI generated server +# Flex Interactive Master Server + +Master Server is controls the whole deployment of Interactive in k8s. It +- Watch the service registry, informing client of the routing info. +- Serve all admin request, forwarding them to desired pod. +- Launching pod to execute jobs like bulkloading, checkpointing. +- Responsible for launching new pods. ## Overview This server was generated by the [OpenAPI Generator](https://openapi-generator.tech) project. By using the diff --git a/flex/interactive/sdk/master/gs_interactive_admin/VERSION b/flex/interactive/sdk/master/gs_interactive_admin/VERSION new file mode 100644 index 000000000000..26bea73e8119 --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/VERSION @@ -0,0 +1 @@ +0.31.0 diff --git a/flex/interactive/sdk/master/gs_interactive_admin/__main__.py b/flex/interactive/sdk/master/gs_interactive_admin/__main__.py index 2363c048d7b1..6fba02a25d23 100644 --- a/flex/interactive/sdk/master/gs_interactive_admin/__main__.py +++ b/flex/interactive/sdk/master/gs_interactive_admin/__main__.py @@ -16,21 +16,101 @@ # limitations under the License. # +import base64 +import string import connexion +import argparse +import logging +import random +import os -from gs_interactive_admin import encoder +from gs_interactive_admin.encoder import JSONEncoder +from gs_interactive_admin.core.config import Config +from gs_interactive_admin.core.service_discovery.service_registry import ( + initialize_service_registry, +) +from gs_interactive_admin.core.metadata.metadata_store import ( + init_metadata_store, + get_metadata_store, +) +from gs_interactive_admin.core.service.service_manager import init_service_manager +from gs_interactive_admin.core.job.job_manager import init_job_manager +from gs_interactive_admin.core.procedure.procedure_manager import init_procedure_manager + + +def setup_args_parsing(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--config-file", type=str, help="The config file path in yaml or json format" + ) + return parser.parse_args() + + +def config_logging(log_level): + """Set log level basic on config. + Args: + log_level (str): Log level of stdout handler + """ + logging.basicConfig(level=logging.INFO) + + # `NOTSET` is special as it doesn't show log in Python + if isinstance(log_level, str): + log_level = getattr(logging, log_level.upper()) + if log_level == logging.NOTSET: + log_level = logging.DEBUG + + logger = logging.getLogger("interactive") + logger.setLevel(log_level) + + +def initialize_global_variables(config, config_file): + """ + Initialize global variables. All global variables should have two methods: + - initialize_xxx: Initialize the global variable + - get_xxx: Get the global variable + """ + initialize_service_registry(config) + init_metadata_store(config) + # Should be placed after init_metadata_store + init_service_manager(config) + init_job_manager(config, get_metadata_store()) + init_procedure_manager(config, config_file_path=config_file) + + +def preprocess_config(config: Config): + if config.master.instance_name is None: + if os.environ.get("MASTER_INSTANCE_NAME"): + config.master.instance_name = os.environ.get("MASTER_INSTANCE_NAME") + else: + # generate a random instance name with six characters + config.master.instance_name = "gs-interactive-{}".format( + "".join(random.choices(string.ascii_lowercase + string.digits, k=6)) + ) + logging.info("Generated instance name: %s", config.master.instance_name) def main(): + + args = setup_args_parsing() + config: Config = None + if not args.config_file: + raise RuntimeError("Must specify a config or config-file") + + config = Config.load(args.config_file) + preprocess_config(config) + + config_logging(config.log_level) + initialize_global_variables(config, args.config_file) + app = connexion.App(__name__, specification_dir="./openapi/") - app.app.json_encoder = encoder.JSONEncoder app.add_api( "openapi.yaml", arguments={"title": "GraphScope Interactive API v0.3"}, pythonic_params=True, ) + app.app.json_encoder = JSONEncoder - app.run(port=8080) + app.run(port=config.master.port, debug=True) if __name__ == "__main__": diff --git a/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_graph_management_controller.py b/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_graph_management_controller.py index d02f40c87a37..e26e1478c583 100644 --- a/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_graph_management_controller.py +++ b/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_graph_management_controller.py @@ -15,46 +15,47 @@ # See the License for the specific language governing permissions and # limitations under the License. # - -import logging +import connexion from typing import Dict from typing import Tuple from typing import Union +import logging -import connexion - -from gs_interactive_admin import util -from gs_interactive_admin.models.api_response_with_code import ( # noqa: E501 +from gs_interactive_admin.models.api_response_with_code import ( APIResponseWithCode, -) +) # noqa: E501 from gs_interactive_admin.models.create_edge_type import CreateEdgeType # noqa: E501 -from gs_interactive_admin.models.create_graph_request import ( # noqa: E501 +from gs_interactive_admin.models.create_graph_request import ( CreateGraphRequest, -) -from gs_interactive_admin.models.create_graph_response import ( # noqa: E501 +) # noqa: E501 +from gs_interactive_admin.models.create_graph_response import ( CreateGraphResponse, -) -from gs_interactive_admin.models.create_vertex_type import ( # noqa: E501 +) # noqa: E501 +from gs_interactive_admin.models.create_vertex_type import ( CreateVertexType, -) -from gs_interactive_admin.models.get_graph_response import ( # noqa: E501 +) # noqa: E501 +from gs_interactive_admin.models.get_graph_response import ( GetGraphResponse, -) -from gs_interactive_admin.models.get_graph_schema_response import ( # noqa: E501 +) # noqa: E501 +from gs_interactive_admin.models.get_graph_schema_response import ( GetGraphSchemaResponse, -) -from gs_interactive_admin.models.get_graph_statistics_response import ( # noqa: E501 +) # noqa: E501 +from gs_interactive_admin.models.get_graph_statistics_response import ( GetGraphStatisticsResponse, -) +) # noqa: E501 from gs_interactive_admin.models.job_response import JobResponse # noqa: E501 from gs_interactive_admin.models.schema_mapping import SchemaMapping # noqa: E501 from gs_interactive_admin.models.snapshot_status import SnapshotStatus # noqa: E501 +from gs_interactive_admin import util +from gs_interactive_admin.core.metadata.metadata_store import get_metadata_store +from gs_interactive_admin.core.job.job_manager import get_job_manager +from gs_interactive_admin.core.service.service_manager import get_service_manager logger = logging.getLogger("interactive") - def create_dataloading_job(graph_id, schema_mapping): # noqa: E501 """create_dataloading_job + TODO: currently we launch the job in master, we should launch the job in a temporary pod in the future. Create a dataloading job # noqa: E501 @@ -66,29 +67,15 @@ def create_dataloading_job(graph_id, schema_mapping): # noqa: E501 :rtype: Union[JobResponse, Tuple[JobResponse, int], Tuple[JobResponse, int, Dict[str, str]] """ if connexion.request.is_json: - schema_mapping = SchemaMapping.from_dict( # noqa: F841 - connexion.request.get_json() - ) # noqa: E501 - return "do some magic!" - - -def create_edge_type(graph_id, create_edge_type=None): # noqa: E501 - """create_edge_type - - Create a edge type # noqa: E501 - - :param graph_id: - :type graph_id: str - :param create_edge_type: - :type create_edge_type: dict | bytes - - :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] - """ - if connexion.request.is_json: - create_edge_type = CreateEdgeType.from_dict( # noqa: F841 + schema_mapping = SchemaMapping.from_dict( connexion.request.get_json() ) # noqa: E501 - return "do some magic!" + job_id = get_job_manager().create_dataloading_job( + graph_id=graph_id, schema_mapping=schema_mapping.to_dict() + ) + return JobResponse(job_id=job_id) + else: + raise RuntimeError("Invalid request") def create_graph(create_graph_request): # noqa: E501 @@ -102,78 +89,35 @@ def create_graph(create_graph_request): # noqa: E501 :rtype: Union[CreateGraphResponse, Tuple[CreateGraphResponse, int], Tuple[CreateGraphResponse, int, Dict[str, str]] """ if connexion.request.is_json: - create_graph_request = CreateGraphRequest.from_dict( # noqa: F841 - connexion.request.get_json() - ) # noqa: E501 - return "do some magic!" - - -def create_vertex_type(graph_id, create_vertex_type): # noqa: E501 - """create_vertex_type - - Create a vertex type # noqa: E501 - - :param graph_id: - :type graph_id: str - :param create_vertex_type: - :type create_vertex_type: dict | bytes - - :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] - """ - if connexion.request.is_json: - create_vertex_type = CreateVertexType.from_dict( # noqa: F841 + create_graph_request = CreateGraphRequest.from_dict( connexion.request.get_json() ) # noqa: E501 - return "do some magic!" - - -def delete_edge_type( - graph_id, type_name, source_vertex_type, destination_vertex_type -): # noqa: E501 - """delete_edge_type - - Delete an edge type by name # noqa: E501 - - :param graph_id: - :type graph_id: str - :param type_name: - :type type_name: str - :param source_vertex_type: - :type source_vertex_type: str - :param destination_vertex_type: - :type destination_vertex_type: str - - :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] - """ - return "do some magic!" + graph_id = get_metadata_store().create_graph_meta( + create_graph_request.to_dict() + ) + return CreateGraphResponse(graph_id=graph_id) + else: + raise RuntimeError("Invalid request") def delete_graph(graph_id): # noqa: E501 """delete_graph Delete a graph by id # noqa: E501 + TODO: Should we stop the service before we delete the graph? :param graph_id: The id of graph to delete :type graph_id: str :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] """ - return "do some magic!" - - -def delete_vertex_type(graph_id, type_name): # noqa: E501 - """delete_vertex_type - - Delete a vertex type by name # noqa: E501 - - :param graph_id: - :type graph_id: str - :param type_name: - :type type_name: str - - :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] - """ - return "do some magic!" + # Before we delete graph, we need to make sure the service on the graph has been stopped. + if get_service_manager().is_graph_running(graph_id): + # bad request + return APIResponseWithCode( + code=400, message=f"The service on the graph {graph_id} has not been stopped" + ) + return get_metadata_store().delete_graph_meta(graph_id) def get_graph(graph_id): # noqa: E501 @@ -186,7 +130,7 @@ def get_graph(graph_id): # noqa: E501 :rtype: Union[GetGraphResponse, Tuple[GetGraphResponse, int], Tuple[GetGraphResponse, int, Dict[str, str]] """ - return "do some magic!" + return get_metadata_store().get_graph_meta(graph_id) def get_graph_statistic(graph_id): # noqa: E501 @@ -199,7 +143,9 @@ def get_graph_statistic(graph_id): # noqa: E501 :rtype: Union[GetGraphStatisticsResponse, Tuple[GetGraphStatisticsResponse, int], Tuple[GetGraphStatisticsResponse, int, Dict[str, str]] """ - return "do some magic!" + res = get_metadata_store().get_graph_statistics(graph_id) + logger.info(f"Get graph statistics response: {res}") + return GetGraphStatisticsResponse.from_dict(res) def get_schema(graph_id): # noqa: E501 @@ -212,33 +158,64 @@ def get_schema(graph_id): # noqa: E501 :rtype: Union[GetGraphSchemaResponse, Tuple[GetGraphSchemaResponse, int], Tuple[GetGraphSchemaResponse, int, Dict[str, str]] """ - return "do some magic!" + return get_metadata_store().get_graph_schema(graph_id) -def get_snapshot_status(graph_id, snapshot_id): # noqa: E501 - """get_snapshot_status +def list_graphs(): # noqa: E501 + """list_graphs + + List all graphs # noqa: E501 + + + :rtype: Union[List[GetGraphResponse], Tuple[List[GetGraphResponse], int], Tuple[List[GetGraphResponse], int, Dict[str, str]] + """ + return dict(get_metadata_store().get_all_graph_meta()) - Get the status of a snapshot by id # noqa: E501 + +################################################################ +def create_edge_type(graph_id, create_edge_type=None): # noqa: E501 + """create_edge_type + + Create a edge type # noqa: E501 :param graph_id: :type graph_id: str - :param snapshot_id: - :type snapshot_id: int + :param create_edge_type: + :type create_edge_type: dict | bytes - :rtype: Union[SnapshotStatus, Tuple[SnapshotStatus, int], Tuple[SnapshotStatus, int, Dict[str, str]] + :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] """ - return "do some magic!" + raise RuntimeError("Not supported") -def list_graphs(): # noqa: E501 - """list_graphs +def create_vertex_type(graph_id, create_vertex_type): # noqa: E501 + """create_vertex_type - List all graphs # noqa: E501 + Create a vertex type # noqa: E501 + :param graph_id: + :type graph_id: str + :param create_vertex_type: + :type create_vertex_type: dict | bytes - :rtype: Union[List[GetGraphResponse], Tuple[List[GetGraphResponse], int], Tuple[List[GetGraphResponse], int, Dict[str, str]] + :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] + """ + raise RuntimeError("Not supported") + + +def update_vertex_type(graph_id, create_vertex_type): # noqa: E501 + """update_vertex_type + + Update a vertex type to add more properties # noqa: E501 + + :param graph_id: + :type graph_id: str + :param create_vertex_type: + :type create_vertex_type: dict | bytes + + :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] """ - return "do some magic!" + raise RuntimeError("Not supported") def update_edge_type(graph_id, create_edge_type): # noqa: E501 @@ -253,27 +230,55 @@ def update_edge_type(graph_id, create_edge_type): # noqa: E501 :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] """ - if connexion.request.is_json: - create_edge_type = CreateEdgeType.from_dict( # noqa: F841 - connexion.request.get_json() - ) # noqa: E501 - return "do some magic!" + raise RuntimeError("Not supported") -def update_vertex_type(graph_id, create_vertex_type): # noqa: E501 - """update_vertex_type +def delete_edge_type( + graph_id, type_name, source_vertex_type, destination_vertex_type +): # noqa: E501 + """delete_edge_type - Update a vertex type to add more properties # noqa: E501 + Delete an edge type by name # noqa: E501 :param graph_id: :type graph_id: str - :param create_vertex_type: - :type create_vertex_type: dict | bytes + :param type_name: + :type type_name: str + :param source_vertex_type: + :type source_vertex_type: str + :param destination_vertex_type: + :type destination_vertex_type: str :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] """ - if connexion.request.is_json: - create_vertex_type = CreateVertexType.from_dict( # noqa: F841 - connexion.request.get_json() - ) # noqa: E501 - return "do some magic!" + raise RuntimeError("Not supported") + + +def get_snapshot_status(graph_id, snapshot_id): # noqa: E501 + """get_snapshot_status + + Get the status of a snapshot by id # noqa: E501 + + :param graph_id: + :type graph_id: str + :param snapshot_id: + :type snapshot_id: int + + :rtype: Union[SnapshotStatus, Tuple[SnapshotStatus, int], Tuple[SnapshotStatus, int, Dict[str, str]] + """ + raise RuntimeError("Not supported") + + +def delete_vertex_type(graph_id, type_name): # noqa: E501 + """delete_vertex_type + + Delete a vertex type by name # noqa: E501 + + :param graph_id: + :type graph_id: str + :param type_name: + :type type_name: str + + :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] + """ + raise RuntimeError("Not supported") diff --git a/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_job_management_controller.py b/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_job_management_controller.py index 68e74734d166..b009bc1f6ab1 100644 --- a/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_job_management_controller.py +++ b/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_job_management_controller.py @@ -15,18 +15,15 @@ # See the License for the specific language governing permissions and # limitations under the License. # - -import logging +import connexion from typing import Dict from typing import Tuple from typing import Union -import connexion - -from gs_interactive_admin import util from gs_interactive_admin.models.job_status import JobStatus # noqa: E501 - -logger = logging.getLogger("interactive") +from gs_interactive_admin import util +from gs_interactive_admin.core.job.job_manager import get_job_manager +import logging def delete_job_by_id(job_id): # noqa: E501 @@ -39,7 +36,7 @@ def delete_job_by_id(job_id): # noqa: E501 :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] """ - return "do some magic!" + return get_job_manager().delete_job_by_id(job_id) def get_job_by_id(job_id): # noqa: E501 @@ -52,7 +49,9 @@ def get_job_by_id(job_id): # noqa: E501 :rtype: Union[JobStatus, Tuple[JobStatus, int], Tuple[JobStatus, int, Dict[str, str]] """ - return "do some magic!" + logging.info("Get job by id: %s", job_id) + data = get_job_manager().get_job_by_id(job_id) + return JobStatus.from_dict(data) def list_jobs(): # noqa: E501 @@ -63,4 +62,6 @@ def list_jobs(): # noqa: E501 :rtype: Union[List[JobStatus], Tuple[List[JobStatus], int], Tuple[List[JobStatus], int, Dict[str, str]] """ - return "do some magic!" + ret_list = [JobStatus.from_dict(data) for data in get_job_manager().list_jobs()] + logging.info("List jobs: %s", ret_list) + return ret_list diff --git a/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_procedure_management_controller.py b/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_procedure_management_controller.py index 90b84abe683f..ea83c2cfe42f 100644 --- a/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_procedure_management_controller.py +++ b/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_procedure_management_controller.py @@ -16,31 +16,29 @@ # limitations under the License. # -import logging +import connexion from typing import Dict from typing import Tuple from typing import Union -import connexion - -from gs_interactive_admin import util -from gs_interactive_admin.models.api_response_with_code import ( # noqa: E501 +from gs_interactive_admin.models.api_response_with_code import ( APIResponseWithCode, -) -from gs_interactive_admin.models.create_procedure_request import ( # noqa: E501 +) # noqa: E501 +from gs_interactive_admin.models.create_procedure_request import ( CreateProcedureRequest, -) -from gs_interactive_admin.models.create_procedure_response import ( # noqa: E501 +) # noqa: E501 +from gs_interactive_admin.models.create_procedure_response import ( CreateProcedureResponse, -) -from gs_interactive_admin.models.get_procedure_response import ( # noqa: E501 +) # noqa: E501 +from gs_interactive_admin.models.get_procedure_response import ( GetProcedureResponse, -) -from gs_interactive_admin.models.update_procedure_request import ( # noqa: E501 +) # noqa: E501 +from gs_interactive_admin.models.update_procedure_request import ( UpdateProcedureRequest, -) +) # noqa: E501 +from gs_interactive_admin import util -logger = logging.getLogger("interactive") +from gs_interactive_admin.core.procedure.procedure_manager import ProcedureManager, get_procedure_manager def create_procedure(graph_id, create_procedure_request): # noqa: E501 @@ -56,10 +54,14 @@ def create_procedure(graph_id, create_procedure_request): # noqa: E501 :rtype: Union[CreateProcedureResponse, Tuple[CreateProcedureResponse, int], Tuple[CreateProcedureResponse, int, Dict[str, str]] """ if connexion.request.is_json: - create_procedure_request = CreateProcedureRequest.from_dict( # noqa: F841 + create_procedure_request = CreateProcedureRequest.from_dict( connexion.request.get_json() - ) # noqa: E501 - return "do some magic!" + ) + return get_procedure_manager().create_procedure( + graph_id=graph_id, create_procedure_request=create_procedure_request + ) + else: + raise RuntimeError("Invalid request") def delete_procedure(graph_id, procedure_id): # noqa: E501 @@ -74,7 +76,12 @@ def delete_procedure(graph_id, procedure_id): # noqa: E501 :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] """ - return "do some magic!" + if connexion.request.is_json: + return get_procedure_manager().delete_procedure( + graph_id=graph_id, procedure_id=procedure_id + ) + else: + raise RuntimeError("Invalid request") def get_procedure(graph_id, procedure_id): # noqa: E501 @@ -89,7 +96,12 @@ def get_procedure(graph_id, procedure_id): # noqa: E501 :rtype: Union[GetProcedureResponse, Tuple[GetProcedureResponse, int], Tuple[GetProcedureResponse, int, Dict[str, str]] """ - return "do some magic!" + if connexion.request.is_json: + return get_procedure_manager().get_procedure( + graph_id=graph_id, procedure_id=procedure_id + ) + else: + raise RuntimeError("Invalid request") def list_procedures(graph_id): # noqa: E501 @@ -102,7 +114,10 @@ def list_procedures(graph_id): # noqa: E501 :rtype: Union[List[GetProcedureResponse], Tuple[List[GetProcedureResponse], int], Tuple[List[GetProcedureResponse], int, Dict[str, str]] """ - return "do some magic!" + if connexion.request.is_json: + return get_procedure_manager().list_procedures(graph_id=graph_id) + else: + raise RuntimeError("Invalid request") def update_procedure( @@ -122,7 +137,13 @@ def update_procedure( :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] """ if connexion.request.is_json: - update_procedure_request = UpdateProcedureRequest.from_dict( # noqa: F841 + update_procedure_request = UpdateProcedureRequest.from_dict( connexion.request.get_json() - ) # noqa: E501 - return "do some magic!" + ) + return get_procedure_manager().update_procedure( + graph_id=graph_id, + procedure_id=procedure_id, + update_procedure_request=update_procedure_request, + ) + else: + raise RuntimeError("Invalid request") diff --git a/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_service_management_controller.py b/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_service_management_controller.py index 94cd3a867aa5..1897f212a187 100644 --- a/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_service_management_controller.py +++ b/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_service_management_controller.py @@ -15,38 +15,24 @@ # See the License for the specific language governing permissions and # limitations under the License. # - -import logging +import connexion from typing import Dict from typing import Tuple from typing import Union -import connexion - -from gs_interactive_admin import util -from gs_interactive_admin.models.api_response_with_code import ( # noqa: E501 +from gs_interactive_admin.models.api_response_with_code import ( APIResponseWithCode, -) +) # noqa: E501 from gs_interactive_admin.models.service_status import ServiceStatus # noqa: E501 -from gs_interactive_admin.models.start_service_request import ( # noqa: E501 +from gs_interactive_admin.models.start_service_request import ( StartServiceRequest, -) -from gs_interactive_admin.models.stop_service_request import ( # noqa: E501 +) # noqa: E501 +from gs_interactive_admin.models.stop_service_request import ( StopServiceRequest, -) - -logger = logging.getLogger("interactive") - - -def check_service_ready(): # noqa: E501 - """check_service_ready - - Check if the service is ready # noqa: E501 +) # noqa: E501 +from gs_interactive_admin import util - - :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] - """ - return "do some magic!" +from gs_interactive_admin.core.service.service_manager import get_service_manager def get_service_status(): # noqa: E501 @@ -57,10 +43,10 @@ def get_service_status(): # noqa: E501 :rtype: Union[ServiceStatus, Tuple[ServiceStatus, int], Tuple[ServiceStatus, int, Dict[str, str]] """ - return "do some magic!" + return get_service_manager().get_service_status() -def restart_service(): # noqa: E501 +def restart_service(start_service_request=None): # noqa: E501 """restart_service Start current service # noqa: E501 @@ -68,7 +54,13 @@ def restart_service(): # noqa: E501 :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] """ - return "do some magic!" + if connexion.request.is_json: + start_service_request = StartServiceRequest.from_dict( + connexion.request.get_json() + ) # noqa: E501 + return get_service_manager().restart_service(start_service_request) + else: + raise RuntimeError("Invalid request") def start_service(start_service_request=None): # noqa: E501 @@ -82,10 +74,12 @@ def start_service(start_service_request=None): # noqa: E501 :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] """ if connexion.request.is_json: - start_service_request = StartServiceRequest.from_dict( # noqa: F841 + start_service_request = StartServiceRequest.from_dict( connexion.request.get_json() ) # noqa: E501 - return "do some magic!" + return get_service_manager().start_service(start_service_request) + else: + raise RuntimeError("Invalid request") def stop_service(stop_service_request=None): # noqa: E501 @@ -99,7 +93,20 @@ def stop_service(stop_service_request=None): # noqa: E501 :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] """ if connexion.request.is_json: - stop_service_request = StopServiceRequest.from_dict( # noqa: F841 + stop_service_request = StopServiceRequest.from_dict( connexion.request.get_json() ) # noqa: E501 - return "do some magic!" + return get_service_manager().stop_service(stop_service_request) + else: + raise RuntimeError("Invalid request") + + +def check_service_ready(): # noqa: E501 + """check_service_ready + + Check if service is ready # noqa: E501 + + + :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] + """ + return get_service_manager().check_service_ready() diff --git a/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_service_registry_controller.py b/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_service_registry_controller.py index c14a513d1248..4d84c067bf80 100644 --- a/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_service_registry_controller.py +++ b/flex/interactive/sdk/master/gs_interactive_admin/controllers/admin_service_service_registry_controller.py @@ -16,16 +16,19 @@ # limitations under the License. # -import logging +import connexion from typing import Dict from typing import Tuple from typing import Union -import connexion - -from gs_interactive_admin import util -from gs_interactive_admin.models.graph_service_registry_record import ( # noqa: E501 +from gs_interactive_admin.models.graph_service_registry_record import ( GraphServiceRegistryRecord, +) # noqa: E501 +from gs_interactive_admin import util +import logging + +from gs_interactive_admin.core.service_discovery.service_registry import ( + get_service_registry, ) logger = logging.getLogger("interactive") @@ -43,7 +46,10 @@ def get_service_registry_info(graph_id, service_name): # noqa: E501 :rtype: Union[GraphServiceRegistryRecord, Tuple[GraphServiceRegistryRecord, int], Tuple[GraphServiceRegistryRecord, int, Dict[str, str]] """ - return "do some magic!" + logger.info(f"get_service_registry_info: {graph_id}, {service_name}") + ret = get_service_registry().discover(graph_id, service_name) + logger.info(f"get_service_registry_info: {ret}") + return GraphServiceRegistryRecord.from_dict(ret) def list_service_registry_info(): # noqa: E501 @@ -52,6 +58,8 @@ def list_service_registry_info(): # noqa: E501 List all services registry # noqa: E501 - :rtype: Union[List[GraphServiceRegistryRecord], Tuple[List[GraphServiceRegistryRecord], int], Tuple[List[GraphServiceRegistryRecord], int, Dict[str, str]] + :rtype: Union[List[List[GraphServiceRegistryRecord]], Tuple[List[List[GraphServiceRegistryRecord]], int], Tuple[List[List[GraphServiceRegistryRecord]], int, Dict[str, str]] """ - return "do some magic!" + ret = get_service_registry().list_all() + logging.info(f"list_service_registry_info: {ret}") + return [GraphServiceRegistryRecord.from_dict(x) for x in ret] diff --git a/flex/interactive/sdk/master/gs_interactive_admin/controllers/graph_service_edge_management_controller.py b/flex/interactive/sdk/master/gs_interactive_admin/controllers/graph_service_edge_management_controller.py index 46821f9fbbb2..ea1b641cff37 100644 --- a/flex/interactive/sdk/master/gs_interactive_admin/controllers/graph_service_edge_management_controller.py +++ b/flex/interactive/sdk/master/gs_interactive_admin/controllers/graph_service_edge_management_controller.py @@ -16,108 +16,27 @@ # limitations under the License. # -import logging -from typing import Dict -from typing import Tuple -from typing import Union +# Those methods should not be implemented in AdminService, but is still kept here, cause the python flask app relies on the openpai_interactive.yaml to launch the service, which needs these function definitions. +# To create_edge/delete_edge/get_edge/add_edge/update, send requests to query service. +def create_edge(): + raise NotImplementedError("create_edge is not implemented in admin service, please send to query service") -import connexion -from gs_interactive_admin import util -from gs_interactive_admin.models.api_response_with_code import ( # noqa: E501 - APIResponseWithCode, -) -from gs_interactive_admin.models.delete_edge_request import ( # noqa: E501 - DeleteEdgeRequest, -) -from gs_interactive_admin.models.edge_data import EdgeData # noqa: E501 -from gs_interactive_admin.models.edge_request import EdgeRequest # noqa: E501 +def create_edge_type(): + raise NotImplementedError("create_edge_type is not implemented in admin service, please send to query service") -logger = logging.getLogger("interactive") +def delete_edge(): + raise NotImplementedError("delete_edge is not implemented in admin service, please send to query service") -def add_edge(graph_id, edge_request): # noqa: E501 - """Add edge to the graph - Add the edge to graph. # noqa: E501 +def get_edge(): + raise NotImplementedError("get_edge is not implemented in admin service, please send to query service") - :param graph_id: - :type graph_id: str - :param edge_request: - :type edge_request: list | bytes - :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] - """ - if connexion.request.is_json: - edge_request = [ # noqa: F841 - EdgeRequest.from_dict(d) for d in connexion.request.get_json() - ] # noqa: E501 - return "do some magic!" +def add_edge(): + raise NotImplementedError("add_edge is not implemented in admin service, please send to query service") -def delete_edge(graph_id, delete_edge_request): # noqa: E501 - """Remove edge from the graph - - Remove the edge from current graph. # noqa: E501 - - :param graph_id: - :type graph_id: str - :param delete_edge_request: The label and primary key values of the src and dst vertices, and the edge label. - :type delete_edge_request: list | bytes - - :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] - """ - if connexion.request.is_json: - delete_edge_request = [ # noqa: F841 - DeleteEdgeRequest.from_dict(d) for d in connexion.request.get_json() - ] # noqa: E501 - return "do some magic!" - - -def get_edge( - graph_id, - edge_label, - src_label, - src_primary_key_value, - dst_label, - dst_primary_key_value, -): # noqa: E501 - """Get the edge's properties with src and dst vertex primary keys. - - Get the properties for the specified vertex. # noqa: E501 - - :param graph_id: - :type graph_id: str - :param edge_label: The label name of querying edge. - :type edge_label: str - :param src_label: The label name of src vertex. - :type src_label: str - :param src_primary_key_value: The primary key value of src vertex. - :type src_primary_key_value: dict | bytes - :param dst_label: The label name of dst vertex. - :type dst_label: str - :param dst_primary_key_value: The value of dst vertex's primary key - :type dst_primary_key_value: dict | bytes - - :rtype: Union[EdgeData, Tuple[EdgeData, int], Tuple[EdgeData, int, Dict[str, str]] - """ - return "do some magic!" - - -def update_edge(graph_id, edge_request): # noqa: E501 - """Update edge's property - - Update the edge on the running graph. # noqa: E501 - - :param graph_id: - :type graph_id: str - :param edge_request: - :type edge_request: list | bytes - - :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] - """ - if connexion.request.is_json: - edge_request = [ # noqa: F841 - EdgeRequest.from_dict(d) for d in connexion.request.get_json() - ] # noqa: E501 - return "do some magic!" +def update_edge(): + raise NotImplementedError("update_edge is not implemented in admin service, please send to query service") diff --git a/flex/interactive/sdk/master/gs_interactive_admin/controllers/graph_service_vertex_management_controller.py b/flex/interactive/sdk/master/gs_interactive_admin/controllers/graph_service_vertex_management_controller.py index a6d3cb1776ee..1045ead858a2 100644 --- a/flex/interactive/sdk/master/gs_interactive_admin/controllers/graph_service_vertex_management_controller.py +++ b/flex/interactive/sdk/master/gs_interactive_admin/controllers/graph_service_vertex_management_controller.py @@ -16,97 +16,31 @@ # limitations under the License. # -import logging -from typing import Dict -from typing import Tuple -from typing import Union +# Those methods should not be implemented in AdminService, but is still kept here, cause the python flask app relies on the openpai_interactive.yaml to launch the service, which needs these function definitions. +# To create_vertex/delete_vertex/get_vertex/add_vertex/update, send requests to query service. +def get_vertex(): + raise NotImplementedError("get_vertex is not implemented in admin service, please send to query service") -import connexion -from gs_interactive_admin import util -from gs_interactive_admin.models.api_response_with_code import ( # noqa: E501 - APIResponseWithCode, -) -from gs_interactive_admin.models.delete_vertex_request import ( # noqa: E501 - DeleteVertexRequest, -) -from gs_interactive_admin.models.vertex_data import VertexData # noqa: E501 -from gs_interactive_admin.models.vertex_edge_request import ( # noqa: E501 - VertexEdgeRequest, -) +def create_vertex(): + raise NotImplementedError("create_vertex is not implemented in admin service, please send to query service") -logger = logging.getLogger("interactive") +def delete_vertex(): + raise NotImplementedError("delete_vertex is not implemented in admin service, please send to query service") -def add_vertex(graph_id, vertex_edge_request): # noqa: E501 - """Add vertex (and edge) to the graph - Add the provided vertex (and edge) to the specified graph. # noqa: E501 +def update_vertex(): + raise NotImplementedError("update_vertex is not implemented in admin service, please send to query service") - :param graph_id: - :type graph_id: str - :param vertex_edge_request: - :type vertex_edge_request: dict | bytes - :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] - """ - if connexion.request.is_json: - vertex_edge_request = VertexEdgeRequest.from_dict( # noqa: F841 - connexion.request.get_json() - ) # noqa: E501 - return "do some magic!" +def create_vertex_type(): + raise NotImplementedError("create_vertex_type is not implemented in admin service, please send to query service") -def delete_vertex(graph_id, delete_vertex_request): # noqa: E501 - """Remove vertex from the graph +def delete_vertex_type(): + raise NotImplementedError("delete_vertex_type is not implemented in admin service, please send to query service") - Remove the vertex from the specified graph. # noqa: E501 - :param graph_id: - :type graph_id: str - :param delete_vertex_request: The label and primary key values of the vertex to be deleted. - :type delete_vertex_request: list | bytes - - :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] - """ - if connexion.request.is_json: - delete_vertex_request = [ # noqa: F841 - DeleteVertexRequest.from_dict(d) for d in connexion.request.get_json() - ] # noqa: E501 - return "do some magic!" - - -def get_vertex(graph_id, label, primary_key_value): # noqa: E501 - """Get the vertex's properties with vertex primary key. - - Get the properties for the specified vertex. example: ```http GET /endpoint?param1=value1&param2=value2 HTTP/1.1 Host: example.com ``` # noqa: E501 - - :param graph_id: The id of the graph - :type graph_id: str - :param label: The label name of querying vertex. - :type label: str - :param primary_key_value: The primary key value of querying vertex. - :type primary_key_value: dict | bytes - - :rtype: Union[VertexData, Tuple[VertexData, int], Tuple[VertexData, int, Dict[str, str]] - """ - return "do some magic!" - - -def update_vertex(graph_id, vertex_edge_request): # noqa: E501 - """Update vertex's property - - Update the vertex with the provided properties to the specified graph. # noqa: E501 - - :param graph_id: - :type graph_id: str - :param vertex_edge_request: - :type vertex_edge_request: dict | bytes - - :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] - """ - if connexion.request.is_json: - vertex_edge_request = VertexEdgeRequest.from_dict( # noqa: F841 - connexion.request.get_json() - ) # noqa: E501 - return "do some magic!" +def add_vertex(): + raise NotImplementedError("add_vertex is not implemented in admin service, please send to query service") diff --git a/flex/interactive/sdk/master/gs_interactive_admin/controllers/query_service_controller.py b/flex/interactive/sdk/master/gs_interactive_admin/controllers/query_service_controller.py index 78f13e10c456..fe2daf72eecd 100644 --- a/flex/interactive/sdk/master/gs_interactive_admin/controllers/query_service_controller.py +++ b/flex/interactive/sdk/master/gs_interactive_admin/controllers/query_service_controller.py @@ -16,69 +16,19 @@ # limitations under the License. # -from typing import Dict -from typing import Tuple -from typing import Union +# Those methods should not be implemented in AdminService, but is still kept here, cause the python flask app relies on the openpai_interactive.yaml to launch the service, which needs these function definitions. +# To run_adhoc_current/call_proc_current/call_proc/run_adhoc, send requests to query service. +def run_adhoc_current(): + raise NotImplementedError("run_adhoc_current is not implemented in admin service, please send to query service") -import connexion -from gs_interactive_admin import util -from gs_interactive_admin.models.api_response_with_code import ( # noqa: E501 - APIResponseWithCode, -) +def call_proc_current(): + raise NotImplementedError("call_proc_current is not implemented in admin service, please send to query service") -def call_proc(graph_id, body=None): # noqa: E501 - """run queries on graph +def call_proc(): + raise NotImplementedError("call_proc is not implemented in admin service, please send to query service") - After the procedure is created, user can use this API to run the procedure. # noqa: E501 - :param graph_id: - :type graph_id: str - :param body: - :type body: str - - :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] - """ - return "do some magic!" - - -def call_proc_current(body=None): # noqa: E501 - """run queries on the running graph - - Submit a query to the running graph. # noqa: E501 - - :param body: - :type body: str - - :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] - """ - return "do some magic!" - - -def run_adhoc(graph_id, body=None): # noqa: E501 - """Submit adhoc query to the Interactive Query Service. - - Submit a adhoc query to the running graph. The adhoc query should be represented by the physical plan: https://github.com/alibaba/GraphScope/blob/main/interactive_engine/executor/ir/proto/physical.proto # noqa: E501 - - :param graph_id: - :type graph_id: str - :param body: - :type body: str - - :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] - """ - return "do some magic!" - - -def run_adhoc_current(body=None): # noqa: E501 - """Submit adhoc query to the Interactive Query Service. - - Submit a adhoc query to the running graph. The adhoc query should be represented by the physical plan: https://github.com/alibaba/GraphScope/blob/main/interactive_engine/executor/ir/proto/physical.proto # noqa: E501 - - :param body: - :type body: str - - :rtype: Union[str, Tuple[str, int], Tuple[str, int, Dict[str, str]] - """ - return "do some magic!" +def run_adhoc(): + raise NotImplementedError("run_adhoc is not implemented in admin service, please send to query service") diff --git a/flex/interactive/sdk/master/gs_interactive_admin/controllers/utils_controller.py b/flex/interactive/sdk/master/gs_interactive_admin/controllers/utils_controller.py index 7d5b6ea4dbb2..741550e99e8d 100644 --- a/flex/interactive/sdk/master/gs_interactive_admin/controllers/utils_controller.py +++ b/flex/interactive/sdk/master/gs_interactive_admin/controllers/utils_controller.py @@ -20,19 +20,17 @@ from typing import Tuple from typing import Union -import connexion - -from gs_interactive_admin import util -from gs_interactive_admin.models.api_response_with_code import ( # noqa: E501 +from gs_interactive_admin.models.api_response_with_code import ( APIResponseWithCode, -) -from gs_interactive_admin.models.upload_file_response import ( # noqa: E501 +) # noqa: E501 +from gs_interactive_admin.models.upload_file_response import ( UploadFileResponse, -) +) # noqa: E501 +from gs_interactive_admin.file_utils import upload_file_impl def upload_file(filestorage=None): # noqa: E501 - """upload_file + """upload_file. In k8s deployment, we may need to upload to a oss bucket, then download to the pod. # noqa: E501 @@ -41,4 +39,4 @@ def upload_file(filestorage=None): # noqa: E501 :rtype: Union[UploadFileResponse, Tuple[UploadFileResponse, int], Tuple[UploadFileResponse, int, Dict[str, str]] """ - return "do some magic!" + return upload_file_impl(filestorage) diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/__init__.py b/flex/interactive/sdk/master/gs_interactive_admin/core/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/config.py b/flex/interactive/sdk/master/gs_interactive_admin/core/config.py new file mode 100644 index 000000000000..086f229d4d34 --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/core/config.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from dataclasses import dataclass +from dataclasses import field +from typing import Union + +from simple_parsing import ArgumentParser +from simple_parsing.helpers import Serializable +from simple_parsing.helpers import list_field + +import os + +OSS_BUCKET_NAME = os.getenv("OSS_BUCKET_NAME", "graphscope") +OSS_BUCKET_DATA_DIR = os.getenv("OSS_BUCKET_DATA_DIR", "interactive") +OSS_ACCESS_KEY_ID = os.getenv("OSS_ACCESS_KEY_ID", "") +OSS_ACCESS_KEY_SECRET = os.getenv("OSS_ACCESS_KEY_SECRET", "") +OSS_ENDPOINT = os.getenv("OSS_ENDPOINT", "oss-cn-beijing.aliyuncs.com") +OSS_BUCKET_NAME = os.getenv("OSS_BUCKENT_NAME", "graphscope") + +INTERACTIVE_WORKSPACE = os.environ.get( + "INTERACTIVE_WORKSPACE", "/tmp/interactive_workspace" +) + +# The name of the script to load plan and generate code. +CODE_GEN_BIN = "load_plan_and_gen.sh" +CODE_GEN_TMP_DIR = os.environ.get("INTERACTIVE_CODE_GEN_WORKDIR", "/tmp/interactive_workspace/codegen") + + +@dataclass +class MetadataStore: + """ + Stores configurations for the metadata store. + """ + + uri: str = "" + + +@dataclass +class ComputeEngine: + """ + Stores configurations for the compute engine. + """ + + engine: str = "vineyard" + vineyard_socket: str = "vineyard.default" + vineyard_rpc_endpoint: str = "" + + thread_num_per_worker: int = 1 + memory_per_worker: str = "4Gi" + + metadata_store: MetadataStore = field(default_factory=MetadataStore) + wal_uri: str = f"file://{{GRAPH_DATA_DIR}}/wal" + + config_file_mount_path: str = "/opt/flex/share/interactive_config.yaml" + entrypoint_mount_path: str = "/etc/interactive/engine_entrypoint.sh" + + +@dataclass +class HttpService: + """ + Stores configurations for the http service. + """ + + default_listen_address: str = "localhost" + admin_port: int = 7777 + query_port: int = 10000 + max_content_length: str = "1GB" + + +@dataclass +class ServiceRegistry: + """ + Stores configurations for the service registry. + """ + + type: str = "etcd" + endpoint: str = "http://localhost:2379" + ttl: int = 60 + + +@dataclass +class K8sLauncherConfig: + """ + Stores configurations for the k8s launcher. + """ + + # The namespace must be created before launching the interactive engine. + namespace: Union[str, None] = "default" + instance_prefix: str = "gs-interactive" + instance_id: str = "" # If instance_id is not empty, the launcher will use it as the instance_id. + default_replicas: int = 1 + config_file: Union[str, None] = None + + image_pull_policy: str = "Always" + image_registry: str = "registry.cn-hongkong.aliyuncs.com" + image_tag: str = "debug" + repository: str = "graphscope" + image_name: str = "interactive" + + default_container_name: str = "interactive" + + volume_claim_name: str = "interactive-workspace" + volume_mount_path: str = "/tmp/interactive" + volume_size: str = "1Gi" + volume_access_mode: str = "ReadWriteOnce" + volume_storage_class: str = "standard" + + node_selectors: dict = field(default_factory=dict) + affinity: dict = field(default_factory=dict) + tolerations: list = field(default_factory=list) + annotations: dict = field(default_factory=dict) + + service_type: str = "NodePort" + cluster_ip: str = "" # If service_type is ClusterIP, user could specify the cluster_ip + + update_strategy: str = "RollingUpdate" + engine_pod_annotations: dict = field(default_factory=dict) + service_account_name: str = "" + + engine_config_file_mount_path: str = "/opt/flex/share/interactive_config.yaml" + engine_entrypoint_mount_path: str = "/etc/interactive/engine_entrypoint.sh" + + +@dataclass +class Master: + port: int = 7776 + instance_name: str = "test" + service_registry: ServiceRegistry = field(default_factory=ServiceRegistry) + + k8s_launcher_config: K8sLauncherConfig = field(default_factory=K8sLauncherConfig) + launcher_type: str = "k8s" + entrypoint_mount_path: str = "/etc/interactive/master_entrypoint.sh" + config_file_mount_path: str = "/opt/flex/share/interactive_config.yaml" + + +@dataclass +class ConnectorConfig: + disabled: bool = False + port: int = 7687 + + +@dataclass +class CompilerEndpoint: + default_listen_address: str = "localhost" + bolt_connector: ConnectorConfig = field(default_factory=ConnectorConfig) + gremlin_connector: ConnectorConfig = ConnectorConfig(disabled=True, port=8182) + + +@dataclass +class ReaderUri: + uri: str = "" + interval : int = 1000 # ms + +@dataclass +class CompilerMetaReader: + schema: ReaderUri = field(default_factory=ReaderUri) + statistics: ReaderUri = field(default_factory=ReaderUri) + timeout: int = 1000 # ms + +@dataclass +class CompilerMeta: + reader : CompilerMetaReader = field(default_factory=CompilerMetaReader) + +@dataclass +class PlannerConfig: + is_on: bool = True + opt: str = "RBO" + rules: list = field(default_factory=list) + +@dataclass +class CompilerConfig: + endpoint: CompilerEndpoint = field(default_factory=CompilerEndpoint) + meta : CompilerMeta = field(default_factory=CompilerMeta) + planner: PlannerConfig = field(default_factory=PlannerConfig) + query_timeout: int = 40000 # ms + gremlin_script_language_name : str = "antlr_gremlin_calcite" + + +@dataclass +class Config(Serializable): + """ + Stores all configurations for Interactive. Corresponding to the yaml file https://github.com/alibaba/GraphScope/blob/main/flex/tests/hqps/interactive_config_standalone.yaml + """ + + log_level: str = "INFO" + verbose_level: int = 0 + + compute_engine: ComputeEngine = field(default_factory=ComputeEngine) + + compiler: CompilerConfig = field(default_factory=CompilerConfig) + + http_service: HttpService = field(default_factory=HttpService) + + workspace: str = "/tmp/interactive_workspace" + + master: Master = field(default_factory=Master) diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/job/__init__.py b/flex/interactive/sdk/master/gs_interactive_admin/core/job/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/job/job_manager.py b/flex/interactive/sdk/master/gs_interactive_admin/core/job/job_manager.py new file mode 100644 index 000000000000..5d94c111f004 --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/core/job/job_manager.py @@ -0,0 +1,292 @@ +from gs_interactive_admin.core.metadata.metadata_store import IMetadataStore +from gs_interactive_admin.core.config import ( + Config, + OSS_BUCKET_NAME, + OSS_BUCKET_DATA_DIR, +) +from gs_interactive_admin.util import remove_nones, SubProcessRunner, OssReader, get_current_time_stamp_ms +from abc import ABCMeta +from abc import abstractmethod +import os +import shutil + +import logging +import subprocess +import time +import yaml + +logger = logging.getLogger("interactive") + + +class JobProcessCallback(object): + """ + This class is used as a callback function for the data loading subprocess. + """ + + def __init__( + self, meta_store, graph_id, process_id, log_path, job_id, oss_graph_path + ): + self.metadata_store : IMetadataStore = meta_store + self.graph_id = graph_id + self.process_id = process_id + self.log_path = log_path + self.job_id = job_id + self.oss_graph_path = oss_graph_path + + def _update_remote_storage_path(self): + """ + This method should be called when the job is successfully finished. + We need to update the remote storage path of the graph, such that the graph can be accessed by the users. + """ + logger.info("Update remote storage path of the graph") + + def _update_remote_storage_path_of_graph(graph_meta: str): + old_meta = yaml.safe_load(graph_meta) + old_name = None + old_path = None + logger.info("old meta: %s", old_meta) + if "remote_path" in old_meta: + old_path = old_meta["remote_path"] + logger.info(f"old path: {old_path}") + + if old_path and old_path.startswith("oss://"): + # Get the object name + split_paths = old_path[5:].split("/") + old_name = split_paths[-1] + if old_name: + # new name should be larger than the old name in timestamp + new_name = self.oss_graph_path.split("/")[-1] + if new_name <= old_name: + logger.warning( + f"New path {self.oss_graph_path} is not larger than the old path {old_path}" + ) + return graph_meta + else: + old_meta["remote_path"] = self.oss_graph_path + res = yaml.dump(old_meta) + logger.info("new meta: %s", res) + return res + + self.metadata_store.update_graph_meta_with_func( + self.graph_id, _update_remote_storage_path_of_graph + ) + + def _update_graph_statistics(self): + # The statistics should be reported by bulk loader process by writing to a remote oss file. In master, we download the oss file and insert the content into metadata store. + reader = OssReader() + logger.info(f"Update graph statistics for graph {self.graph_id}") + try: + statistics = reader.read(self.oss_graph_path + "_statistics.json") + self.metadata_store.create_graph_statistics(self.graph_id, statistics) + except Exception as e: + logger.error(f"Failed to update graph statistics: {e}") + + + def __call__(self, process: subprocess.CompletedProcess): + logger.info( + f"Job process {self.process_id} finished with code {process.returncode}" + ) + if process.returncode == 0: + status = "SUCCESS" + else: + status = "FAILED" + job_meta = { + "graph_id": self.graph_id, + "process_id": self.process_id, + "log": "@" + self.log_path, + "status": status, + "end_time": get_current_time_stamp_ms(), + "type": "BULK_LOADING", + } + logger.info(f"Update Job meta: {job_meta}") + self.res_code = self.metadata_store.update_job_meta( + job_id=self.job_id, job_meta=job_meta + ) + logger.info(f"Job meta Update with id {self.res_code}") + + # We should also update graph meta to update the remote storage path of the graph. + if status == "SUCCESS": + self._update_remote_storage_path() + self._update_graph_statistics() + + +class JobManager(metaclass=ABCMeta): + def __init__(self, config: Config, metadata_store: IMetadataStore): + self.metadata_store = metadata_store + + @abstractmethod + def list_jobs(self): + pass + + @abstractmethod + def get_job_by_id(self, job_id): + pass + + @abstractmethod + def delete_job_by_id(self, job_id): + pass + + @abstractmethod + def create_dataloading_job(self, graph_id, schema_mapping): + pass + + +class DefaultJobManager(JobManager): + def __init__(self, config: Config, metadata_store: IMetadataStore): + super().__init__(config, metadata_store) + self._data_loading_processes = {} + self._process_call_backs = {} + + def list_jobs(self): + return self.metadata_store.get_all_job_meta() + + def get_job_by_id(self, job_id) -> dict: + job_meta_str = self.metadata_store.get_job_meta(job_id) + # convert the string to dict + data = yaml.load(job_meta_str, Loader=yaml.FullLoader) + logger.info(f"Get job by id: {job_id}, data: {data}") + if "log" in data: + if data["log"].startswith("@"): + log_path = data["log"][1:] + with open(log_path, "r") as f: + data["log"] = f.read() + return data + + def delete_job_by_id(self, job_id): + if job_id in self._data_loading_processes: + logger.info(f"Terminating job {job_id}") + self._data_loading_processes[job_id].terminate() + return f"Successfully deleted job {job_id}." + + def create_dataloading_job(self, graph_id, schema_mapping): + """ + Create a dataloading job which running in a child process. + """ + + bulk_loader = self._get_bulk_loader() + temp_mapping_file, schema_mapping = self._dump_schema_mapping( + graph_id, schema_mapping + ) + temp_graph_file = self._dump_graph_schema(graph_id) + + # Create a log file for the process + log_path = os.path.join("/tmp", graph_id, "bulk_loader.log") + if ( + "loading_config" in schema_mapping + and "destination" in schema_mapping["loading_config"] + ): + oss_graph_path = schema_mapping["loading_config"]["destination"] + else: + cur_time_stamp = get_current_time_stamp_ms() + oss_graph_path = f"oss://{OSS_BUCKET_NAME}/{OSS_BUCKET_DATA_DIR}/{graph_id}/{cur_time_stamp}" + logger.info(f"oss_graph_path: {oss_graph_path}") + + cmds = [ + bulk_loader, + "-l", + temp_mapping_file, + "-g", + temp_graph_file, + "-d", + oss_graph_path, # The path where the graph data is stored + ] + logger.info(f"Running bulk loader with command {cmds}") + job_meta = self._new_job_meta( + graph_id=graph_id, + process_id=0, + log_path=log_path, + type="BULK_LOADING", + status="RUNNING", + ) + # Check whether a bulk loading process is already running, if so, return failed + for job_id, process in self._data_loading_processes.items(): + if process.is_alive(): + if process.graph_id == graph_id: + logger.info(f"Job {job_id} is already running for graph {graph_id}") + raise Exception( + f"Job {job_id} is already running for graph {graph_id}" + ) + job_id = self.metadata_store.create_job_meta(str(job_meta)) + logger.info(f"Data loading job created with {job_meta}") + runner = SubProcessRunner( + graph_id, + cmds, + JobProcessCallback( + self.metadata_store, graph_id, 0, log_path, job_id, oss_graph_path + ), + log_path, + ) + self._data_loading_processes[job_id] = runner + + runner.start() + logger.info(f"Job id {job_id} created for data loading job") + return job_id + + def _get_bulk_loader(self): + """ + Try to find the bulk loader in the current environment. + """ + # First try find from PATH + if shutil.which("bulk_loader"): + return "bulk_loader" + # Then try to find from /opt/flex/bin and /opt/graphscope/bin, check it is excutable + if os.path.exists("/opt/flex/bin/bulk_loader") and os.access( + "/opt/flex/bin/bulk_loader", os.X_OK + ): + return "/opt/flex/bin/bulk_loader" + if os.path.exists("/opt/graphscope/bin/bulk_loader") and os.access( + "/opt/graphscope/bin/bulk_loader", os.X_OK + ): + return "/opt/graphscope/bin/bulk_loader" + + # Then try to find via the relative path, works for local development + relative_path = os.path.join( + os.path.dirname(__file__), "../../../../../../build/bin/bulk_loader" + ) + if os.path.exists(relative_path) and os.access(relative_path, os.X_OK): + return relative_path + raise RuntimeError("Cannot find bulk_loader in the current environment.") + + def _dump_schema_mapping(self, graph_id, schema_mapping): + # dump the schema_mapping to a temp file + schema_mapping = remove_nones(schema_mapping) + logger.info("schema mapping: %s", schema_mapping) + os.makedirs(os.path.join("/tmp", graph_id), exist_ok=True) + temp_mapping_file = os.path.join("/tmp", graph_id, "schema_mapping.yaml") + with open(temp_mapping_file, "w") as f: + # write the dict in yaml format + yaml.dump(schema_mapping, f) + return (temp_mapping_file, schema_mapping) + + def _dump_graph_schema(self, graph_id): + graph_metadata = self.metadata_store.get_graph_meta(graph_id) + logger.info("graph metadata: %s", graph_metadata) + temp_graph_file = os.path.join("/tmp", graph_id, "graph.yaml") + with open(temp_graph_file, "w") as f: + yaml.dump(graph_metadata, f, default_flow_style=False) + return temp_graph_file + + def _new_job_meta(self, graph_id, process_id, log_path, type, status): + return { + "graph_id": graph_id, + "process_id": process_id, + "log": "@" + log_path, + "status": status, + # in milliseconds timestamp + "start_time": get_current_time_stamp_ms(), + "end_time": 0, + "type": type, + } + + +job_manager = None + + +def get_job_manager(): + global job_manager + return job_manager + + +def init_job_manager(config: Config, metadata_store: IMetadataStore): + global job_manager + job_manager = DefaultJobManager(config, metadata_store) diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/launcher/__init__.py b/flex/interactive/sdk/master/gs_interactive_admin/core/launcher/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/launcher/abstract_launcher.py b/flex/interactive/sdk/master/gs_interactive_admin/core/launcher/abstract_launcher.py new file mode 100644 index 000000000000..d0e403a105f2 --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/core/launcher/abstract_launcher.py @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from abc import ABCMeta +from abc import abstractmethod + +from gs_interactive_admin.core.config import Config + + +class InteractiveCluster(metaclass=ABCMeta): + """ + An abstraction for the interactive cluster, built by K8sLauncher, and expose the common operations to start, stop. + """ + + def __init__(self): + pass + + def __del__(self): + self.stop() + + @abstractmethod + def start(self): + """ + Start the cluster. + """ + pass + + @abstractmethod + def stop(self): + """ + Stop the cluster. + """ + + @abstractmethod + def wait_pods_ready(self, timeout: int = 600): + """ + Wait until the service is ready. + """ + pass + + +class ILauncher(metaclass=ABCMeta): + """ + Define the interface for the launcher, Which is used to launch new deployments. + TODO: currently use graph_id as the unique identifier for the deployment, + but it may be changed to a more general identifier in the future. + """ + + @abstractmethod + def __init__(self, config: Config): + """ + Initialize the launcher. + """ + pass + + @abstractmethod + def launch_cluster(self, graph_id: str, config: Config) -> InteractiveCluster: + """ + Launch an interactive engine. return the cluster name. + """ + pass + + @abstractmethod + def update_cluster(self, graph_id: str, config: Config) -> bool: + """ + Update the cluster. For example, increase or decrease the number of replicas. + """ + pass + + @abstractmethod + def delete_cluster(self, graph_id: str) -> bool: + """ + Delete the cluster. + """ + pass + + @abstractmethod + def get_cluster_status(self, graph_id: str) -> str: + """ + Get the status of the cluster. + """ + pass + + @abstractmethod + def get_all_clusters(self) -> list: + """ + Get all the clusters. + """ + pass diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/launcher/k8s_launcher.py b/flex/interactive/sdk/master/gs_interactive_admin/core/launcher/k8s_launcher.py new file mode 100644 index 000000000000..894db8423817 --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/core/launcher/k8s_launcher.py @@ -0,0 +1,684 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from abc import ABCMeta +import os +from abc import abstractmethod +import logging +import json + +import random +import string +import yaml + +from gs_interactive_admin.util import convert_str_to_k8s_valid + +logger = logging.getLogger("interactive") + + +from kubernetes import client as kube_client +from kubernetes import config as kube_config +from kubernetes import watch as kube_watch +from kubernetes.client import AppsV1Api +from kubernetes.client import CoreV1Api +from kubernetes.client.rest import ApiException as K8SApiException +from kubernetes.config import ConfigException as K8SConfigException + + +from gs_interactive_admin.core.config import Config +from gs_interactive_admin.core.launcher.abstract_launcher import ( + InteractiveCluster, + ILauncher, +) +from gs_interactive_admin.core.launcher.k8s_utils import resolve_api_client +from gs_interactive_admin.version import __version__ + + +class InteractiveK8sCluster(InteractiveCluster): + def __init__(self, graph_id: str, config: Config): + super().__init__() + + self._started = False + self._graph_id = graph_id + self._namespace = config.master.k8s_launcher_config.namespace + self._instance_prefix = config.master.k8s_launcher_config.instance_prefix + # The instance name of the master + self._instance_id = f"{self._instance_prefix}-graph-{self._graph_id}" + self._master_instance_id = config.master.instance_name + logger.info(f"instance id {self._instance_id}") + self._config_file = config.master.k8s_launcher_config.config_file + self._default_replicas = config.master.k8s_launcher_config.default_replicas + self._config = config + self._initialized = True + + self._image_pull_policy = config.master.k8s_launcher_config.image_pull_policy + self._image_tag = config.master.k8s_launcher_config.image_tag + self._image_registry = config.master.k8s_launcher_config.image_registry + self._repository = config.master.k8s_launcher_config.repository + self._image_name = config.master.k8s_launcher_config.image_name + self._default_container_name = ( + config.master.k8s_launcher_config.default_container_name + ) + + self._cpu_request = config.compute_engine.thread_num_per_worker + self._cpu_limit = config.compute_engine.thread_num_per_worker + self._memory_request = config.compute_engine.memory_per_worker + self._memory_limit = config.compute_engine.memory_per_worker + self._node_selectors = config.master.k8s_launcher_config.node_selectors + self._annotations = config.master.k8s_launcher_config.annotations + self._engine_pod_annotations = ( + config.master.k8s_launcher_config.engine_pod_annotations + ) + self._workspace = config.workspace + + self._admin_port = config.http_service.admin_port + self._query_port = config.http_service.query_port + self._cypher_port = config.compiler.endpoint.bolt_connector.port + + self._service_type = config.master.k8s_launcher_config.service_type + self._cluster_ip = config.master.k8s_launcher_config.cluster_ip + + self._update_strategy = config.master.k8s_launcher_config.update_strategy + + self._service_account_name = ( + config.master.k8s_launcher_config.service_account_name + ) + + self._engine_config_file_mount_path = ( + config.master.k8s_launcher_config.engine_config_file_mount_path + ) + self._engine_entrypoint_mount_path = ( + config.master.k8s_launcher_config.engine_entrypoint_mount_path + ) + + # Some preprocessing + if self._config_file is not None: + self._config_file = os.environ.get("KUBECONFIG", "~/.kube/config") + + self._api_client = resolve_api_client(self._config_file) + self._core_api = kube_client.CoreV1Api(self._api_client) + self._apps_api = kube_client.AppsV1Api(self._api_client) + + @property + def namespace(self): + return self._namespace + + @property + def image_full_name(self): + return f"{self._image_registry}/{self._repository}/{self._image_name}:{self._image_tag}" + + @property + def instance_id(self): + # Full name + return self._instance_id + + @property + def master_instance_id(self): + return self._master_instance_id + + @property + def master_config_map_name(self): + """ + The master_instance_id is like {deployment_name}-{master}. We need to remote the master + """ + deployment_name_list = self.master_instance_id.split("-")[:-1] + deployment_name = "-".join(deployment_name_list) + return f"{deployment_name}-config" + + @property + def config_map_name(self): + return f"{self.instance_id}-config" + + @property + def engine_stateful_set_name(self): + return f"{self.instance_id}-engine" + + @property + def engine_service_name(self): + return f"{self.engine_stateful_set_name}-headless" + + @property + def node_selectors(self): + return self._node_selectors + + @property + def admin_port(self): + return self._admin_port + + @property + def query_port(self): + return self._query_port + + @property + def engine_pod_annotations(self): + return self._engine_pod_annotations + + @property + def service_account_name(self): + return self._service_account_name + + @property + def cypher_port(self): + return self._cypher_port + + @property + def update_strategy(self): + return self._update_strategy + + @property + def statefulset_labels(self): + return { + "app.kubernetes.io/name": "graphscope-interactive", + "app.kubernetes.io/instance": self.instance_id, + "app.kubernetes.io/version": __version__, + "app.kubernetes.io/component": "engine", + } + + @property + def selector_labels(self): + return { + "app.kubernetes.io/name": "graphscope-interactive", + "app.kubernetes.io/instance": self.instance_id, + } + + @property + def engine_config_file_mount_path(self): + return self._engine_config_file_mount_path + + @property + def engine_entrypoint_mount_path(self): + return self._engine_entrypoint_mount_path + + @property + def annotations(self): + return self._annotations + + @property + def service_type(self): + return self._service_type + + @property + def cluster_ip(self): + return self._cluster_ip + + @property + def engine_envs(self): + envs = os.environ.copy() + return [kube_client.V1EnvVar(name=k, value=v) for k, v in envs.items()] + + def start(self, custom_graph_schema_mount_path, custom_graph_statistics_mount_path, additional_config_map=None): + """ + Start the cluster. + + full_config_map: A key value pair, which will be added to the configmap. The key should be a string, and the value should be a string or a dict. + """ + # self.stop() + + logger.info( + f"Creating the interactive cluster with image {self.image_full_name}" + ) + self._create_config_map(custom_graph_schema_mount_path, custom_graph_statistics_mount_path, additional_config_map) + self._create_interactive_server_sts( + custom_graph_schema_mount_path, additional_config_map + ) + self._create_interactive_service() + + # We just need to create the stateful set. No load balancer is needed. + self._started = True + return True + + def stop(self): + """ + Stop the cluster. Exit the stateful set and pods + """ + # if not self._started: + # return True + logger.info( + f"Stopping the interactive cluster {self.instance_id}, namespace {self.namespace}, stateful set {self.engine_stateful_set_name}" + ) + + self._apps_api.delete_namespaced_stateful_set( + name=self.engine_stateful_set_name, + namespace=self.namespace, + body=kube_client.V1DeleteOptions(grace_period_seconds=0), + ) + + self._core_api.delete_namespaced_service( + name=self.engine_service_name, + namespace=self.namespace, + body=kube_client.V1DeleteOptions(grace_period_seconds=0), + ) + + self._core_api.delete_namespaced_config_map( + name=self.config_map_name, + namespace=self.namespace, + body=kube_client.V1DeleteOptions(grace_period_seconds=0), + ) + + self._core_api.delete_persistent_volume_claim( + name=self._config.master.k8s_launcher_config.volume_claim_name, + namespace=self.namespace, + body=kube_client.V1DeleteOptions(grace_period_seconds=0), + ) + + def is_ready(self): + """ + Check whether the cluster is ready. + """ + stateful_set = self._apps_api.read_namespaced_stateful_set( + name=self.engine_stateful_set_name, namespace=self.namespace + ) + logger.info( + f"Stateful set ready replicas: {stateful_set.status.ready_replicas}, desired replicas: {stateful_set.spec.replicas}" + ) + return stateful_set.status.ready_replicas == stateful_set.spec.replicas + + def wait_pods_ready(self, timeout: int = 600): + """ + Wait for the pods to be ready. + """ + w = kube_watch.Watch() + try: + for event in w.stream( + self._apps_api.list_namespaced_stateful_set, + namespace=self.namespace, + label_selector=f"app.kubernetes.io/instance={self.instance_id}", + timeout_seconds=timeout, + ): + logger.info(f"Event: {event}") + except Exception as e: + logger.error( + f"Failed to watch the stateful set {self.engine_stateful_set_name}, error: {e}" + ) + return False + finally: + w.stop() + + def _create_config_map(self, custom_graph_mount_path, custom_graph_statistics_mount_path, additional_config_map: list[tuple]): + logger.info(f"Creating config map for the interactive servers") + full_config_map = {} + + #logger.info(f"new config {config}") + # full_config_map["interactive_config.yaml"] = yaml.dump(config) + # full_config_map["engine_entrypoint.sh"] = f""" + # #!/bin/bash + # # This should be the entrypoint of the engine instance + # echo "Starting engine instance..." + # echo "using configfile: {self.engine_config_file_mount_path}" + # echo "Workspace: {self._workspace} " + # mkdir -p {self._workspace}/conf + # # /opt/flex/bin/entrypoint.sh -w {self._workspace} + # sleep infinity + # """, + # full_config_map["engine_entrypoint.sh"] = ( + # "echo 'Starting engine instance...'; echo 'using configfile: /tmp/interactive/workspace/interactive_config.yaml'; echo 'Workspace: /tmp/interactive/workspace'; mkdir -p /tmp/interactive/workspace/conf; sleep infinity" + # ) + if additional_config_map is not None: + # additional_config_map is a list of tuple, each tuple contains the key(configName), file_path, and the content + for key, file_path, sub_path, content in additional_config_map: + full_config_map[key] = content + logger.info(f"full config map {full_config_map}") + json_str = json.dumps(full_config_map) + logger.info(f"json str {json_str}") + + config_map = kube_client.V1ConfigMap( + api_version="v1", + kind="ConfigMap", + metadata=kube_client.V1ObjectMeta( + name=self.config_map_name, + namespace=self.namespace, + labels=self.statefulset_labels, + ), + data=full_config_map, + ) + logger.info(f"config map {config_map}") + resp = self._core_api.create_namespaced_config_map( + namespace=self.namespace, body=config_map + ) + logger.info(f"Config map created. resp={resp}") + + def _create_interactive_service(self): + """ + Create the service for the interactive servers, the service type is by default NodePort. + """ + service_name = self.engine_service_name + logger.info(f"Creating service {service_name}") + service = kube_client.V1Service( + api_version="v1", + kind="Service", + metadata=kube_client.V1ObjectMeta( + name=service_name, + namespace=self.namespace, + labels=self.statefulset_labels, + ), + spec=kube_client.V1ServiceSpec( + type=self.service_type, + selector=self.selector_labels, + ports=[ + kube_client.V1ServicePort( + name="admin-port", + port=self.admin_port, + target_port=self.admin_port, + ), + kube_client.V1ServicePort( + name="query-port", + port=self.query_port, + target_port=self.query_port, + ), + kube_client.V1ServicePort( + name="cypher-port", + port=self.cypher_port, + target_port=self.cypher_port, + ), + ], + ), + ) + if self.service_type == "ClusterIP" and self.cluster_ip is not None: + service.spec.cluster_ip = self.cluster_ip + + resp = self._core_api.create_namespaced_service( + namespace=self.namespace, body=service + ) + logger.info(f"Service created. resp={resp}") + + def _create_interactive_server_sts( + self, custom_graph_file_mount_path, additional_config_map: list[tuple] + ): + stateful_set = self._generate_engine_stateful_set( + custom_graph_file_mount_path, additional_config_map=additional_config_map + ) + logger.info(f"Succeed to create stateful set {stateful_set.metadata}") + + logger.info(f"Creating namespaced stateful set {self.namespace}") + resp = self._apps_api.create_namespaced_stateful_set( + namespace=self.namespace, body=stateful_set + ) + logger.info(f"Stateful set created. resp={resp}") + + def _generate_engine_stateful_set( + self, custom_graph_file_mount_path: str, additional_config_map: list[tuple] + ): + stateful_set_name = self.engine_stateful_set_name + volume_claim_template = self._get_volume_claim_template() + volumes, volumes_mounts = self._get_volumes( + additional_config_map=additional_config_map + ) + stateful_set_template_spec = self._get_engine_template_spec( + custom_graph_file_mount_path, volumes, volumes_mounts + ) + replicas = self._default_replicas + service_name = self.engine_service_name + logger.info( + f"Creating stateful set {stateful_set_name} with {replicas} replicas" + ) + spec = kube_client.V1StatefulSetSpec( + selector=kube_client.V1LabelSelector(match_labels=self.selector_labels), + service_name=service_name, + replicas=replicas, + template=stateful_set_template_spec, + volume_claim_templates=[volume_claim_template], + update_strategy=kube_client.V1StatefulSetUpdateStrategy( + type=self.update_strategy + ), + ) + logger.info(f"Stateful set spec: {spec}") + return kube_client.V1StatefulSet( + api_version="apps/v1", + kind="StatefulSet", + metadata=kube_client.V1ObjectMeta( + name=stateful_set_name, + namespace=self.namespace, + labels=self.statefulset_labels, + annotations=self.annotations, + ), + spec=spec, + ) + + def _get_engine_template_spec( + self, custom_graph_mount_path: str, volumes: list, volume_mounts: list + ): + """ + Get the template spec for the engine. + """ + container = self._get_container_spec(custom_graph_mount_path, volume_mounts) + pod_spec = kube_client.V1PodTemplateSpec( + metadata=kube_client.V1ObjectMeta( + annotations=self.engine_pod_annotations, labels=self.statefulset_labels + ), + spec=kube_client.V1PodSpec( + containers=[container], + restart_policy="Always", + termination_grace_period_seconds=10, + volumes=volumes, + service_account_name=self.service_account_name, + ), + ) + if self.node_selectors is not None: + pod_spec.node_selector = self.node_selectors + return pod_spec + + def _get_container_spec( + self, custom_graph_file_mount_path: str, volume_mounts: list + ): + return kube_client.V1Container( + name=self._default_container_name, + image=self.image_full_name, + image_pull_policy=self._image_pull_policy, + volume_mounts=volume_mounts, + env=self.engine_envs, + command=[self.engine_entrypoint_mount_path], + args=[custom_graph_file_mount_path], + resources=kube_client.V1ResourceRequirements( + requests={"cpu": self._cpu_request, "memory": self._memory_request}, + limits={"cpu": self._cpu_limit, "memory": self._memory_limit}, + ), + readiness_probe=kube_client.V1Probe( + http_get=kube_client.V1HTTPGetAction( + path="/v1/service/ready", + port=self.query_port, + ), + initial_delay_seconds=5, + period_seconds=10, + ), + ports=[ + kube_client.V1ContainerPort( + name="admin-port", container_port=self.admin_port + ), + kube_client.V1ContainerPort( + name="query-port", container_port=self.query_port + ), + kube_client.V1ContainerPort( + name="cypher-port", container_port=self.cypher_port + ), + ], + ) + + def _get_volume_claim_template(self): + """ + Create the volume and pvc spec for the engine. + """ + pvc = kube_client.V1PersistentVolumeClaim( + api_version="v1", + kind="PersistentVolumeClaim", + metadata=kube_client.V1ObjectMeta( + name=self._config.master.k8s_launcher_config.volume_claim_name + ), + spec=kube_client.V1PersistentVolumeClaimSpec( + access_modes=[ + self._config.master.k8s_launcher_config.volume_access_mode + ], + storage_class_name=self._config.master.k8s_launcher_config.volume_storage_class, + resources=kube_client.V1ResourceRequirements( + requests={ + "storage": self._config.master.k8s_launcher_config.volume_size + } + ), + ), + ) + # mount to the container + # volume_mount = kube_client.V1VolumeMount( + # name=self._config.master.k8s_launcher_config.volume_claim_name, + # mount_path=self._config.master.k8s_launcher_config.volume_mount_path, + # ) + return pvc + + def _get_volumes(self, additional_config_map: list[tuple]): + """ + Get the volumes and volume mounts for engine pod and container. + Volumes: Config and engine-entrypoint + """ + # config_volume = kube_client.V1Volume( + # name="config-volume", + # config_map=kube_client.V1ConfigMapVolumeSource( + # name=self.master_config_map_name, + # default_mode=0o755, + # ), + # ) + engine_entrypoint_volume = kube_client.V1Volume( + name="engine-entrypoint", + config_map=kube_client.V1ConfigMapVolumeSource( + name=self.master_config_map_name, + default_mode=0o755, + ), + ) + volumes = [engine_entrypoint_volume] + # config_volume_mount = kube_client.V1VolumeMount( + # name="config-volume", + # mount_path=self.engine_config_file_mount_path, + # sub_path="interactive_config.yaml", + # ) + engine_entrypoint_volume_mount = kube_client.V1VolumeMount( + name="engine-entrypoint", + mount_path=self.engine_entrypoint_mount_path, + sub_path="engine_entrypoint.sh", + ) + volume_mounts = [engine_entrypoint_volume_mount] + for key, file_path,sub_path, value in additional_config_map: + config_volume = kube_client.V1Volume( + name=convert_str_to_k8s_valid(key), + config_map=kube_client.V1ConfigMapVolumeSource( + name=self.config_map_name, + default_mode=0o755, + ), + ) + config_volume_mount = kube_client.V1VolumeMount( + name=convert_str_to_k8s_valid(key), + mount_path=file_path, + sub_path=sub_path, + ) + volumes.append(config_volume) + volume_mounts.append(config_volume_mount) + return volumes, volume_mounts + + +class K8sLauncher(ILauncher): + """ + The implementation for launching interactive engines in k8s. + Note that the master could be outside the k8s cluster or inside the k8s cluster. + Currently, we consider the master is outside the k8s cluster. + + No state should be stored in the launcher, because the launcher may be re-initialized. + """ + + def __init__(self, config: Config): + """ + Initialize the launcher. + """ + self._config = config + self._config_file = self._config.master.k8s_launcher_config.config_file + if self._config_file is not None: + self._config_file = os.environ.get("KUBECONFIG", "~/.kube/config") + + self._api_client = resolve_api_client(self._config_file) + self._core_api = kube_client.CoreV1Api(self._api_client) + self._apps_api = kube_client.AppsV1Api(self._api_client) + + def launch_cluster( + self, + graph_id: str, + config: Config, + custom_graph_schema_mount_path: str, + custom_graph_statistics_mount_path: str, + wait_service_ready=False, + wait_service_ready_timeout=30, + additional_config=list[tuple], + ) -> InteractiveCluster: + """ + Launch a new interactive cluster, which contains a master pod and some standby pod. + The started pods will serve for graph with the given graph_id. + + full_config_map: A key value pair, which will be added to the configmap. The key should be a string, and the value should be a string or a dict. + """ + # First check whether there is already a cluster for the graph. + self._check_whether_cluster_exists(graph_id) + + cur_config = self._config + # override the default config with the given config + if config is not None: + logger.info("Override the default config with the given config.") + cur_config = config + logger.debug(f"Launch a new cluster for graph {graph_id}, config: {cur_config}") + + # Generating the deployment config for the given graph. + cluster = InteractiveK8sCluster(graph_id, cur_config) + + # Start the cluster + cluster.start( + custom_graph_schema_mount_path=custom_graph_schema_mount_path, + custom_graph_statistics_mount_path=custom_graph_statistics_mount_path, + additional_config_map=additional_config, + ) + + # Wait for the cluster to be ready + if wait_service_ready: + if not cluster.wait_pods_ready(timeout=wait_service_ready_timeout): + raise Exception( + f"Failed to wait the pods to be ready for graph {graph_id}" + ) + + return cluster + + def update_cluster(self, graph_id: str, config: Config) -> bool: + pass + + def delete_cluster(self, graph_id: str) -> bool: + pass + + def get_cluster_status(self, instance_id: str) -> str: + """ + Get the status of the cluster, with the given instance_id(the statueful set name). + """ + if instance_id is None: + return None + try: + stateful_set = self._apps_api.read_namespaced_stateful_set( + name=instance_id, + namespace=self._config.master.k8s_launcher_config.namespace, + ) + return stateful_set.status + except Exception as e: + logger.warning( + f"Failed to get the status of the cluster {instance_id}, error: {e}" + ) + return None + + def get_all_clusters(self) -> list: + pass + + def _check_whether_cluster_exists(self, graph_id: str): + pass diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/launcher/k8s_utils.py b/flex/interactive/sdk/master/gs_interactive_admin/core/launcher/k8s_utils.py new file mode 100644 index 000000000000..7a7beb148f04 --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/core/launcher/k8s_utils.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from kubernetes import client as kube_client +from kubernetes import config as kube_config +from kubernetes.client.rest import ApiException as K8SApiException + +import os + + +# Refer from: https://github.com/alibaba/GraphScope/blob/main/python/graphscope/deploy/kubernetes/utils.py +def resolve_api_client(k8s_config_file=None): + """Get ApiClient from predefined locations. + + Args: + k8s_config_file(str): Path to kubernetes config file. + + Raises: + RuntimeError: K8s api client resolve failed. + + Returns: + An kubernetes ApiClient object, initialized with the client args. + + The order of resolution as follows: + 1. load from kubernetes config file or, + 2. load from incluster configuration or, + 3. set api address from env if `KUBE_API_ADDRESS` exist. + RuntimeError will be raised if resolution failed. + """ + try: + # load from kubernetes config file + kube_config.load_kube_config(k8s_config_file) + except: # noqa: E722 + try: + # load from incluster configuration + kube_config.load_incluster_config() + except Exception as e: # noqa: E722 + if "KUBE_API_ADDRESS" in os.environ: + # try to load from env `KUBE_API_ADDRESS` + config = kube_client.Configuration() + config.host = os.environ["KUBE_API_ADDRESS"] + return kube_client.ApiClient(config) + raise RuntimeError("Resolve kube api client failed, exception: %s" % e) + return kube_client.ApiClient() diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/metadata/__init__.py b/flex/interactive/sdk/master/gs_interactive_admin/core/metadata/__init__.py new file mode 100644 index 000000000000..618dda69a7be --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/core/metadata/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/metadata/kv_store.py b/flex/interactive/sdk/master/gs_interactive_admin/core/metadata/kv_store.py new file mode 100644 index 000000000000..ada5d130b270 --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/core/metadata/kv_store.py @@ -0,0 +1,303 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from abc import ABCMeta +from abc import abstractmethod + +import etcd3 + +import logging +from gs_interactive_admin.util import MetaKeyHelper + +logger = logging.getLogger("interactive") + + +class AbstractKeyValueStore(metaclass=ABCMeta): + """ + An abstraction for key value store. + """ + + @abstractmethod + def open(self): + """ + Open a connection to the key value store. + """ + pass + + @abstractmethod + def close(self): + """ + Close the connection to the key value store. + """ + pass + + @abstractmethod + def insert(self, key, value) -> bool: + """ + Create a key value pair. + """ + pass + + @abstractmethod + def insert_with_prefix(self, prefix, value) -> str: + """ + Create a key value pair. + """ + pass + + @abstractmethod + def get(self, key) -> dict: + """ + Get the value of a key. + """ + pass + + @abstractmethod + def get_with_prefix(self, prefix) -> list: + """ + Get all the keys with a prefix. + """ + pass + + @abstractmethod + def delete(self, key) -> bool: + """ + Delete a key value pair. + """ + pass + + @abstractmethod + def delete_with_prefix(self, prefix) -> bool: + """ + Delete all the keys with a prefix. + """ + pass + + @abstractmethod + def update(self, key, value): + """ + Update the value of a key. + """ + pass + + @abstractmethod + def update_with_func(self, key, func): + """ + Update the value of a key using a function. + """ + pass + + @abstractmethod + def add_watch_prefix_callback(self, prefix, callback): + """ + Add a watch on the keys with a prefix. + """ + pass + + @abstractmethod + def cancel_watch(self, watch_id): + """ + Cancel a watch. + """ + pass + + +class ETCDKeyValueStore(AbstractKeyValueStore): + """ + An implementation of key value store using ETCD. + TODO: Currently KeyValueStore and KeyHelper both takes namespace and instance name as prefix. + It should only be controlled by one of them, not both + """ + + def __init__(self, meta_key_helper : None, host="localhost", port=2379): + self._meta_key_helper : MetaKeyHelper = meta_key_helper + self._host = host + self._port = port + self._client = None + # self.root = root + self.inc_id_dir = "/inc_id" + + @classmethod + def create( + cls, meta_key_helper, host: str, port: int): + #, namespace="interactive", instance_name="default" + return ETCDKeyValueStore(meta_key_helper, host, port) #"/" + "/".join([namespace, instance_name]) + + @classmethod + def create_from_endpoint( + cls, meta_key_helper, endpoint: str + ): + """ + Initialize the key value store with the endpoint. + param endpoint: The endpoint of the key value store, in format http://host:port + """ + # parse the endpoint + if not endpoint.startswith("http://"): + raise ValueError("Invalid endpoint format.") + endpoint = endpoint[7:] + host, port = endpoint.split(":") + return ETCDKeyValueStore( + meta_key_helper, host, int(port) #"/" + "/".join([namespace, instance_name] + ) + + @property + def root(self): + if self._meta_key_helper is None: + return "/" + return self._meta_key_helper.root + + def _get_full_key(self, keys: list): + if isinstance(keys, str): + return self.root + "/" + keys + elif isinstance(keys, list): + return self.root + "/" + "/".join(keys) + else: + raise ValueError("Invalid key type.") + + def _get_next_key(self, prefix: str): + """ + Inside etcd client, we maintain a key to store the next key to be used. + This operation is atomic. + + returns: 1) The full path key 2) The subpath key + """ + full_key = self._get_full_key([self.inc_id_dir, prefix]) + self._client.put_if_not_exists( + full_key, "1" + ) # initialize the key if it does not exist. + # compare_and swap + max_retry = 10 + while max_retry > 0: + cur_value = int(self._client.get(full_key)[0]) + if self._client.replace(full_key, str(cur_value), str(cur_value + 1)): + return prefix + "/" + str(cur_value), cur_value + max_retry -= 1 + raise RuntimeError("Failed to get next key.") + + def open(self): + self._client = etcd3.client(host=self._host, port=self._port) + + def close(self): + self._client.close() + logger.info("ETCD connection closed.") + + def insert(self, key, value) -> str: + """Expect the key to be a full-pathed key + + Args: + key (_type_): full-pathed key + value (_type_): value + + Returns: + str: full-pathed-key + """ + logger.info(f"Inserting key {key} with value {value}") + if not key.startswith(self.root): + raise ValueError(f"Key must start with.: {self.root}") + self._client.put(key, value) + return key + + def get(self, key) -> str: + """Expect the key to be a full-pathed key + + Args: + key (_type_): full-pathed key + + Returns: + str: value + """ + logger.info(f"Getting key {key}") + if not key.startswith(self.root): + raise ValueError(f"Key must start with.: {self.root}") + ret = self._client.get(key) + if not ret[0]: + return None + return ret[0].decode("utf-8") + + def insert_with_prefix(self, prefix, value) -> tuple: + """ + Insert the value without giving a specific key, but a prefix. The key is generated automatically, in increasing order. + """ + next_key, next_val = self._get_next_key(prefix) + logger.info(f"Inserting key {next_key} with value {value}") + self._client.put(next_key, value) + return next_key, str(next_val) + + def get_with_prefix(self, prefix) -> list: + logger.info("Getting keys with prefix: " + self._get_full_key(prefix)) + ret = self._client.get_prefix(self._get_full_key(prefix)) + return [ + ( + pair[1].key.decode("utf-8")[len(self.root) + 1 :], + pair[0].decode("utf-8"), + ) + for pair in ret + ] + + def delete(self, key) -> bool: + """Delete key-value pair with full-pathed key + + Args: + key (_type_): full-pathed key + + Returns: + bool: True if successful + """ + return self._client.delete(key) + + def delete_with_prefix(self, prefix) -> bool: + """delete key-value pairs with prefix + + Args: + prefix (_type_): prefix should be a full-pathed key + + Returns: + bool: True if successful + """ + return self._client.delete_prefix(prefix) + + def update(self, key, new_value): + """ + Update the value of a key. key should be a full-pathed key + """ + if not self._client.get(key): + raise ValueError("Key does not exist.") + cur_value = self.get(key) + logger.info(f"Updating key {key} from {cur_value} to {new_value}") + return self._client.replace(key, cur_value, new_value) + + def update_with_func(self, key, func): + if not self._client.get(key): + raise ValueError("Key does not exist.") + cur_value = self.get(key) + new_value = func(cur_value) + return self._client.replace(key, cur_value, new_value) + + def add_watch_prefix_callback(self, prefix, callback): + """ + Add a watch on the keys with a prefix. + """ + logger.info("Adding watch on prefix: " + prefix) + return self._client.add_watch_prefix_callback(prefix, callback) + + def cancel_watch(self, watch_id): + """ + Cancel a watch. + """ + logger.info("Cancelling watch with id: " + str(watch_id)) + return self._client.cancel_watch(watch_id) diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/metadata/metadata_store.py b/flex/interactive/sdk/master/gs_interactive_admin/core/metadata/metadata_store.py new file mode 100644 index 000000000000..d0beafd7b89d --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/core/metadata/metadata_store.py @@ -0,0 +1,587 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from gs_interactive_admin.core.metadata.kv_store import AbstractKeyValueStore +from abc import ABCMeta +from abc import abstractmethod +import logging +import json + +from gs_interactive_admin.core.config import Config +from gs_interactive_admin.core.metadata.kv_store import ETCDKeyValueStore +from gs_interactive_admin.util import MetaKeyHelper +from gs_interactive_admin.util import remove_nones +import yaml + +logger = logging.getLogger("interactive") + + +class IMetadataStore(metaclass=ABCMeta): + """ + The interface for the metadata store. + """ + + @abstractmethod + def open(self): + """ + Open the metadata store. + """ + pass + + @abstractmethod + def close(self): + """ + Close the metadata store. + """ + pass + + @abstractmethod + def create_graph_meta(self, graph_meta: dict): + """ + Create the metadata for a graph. + """ + pass + + @abstractmethod + def get_graph_meta(self, graph_id: str): + """ + Get the metadata for a graph. + """ + pass + + @abstractmethod + def get_graph_schema(self, graph_id: str): + """ + Get the schema for a graph. + """ + pass + + @abstractmethod + def get_graph_statistics(self, graph_id: str): + """ + Get the statistics for a graph. + """ + pass + + @abstractmethod + def create_graph_statistics(self, graph_id: str, statistics: str): + """ + Create the statistics for a graph. + """ + pass + + @abstractmethod + def get_all_graph_meta(self): + """ + Get the metadata for all graphs. + """ + pass + + @abstractmethod + def delete_graph_meta(self, graph_id: str): + """ + Delete the metadata for a graph. + """ + pass + + @abstractmethod + def update_graph_meta(self, graph_id: str, graph_meta: dict): + """ + Update the metadata for a graph. + """ + pass + + @abstractmethod + def update_graph_meta_with_func(self, graph_id: str, func): + """ + Update the metadata for a graph with a function. + """ + pass + + @abstractmethod + def create_job_meta(self, job_meta: dict): + """ + Create the metadata for a job. + """ + pass + + @abstractmethod + def get_job_meta(self, job_id: str): + """ + Get the metadata for a job. + """ + pass + + @abstractmethod + def get_all_job_meta(self): + """ + Get the metadata for all jobs. + """ + pass + + @abstractmethod + def delete_job_meta(self, job_id: str): + """ + Delete the metadata for a job. + """ + pass + + @abstractmethod + def update_job_meta(self, job_id: str, job_meta: dict): + """ + Update the metadata for a job. + """ + pass + + @abstractmethod + def create_plugin_meta(self, graph_id: str, plugin_meta: dict): + """ + Create the metadata for a plugin. + """ + pass + + @abstractmethod + def get_plugin_meta(self, graph_id: str, plugin_id: str): + """ + Get the metadata for a plugin. + """ + pass + + @abstractmethod + def get_all_plugin_meta(self, graph_id: str): + """ + Get the metadata for all plugins. + """ + pass + + @abstractmethod + def delete_plugin_meta(self, graph_id: str, plugin_id: str): + """ + Delete the metadata for a plugin. + """ + pass + + @abstractmethod + def update_plugin_meta(self, graph_id: str, plugin_id: str, plugin_meta: dict): + """ + Update the metadata for a plugin. + """ + pass + + @abstractmethod + def delete_plugin_meta_by_graph_id(self, graph_id: str): + """ + Delete the metadata for all plugins of a graph. + """ + pass + + @abstractmethod + def set_graph_status(self, graph_id: str, status: str): + """ + Set the status for a graph. + """ + pass + + +class DefaultMetadataStore(IMetadataStore): + """ + The default implementation of the metadata store. + """ + + def __init__( + self, + kv_store_handle: AbstractKeyValueStore, + key_helper: MetaKeyHelper, + ): + self._kv_store_handle : AbstractKeyValueStore = kv_store_handle + self._meta_key_helper = key_helper + + def open(self): + self._kv_store_handle.open() + + def close(self): + self._kv_store_handle.close() + + def __del__(self): + self.close() + + def create_graph_meta(self, graph_meta: dict) -> str: + graph_meta = remove_nones(graph_meta) + logger.info( + "Creating graph meta prefix %s, value : %s" + % (self._meta_key_helper.graph_meta_prefix(), graph_meta) + ) + full_key, key_id = self._kv_store_handle.insert_with_prefix( + self._meta_key_helper.graph_meta_prefix(), str(graph_meta) + ) + logger.info( + "Created graph meta prefix %s, key_id : %s" + % (self._meta_key_helper.graph_meta_prefix(), key_id) + ) + # add id field back into the graph meta + graph_meta["id"] = key_id + self.update_graph_meta(key_id, str(graph_meta)) + return key_id + + def get_graph_meta(self, graph_id: str) -> dict: + meta_str = self._kv_store_handle.get( + "/".join([self._meta_key_helper.graph_meta_prefix(), graph_id]) + ) + if meta_str is None: + logger.info("Graph meta not found for graph id %s" % graph_id) + return None + # convert the string to dict + res = yaml.safe_load(meta_str) + if "_schema" in res: + res["schema"] = res["_schema"] + del res["_schema"] + # Remove all key-value pairs that value is None, recursively. + res = remove_nones(res) + return res + + def get_graph_statistics(self, graph_id: str) -> dict: + """ + Get the statistic info for a graph. + Args: + graph_id (str): The unique identifier of the graph + """ + logger.info(f"Getting graph statistics fo {graph_id}") + json_str = self._kv_store_handle.get( + self._meta_key_helper.graph_statistics_key(graph_id) + ) + logger.info("Got graph statistics: %s" % json_str) + # convert the string to dict + # TODO: try to debug failure + res = json.loads(json_str) + return res + + def create_graph_statistics(self, graph_id: str, statistics: str) -> bool: + """Create graph statistics + + Args: + graph_id (str): graph_id + statistics (str): statistics info + { + "total_vertex_count": 0, + "total_edge_count": 0, + "vertex_type_statistics: [ + "type_id": 0, + "type_name": "person", + "count": 0 + ], + "edge_type_statistics": [ + "type_id": 0, + "type_name": "knows", + "vertex_type_pair_statistics": [ + { + "source_vertex": 0, + "destination_vertex": 0, + "count": 0 + } + ] + ] + } + + Returns: + bool: success or not + """ + logger.info(f"Creating graph statistics for {graph_id}") + return self._kv_store_handle.insert( + self._meta_key_helper.graph_statistics_key(graph_id), statistics + ) + + def get_graph_schema(self, graph_id: str) -> dict: + meta = self.get_graph_meta(graph_id) + if "schema" in meta: + return meta["schema"] + else: + raise RuntimeError(f"Internal error, schema not found in graph meta, graph_id {graph_id}") + + def get_all_graph_meta(self) -> list: + logger.info( + "Getting all graph meta prefix %s" + % self._meta_key_helper.graph_meta_prefix() + ) + return self._kv_store_handle.get_with_prefix( + self._meta_key_helper.graph_meta_prefix() + ) + + def delete_graph_meta(self, graph_id: str) -> bool: + logger.info( + "Deleting graph meta prefix %s, id %s" + % (self._meta_key_helper.graph_meta_prefix(), graph_id) + ) + return self._kv_store_handle.delete( + "/".join([self._meta_key_helper.graph_meta_prefix(), graph_id]) + ) + + def update_graph_meta(self, graph_id: str, graph_meta: dict) -> bool: + logger.info( + "Updating graph meta prefix %s, id %s, value %s" + % (self._meta_key_helper.graph_meta_prefix(), graph_id, graph_meta) + ) + return self._kv_store_handle.update( + "/".join([self._meta_key_helper.graph_meta_prefix(), graph_id]), graph_meta + ) + + def update_graph_meta_with_func(self, graph_id, func): + logger.info( + "Updating graph meta prefix %s, id %s with function" + % (self._meta_key_helper.graph_meta_prefix(), graph_id) + ) + return self._kv_store_handle.update_with_func( + "/".join([self._meta_key_helper.graph_meta_prefix(), graph_id]), func + ) + + def create_job_meta(self, job_meta: dict) -> str: + logger.info( + "Creating job meta prefix %s, value %s" + % (self._meta_key_helper.job_meta_prefix(), job_meta) + ) + full_key, key_id = self._kv_store_handle.insert_with_prefix( + self._meta_key_helper.job_meta_prefix(), job_meta + ) + return key_id + + def get_job_meta(self, job_id: str) -> dict: + logger.info( + "Getting job meta prefix %s, id %s" + % (self._meta_key_helper.job_meta_prefix(), job_id) + ) + return self._kv_store_handle.get( + "/".join([self._meta_key_helper.job_meta_prefix(), job_id]) + ) + + def get_all_job_meta(self) -> list: + return self._kv_store_handle.get_with_prefix( + self._meta_key_helper.job_meta_prefix() + ) + + def delete_job_meta(self, job_id: str) -> bool: + return self._kv_store_handle.delete( + self._meta_key_helper.job_meta_prefix(), job_id + ) + + def update_job_meta(self, job_id: str, job_meta: dict) -> bool: + return self._kv_store_handle.update( + "/".join([self._meta_key_helper.job_meta_prefix(), job_id]), str(job_meta) + ) + + def create_plugin_meta(self, graph_id: str, plugin_meta: dict) -> str: + logger.info( + "Creating plugin meta prefix %s, value %s" + % (self._meta_key_helper.plugin_meta_prefix(graph_id), plugin_meta) + ) + full_key, key_id = self._kv_store_handle.insert_with_prefix( + self._meta_key_helper.plugin_meta_prefix(graph_id), plugin_meta + ) + return key_id + + def get_plugin_meta(self, graph_id: str, plugin_id: str) -> str: + logger.info( + "Getting plugin meta prefix %s, graph id %s, id %s" + % (self._meta_key_helper.plugin_meta_prefix(graph_id), graph_id, plugin_id) + ) + return self._kv_store_handle.get( + "/".join([self._meta_key_helper.plugin_meta_prefix(graph_id), plugin_id]) + ) + + def get_all_plugin_meta(self, graph_id: str) -> list: + """ returns list of string + + Args: + graph_id (str): _description_ + + Returns: + list: _description_ + """ + logger.info( + "Getting all plugin meta prefix %s" + % self._meta_key_helper.plugin_meta_prefix(graph_id) + ) + return self._kv_store_handle.get_with_prefix( + self._meta_key_helper.plugin_meta_prefix(graph_id) + ) + + def delete_plugin_meta(self, graph_id: str, plugin_id: str) -> bool: + logger.info( + "Deleting plugin meta prefix %s, id %s" + % (self._meta_key_helper.plugin_meta_prefix(graph_id), plugin_id) + ) + return self._kv_store_handle.delete( + "/".join([self._meta_key_helper.plugin_meta_prefix(graph_id), plugin_id]) + ) + + def update_plugin_meta( + self, graph_id: str, plugin_id: str, plugin_meta: dict + ) -> bool: + logger.info( + "Updating plugin meta prefix %s, id %s, value %s" + % (self._meta_key_helper.plugin_meta_prefix(graph_id), plugin_id, plugin_meta) + ) + return self._kv_store_handle.update( + "/".join([self._meta_key_helper.plugin_meta_prefix(graph_id), plugin_id]), + plugin_meta, + ) + + def delete_plugin_meta_by_graph_id(self, graph_id: str) -> bool: + # get all plugins metas. + logger.info("Deleting plugin meta by graph id %s" % graph_id) + plugin_metas = self.get_all_plugin_meta(graph_id) + for plugin_meta in plugin_metas: + if plugin_meta["graph_id"] == graph_id: + self.delete_plugin_meta(graph_id, plugin_meta["plugin_id"]) + return True + + def set_graph_status(self, graph_id, status): + """ + Use a key-value pair to store the status of a graph. + """ + return self._kv_store_handle.update( + self._meta_key_helper.graph_status_key(graph_id), status + ) + + +metadata_store = None + + +def get_metadata_store(): + global metadata_store + return metadata_store + + +default_graph_def = { + "name": "modern_graph", + "description": "This is a test graph", + "schema": { + "vertex_types": [ + { + "type_name": "person", + "properties": [ + { + "property_name": "id", + "property_type": {"primitive_type": "DT_SIGNED_INT64"}, + }, + { + "property_name": "name", + "property_type": {"string": {"var_char": {"max_length": 16}}}, + }, + { + "property_name": "age", + "property_type": {"primitive_type": "DT_SIGNED_INT32"}, + }, + ], + "primary_keys": ["id"], + } + ], + "edge_types": [ + { + "type_name": "knows", + "vertex_type_pair_relations": [ + { + "source_vertex": "person", + "destination_vertex": "person", + "relation": "MANY_TO_MANY", + } + ], + "properties": [ + { + "property_name": "weight", + "property_type": {"primitive_type": "DT_DOUBLE"}, + } + ], + "primary_keys": [], + } + ], + }, +} + +default_graph_statistics = { + "total_vertex_count": 6, + "total_edge_count": 6, + "vertex_type_statistics": [ + { + "type_id": 0, + "type_name": "person", + "count": 4 + }, + { + "type_id": 1, + "type_name": "software", + "count": 2 + } + ], + "edge_type_statistics": [ + { + "type_id": 0, + "type_name": "knows", + "vertex_type_pair_statistics": [ + { + "source_vertex": "person", + "destination_vertex": "person", + "count": 2 + } + ] + }, + { + "type_id": 1, + "type_name": "created", + "vertex_type_pair_statistics": [ + { + "source_vertex": "person", + "destination_vertex": "software", + "count": 4 + } + ] + } + ] +} + +def __make_default_graph_meta(metadata_store: IMetadataStore): + if metadata_store.get_graph_meta("1") is None: + key_id = metadata_store.create_graph_meta(default_graph_def) + # Expect the key_id is 1 + if key_id != "1": + raise ValueError("The key_id is not 1: %s" % key_id) + logger.info("Created graph meta with key_id 1") + metadata_store.create_graph_statistics("1", str(default_graph_statistics)) + logger.info("Created graph statistics with key_id 1") + # NOTE: The bulk_loading process will be automatically triggered when the engine pod are launched. + + +def init_metadata_store(config: Config): + global metadata_store + if config.compute_engine.metadata_store.uri.startswith("http://"): + # we assume is etcd key-value store + key_helper = MetaKeyHelper( + namespace=config.master.k8s_launcher_config.namespace, + instance_name=config.master.instance_name, + ) + etcd_metadata_store = ETCDKeyValueStore.create_from_endpoint( + key_helper, config.compute_engine.metadata_store.uri + ) + metadata_store = DefaultMetadataStore(etcd_metadata_store, key_helper) + metadata_store.open() + + # Check whether default graph's metadata exists, if not, create it. + __make_default_graph_meta(metadata_store) + else: + raise ValueError( + "Unsupported metadata store URI: %s" + % config.compute_engine.metadata_store.uri + ) diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/procedure/__init__.py b/flex/interactive/sdk/master/gs_interactive_admin/core/procedure/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/procedure/procedure_manager.py b/flex/interactive/sdk/master/gs_interactive_admin/core/procedure/procedure_manager.py new file mode 100644 index 000000000000..4a22370e125a --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/core/procedure/procedure_manager.py @@ -0,0 +1,342 @@ +from abc import ABCMeta +from abc import abstractmethod + +import logging +import os + +from gs_interactive_admin.core.config import Config, CODE_GEN_BIN,CODE_GEN_TMP_DIR +from gs_interactive_admin.util import dump_file,check_field_in_dict +from gs_interactive_admin.core.metadata.metadata_store import get_metadata_store +from gs_interactive_admin.core.metadata.metadata_store import IMetadataStore +from gs_interactive_admin.models.create_procedure_request import CreateProcedureRequest +from gs_interactive_admin.models.get_procedure_response import GetProcedureResponse +from gs_interactive_admin.models.update_procedure_request import UpdateProcedureRequest +from gs_interactive_admin.util import get_current_time_stamp_ms +import subprocess +import yaml + +logger = logging.getLogger("interactive") + + +class ProcedureManager(metaclass=ABCMeta): + """The interface of procedure manager. + + Args: + metaclass (_type_, optional): _description_. Defaults to ABCMeta. + """ + @abstractmethod + def open(self): + """Open the procedure manager.""" + pass + + @abstractmethod + def close(self): + """Close the procedure manager.""" + pass + + @abstractmethod + def create_procedure(self, graph_id, create_procedure_request): + """Create a procedure on a graph. + + Args: + graph_id (str): The id of the graph. + create_procedure_request (CreateProcedureRequest): The request to create a procedure. + + Returns: + CreateProcedureResponse: The response of the creation. + """ + + + @abstractmethod + def delete_procedure(self, graph_id, procedure_id): + """Delete a procedure on a graph by id. + + Args: + graph_id (str): The id of the graph. + procedure_id (str): The id of the procedure. + + Returns: + str: The result of the deletion. + """ + pass + + @abstractmethod + def get_procedure(self, graph_id, procedure_id): + """Get a procedure by id. + + Args: + graph_id (str): The id of the graph. + procedure_id (str): The id of the procedure. + + Returns: + GetProcedureResponse: The response of the procedure. + """ + pass + + @abstractmethod + def list_procedures(self, graph_id): + """List all procedures on a graph. + + Args: + graph_id (str): The id of the graph. + + Returns: + List[GetProcedureResponse]: The response of all procedures. + """ + pass + + @abstractmethod + def update_procedure(self, graph_id, procedure_id, update_procedure_request): + """Update a procedure on a graph. + + Args: + graph_id (str): The id of the graph. + procedure_id (str): The id of the procedure. + update_procedure_request (UpdateProcedureRequest): The request to update a procedure. + + Returns: + UpdateProcedureResponse: The response of the update. + """ + pass + + +class DefaultProcedureManager(ProcedureManager): + """The default implementation of procedure manager. + + Args: + ProcedureManager (_type_): The interface of procedure manager. + """ + def __init__(self, metadata_manager : IMetadataStore, config_file_path : str): + self._metadata_manager = metadata_manager + self._config_file_path = config_file_path + self._procedure_builder = None + self._builtin_proc_names = ["count_vertices", "pagerank", "k_neighbors", "shortest_path_among_three"] + + def open(self): + pass + + def close(self): + pass + + def create_procedure(self, graph_id : str, create_procedure_request : CreateProcedureRequest): + if not graph_id or graph_id == "": + raise RuntimeError("The graph id is None.") + if not create_procedure_request: + raise RuntimeError("The create procedure request is None.") + if self._metadata_manager.get_graph_meta(graph_id) is None: + raise RuntimeError(f"The graph {graph_id} does not exist.") + if self._metadata_manager.get_plugin_meta(graph_id, create_procedure_request.name) is not None: + raise RuntimeError(f"The procedure {create_procedure_request.name} already exists.") + logger.info(f"Creating procedure {create_procedure_request.name} on graph {graph_id}") + proc_name = create_procedure_request.name + if proc_name in self._builtin_proc_names: + raise RuntimeError(f"The procedure name {proc_name} is reserved, please use another name.") + request_dict = create_procedure_request.to_dict() + request_dict["id"] = request_dict["name"] + request_dict["bound_graph"] = graph_id + request_dict["creation_time"] = get_current_time_stamp_ms() + request_dict["update_time"] = get_current_time_stamp_ms() + request_dict["enable"] = True + + logger.info("Creating procedure with request: %s", request_dict) + plugin_key = self._metadata_manager.create_plugin_meta(graph_id, str(request_dict)) + logger.info("Created plugin meta: %s", plugin_key) + + if not self._generate_procedure(graph_id, request_dict, plugin_key, self._config_file_path): + logger.error(f"Failed to generate procedure {proc_name}") + self._metadata_manager.delete_plugin_meta(graph_id, proc_name) + raise RuntimeError(f"Failed to generate procedure {proc_name}") + logger.info(f"Successfully created procedure {proc_name}") + return plugin_key + + + def delete_procedure(self, graph_id, procedure_id): + """_summary_ + + Args: + graph_id (_type_): _description_ + procedure_id (_type_): _description_ + """ + if not graph_id or graph_id == "": + raise RuntimeError("The graph id is None.") + if not procedure_id or procedure_id == "": + raise RuntimeError("The procedure id is None.") + if self._metadata_manager.get_graph_meta(graph_id) is None: + raise RuntimeError(f"The graph {graph_id} does not exist.") + if self._metadata_manager.get_plugin_meta(graph_id, procedure_id) is None: + raise RuntimeError(f"The procedure {procedure_id} does not exist.") + logger.info(f"Deleting procedure {procedure_id} on graph {graph_id}") + + # delete the procedure from meta store + if not self._metadata_manager.delete_plugin_meta(graph_id, procedure_id): + raise RuntimeError(f"Failed to delete procedure {procedure_id}") + + # delete the procedure from remote storage + if not self._delete_procedure_from_remote_storage(graph_id, procedure_id): + raise RuntimeError(f"Failed to delete procedure {procedure_id} from remote storage") + logger.info(f"Successfully deleted procedure {procedure_id}") + return f"Successfully deleted procedure {procedure_id}" + + def get_procedure(self, graph_id, procedure_id): + if not graph_id or graph_id == "": + raise RuntimeError("The graph id is None.") + if not procedure_id or procedure_id == "": + raise RuntimeError("The procedure id is None.") + if self._metadata_manager.get_graph_meta(graph_id) is None: + raise RuntimeError(f"The graph {graph_id} does not exist.") + logger.info(f"Getting procedure {procedure_id} on graph {graph_id}") + plugin_meta = self._metadata_manager.get_plugin_meta(graph_id, procedure_id) + if plugin_meta is None: + raise RuntimeError(f"Cannot find the procedure {procedure_id}") + # parse the plugin_meta from str to dict + plugin_meta = yaml.safe_load(plugin_meta) + logger.info(f"Got plugin meta: {plugin_meta}") + return GetProcedureResponse.from_dict(plugin_meta) + + def list_procedures(self, graph_id): + if not graph_id or graph_id == "": + raise RuntimeError("The graph id is None.") + if self._metadata_manager.get_graph_meta(graph_id) is None: + raise RuntimeError(f"The graph {graph_id} does not exist.") + logger.info(f"Listing procedures on graph {graph_id}") + plugin_metas = self._metadata_manager.get_all_plugin_meta(graph_id) + if plugin_metas is None: + raise RuntimeError(f"Cannot find the procedures on graph {graph_id}") + results = [] + for plugin_meta in plugin_metas: + results.append(GetProcedureResponse.from_dict(yaml.safe_load(plugin_meta))) + return + + def update_procedure(self, graph_id, procedure_id, update_procedure_request): + if not graph_id or graph_id == "": + raise RuntimeError("The graph id is None.") + if not procedure_id or procedure_id == "": + raise RuntimeError("The procedure id is None.") + if not update_procedure_request: + raise RuntimeError("The update procedure request is None.") + if self._metadata_manager.get_graph_meta(graph_id) is None: + raise RuntimeError(f"The graph {graph_id} does not exist.") + if self._metadata_manager.get_plugin_meta(graph_id, procedure_id) is None: + raise RuntimeError(f"The procedure {procedure_id} does not exist.") + logger.info(f"Updating procedure {procedure_id} on graph {graph_id}") + new_description = update_procedure_request.description + old_plugin_meta = self._metadata_manager.get_plugin_meta(graph_id, procedure_id) + if old_plugin_meta is None: + raise RuntimeError(f"Cannot find the procedure {procedure_id}") + old_plugin_meta = yaml.safe_load(old_plugin_meta) + logger.info(f"Got old plugin meta: {old_plugin_meta}") + old_plugin_meta["description"] = new_description + self._metadata_manager.update_plugin_meta(graph_id, procedure_id, str(old_plugin_meta)) + logger.info(f"Updated plugin meta: {old_plugin_meta}") + return f"Successfully updated procedure {procedure_id}" + + + def _generate_procedure(self, graph_id, request_dict, plugin_key, config_file_path): + # Check whether the request is valid + self._check_request(request_dict) + builder_path = self._get_procedure_builder_path() + logger.info(f"Generating procedure with builder path: {builder_path}") + query_str = request_dict["query"] + if query_str is None or query_str == "": + raise RuntimeError("The query is None or empty.") + if "description" not in request_dict: + request_dict["description"] = "A procedure generated by FLEX" + query_file_path = self._dump_query_to_file(query_str, request_dict["type"], plugin_key) + desc_file_path = f"{CODE_GEN_TMP_DIR}/{plugin_key}.desc" + output_dir=f"{CODE_GEN_TMP_DIR}/{plugin_key}" + if not os.path.exists(output_dir): + os.makedirs(output_dir) + dump_file(request_dict['description'], desc_file_path) + command = [ + CODE_GEN_BIN, + "-e=hqps", + f"-i={query_file_path}", + f"-o={output_dir}", + f"--procedure_name={request_dict['name']}", + f"-w={CODE_GEN_TMP_DIR}", + f"--ir_conf={config_file_path}", + f"--graph_schema_path={self._dump_schema_to_file(graph_id, plugin_key)}", + ] + if "description" in request_dict and request_dict["description"] is not None: + command.append(f"--procedure_desc=={desc_file_path}") + + logger.info(f"Generating procedure with command: {command}") + log_file = f"{CODE_GEN_TMP_DIR}/{plugin_key}.log" + with open(log_file, "w") as log: + process = subprocess.Popen(command, stdout = log, stderr = log) + logger.info(f"Started process {process.pid}") + process.wait() + logger.info(f"Finished process {process.pid} with code {process.returncode}") + if process.returncode != 0: + logger.error(f"Failed to generate procedure {request_dict['name']}") + return False + return True + + + + + def _dump_schema_to_file(self, graph_id, plugin_key): + schema_file = f"{CODE_GEN_TMP_DIR}/{plugin_key}.schema" + schema = self._metadata_manager.get_graph_schema(graph_id) + if schema is None: + raise RuntimeError(f"Cannot find the schema of graph {graph_id}") + return dump_file(schema, schema_file) + + def _dump_query_to_file(self, query_str, query_type, plugin_key): + query_file = f"{CODE_GEN_TMP_DIR}/{plugin_key}" + if query_type.lower() == "cypher": + query_file += ".cypher" + elif query_type.lower() == "cpp" or query_type.lower() == "cc": + query_file += ".cc" + else: + raise RuntimeError(f"Invalid query type: {query_type}") + return dump_file(query_str, query_file) + + def _get_procedure_builder_path(self): + """Get the path to CODE_GEN_BIN.""" + # Try from FLEX_HOME + if os.environ.get("FLEX_HOME"): + bin_path = os.path.join(os.environ.get("FLEX_HOME"), "bin", CODE_GEN_BIN) + if os.path.exists(bin_path): + return bin_path + # Try from /opt/flex/bin + bin_path = os.path.join("/opt/flex/bin", CODE_GEN_BIN) + if os.path.exists(bin_path): + return bin_path + # Try to find via the relative path + bin_path = os.path.join(os.path.dirname(__file__), "../../../../../../bin", CODE_GEN_BIN) + if os.path.exists(bin_path): + return bin_path + raise RuntimeError(f"Cannot find the code gen bin: {CODE_GEN_BIN}") + + def _check_request(self, request_dict): + """check create procedure request. + + Args: + request_dict (_type_): _description_ + + Returns: + _type_: _description_ + """ + check_field_in_dict(request_dict, "bound_graph") + check_field_in_dict(request_dict, "name") + check_field_in_dict(request_dict, "description") + check_field_in_dict(request_dict, "enable") + check_field_in_dict(request_dict, "query") + check_field_in_dict(request_dict, "type") + _type = request_dict["type"] + if _type not in ["cypher", "CYPHER", "CPP", "cpp"]: + raise RuntimeError(f"Invalid procedure type: {_type}") + + + + +procedure_manager = None + +def get_procedure_manager() -> ProcedureManager: + global procedure_manager + return procedure_manager + +def init_procedure_manager(config: Config, config_file_path: str): + global procedure_manager + procedure_manager = DefaultProcedureManager(get_metadata_store(), config_file_path) \ No newline at end of file diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/service/__init__.py b/flex/interactive/sdk/master/gs_interactive_admin/core/service/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/service/service_manager.py b/flex/interactive/sdk/master/gs_interactive_admin/core/service/service_manager.py new file mode 100644 index 000000000000..56ae9f38bbf8 --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/core/service/service_manager.py @@ -0,0 +1,156 @@ +import time +from gs_interactive_admin.core.config import Config +import logging + +from gs_interactive_admin.models.start_service_request import StartServiceRequest +from gs_interactive_admin.core.launcher.k8s_launcher import K8sLauncher +from gs_interactive_admin.core.launcher.abstract_launcher import InteractiveCluster +from gs_interactive_admin.core.metadata.metadata_store import get_metadata_store +import yaml +from gs_interactive_admin.util import get_current_time_stamp_ms + +logger = logging.getLogger("interactive") + + +class ServiceManager(object): + def __init__(self, config: Config): + self._query_port = config.http_service.query_port + self._admin_port = config.master.port + # get start time in unix timestamp + self._start_time = get_current_time_stamp_ms() + self._bolt_port = config.compiler.endpoint.bolt_connector.port + self._config = config + if config.master.k8s_launcher_config: + logger.info("Using K8sLauncher") + self._launcher = K8sLauncher(config) + + self._clusters = {} # graph_id -> cluster + self._metadata_store = get_metadata_store() + + def check_service_ready(self): + return "Service is ready" + + def get_service_status(self): + return { + "status": "running", + "hqps_port": self._query_port, + "bolt_port": self._bolt_port, + "statistics_enabled": False, + "graph": {}, + "start_time": self._start_time, + "deploy_mode": "k8s", + } + + def start_service(self, start_service_request: StartServiceRequest): + """ + Start service on a specified graph. This assumes the graph has already been created. + A new couple of pods will be created to serve the graph. + TODO: Avoid creating new pods if the graph is already running. + TODO: Delete the pod if the graph is deleted. + + Args: + start_service_request (StartServiceRequest): _description_ + + Returns: + _type_: _description_ + """ + graph_id = start_service_request.graph_id + logger.info("Starting service for graph %s", graph_id) + if graph_id is None or graph_id == "": + return "Invalid graph id" + + graph_meta = self._metadata_store.get_graph_meta(graph_id) + if graph_meta is None: + raise RuntimeError(f"Graph {graph_id} does not exist") + + # check whether the graph has been loaded with data + if "remote_path" not in graph_meta: + raise RuntimeError(f"Graph {graph_id} has not been loaded with data") + + # we need serialize the graph_meta into a yaml file, and mount it to the pod that we are going to create + custom_graph_name = f"graph-{graph_id}.yaml" + custom_graph_file_mount_path = f"/etc/interactive/{custom_graph_name}" + custom_graph_file_sub_path = custom_graph_name + custom_graph_statistics_mount_path = f"{self._config.workspace}/data/gs_interactive_default_graph/indices/statistics.json" + custom_graph_file_data = yaml.dump(graph_meta, default_flow_style=False) + logger.info("Custom graph file data: %s", custom_graph_file_data) + + custom_engine_config_name = "interactive_config.yaml" + custom_engine_config_mount_path = "/opt/flex/share/interactive_config.yaml" + custom_engine_config_sub_path = "interactive_config.yaml" + custom_engine_config_data = self._config.to_dict() + custom_engine_config_data["compiler"]["meta"]["reader"]["schema"]["uri"] = custom_graph_file_mount_path + custom_engine_config_data["compiler"]["meta"]["reader"]["statistics"]["uri"] = custom_graph_statistics_mount_path + custom_engine_config_data = yaml.dump(custom_engine_config_data, default_flow_style=False) + + cluster = self._launcher.launch_cluster( + graph_id=graph_id, + config=self._config, + custom_graph_schema_mount_path=custom_graph_file_mount_path, + custom_graph_statistics_mount_path=custom_graph_statistics_mount_path, + wait_service_ready=False, + additional_config=[ + ( + custom_graph_name, + custom_graph_file_mount_path, + custom_graph_file_sub_path, + custom_graph_file_data, + ), + ( + custom_engine_config_name, + custom_engine_config_mount_path, + custom_engine_config_sub_path, + custom_engine_config_data, + ) + ], + ) + + self._clusters[graph_id] = cluster + return "Service started successfully" + + def stop_service(self, stop_service_request: StartServiceRequest): + """ + Stop the service for a specified graph. + + Args: + stop_service_request (StartServiceRequest): _description_ + + Returns: + _type_: _description_ + """ + graph_id = stop_service_request.graph_id + logger.info("Stopping service for graph %s", graph_id) + if graph_id is None or graph_id == "": + raise RuntimeError("graph_id is empty") + + if graph_id not in self._clusters: + raise RuntimeError(f"The specified graph {graph_id} is not running") + + cluster: InteractiveCluster = self._clusters[graph_id] + cluster.stop() + del self._clusters[graph_id] + return "Service stopped successfully" + + def is_graph_running(self, graph_id: str): + """Returns whether a graph is serving in this deployment. + + Args: + graph_id (str): _description_ + + Returns: + _type_: _description_ + """ + + +service_manager = None + + +def get_service_manager(): + global service_manager + return service_manager + + +def init_service_manager(config: Config): + global service_manager + service_manager = ServiceManager(config) + return service_manager diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/service_discovery/__init__.py b/flex/interactive/sdk/master/gs_interactive_admin/core/service_discovery/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/flex/interactive/sdk/master/gs_interactive_admin/core/service_discovery/service_registry.py b/flex/interactive/sdk/master/gs_interactive_admin/core/service_discovery/service_registry.py new file mode 100644 index 000000000000..fcb0ba876733 --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/core/service_discovery/service_registry.py @@ -0,0 +1,584 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +from abc import ABCMeta +from abc import abstractmethod + +import json +import logging +import etcd3 + +import etcd3.events +import etcd3.watch + +from gs_interactive_admin import util +from gs_interactive_admin.core.metadata.kv_store import ETCDKeyValueStore +from etcd3.events import PutEvent +from etcd3.events import DeleteEvent +from gs_interactive_admin.core.config import Config +from gs_interactive_admin.util import META_SERVICE_KEY, MetaKeyHelper + + +from gs_interactive_admin.models.base_model import Model + +logger = logging.getLogger("interactive") + +ETCD_RETRY_TIMES = 3 + + +def like_endpoint(ip_port: str): + """Expect ip_port to be a ip or ip+port""" + if ip_port is None: + return False + parts = ip_port.split(":") + if len(parts) == 1: + return True + if len(parts) == 2: + return parts[1].isdigit() + return False + + +class IServiceRegistry(metaclass=ABCMeta): + """ + An abstraction for service registry. + """ + + @abstractmethod + def discover(self, graph_id, service_name): + """ + Discover a service. + """ + pass + + @abstractmethod + def start(self): + """ + start a service. + """ + pass + + @abstractmethod + def stop(self): + """ + stop a service. + """ + pass + + +class ServiceInstance(Model): + """ + A service instance. + """ + + def __init__(self, endpoint, metrics: str = None): + self._endpoint = endpoint + self._metrics = metrics + + self.openapi_types = {"endpoint": str, "metrics": str} + + self.attribute_map = {"endpoint": "endpoint", "metrics": "metrics"} + + @property + def endpoint(self): + return self._endpoint + + @property + def metrics(self): + return self._metrics + + @metrics.setter + def metrics(self, metrics: str): + self._metrics = metrics + + @endpoint.setter + def endpoint(self, endpoint): + self._endpoint = endpoint + + @classmethod + def from_dict(cls, dikt) -> "ServiceInstance": + """Returns the dict as a model + + :param dikt: A dict. + :type: dict + :return: The ServiceInstance of this ServiceInstance. # noqa: E501 + :rtype: ServiceInstance + """ + return util.deserialize_model(dikt, cls) + + def __eq__(self, other): + if isinstance(other, ServiceInstance): + return self._endpoint == other._endpoint and self._metrics == other._metrics + return False + + def __hash__(self): + return hash((self._endpoint, self._metrics)) + + def __str__(self): + return "ServiceInstance(%s:%s)" % (self._endpoint, self._metrics) + + def __repr__(self): + return self.__str__() + + def to_dict(self): + return {"endpoint": self._endpoint, "metrics": self._metrics} + + +class DiscoverResult(Model): + """ + The result of the service discovery. + ├────── instance_list + │ │ ├── cypher + │ │ │ ├── 11.12.13.14_7687 + │ │ │ └── 22.12.13.14_7687 + │ | ├── gremlin + │ │ │ ├── 11.12.13.14_12314 + │ │ │ └── 22.12.13.14_12314 + │ | └─-- procedure + │ | ├── 11.12.13.14_10000 + │ | └── 22.12.13.14_10000 + │ │ + | └── primary + """ + + def __init__(self): + self._instance_list = dict() + self._primary: str = None + + self.openapi_types = {"instance_list": dict, "primary": str} + + self.attribute_map = {"instance_list": "instance_list", "primary": "primary"} + + @property + def primary(self): + return self._primary + + def set_primary(self, primary: str): + logger.info(f"----Set primary: {primary}") + self._primary = primary + + @property + def instance_list(self): + return self._instance_list + + @instance_list.setter + def instance_list(self, instance_list: dict): + self._instance_list = instance_list + + @classmethod + def from_dict(cls, dikt) -> "DiscoverResult": + """Returns the dict as a model + + :param dikt: A dict. + :type: dict + :return: The DiscoverResult of this DiscoverResult. # noqa: E501 + :rtype: DiscoverResult + """ + return util.deserialize_model(dikt, cls) + + def add_instance(self, service_name: str, end_point, metrics: str): + """Add a service instance to the instance list of the service name. + + Args: + service_name (str): The name of the service. + end_point (_type_): The endpoint of the service instance. + metrics (str): The metrics of the service instance. + """ + if service_name not in self._instance_list: + self._instance_list[service_name] = set() + self._instance_list[service_name].add(ServiceInstance(end_point, metrics)) + + def get_instance_list(self): + return self._instance_list + + def get_instance_list(self, service_name: str) -> list: + if service_name not in self._instance_list: + return [] + return list(self._instance_list[service_name]) + + def find_service(self, service_name: str) -> bool: + return service_name in self._instance_list + + def to_dict(self): + ret = {} + logger.info(f"-----to dict {self._primary}") + if self._primary is not None: + ret["primary"] = self._primary + if self._instance_list is not None: + ret["instance_list"] = {} + for k, v in self._instance_list.items(): + ret["instance_list"][k] = list(v) + return ret + + +class GlobalServiceDiscovery(Model): + def __init__(self): + """ + ├── graph_1 + │ ├── instance_list + │ │ ├── cypher + │ │ │ ├── 11.12.13.14_7687 + │ │ │ └── 22.12.13.14_7687 + │ | ├── gremlin + │ │ │ ├── 11.12.13.14_12314 + │ │ │ └── 22.12.13.14_12314 + │ | └─-- procedure + │ | ├── 11.12.13.14_10000 + │ | └── 22.12.13.14_10000 + │ │ + | └── primary + """ + self._map = {} + """ + { + graph_id: { + primary: primary_ip + instance_list: { + cypher: [ + { + endpoint: xxxx + metrics { + snapshot: xxxx + } + }, + { + endpoint: xxxx + metrics { + snapshot: xxxx + }, + }] + } + } + } + """ + + def add_discovery_instance(self, graph_id, service_name, endpoint, metrics): + if graph_id not in self._map: + self._map[graph_id] = DiscoverResult() + self._map[graph_id].add_instance(service_name, endpoint, metrics) + + def set_primary_instance(self, graph_id: str, primary_ip: str): + """Expect primary_ip to be a single string + + Args: + graph_id (_type_): The unique identifier for the graph + primary_ip (str): A single string + """ + if graph_id not in self._map: + self._map[graph_id] = DiscoverResult() + logger.info(f"Set primary instance: {graph_id}, {primary_ip}") + self._map[graph_id].set_primary(primary_ip) + + def get(self, graph_id, service_name) -> dict: + """In raw storage, we store primary as a ip, when fetching, we should return it as a ServiceInstance + + Args: + graph_id (_type_): _description_ + service_name (_type_): _description_ + + Returns: + dict: { + primary: ServiceInstance, + instance_list: [ServiceInstance] + } + """ + if graph_id not in self._map: + return None + if not self._map[graph_id].find_service(service_name): + logger.error(f"Service not found: {service_name}") + return None + # TODO: FIX me + instance_list = self._map[graph_id].get_instance_list(service_name) + primary = self._map[graph_id].primary + for i in range(len(instance_list)): + if instance_list[i].endpoint.startswith(primary): + primary = instance_list[i] + return { + "graph_id": graph_id, + "service_registry": { + "service_name": service_name, + "primary": primary.to_dict(), + "instances": [x.to_dict() for x in instance_list], + }, + } + logger.error( + f"Primary instance not found in instance list: {primary}, {instance_list}" + ) + return { + "graph_id": graph_id, + "service_registry": { + "primary": None, + "serivce_name": service_name, + "instances": [x.to_dict() for x in instance_list], + }, + } + + def list_all(self) -> list: + """ + + Returns: + dict: { + graph_id: { + service_name: { + primary: ServiceInstance, + instance_list: [ServiceInstance] + } + } + } + """ + ori_dict = self.to_dict() + ret = [] + for graph_id, registry_info in ori_dict.items(): + cur = {} + cur["graph_id"] = graph_id + cur["service_registry"] = {} + primary_ip = registry_info.primary + logger.info(f"Found primary ip {primary_ip} for {graph_id}") + for service_name, instance_list in registry_info.instance_list.items(): + logger.info( + f"Found service {service_name} for {graph_id}, {instance_list}" + ) + cur["service_registry"]["service_name"] = service_name + if primary_ip: + for instance in instance_list: + if instance.endpoint.startswith(primary_ip): + cur["service_registry"]["primary"] = instance.to_dict() + logger.info(f"Found primary instance {instance} for {graph_id}, {service_name}") + # for instance in instance_list: + # if instance.endpoint.startswith(primary_ip): + # cur["service_registry"]["primary"] = instance.to_dict() + # logger.info(f"Found primary instance {instance} for {graph_id}, {service_name}") + cur["service_registry"]["instances"] = [x.to_dict() for x in instance_list] + ret.append(cur) + return ret + + def remove_discovery(self, graph_id, service_name): + logger.info(f"Remove discovery: {graph_id}, {service_name}") + if graph_id in self._map and service_name in self._map[graph_id]: + del self._map[graph_id][service_name] + + def remove_primary_instance(self, graph_id, service_name): + logger.info( + f'Remove primary instance: {graph_id}, {self._map[graph_id]["primary"]}' + ) + if graph_id in self._map: + del self._map[graph_id]["primary"] + + def to_dict(self): + return dict(self._map) + + +class EtcdServiceRegistry(IServiceRegistry): + """ + Implement the service registry using etcd. + """ + + def __init__( + self, + etcd_host="localhost", + etcd_port=2379, + namespace="interactive", + instance_name="default", + ): + logger.info("namespace: %s, instance_name: %s", namespace, instance_name) + self._namespace = namespace + self._instance_name = instance_name + self._key_helper = MetaKeyHelper( + namespace=namespace, instance_name=instance_name + ) + self._etcd_kv_store = ETCDKeyValueStore.create( + self._key_helper, etcd_host, etcd_port + ) + self._etcd_kv_store.open() + + self._global_discovery = GlobalServiceDiscovery() + self._cancel_watch_handler = None + + @property + def namespace(self): + return self._namespace + + @property + def instance_name(self): + return self._instance_name + + def __del__(self): + self.stop() + logger.info("EtcdServiceRegistry is closed.") + + def start(self): + """ + Start watching the service registry, and will be kept updated with watch mechanism. + Watch all changes in the service registry. + """ + logger.info( + "Start watching the service registry on %s", + self._key_helper.service_prefix(), + ) + + def service_watch_call_back(event): + """ + Handling all the watch events. Should handle the events in a sequential way. + """ + logger.info("Got event: %s", event) + if event is None: + return + if isinstance(event, PutEvent): + logger.info("Put event: %s", event) + self._handle_put_event(event) + elif isinstance(event, DeleteEvent): + logger.info("Delete event: %s", event) + self._handle_delete_event(event) + elif isinstance(event, etcd3.watch.WatchResponse): + logger.info("Watch response: %s", event.events) + for e in event.events: + if isinstance(e, PutEvent): + self._handle_put_event(e) + elif isinstance(e, DeleteEvent): + self._handle_delete_event(e) + else: + raise ValueError("Invalid event type: %s", event) + + # When we start, we need to first scan whether primary is set, then we watch + # the service registry + self._etcd_kv_store.get_with_prefix(META_SERVICE_KEY) + for key, value in self._etcd_kv_store.get_with_prefix(META_SERVICE_KEY): + logger.info(f"Get key: {key}, value: {value}") + # The returned key is after prefix, append the prefix + key = self._etcd_kv_store.root + "/" + key + self._handle_put_event_impl(key, value) + + logger.info("Watch prefix: %s", self._key_helper.service_prefix()) + self._cancel_watch_handler = self._etcd_kv_store.add_watch_prefix_callback( + self._key_helper.service_prefix(), service_watch_call_back + ) + logger.info("Watch handler: %s", self._cancel_watch_handler) + + def stop(self): + """ + Stop watching the service registry. + """ + if self._cancel_watch_handler is not None: + logger.info("Stop watching the service registry.") + cancel_res = self._etcd_kv_store.cancel_watch(self._cancel_watch_handler) + self._cancel_watch_handler = None + if self._etcd_kv_store is not None: + self._etcd_kv_store.close() + self._etcd_kv_store = None + + def discover(self, graph_id: str, service_name: str) -> dict: + """ + Manually discover the registered services for a graph for a given service. + + param graph_id: the unique graph id + param service_name: the name of the service, e.g., gremlin, cypher, etc. + + return: True if the service is registered successfully, False otherwise + """ + return self._global_discovery.get(graph_id, service_name) + + def list_all(self) -> dict: + """ + List all services in the service registry. + """ + return self._global_discovery.list_all() + + def _handle_put_event_impl(self, key, value): + graph_id, service_name, ip_port = self._try_decode_key(key) + if graph_id is None: + return + logger.info( + "Put event: graph_id=%s, service_name=%s, endpoint=%s, value=%s", + graph_id, + service_name, + ip_port, + value, + ) + # check whether ip_port is like ip or ip_port + if ip_port is None: + self._global_discovery.set_primary_instance(graph_id, value) + else: + self._global_discovery.add_discovery_instance( + graph_id, service_name, ip_port, value + ) + + def _handle_put_event(self, event: PutEvent): + """ + Handle the put event. + """ + value = event.value.decode("utf-8") + key = event.key.decode("utf-8") + return self._handle_put_event_impl(key, value) + + def _handle_delete_event(self, event: DeleteEvent): + """ + Handle the delete event. + """ + key = event.key.decode("utf-8") + graph_id, service_name, ip_port = self._try_decode_key(key) + logger.info( + "Delete event: graph_id=%s, service_name=%s, endpoint=%s", + graph_id, + service_name, + ip_port, + ) + if ip_port is None: + self._global_discovery.remove_primary_instance(graph_id) + else: + self._global_discovery.remove_discovery(graph_id, service_name) + + def _try_decode_key(self, key): + """ + Try to decode the key to get the graph_id and service_name. + If the key is instance_list key, return the graph_id, service_name, and ip_port. + If the key is primary key, return the graph_id and service_name and None. + """ + _tuple = self._key_helper.decode_service_key(key) + if _tuple is None: + return None, None, None + if len(_tuple) != 3: + raise RuntimeError("Expect 3 parts, but got %d", len(_tuple)) + return _tuple + + +service_registry = None + + +def initialize_service_registry(config: Config): + global service_registry + if config.master.service_registry.type == "etcd": + # get ip and port from http://ip:port + endpoint = config.master.service_registry.endpoint + endpoint = endpoint.startswith("http://") and endpoint[7:] or endpoint + ip, port = endpoint.split(":") + service_registry = EtcdServiceRegistry( + ip, + int(port), + config.master.k8s_launcher_config.namespace, + config.master.instance_name, + ) + else: + raise ValueError( + "Invalid service registry type: %s", config.master.service_registry.type + ) + service_registry.start() + + +def get_service_registry(): + global service_registry + return service_registry diff --git a/flex/interactive/sdk/master/gs_interactive_admin/encoder.py b/flex/interactive/sdk/master/gs_interactive_admin/encoder.py index 3e8dbd3084d9..dbf295f301c0 100644 --- a/flex/interactive/sdk/master/gs_interactive_admin/encoder.py +++ b/flex/interactive/sdk/master/gs_interactive_admin/encoder.py @@ -25,6 +25,8 @@ class JSONEncoder(FlaskJSONEncoder): include_nulls = False def default(self, o): + if isinstance(o, set): + return list(o) if isinstance(o, Model): dikt = {} for attr in o.openapi_types: diff --git a/flex/interactive/sdk/master/gs_interactive_admin/file_utils.py b/flex/interactive/sdk/master/gs_interactive_admin/file_utils.py new file mode 100644 index 000000000000..d1f6d593db6f --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/file_utils.py @@ -0,0 +1,44 @@ +import datetime + +import typing +import subprocess +import threading +import os +from gs_interactive_admin import typing_utils +import logging +import re +from gs_interactive_admin.core.config import INTERACTIVE_WORKSPACE +from gs_interactive_admin.models.upload_file_response import UploadFileResponse + +logger = logging.getLogger("interactive") + + +def parse_file_metadata(location: str) -> dict: + """ + Args: + location: optional values: + odps://path/to/file, hdfs://path/to/file, file:///path/to/file + /home/graphscope/path/to/file + """ + metadata = {"datasource": "file"} + path = location + pattern = r"^(odps|hdfs|file|oss|s3)?://([\w/.-]+)$" + match = re.match(pattern, location) + if match: + datasource = match.group(1) + metadata["datasource"] = datasource + if datasource == "file": + path = match.group(2) + if metadata["datasource"] == "file": + _, file_extension = os.path.splitext(path) + metadata["file_type"] = file_extension[1:] + return metadata + + +def upload_file_impl(filestorage) -> str: + filepath = os.path.join(INTERACTIVE_WORKSPACE, filestorage.filename) + if not os.path.exists(INTERACTIVE_WORKSPACE): + os.makedirs(INTERACTIVE_WORKSPACE) + filestorage.save(filepath) + metadata = parse_file_metadata(filepath) + return UploadFileResponse.from_dict({"file_path": filepath, "metadata": metadata}) diff --git a/flex/interactive/sdk/master/gs_interactive_admin/test/__init__.py b/flex/interactive/sdk/master/gs_interactive_admin/test/__init__.py index de57499a3cc0..e91a9030dc34 100644 --- a/flex/interactive/sdk/master/gs_interactive_admin/test/__init__.py +++ b/flex/interactive/sdk/master/gs_interactive_admin/test/__init__.py @@ -21,13 +21,24 @@ from flask_testing import TestCase from gs_interactive_admin.encoder import JSONEncoder +from gs_interactive_admin.core.service_discovery.service_registry import ( + initialize_service_registry, +) +from gs_interactive_admin.core.config import Config class BaseTestCase(TestCase): def create_app(self): + logging.basicConfig(level=logging.INFO) + config = Config() + config.master.service_registry.ttl = 3 + + initialize_service_registry(config) logging.getLogger("connexion.operation").setLevel("ERROR") + logging.getLogger("interactive").setLevel("INFO") app = connexion.App(__name__, specification_dir="../openapi/") app.app.json_encoder = JSONEncoder app.add_api("openapi.yaml", pythonic_params=True) + return app.app diff --git a/flex/interactive/sdk/master/gs_interactive_admin/test/conftest.py b/flex/interactive/sdk/master/gs_interactive_admin/test/conftest.py new file mode 100644 index 000000000000..8e50de7faef2 --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/test/conftest.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from neo4j import Session as Neo4jSession + +#TODO: We should not use two conftest.py. Merge this with python/gs_interactive/tests/conftest.py +def call_procedure(neo4j_sess: Neo4jSession, graph_id: str, proc_name: str, *args): + query = "CALL " + proc_name + "(" + ",".join([str(item) for item in args]) + ")" + result = neo4j_sess.run(query) + for record in result: + print(record) \ No newline at end of file diff --git a/flex/interactive/sdk/master/gs_interactive_admin/test/remove_k8s_resources.sh b/flex/interactive/sdk/master/gs_interactive_admin/test/remove_k8s_resources.sh new file mode 100755 index 000000000000..7b4c527854af --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/test/remove_k8s_resources.sh @@ -0,0 +1,13 @@ + +# Expect 1 args +if [ $# -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi +alias ktl="kubectl -n default" +graph_id=$1 + +ktl delete svc default-graph-${graph_id}-engine-headless +ktl delete sts default-graph-${graph_id}-engine +ktl delete ConfigMap default-graph-${graph_id}-config +ktl delete pvc interactive-workspace-default-graph-${graph_id}-engine-0 \ No newline at end of file diff --git a/flex/interactive/sdk/master/gs_interactive_admin/test/test_end_to_end.py b/flex/interactive/sdk/master/gs_interactive_admin/test/test_end_to_end.py new file mode 100644 index 000000000000..8f231a1eb2d0 --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/test/test_end_to_end.py @@ -0,0 +1,275 @@ +import unittest + +from flask import json +import pytest +import logging +import os +import sys +import time + +sys.path.append(os.path.join(os.path.dirname(__file__), "../../../python/")) + +logger = logging.getLogger("interactive") + +from gs_interactive_admin.models.job_status import JobStatus # noqa: E501 +from gs_interactive_admin.test import BaseTestCase + +from gs_interactive.client.driver import Driver # noqa: E402 +from gs_interactive.client.status import StatusCode # noqa: E402 +from gs_interactive_admin.test.conftest import call_procedure +from neo4j import GraphDatabase + +# Must use the models on SDK side +from gs_interactive.models import * + +test_graph_def = { + "name": "modern_graph", + "description": "This is a test graph", + "schema": { + "vertex_types": [ + { + "type_name": "person", + "properties": [ + { + "property_name": "id", + "property_type": {"primitive_type": "DT_SIGNED_INT64"}, + }, + { + "property_name": "name", + "property_type": {"string": {"var_char": {"max_length": 16}}}, + }, + { + "property_name": "age", + "property_type": {"primitive_type": "DT_SIGNED_INT32"}, + }, + ], + "primary_keys": ["id"], + } + ], + "edge_types": [ + { + "type_name": "knows", + "vertex_type_pair_relations": [ + { + "source_vertex": "person", + "destination_vertex": "person", + "relation": "MANY_TO_MANY", + } + ], + "properties": [ + { + "property_name": "weight", + "property_type": {"primitive_type": "DT_DOUBLE"}, + } + ], + } + ], + }, +} + + +class TestEndToEnd(unittest.TestCase): + """A comprehensive test case contains creating graph, importing data, running queries, and deleting graph""" + + def setup_class(self): + if os.environ.get("INTERACTIVE_ADMIN_ENDPOINT") is None: + raise Exception("INTERACTIVE_ADMIN_ENDPOINT is not set") + self._endpoint = os.environ.get("INTERACTIVE_ADMIN_ENDPOINT") + logger.info("Endpoint: %s", self._endpoint) + self._driver = Driver(self._endpoint) + self._sess = self._driver.getDefaultSession() + self._graph_id = "33" + + def test1(self): + # self.create_graph() + # self.import_data() + # self.get_statistics() + # self.start_service() + # self.call_builtin_procedure() + self.create_cpp_procedure() + # self.start_service_on_graph(self._graph_id) + # self.call_cpp_procedure() + # self.stop_service() + # self.callVertexEdgeQuery() + + def start_service_on_graph(self, graph_id): + resp = self._sess.start_service( + start_service_request=StartServiceRequest(graph_id=graph_id) + ) + assert resp.is_ok() + logger.info(f"Launching service resp {resp}") + # get service status + resp = self._sess.get_service_status() + assert resp.is_ok() + + def create_graph(self): + create_graph_request = CreateGraphRequest.from_dict(test_graph_def) + resp = self._sess.create_graph(create_graph_request) + assert resp.is_ok() + graph_id = resp.get_value().graph_id + print("Graph id: ", graph_id) + self._graph_id = graph_id + + def import_data(self): + assert os.environ.get("FLEX_DATA_DIR") is not None + location = os.environ.get("FLEX_DATA_DIR") + person_csv_path = "@/{}/person.csv".format(location) + knows_csv_path = "@/{}/person_knows_person.csv".format(location) + print("test bulk loading: ", self._graph_id, person_csv_path, knows_csv_path) + schema_mapping = SchemaMapping( + loading_config=SchemaMappingLoadingConfig( + data_source=SchemaMappingLoadingConfigDataSource(scheme="file"), + import_option="init", + format=SchemaMappingLoadingConfigFormat(type="csv"), + ), + vertex_mappings=[ + VertexMapping(type_name="person", inputs=[person_csv_path]) + ], + edge_mappings=[ + EdgeMapping( + type_triplet=EdgeMappingTypeTriplet( + edge="knows", + source_vertex="person", + destination_vertex="person", + ), + inputs=[knows_csv_path], + ) + ], + ) + resp = self._sess.bulk_loading(self._graph_id, schema_mapping) + assert resp.is_ok() + job_id = resp.get_value().job_id + assert self.waitJobFinish(job_id) + logger.info("Successfully import data") + + def get_statistics(self): + resp = self._sess.get_graph_statistics(self._graph_id) + assert resp.is_ok() + print("Statistics: ", resp.get_value()) + + def start_service(self): + resp = self._sess.start_service( + start_service_request=StartServiceRequest(graph_id=self._graph_id) + ) + assert resp.is_ok() + logger.info(f"Launching service resp {resp}") + # get service status + resp = self._sess.get_service_status() + assert resp.is_ok() + + def stop_service(self): + # logger.info("Stopping service") + # # logger.info(f"Wil wait 10 seconds before stopping service") + # # time.sleep(200) + resp = self._sess.stop_service(graph_id=self._graph_id) + logger.info(f"Stop service resp {resp}") + + def waitJobFinish(self, job_id: str): + assert job_id is not None + while True: + resp = self._sess.get_job(job_id) + assert resp.is_ok() + status = resp.get_value().status + print("job status: ", status) + if status == "SUCCESS": + print(resp.get_value()) + return True + elif status == "FAILED": + return False + else: + time.sleep(1) + + def call_builtin_procedure(self): + """Test calling builtin procedure. The request should distributed to all valid backend servers in a roundrobin or a random manner.""" + all_endpoints = self._driver.getNeo4jEndpoints(self._graph_id) + logger.info("All endpoints: %s", all_endpoints) + for endpoint in all_endpoints: + logger.info("Endpoint: %s", endpoint) + neo4j_driver = GraphDatabase.driver(endpoint, auth=None) + with neo4j_driver.session() as session: + logger.info(f"calling procedure count vertices on graph {self._graph_id}") + call_procedure( + session, + self._graph_id, + "count_vertices", + '"person"', + ) + + def create_cpp_procedure(self): + self._cpp_proc_name = "test_procedure_cpp" + app_path = os.path.join( + os.path.dirname(__file__), "../../../java/src/test/resources/sample_app.cc" + ) + if not os.path.exists(app_path): + raise Exception("sample_app.cc not found") + with open(app_path, "r") as f: + app_content = f.read() + + create_proc_request = CreateProcedureRequest( + name=self._cpp_proc_name, + description="test procedure", + query=app_content, + type="cpp", + ) + resp = self._sess.create_procedure(self._graph_id, create_proc_request) + assert resp.is_ok() + print("create procedure: ", resp.get_value()) + + def call_cpp_procedure(self): + """ + Test directly submit procedure request to query port. + This will fetch the procedure's endpoints in a certain manner(roundrobin or random) and submit the request to the endpoint. + """ + req = QueryRequest( + query_name=self._cpp_proc_name, + arguments=[ + TypedValue( + type=GSDataType(PrimitiveType(primitive_type="DT_SIGNED_INT32")), + value=1, + ) + ], + ) + resp = self._sess.call_procedure(graph_id=self._graph_id, params=req) + assert resp.is_ok() + print("call procedure result: ", resp.get_value()) + + def callVertexEdgeQuery(self): + """ + Vertex/Edge Queries could be splited into two kinds: + - Write: insert/update/delete + - Read: get + For Write queries, we need to send to the primary server. + For Read queries, we could send to any server. + """ + # get vertex + resp = self._sess.get_vertex( + graph_id=self._graph_id, + label="person", + primary_key_value="1" # marko + ) + assert resp.is_ok() + vertex = resp.get_value() + assert vertex.to_dict == { + "label": "person", + "values": [ + {"name": "id", "value": 1}, + {"name": "name", "value": "marko"}, + {"name": "age", "value": 29}, + ], + } + + def tearDown(self): + if self._graph_id is not None: + rep1 = self.stop_service() + print("stop service: ", rep1) + + def teardown_class(self): + pass + # if self._graph_id is not None: + # rep2 = self._sess.delete_graph(self._graph_id) + # print("delete graph: ", rep2) + + + +if __name__ == "__main__": + unittest.main() diff --git a/flex/interactive/sdk/master/gs_interactive_admin/test/test_get_service_info.py b/flex/interactive/sdk/master/gs_interactive_admin/test/test_get_service_info.py new file mode 100644 index 000000000000..88e05589b75d --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/test/test_get_service_info.py @@ -0,0 +1,138 @@ +import time +import unittest + +from flask import json +import os +import logging +import etcd3 + +from gs_interactive_admin.models.job_status import JobStatus # noqa: E501 +from gs_interactive_admin.test import BaseTestCase + +from gs_interactive_admin.util import MetaKeyHelper +from gs_interactive_admin.core.service_discovery.service_registry import ( + get_service_registry, +) + +logger = logging.getLogger("interactive") + + +class TestServiceRegistryServer(BaseTestCase): + """AdminServiceJobManagementController integration test stubs""" + + def setUp(self): + # get the field from the parent class + if os.environ.get("ETCD_ENDPOINT") is None: + raise Exception("ETCD_ENDPOINT is not set") + self.etcd_endpoint = os.environ["ETCD_ENDPOINT"] + self.host, self.port = self.etcd_endpoint.split(":") + self.etcd_client = etcd3.client(host=self.host, port=int(self.port)) + self._helper = MetaKeyHelper( + namespace=get_service_registry().namespace, + instance_name=get_service_registry().instance_name, + ) + + def send_request(self, method, url, data=None): + headers = { + "Accept": "application/json", + } + logger.info("sending request to %s", url) + response = self.client.open( + url, method=method, headers=headers, data=json.dumps(data) + ) + logger.info("response : %s", response.json) + return response + + def test_get_service_registry_info(self): + """ + Test case for get_service_registry_info + """ + resp = self.send_request(method="get", url="/v1/service/registry") + assert resp.status_code == 200 and resp.json == {} + + service_key1 = ( + self._helper.service_instance_list_prefix( + "graph_id_example", "service_name_example" + ) + + "/example.interactive.com:7687" + ) + primary_key1 = self._helper.service_primary_key( + "graph_id_example", "service_name_example" + ) + service_key2 = ( + self._helper.service_instance_list_prefix( + "graph_id_example", "service_name_example" + ) + + "/example.interactive.com:7688" + ) + primary_key2 = self._helper.service_primary_key( + "graph_id_example", "service_name_example" + ) + mock_metrics1 = '{"endpoint": "example.interactive.com:7687", "service_name": "service_name_example", "snapshot_id": "0"}' + mock_metrics2 = '{"endpoint": "example.interactive.com:7688", "service_name": "service_name_example", "snapshot_id": "0"}' + + self.etcd_client.put(service_key1, mock_metrics1) + self.etcd_client.put(primary_key1, mock_metrics1) + time.sleep(1) + # registry should immediately get the updated data + resp = self.send_request(method="get", url="/v1/service/registry") + assert resp.status_code == 200 and resp.json == { + "graph_id_example": { + "service_name_example": { + "instance_list": [ + { + "endpoint": "example.interactive.com:7687", + "metrics": '{"endpoint": "example.interactive.com:7687", "service_name": "service_name_example", "snapshot_id": "0"}', + }, + ], + "primary": { + "endpoint": "example.interactive.com:7687", + "metrics": '{"endpoint": "example.interactive.com:7687", "service_name": "service_name_example", "snapshot_id": "0"}', + }, + } + } + } + + # insert another service instance + self.etcd_client.put(service_key2, mock_metrics2) + time.sleep(1) + # registry should immediately get the updated data + resp = self.send_request(method="get", url="/v1/service/registry") + # sort the resp.json + sorted_resp = resp.json + for graph_id in sorted_resp: + for service_name in sorted_resp[graph_id]: + sorted_resp[graph_id][service_name]["instance_list"] = sorted( + sorted_resp[graph_id][service_name]["instance_list"], + key=lambda x: x["endpoint"], + reverse=False, + ) + + assert resp.status_code == 200 and sorted_resp == { + "graph_id_example": { + "service_name_example": { + "instance_list": [ + { + "endpoint": "example.interactive.com:7687", + "metrics": '{"endpoint": "example.interactive.com:7687", "service_name": "service_name_example", "snapshot_id": "0"}', + }, + { + "endpoint": "example.interactive.com:7688", + "metrics": '{"endpoint": "example.interactive.com:7688", "service_name": "service_name_example", "snapshot_id": "0"}', + }, + ], + "primary": { + "endpoint": "example.interactive.com:7687", + "metrics": '{"endpoint": "example.interactive.com:7687", "service_name": "service_name_example", "snapshot_id": "0"}', + }, + } + } + } + + def teardown(self): + self.etcd_client.delete_prefix(self._helper.service_prefix()) + self.etcd_client.close() + + +if __name__ == "__main__": + unittest.main() diff --git a/flex/interactive/sdk/master/gs_interactive_admin/test/test_k8s_launcher.py b/flex/interactive/sdk/master/gs_interactive_admin/test/test_k8s_launcher.py new file mode 100644 index 000000000000..dec4979a446c --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/test/test_k8s_launcher.py @@ -0,0 +1,62 @@ +import time +import unittest +import pytest + +from flask import json +import os +import logging +import etcd3 + +from gs_interactive_admin.models.job_status import JobStatus # noqa: E501 +from gs_interactive_admin.test import BaseTestCase +from gs_interactive_admin.core.launcher.k8s_launcher import K8sLauncher + +from gs_interactive_admin.core.config import Config + +logger = logging.getLogger("interactive") + + +class TestLaunchK8sCluster(unittest.TestCase): + """AdminServiceJobManagementController integration test stubs""" + + def setup_class(self): + self._config = Config() + self._k8s_launcher = K8sLauncher(self._config) + self._config.master.instance_name = "test" + self._config.master.k8s_launcher_config.namespace = "default" + self._config.master.k8s_launcher_config.default_replicas = 1 + self._cluster = None + + def test_launch_cluster(self): + self._cluster = self._k8s_launcher.launch_cluster( + "test", self._config, wait_service_ready=False + ) + max_wait = 60 + while max_wait > 0: + if self._cluster.is_ready(): + break + time.sleep(1) + max_wait -= 1 + + assert self._cluster.is_ready() + logger.info("Cluster is ready") + + instance_id = self._cluster.instance_id + logger.info(f"instance_id {instance_id}") + + # Now stop the cluster + self._cluster.stop() + logger.info("Cluster is stopped") + + # Check from k8s_launcher + clusters = self._k8s_launcher.get_cluster_status(instance_id) + assert clusters is None + + def teardown_class(self): + # if self._cluster: + # self._cluster.stop() + pass + + +if __name__ == "__main__": + unittest.main() diff --git a/flex/interactive/sdk/master/gs_interactive_admin/test/test_kv_store.py b/flex/interactive/sdk/master/gs_interactive_admin/test/test_kv_store.py new file mode 100644 index 000000000000..09f6899d29b7 --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/test/test_kv_store.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +import os +import sys + +sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) +from flask import json +import logging + +from gs_interactive_admin.models.job_status import JobStatus # noqa: E501 +from gs_interactive_admin.test import BaseTestCase +from gs_interactive_admin.core.metadata.kv_store import ETCDKeyValueStore + + +class TestEtcdKVStore(unittest.TestCase): + def setup_class(self): + # Read etcd server endpoint from environment variable + if "ETCD_ENDPOINT" not in os.environ: + raise Exception("ETCD_ENDPOINT is not set") + self.etcd_endpoint = os.environ["ETCD_ENDPOINT"] + host, port = self.etcd_endpoint.split(":") + self.etcd_kv_store = ETCDKeyValueStore(host, int(port)) + self.etcd_kv_store.open() + # config logging + logging.basicConfig(level=logging.INFO) + + def test_insert(self): + key = "test_key" + value = "test_value" + self.etcd_kv_store.insert(key, value) + assert self.etcd_kv_store.get(key) == value + # delete the key + assert self.etcd_kv_store.delete(key) + assert self.etcd_kv_store.get(key) is None + + def test_insert_without_key(self): + value1 = "test_value" + value2 = "test_value2" + key1, key_id = self.etcd_kv_store.insert_with_prefix("prefix", value1) + key2, key_id = self.etcd_kv_store.insert_with_prefix("prefix", value2) + print("key1: ", key1) + print("key2: ", key2) + assert self.etcd_kv_store.get(key1) == value1 + assert self.etcd_kv_store.get(key2) == value2 + # delete the key + assert self.etcd_kv_store.delete(key1) + assert self.etcd_kv_store.get(key1) is None + value3 = "test_value3" + key3, key_id = self.etcd_kv_store.insert_with_prefix("prefix", value3) + assert self.etcd_kv_store.get(key3) == value3 + + kv_tuples = self.etcd_kv_store.get_with_prefix("prefix") + assert kv_tuples == [(key2, value2), (key3, value3)] + + def test_delete(self): + key = "prefix/test_key" + value = "test_value" + self.etcd_kv_store.insert(key, value) + assert self.etcd_kv_store.get(key) == value + key2 = "prefix/test_key2" + value2 = "test_value2" + self.etcd_kv_store.insert(key2, value2) + assert self.etcd_kv_store.get(key2) == value2 + + self.etcd_kv_store.delete_with_prefix("prefix") + assert self.etcd_kv_store.get(key) is None + assert self.etcd_kv_store.get(key2) is None + + def test_update(self): + key = "prefix/test_key" + value = "test_value" + self.etcd_kv_store.insert(key, value) + assert self.etcd_kv_store.get(key) == value + value2 = "test_value2" + self.etcd_kv_store.update(key, value2) + assert self.etcd_kv_store.get(key) == value2 + + assert self.etcd_kv_store.update_with_func(key, lambda x: x + "2") + assert self.etcd_kv_store.get(key) == value2 + "2" + + self.etcd_kv_store.delete(key) + assert self.etcd_kv_store.get(key) is None + + def teardown_class(self): + self.etcd_kv_store.delete_with_prefix("/") + self.etcd_kv_store.close() diff --git a/flex/interactive/sdk/master/gs_interactive_admin/test/test_service_discovery.py b/flex/interactive/sdk/master/gs_interactive_admin/test/test_service_discovery.py new file mode 100644 index 000000000000..e38bb0b95df8 --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/test/test_service_discovery.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest + +import os +import sys + +sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) +from flask import json +import logging +import threading +import time +import etcd3 + +from gs_interactive_admin.models.job_status import JobStatus # noqa: E501 +from gs_interactive_admin.core.service_discovery.service_registry import ( + EtcdServiceRegistry, +) +from gs_interactive_admin.util import MetaKeyHelper +from gs_interactive_admin.core.service_discovery.service_registry import ServiceInstance + +logger = logging.getLogger("interactive") + + +class TestServiceDiscovery(unittest.TestCase): + def setUp(self): + # Read etcd server endpoint from environment variable + if "ETCD_ENDPOINT" not in os.environ: + raise Exception("ETCD_ENDPOINT is not set") + self.etcd_endpoint = os.environ["ETCD_ENDPOINT"] + host, port = self.etcd_endpoint.split(":") + self.etcd_client = etcd3.client(host=host, port=int(port)) + self.etcd_key_helper = MetaKeyHelper(namespace="test", instance_name="test") + self.registry = EtcdServiceRegistry(host, int(port), "test", "test") + # config logging + logging.basicConfig(level=logging.INFO) + + def test_discover(self): + mock_endpoint = "11.12.234.38:7687" + mock_metrics = '{"endpoint": "11.12.234.38:7687", "service_name": "cypher","snapshot_id": "0"}' + + def mock_service(): + """ + Mock the service registration and delete. + """ + service_instance_list_prefix = ( + self.etcd_key_helper.service_instance_list_prefix("0", "cypher") + ) + primary_key = self.etcd_key_helper.service_primary_key("0", "cypher") + service_key = service_instance_list_prefix + "/" + mock_endpoint + self.etcd_client.put(service_key, mock_metrics) + self.etcd_client.put(primary_key, mock_metrics) + logging.info( + "Mock service registered, key: %s, value: %s", service_key, mock_metrics + ) + + self.registry.start() + pre_registry = self.registry.discover("0", "cypher") + assert pre_registry is None or pre_registry == {} + # mock_service() + t = threading.Thread(target=mock_service) + t.start() + time.sleep(2) + post_registry = self.registry.discover("0", "cypher") + logger.info("post_registry: %s", post_registry) + assert post_registry is not None + assert ( + post_registry["primary"] + == ServiceInstance(mock_endpoint, mock_metrics).to_dict() + ) + logger.info("post_registry: %s", post_registry) + assert post_registry["instance_list"] == [ + ServiceInstance(mock_endpoint, mock_metrics).to_dict() + ] + self.registry.stop() + t.join() + + def teardown(self): + self.registry.stop() + self.etcd_client.delete_prefix("/") + self.etcd_client.close() + logging.info("Clean up etcd.") diff --git a/flex/interactive/sdk/master/gs_interactive_admin/util.py b/flex/interactive/sdk/master/gs_interactive_admin/util.py index f4be60a3b0fb..889778c0b652 100644 --- a/flex/interactive/sdk/master/gs_interactive_admin/util.py +++ b/flex/interactive/sdk/master/gs_interactive_admin/util.py @@ -16,9 +16,84 @@ # limitations under the License. # import datetime -import typing +import typing +import subprocess +import threading +import os +import oss2 from gs_interactive_admin import typing_utils +import logging +import yaml + +from gs_interactive_admin.core.config import OSS_BUCKET_NAME, OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET, OSS_ENDPOINT + +logger = logging.getLogger("interactive") + +def dump_file(data, file_path): + if isinstance(data, str): + with open(file_path, "w") as f: + f.write(data) + return file_path + elif isinstance(data, dict): + # dump as yaml + with open(file_path, "w") as f: + yaml.dump(data, f) + return file_path + else: + raise ValueError(f"Invalid data type: {type(data)}") + +def convert_str_to_k8s_valid(str): + """ + Convert a string to a valid k8s name. + """ + return str.replace("_", "-").replace(".", "-").replace("/", "-").replace(":", "-") + +class SubProcessRunner(object): + def __init__(self, graph_id, command, callback, log_file): + self._graph_id = graph_id + self.command = command + self.callback = callback + self.log_file = log_file + self.process_id = None + self.thread = None + + @property + def graph_id(self): + return self._graph_id + + def start(self): + def target(): + with open(self.log_file, "w") as log: + process = subprocess.Popen(self.command, stdout=log, stderr=log) + self.process_id = process.pid + + process.wait() + logger.info( + f"Job process {self.process_id} finished with code {process.returncode}, calling callback" + ) + self.callback(process) + + self.thread = threading.Thread(target=target) + self.thread.start() + return self.thread, self.process_id + + def is_alive(self): + return self.thread.is_alive() + + +def remove_nones(data: dict): + """ + Recursively remove None values from a dictionary. + """ + if isinstance(data, dict): + return { + key: remove_nones(value) for key, value in data.items() if value is not None + } + elif isinstance(data, list): + return [remove_nones(item) for item in data] + else: + return data def _deserialize(data, klass): @@ -164,3 +239,111 @@ def _deserialize_dict(data, boxed_type): :rtype: dict """ return {k: _deserialize(v, boxed_type) for k, v in data.items()} + + +META_SERVICE_KEY = "service" +INSTANCE_LIST_KEY = "instance_list" +META_PRIMARY_KEY = "primary" +METADATA_KEY = "metadata" +GRAPH_META_KEY = "graph_meta" +JOB_META_KEY = "job_meta" +PLUGIN_META_KEY = "plugin_meta" +STATUS_META_KEY = "status" +STATISTICS_KEY = "statistics" + + +class MetaKeyHelper(object): + def __init__(self, namespace="interactive", instance_name="default"): + self.namespace = namespace + self.instance_name = instance_name + self._root = "/" + "/".join([namespace, instance_name]) + self._service_root = "/" + "/".join( + [namespace, instance_name, META_SERVICE_KEY] + ) + self._meta_root = "/" + "/".join([namespace, instance_name, METADATA_KEY]) + + @property + def root(self): + return self._root + + def graph_meta_prefix(self): + return "/".join([self._meta_root, GRAPH_META_KEY]) + + def plugin_meta_prefix(self, graph_id): + """Plugin is unique for graph scope, not global. + + Returns: + _type_: _description_ + """ + return "/".join([self._meta_root, graph_id + "_" + PLUGIN_META_KEY]) + + def job_meta_prefix(self): + return "/".join([self._meta_root, JOB_META_KEY]) + + def service_prefix(self): + return self._service_root + + def service_instance_list_prefix(self, graph_id, service_name): + return "/".join([self._service_root, graph_id, INSTANCE_LIST_KEY, service_name]) + + def service_primary_key(self, graph_id, service_name): + return "/".join([self._service_root, graph_id, META_PRIMARY_KEY, service_name]) + + def graph_status_key(self, graph_id): + return "/".join([self._meta_root, STATUS_META_KEY, graph_id]) + + def graph_statistics_key(self, graph_id): + return "/".join([self._meta_root, STATISTICS_KEY, graph_id]) + + def decode_service_key(self, key): + """ + Decode a key into instance_list_key or primary_key. + /namespace/instance_name/service/graph_id/instance_list/service_name/ip:port + /namespace/instance_name/service/graph_id/primary + return graph_id, service_name, endpoint, + """ + keys = key.split("/") + keys = list(filter(None, keys)) + if len(keys) > 7 or len(keys) < 5: + raise ValueError(f"Invalid key: {keys}") + logger.info(f"keys {keys}") + key_type = keys[4] + if key_type == INSTANCE_LIST_KEY: + # graph_id, service_name, endpoint + return keys[3], keys[5], keys[6] + elif key_type == META_PRIMARY_KEY: + if len(keys) == 5: + return keys[3], None, None + else: + logger.warning(f"Got invalid key: {keys}") + return None + else: + raise ValueError(f"Invalid key type: {key_type}, {keys}") + + +def get_current_time_stamp_ms(): + return int(datetime.datetime.now().timestamp() * 1000) + +def check_field_in_dict(data, field): + if field not in data: + raise ValueError(f"Field {field} not found in {data}") + +class OssReader(object): + def __init__(self): + self._auth = oss2.Auth(OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET) + self._bucket = oss2.Bucket(self._auth, OSS_ENDPOINT, OSS_BUCKET_NAME) + + def read(self, key): + """ + Read a file under the bucket. + """ + # remove the oss:// prefix if exists + if key.startswith("oss://"): + key = key[6:] + # remove the bucket name if exists + if key.startswith(OSS_BUCKET_NAME): + key = key[len(OSS_BUCKET_NAME) + 1:] + else: + logger.warning(f"Key {key} does not start with bucket name {OSS_BUCKET_NAME}") + logger.warning(f"Trying to read {key}") + return self._bucket.get_object(key).read().decode("utf-8") diff --git a/flex/interactive/sdk/master/gs_interactive_admin/version.py b/flex/interactive/sdk/master/gs_interactive_admin/version.py new file mode 100644 index 000000000000..e484de43eca2 --- /dev/null +++ b/flex/interactive/sdk/master/gs_interactive_admin/version.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from packaging import version + +version_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "VERSION") + +with open(version_file_path, "r", encoding="utf-8") as fp: + sv = version.parse(fp.read().strip()) + __is_prerelease__ = sv.is_prerelease + __version__ = str(sv) + +__version_tuple__ = (v for v in __version__.split(".")) + +del version_file_path diff --git a/flex/interactive/sdk/python/gs_interactive/client/driver.py b/flex/interactive/sdk/python/gs_interactive/client/driver.py index c0584fcf8427..7b0895ef2cd8 100644 --- a/flex/interactive/sdk/python/gs_interactive/client/driver.py +++ b/flex/interactive/sdk/python/gs_interactive/client/driver.py @@ -19,13 +19,17 @@ import os from gremlin_python.driver.client import Client +from gs_interactive.models.graph_service_registry_record import GraphServiceRegistryRecord from neo4j import GraphDatabase from neo4j import Session as Neo4jSession +import logging from gs_interactive.client.session import DefaultSession from gs_interactive.client.session import Session +logger = logging.getLogger("interactive") + class Driver: """ The main entry point for the Interactive SDK. With the Interactive endpoints provided, @@ -157,6 +161,49 @@ def getNeo4jSessionImpl(self, **config) -> Neo4jSession: self._neo4j_driver = GraphDatabase.driver(self._cypher_endpoint, auth=None) return self._neo4j_driver.session(**config) + def getNeo4jEndpoints(self, graph_id: str) -> list[str]: + """ + Get all available neo4j endpoints. Only works if the admin service is running in k8s mode + Returns: + list[str]: list of all available neo4j endpoints + """ + service_status = self.getDefaultSession().get_service_status() + if service_status.is_ok(): + service_status_val = service_status.get_value() + if service_status_val.deploy_mode == "k8s": + return self._get_neo4j_endpoints_from_service_registry(graph_id) + elif service_status_val.deploy_mode == "standalone": + raise ValueError( + "getNeo4jEndpoints are not available in standalone deployment mode, call getNeo4jEndpoint() to get the endpoint" + ) + else: + raise ValueError( + "Failed to get service status " + service_status.get_status_message() + ) + + def _get_neo4j_endpoints_from_service_registry(self, graph_id: str) -> list[str]: + """ + Args: + graph_id (str): The graph id to get the endpoints for + + Returns: + list[str]: List of all available neo4j endpoints + """ + service_registry = self.getDefaultSession().get_service_registry_info(graph_id=graph_id, service_name="cypher") + if service_registry.is_ok(): + service_registry_val : GraphServiceRegistryRecord = service_registry.get_value() + logger.info(f"registry val {service_registry_val}") + if service_registry_val.graph_id != graph_id: + raise RuntimeError(f"Internal error: graph_id mismatch {graph_id}, {service_registry_val.graph_id}") + if service_registry_val.service_registry is None: + raise RuntimeError(f"Internal error: service_registry is None") + #NOTE: Currently we return all cypher endpoints, didn't take care of the primary one + return ["bolt://" + instance.endpoint for instance in service_registry_val.service_registry.instances] + else: + raise ValueError( + f"Failed to get service registry {service_registry.get_status_message()}" + ) + def getNeo4jEndpoint(self) -> str: """ Get the bolt endpoint from the service status endpoint. @@ -164,8 +211,14 @@ def getNeo4jEndpoint(self) -> str: """ service_status = self.getDefaultSession().get_service_status() if service_status.is_ok(): - bolt_port = service_status.get_value().bolt_port - return "bolt://" + self._host + ":" + str(bolt_port) + service_status_val = service_status.get_value() + if service_status_val.deploy_mode == "standalone": + bolt_port = service_status.get_value().bolt_port + return "bolt://" + self._host + ":" + str(bolt_port) + elif service_status_val.deploy_mode == "k8s": + raise ValueError( + "Neo4j endpoint is not available in k8s deployment mode, call getNeo4jEndpoints(graph_id) to get all endpoints for the specified graph" + ) else: raise ValueError( "Failed to get service status " + service_status.get_status_message() diff --git a/flex/interactive/sdk/python/gs_interactive/client/session.py b/flex/interactive/sdk/python/gs_interactive/client/session.py index 9076e6e685ee..813917699e05 100644 --- a/flex/interactive/sdk/python/gs_interactive/client/session.py +++ b/flex/interactive/sdk/python/gs_interactive/client/session.py @@ -18,10 +18,15 @@ from abc import ABCMeta from abc import abstractmethod +from enum import Enum from typing import Any from typing import List from typing import Optional from typing import Union +import threading +import time + +import logging from pydantic import Field from pydantic import StrictBytes @@ -33,6 +38,7 @@ from gs_interactive.api import AdminServiceJobManagementApi from gs_interactive.api import AdminServiceProcedureManagementApi from gs_interactive.api import AdminServiceServiceManagementApi +from gs_interactive.api import AdminServiceServiceRegistryApi from gs_interactive.api import GraphServiceEdgeManagementApi from gs_interactive.api import GraphServiceVertexManagementApi from gs_interactive.api import QueryServiceApi @@ -63,6 +69,7 @@ from gs_interactive.models import QueryRequest from gs_interactive.models import SchemaMapping from gs_interactive.models import ServiceStatus +from gs_interactive.models import GraphServiceRegistryRecord from gs_interactive.models import SnapshotStatus from gs_interactive.models import StartServiceRequest from gs_interactive.models import StopServiceRequest @@ -73,6 +80,12 @@ from gs_interactive.models import VertexRequest +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger("interactive") + +logger.setLevel(logging.INFO) + + class EdgeInterface(metaclass=ABCMeta): @abstractmethod def add_edge( @@ -317,6 +330,31 @@ class QueryServiceInterface: def get_service_status(self) -> Result[ServiceStatus]: raise NotImplementedError + @abstractmethod + def list_service_registry_info(self) -> Result[list[GraphServiceRegistryRecord]]: + """_summary_ + List all graph services in the service registry. + + Returns: + Result[list[GraphServiceRegistryRecord]]: The result containing the list of graph services in the service registry. + """ + raise NotImplementedError + + @abstractmethod + def get_service_registry_info( + self, graph_id: str, service_name: str + ) -> Result[GraphServiceRegistryRecord]: + """ + Get the routing information of a specified graph service. + Args: + graph_id (_type_): Graph ID. + service_name (_type_): Service name: cypher/procedure + + Returns: + _type_: The routing information of the specified graph service. + """ + raise NotImplementedError + @abstractmethod def start_service( self, @@ -370,13 +408,28 @@ class Session( pass +class DeploymentKind(Enum): + STANDALONE = "standalone" + K8S = "k8s" + NOT_SET = "not_set" + + class DefaultSession(Session): """ The default session implementation for Interactive SDK. It provides the implementation of all service APIs. + + The default session could connect to two kinds of admin services: standalone or k8s. + If the admin service is running in standalone mode, there will only be one stored_proc_uri. + If the admin service is running in k8s mode, there will be multiple stored_proc_uris. """ - def __init__(self, admin_uri: str, stored_proc_uri: str = None): + def __init__( + self, + admin_uri: str, + stored_proc_uri: str = None, + service_registry_fetching_interval: int = 1, + ): """ Construct a new session using the specified admin_uri and stored_proc_uri. @@ -384,31 +437,94 @@ def __init__(self, admin_uri: str, stored_proc_uri: str = None): admin_uri (str): the uri for the admin service. stored_proc_uri (str, optional): the uri for the stored procedure service. If not provided,the uri will be read from the service status. + service_registry_fetching_interval (int, optional): the interval for fetching the service registry info. + The default value is 1 second. """ self._admin_uri = admin_uri self._client = ApiClient(Configuration(hosts=admin_uri)) + self._deploy_mode = DeploymentKind.NOT_SET + self._service_registry_fetching_interval = service_registry_fetching_interval self._graph_api = AdminServiceGraphManagementApi(self._client) self._job_api = AdminServiceJobManagementApi(self._client) self._procedure_api = AdminServiceProcedureManagementApi(self._client) self._service_api = AdminServiceServiceManagementApi(self._client) + self._service_registry_api = AdminServiceServiceRegistryApi(self._client) self._utils_api = UtilsApi(self._client) - if stored_proc_uri is None: - service_status = self.get_service_status() - if not service_status.is_ok(): - raise Exception( - "Failed to get service status: ", - service_status.get_status_message(), - ) - service_port = service_status.get_value().hqps_port - # replace the port in uri - splitted = admin_uri.split(":") - splitted[-1] = str(service_port) - stored_proc_uri = ":".join(splitted) - self._query_client = ApiClient(Configuration(hosts=stored_proc_uri)) + self._query_client = None + self._stop_event = None + self._init_stored_proc_uri(stored_proc_uri) + + # Holds the latest service registry info + self._latest_service_registry_info = [] + """ + [ + { + "graph_id": "1", + "service_registry": { + "service_name": "cypher + "instances": [ + { + "endpoint": "11.22.3.4:7687", + "metrics": { + "snapshot_id": "1", + } + }, + { + "endpoint": "11.22.3.5:7687", + "metrics": { + "snapshot_id": "2", + } + }, + ], + "primary": { + "endpoint": "11.22.3.5:7687", + "metrics": { + "snapshot_id": "2", + }, + }, + } + }, + ] + """ + + def _init_stored_proc_uri(self, stored_proc_uri: str): + service_status = self.get_service_status() + if not service_status.is_ok(): + raise Exception( + "Failed to get service status: ", + service_status.get_status_message(), + ) + status_val = service_status.get_value() + self._deploy_mode = DeploymentKind(status_val.deploy_mode) + if self._deploy_mode == DeploymentKind.STANDALONE: + if stored_proc_uri is None: + service_port = service_status.get_value().hqps_port + # replace the port in uri + splitted = self._admin_uri.split(":") + splitted[-1] = str(service_port) + stored_proc_uri = ":".join(splitted) + self._query_client = ApiClient(Configuration(hosts=[stored_proc_uri])) + elif self._deploy_mode == DeploymentKind.K8S: + # Ignore the stored_proc_uri if the deploy mode is k8s + self._query_client = ApiClient( + Configuration(hosts=self._get_stored_proc_uris_from_service_registry()) + ) + else: + raise Exception("Unknown deployment mode: ", self._deploy_mode) + self._query_api = QueryServiceApi(self._query_client) self._edge_api = GraphServiceEdgeManagementApi(self._query_client) self._vertex_api = GraphServiceVertexManagementApi(self._query_client) + + if self._deploy_mode == DeploymentKind.K8S: + # A background thread which fetches the service registry info periodically + self._stop_event = threading.Event() + self._service_registry_info_fetcher = threading.Thread( + target=self._fetch_service_registry_info, + args=(self._service_registry_fetching_interval), + ) + self._service_registry_info_fetcher.start() def __enter__(self): self._client.__enter__() @@ -416,7 +532,15 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): self._client.__exit__(exc_type=exc_type, exc_value=exc_val, traceback=exc_tb) - + + def __del__(self): + if self._stop_event: + self._stop_event.set() + if self._service_registry_info_fetcher: + self._service_registry_info_fetcher.join() + + + @property def admin_uri(self): return self._admin_uri @@ -988,6 +1112,24 @@ def call_procedure_current_raw(self, params: bytes) -> Result[str]: except Exception as e: return Result.from_exception(e) + def get_service_registry_info(self, graph_id: str, service_name: str) -> Result[GraphServiceRegistryRecord]: + """Get the routing info for a specified service on a specified graph. + + Args: + graph_id (str): Graph ID. + service_name (str): Service name: cypher/procedure + """ + try: + response = self._service_registry_api.get_service_registry_info_with_http_info( + graph_id, service_name + ) + return Result.from_response(response) + except Exception as e: + return Result.from_exception(e) + + def list_service_registry_info(self): + raise NotImplementedError + def get_service_status(self) -> Result[ServiceStatus]: """ Get the status of the service. @@ -1024,7 +1166,7 @@ def stop_service(self, graph_id: str = None) -> Result[str]: """ try: - req = StopServiceRequest() + req = StopServiceRequest(graph_id) if graph_id: req.graph_id = graph_id response = self._service_api.stop_service_with_http_info(req) @@ -1267,3 +1409,86 @@ def ensure_param_str(self, param_name: str, param): + str(param) ) return param + + def _get_stored_proc_uris_from_service_registry(self): + """Define a generator which periodically fetches the service registry info, and yields all the uris. + + Raises: + Exception: _description_ + + Yields: + dict: A dict contains primary uri and all available uris. + """ + + def _graph_instance_list_generator(graph_id: str, to_primary: bool = False): + for g_svc_record in self._latest_service_registry_info: + if "graph_id" in g_svc_record and g_svc_record["graph_id"] == graph_id: + if to_primary: + if g_svc_record["service_registry"]["primary"]: + logger.info( + f"Found primary stored_proc_uri for graph_id: {graph_id}, stored_proc_uri: {g_svc_record['service_registry']['primary']['endpoint']}" + ) + yield [ + g_svc_record["service_registry"]["primary"]["endpoint"] + ] + else: + raise Exception( + f"Failed to find primary stored_proc_uri for graph_id: {graph_id}" + ) + else: + if g_svc_record["service_registry"]["instances"]: + endpoints = [ + instance["endpoint"] + for instance in g_svc_record["service_registry"][ + "instances" + ] + ] + logger.info( + f"Found stored_proc_uris for graph_id: {graph_id}, stored_proc_uris: {endpoints}" + ) + yield endpoints + else: + raise Exception( + f"Failed to find stored_proc_uri for graph_id: {graph_id}" + ) + raise Exception(f"Failed to find stored_proc_uri for graph_id: {graph_id}") + + return _graph_instance_list_generator + + def _fetch_service_registry_info(self,interval: int): + while not self._stop_event.is_set(): + try: + response = self.list_service_registry_info() + if response.is_ok(): + self._latest_service_registry_info = self._parse_registry_info( + response.get_value() + ) + else: + logger.error( + "Failed to fetch service registry info: ", + response.get_status_message(), + ) + except Exception as e: + print("Failed to fetch service registry info: ", e) + time.sleep(interval) + + def _parse_registry_info( + self, service_registry_info: list[GraphServiceRegistryRecord] + ): + """_summary_ + + Args: + service_registry_info (list[GraphServiceRegistryRecord]): _description_ + + Returns: + _type_: _description_ + """ + result = [] + for record in service_registry_info: + result.append( + { + "graph_id": record.graph_id, + "service_registry": record.service_registry.to_dict(), + } + ) + return result diff --git a/flex/interactive/sdk/python/gs_interactive/client/status.py b/flex/interactive/sdk/python/gs_interactive/client/status.py index 26745c230f5f..4f572b728cdb 100644 --- a/flex/interactive/sdk/python/gs_interactive/client/status.py +++ b/flex/interactive/sdk/python/gs_interactive/client/status.py @@ -111,6 +111,12 @@ def from_exception(exception: ApiException): return Status(StatusCode.INTERNAL_ERROR, exception) elif isinstance(exception, ProtocolError): return Status(StatusCode.INTERNAL_ERROR, exception) + elif isinstance(exception, ValueError): + return Status(StatusCode.INTERNAL_ERROR, exception) + elif isinstance(exception, AttributeError): + return Status(StatusCode.INTERNAL_ERROR, exception) + elif isinstance(exception, TypeError): + return Status(StatusCode.INTERNAL_ERROR, exception) return Status( StatusCode.UNKNOWN, "Unknown Error from exception " + exception.body ) diff --git a/flex/interactive/sdk/python/gs_interactive/tests/test_driver.py b/flex/interactive/sdk/python/gs_interactive/tests/test_driver.py index 1a097d797dd9..c4c48a15d82d 100644 --- a/flex/interactive/sdk/python/gs_interactive/tests/test_driver.py +++ b/flex/interactive/sdk/python/gs_interactive/tests/test_driver.py @@ -146,25 +146,25 @@ def tearDown(self): def test_example(self): self.createGraphFromDict() self._graph_id = self.createGraph() - self.bulkLoading() - self.bulkLoadingFailure() - self.bulkLoadingUploading() + # self.bulkLoading() + # self.bulkLoadingFailure() + # self.bulkLoadingUploading() self.list_graph() self.get_graph_meta() - self.runCypherQuery() - self.runGremlinQuery() + # self.runCypherQuery() + # self.runGremlinQuery() self.createCypherProcedure() self.createCppProcedure() - self.restart() - self.callVertexEdgeQuery() - self.restartOnNewGraph() - self.getStatistics() - self.callProcedure() - self.callProcedureWithHttp() - self.callProcedureWithHttpCurrent() - # test stop the service, and submit queries - self.queryWithServiceStop() - self.createDriver() + # self.restart() + # self.callVertexEdgeQuery() + # self.restartOnNewGraph() + # self.getStatistics() + # self.callProcedure() + # self.callProcedureWithHttp() + # self.callProcedureWithHttpCurrent() + # # test stop the service, and submit queries + # self.queryWithServiceStop() + # self.createDriver() @unittest.skipIf( os.getenv("ENGINE_TYPE") == "interactive", @@ -319,6 +319,7 @@ def waitJobFinish(self, job_id: str): status = resp.get_value().status print("job status: ", status) if status == "SUCCESS": + print(resp.get_value()) return True elif status == "FAILED": return False diff --git a/flex/openapi/openapi_interactive.yaml b/flex/openapi/openapi_interactive.yaml index 6528ea52fd42..3c8ef4c5c7f7 100644 --- a/flex/openapi/openapi_interactive.yaml +++ b/flex/openapi/openapi_interactive.yaml @@ -18,7 +18,7 @@ info: email: graphscope@alibaba-inc.com license: name: Apache 2.0 - url: 'http://www.apache.org/licenses/LICENSE-2.0.html' + url: "http://www.apache.org/licenses/LICENSE-2.0.html" externalDocs: description: Find out More about GraphScope url: http://graphscope.io diff --git a/flex/storages/rt_mutable_graph/schema.cc b/flex/storages/rt_mutable_graph/schema.cc index 4c162a206dd8..d3644cd4de18 100644 --- a/flex/storages/rt_mutable_graph/schema.cc +++ b/flex/storages/rt_mutable_graph/schema.cc @@ -1210,6 +1210,11 @@ static Status parse_schema_from_yaml_node(const YAML::Node& graph_node, schema.SetGraphId(graph_node["id"].as()); } else { VLOG(1) << "id is not set"; + if (schema.GetGraphName().empty()) { + LOG(ERROR) << "Graph name is not set"; + return Status(StatusCode::INVALID_SCHEMA, "Graph name is not set"); + } + schema.SetGraphId(schema.GetGraphName()); } if (graph_node["description"]) { diff --git a/flex/tests/hqps/CMakeLists.txt b/flex/tests/hqps/CMakeLists.txt index 2aa5ac05bf61..6595ba6989a7 100644 --- a/flex/tests/hqps/CMakeLists.txt +++ b/flex/tests/hqps/CMakeLists.txt @@ -5,12 +5,19 @@ if (NOT BUILD_WITH_OSS) list(REMOVE_ITEM GS_TEST_FILES "oss_test.cc") endif() +if (NOT BUILD_ETCD_METASTORE) + list(REMOVE_ITEM GS_TEST_FILES "etcd_meta_test.cc") +endif() + foreach(f ${GS_TEST_FILES}) string(REGEX MATCH "^(.*)\\.[^.]*$" dummy ${f}) set(T_NAME ${CMAKE_MATCH_1}) message(STATUS "Found graphscope test - " ${T_NAME}) add_executable(${T_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/${T_NAME}.cc) target_link_libraries(${T_NAME} flex_plan_proto flex_graph_db flex_utils) + if (BUILD_ETCD_METASTORE AND T_NAME STREQUAL "etcd_meta_test") + target_link_libraries(${T_NAME} ${ETCD_CPP_LIBRARIES} flex_metadata_store) + endif() if (BUILD_WITH_OSS) target_link_libraries(${T_NAME} cpp-sdk) endif() diff --git a/flex/tests/hqps/hqps_adhoc_test.sh b/flex/tests/hqps/hqps_adhoc_test.sh index eaa3aa212720..a94d4b09ee8b 100644 --- a/flex/tests/hqps/hqps_adhoc_test.sh +++ b/flex/tests/hqps/hqps_adhoc_test.sh @@ -30,9 +30,9 @@ INTERACTIVE_WORKSPACE=$1 GRAPH_NAME=$2 COMPILER_PLANNER_OPT=$3 if [ "${COMPILER_PLANNER_OPT}" == "CBO" ]; then - ENGINE_CONFIG_PATH=${FLEX_HOME}/tests/hqps/interactive_config_test_cbo.yaml + ENGINE_CONFIG_PATH=${FLEX_HOME}/tests/hqps/interactive_config_standalone_cbo.yaml elif [ "${COMPILER_PLANNER_OPT}" == "RBO" ]; then - ENGINE_CONFIG_PATH=${FLEX_HOME}/tests/hqps/interactive_config_test.yaml + ENGINE_CONFIG_PATH=${FLEX_HOME}/tests/hqps/interactive_config_standalone.yaml else echo "COMPILER_PLANNER_OPT: ${COMPILER_PLANNER_OPT} not supported, use CBO or RBO" exit 1 diff --git a/flex/tests/hqps/interactive_config_standalone.yaml b/flex/tests/hqps/interactive_config_standalone.yaml new file mode 100644 index 000000000000..11fa86e9715b --- /dev/null +++ b/flex/tests/hqps/interactive_config_standalone.yaml @@ -0,0 +1,60 @@ +log_level: INFO +verbose_level: 10 +default_graph: modern_graph +compute_engine: + type: hiactor + workers: + - localhost:10000 + thread_num_per_worker: 1 + memory_per_worker: 4Gi + store: + type: cpp-mcsr + metadata_store: + uri: file://{WORKSPACE}/METADATA # Could be file://{WORKSPACE}/METADATA or other supported storage class. WORKSPACE is the placeholder for the workspace directory. + wal_uri: file://{GRAPH_DATA_DIR}/wal # Could be file://{GRAPH_DATA_DIR}/wal or other supported storage class. GRAPH_DATA_DIR is the placeholder for the graph data directory. +compiler: + planner: + is_on: true + opt: RBO + rules: + - FilterIntoJoinRule + - FilterMatchRule + - NotMatchToAntiJoinRule + meta: + reader: + schema: + uri: http://localhost:7777/v1/service/status + interval: 1000 # ms + statistics: + uri: http://localhost:7777/v1/graph/%s/statistics + interval: 86400000 # ms + timeout: 1000 # ms + endpoint: + default_listen_address: localhost + bolt_connector: + disabled: false + port: 7687 + gremlin_connector: + disabled: false + port: 8182 + query_timeout: 40000 + gremlin_script_language_name: antlr_gremlin_calcite +http_service: + default_listen_address: localhost + admin_port: 7777 + query_port: 10000 + max_content_length: 1GB +master: + port: 7776 + instance_name: default + service_registry: + type: etcd + endpoint: http://localhost:2379 + ttl: 5 + launcher_type: k8s # or host. Only works when launching via master. + k8s_launcher_config: + namespace: interactive + instance_prefix: gs-interactive # The prefix of the instance name. The full instance name will be instance_name_prefix + instance_name. + config_file: ~/.kube/config # The config file used to connect to the k8s cluster. + image_pull_policy: Always + default_replicas: 1 diff --git a/flex/tests/hqps/interactive_config_test_cbo.yaml b/flex/tests/hqps/interactive_config_standalone_cbo.yaml similarity index 100% rename from flex/tests/hqps/interactive_config_test_cbo.yaml rename to flex/tests/hqps/interactive_config_standalone_cbo.yaml diff --git a/flex/tests/hqps/interactive_config_test.yaml b/flex/tests/hqps/interactive_config_test.yaml index b27f35e45e13..adb2a08af9a6 100644 --- a/flex/tests/hqps/interactive_config_test.yaml +++ b/flex/tests/hqps/interactive_config_test.yaml @@ -5,11 +5,12 @@ compute_engine: type: hiactor workers: - localhost:10000 - thread_num_per_worker: 4 + thread_num_per_worker: 1 + memory_per_worker: 4Gi store: type: cpp-mcsr metadata_store: - type: file # file/sqlite/etcd + uri: http://localhost:2379 # Could be file://{WORKSPACE}/METADATA or other supported storage class. WORKSPACE is the placeholder for the workspace directory. wal_uri: file://{GRAPH_DATA_DIR}/wal # Could be file://{GRAPH_DATA_DIR}/wal or other supported storage class. GRAPH_DATA_DIR is the placeholder for the graph data directory. compiler: planner: @@ -22,10 +23,10 @@ compiler: meta: reader: schema: - uri: http://localhost:7777/v1/service/status + uri: http://localhost:7776/v1/graph/1 interval: 1000 # ms statistics: - uri: http://localhost:7777/v1/graph/%s/statistics + uri: http://localhost:7776/v1/graph/%s/statistics interval: 86400000 # ms timeout: 1000 # ms endpoint: @@ -34,7 +35,7 @@ compiler: disabled: false port: 7687 gremlin_connector: - disabled: false + disabled: true port: 8182 query_timeout: 40000 gremlin_script_language_name: antlr_gremlin_calcite @@ -43,3 +44,17 @@ http_service: admin_port: 7777 query_port: 10000 max_content_length: 1GB +master: + port: 7776 + instance_name: default + service_registry: + type: etcd + endpoint: http://localhost:2379 + ttl: 5 + launcher_type: k8s # or host. Only works when launching via master. + k8s_launcher_config: + namespace: interactive + instance_prefix: gs-interactive # The prefix of the instance name. The full instance name will be instance_name_prefix + instance_name. + config_file: ~/.kube/config # The config file used to connect to the k8s cluster. + image_pull_policy: Always + default_replicas: 1 diff --git a/flex/tests/hqps/interactive_config_test_2.yaml b/flex/tests/hqps/interactive_config_test_2.yaml new file mode 100644 index 000000000000..90df89a51d51 --- /dev/null +++ b/flex/tests/hqps/interactive_config_test_2.yaml @@ -0,0 +1,60 @@ +log_level: INFO +verbose_level: 10 +default_graph: modern_graph +compute_engine: + type: hiactor + workers: + - localhost:10000 + thread_num_per_worker: 1 + memory_per_worker: 4Gi + store: + type: cpp-mcsr + metadata_store: + uri: http://localhost:2379 # Could be file://{WORKSPACE}/METADATA or other supported storage class. WORKSPACE is the placeholder for the workspace directory. + wal_uri: file://{GRAPH_DATA_DIR}/wal # Could be file://{GRAPH_DATA_DIR}/wal or other supported storage class. GRAPH_DATA_DIR is the placeholder for the graph data directory. +compiler: + planner: + is_on: true + opt: RBO + rules: + - FilterIntoJoinRule + - FilterMatchRule + - NotMatchToAntiJoinRule + meta: + reader: + schema: + uri: http://localhost:7776/v1/graph/1 + interval: 1000 # ms + statistics: + uri: http://localhost:7776/v1/graph/%s/statistics + interval: 86400000 # ms + timeout: 1000 # ms + endpoint: + default_listen_address: localhost + bolt_connector: + disabled: false + port: 7688 + gremlin_connector: + disabled: true + port: 8183 + query_timeout: 40000 + gremlin_script_language_name: antlr_gremlin_calcite +http_service: + default_listen_address: localhost + admin_port: 7778 + query_port: 10001 + max_content_length: 1GB +master: + port: 7776 + instance_name: default + service_registry: + type: etcd + endpoint: http://localhost:2379 + ttl: 5 + launcher_type: k8s # or host. Only works when launching via master. + k8s_launcher_config: + namespace: interactive + instance_prefix: gs-interactive # The prefix of the instance name. The full instance name will be instance_name_prefix + instance_name. + config_file: ~/.kube/config # The config file used to connect to the k8s cluster. + image_pull_policy: Always + default_replicas: 1 diff --git a/flex/third_party/cpprestsdk b/flex/third_party/cpprestsdk new file mode 160000 index 000000000000..0b1ce318a757 --- /dev/null +++ b/flex/third_party/cpprestsdk @@ -0,0 +1 @@ +Subproject commit 0b1ce318a757bbfb89bdb0fffb61ca4e38dc3b33 diff --git a/flex/third_party/etcd-cpp-apiv3 b/flex/third_party/etcd-cpp-apiv3 new file mode 160000 index 000000000000..216b86f8d763 --- /dev/null +++ b/flex/third_party/etcd-cpp-apiv3 @@ -0,0 +1 @@ +Subproject commit 216b86f8d763acf88e4ed7265f983b57c12da2df diff --git a/flex/utils/result.cc b/flex/utils/result.cc index 80dddf525879..676b257e36f0 100644 --- a/flex/utils/result.cc +++ b/flex/utils/result.cc @@ -16,6 +16,7 @@ #include "flex/utils/result.h" namespace gs { + Status::Status() noexcept : error_code_(StatusCode::OK) {} Status::Status(StatusCode error_code) noexcept : error_code_(error_code) {} diff --git a/flex/utils/service_utils.cc b/flex/utils/service_utils.cc index 955a7bfa0c88..8b9fd65731da 100644 --- a/flex/utils/service_utils.cc +++ b/flex/utils/service_utils.cc @@ -14,9 +14,39 @@ */ #include "flex/utils/service_utils.h" +#include +#include +#include +#include +#include namespace gs { +std::string get_uri_scheme(const std::string& uri) { + std::string scheme; + auto pos = uri.find("://"); + if (pos != std::string::npos) { + scheme = uri.substr(0, pos); + } + if (scheme.empty()) { + LOG(INFO) << "No scheme found in wal uri: " << uri + << ", using default scheme: file"; + scheme = "file"; + } + return scheme; +} + +std::string get_uri_path(const std::string& uri) { + std::string path; + auto pos = uri.find("://"); + if (pos != std::string::npos) { + path = uri.substr(pos + 3); + } else { + path = uri; + } + return path; +} + static unsigned long long lastTotalUser, lastTotalUserLow, lastTotalSys, lastTotalIdle; @@ -195,4 +225,37 @@ size_t human_readable_to_bytes(const std::string& human_readable_bytes) { return static_cast(number * multipliers[unit]); } +std::string get_local_ip() { + // First check the environment variable. + if (const char* env_ip = std::getenv("FLEX_INTERACTIVE_ENGINE_IP")) { + return env_ip; + } + + char hostname[256]; + if (gethostname(hostname, sizeof(hostname)) == -1) { + std::cerr << "Error obtaining hostname" << std::endl; + return ""; + } + + struct addrinfo hints, *info, *p; + std::memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET; // Use AF_INET to only get IPv4 addresses + hints.ai_socktype = SOCK_STREAM; + + if (getaddrinfo(hostname, nullptr, &hints, &info) != 0) { + std::cerr << "Error obtaining local IP address" << std::endl; + return ""; + } + + std::string ip; + for (p = info; p != nullptr; p = p->ai_next) { + struct sockaddr_in* address = (struct sockaddr_in*) p->ai_addr; + ip = inet_ntoa(address->sin_addr); + break; // Take the first valid IP + } + + freeaddrinfo(info); + return ip; +} + } // namespace gs diff --git a/flex/utils/service_utils.h b/flex/utils/service_utils.h index feff3a7b9053..b07457f4cb74 100644 --- a/flex/utils/service_utils.h +++ b/flex/utils/service_utils.h @@ -45,6 +45,9 @@ namespace gs { static constexpr const char* CODEGEN_BIN = "load_plan_and_gen.sh"; +std::string get_uri_scheme(const std::string& uri); +std::string get_uri_path(const std::string& uri); + /// Util functions. inline void blockSignal(int sig) { @@ -208,6 +211,8 @@ std::string memory_to_mb_str(uint64_t mem_bytes); size_t human_readable_to_bytes(const std::string& human_readable); +std::string get_local_ip(); + } // namespace gs #endif // SERVICE_UTILS_H \ No newline at end of file diff --git a/k8s/Makefile b/k8s/Makefile index 63b006e7ac64..feaf58ca80c1 100644 --- a/k8s/Makefile +++ b/k8s/Makefile @@ -176,6 +176,17 @@ flex-interactive: -t graphscope/interactive:${VERSION} \ -f ${DOCKERFILES_DIR}/flex-interactive.Dockerfile . +flex-interactive-master: + cd $(WORKING_DIR)/.. && \ + docker build \ + --target master \ + --build-arg REGISTRY=$(REGISTRY) \ + --build-arg VINEYARD_VERSION=$(VINEYARD_VERSION) \ + --build-arg PLATFORM=${PLATFORM} \ + --build-arg ARCH=${ARCH} \ + -t graphscope/interactive-master:${VERSION} \ + -f ${DOCKERFILES_DIR}/flex-interactive-master.Dockerfile --no-cache . + learning: cd $(WORKING_DIR)/.. && \ docker build \ diff --git a/k8s/dockerfiles/flex-interactive-master.Dockerfile b/k8s/dockerfiles/flex-interactive-master.Dockerfile new file mode 100644 index 000000000000..b50585f7b136 --- /dev/null +++ b/k8s/dockerfiles/flex-interactive-master.Dockerfile @@ -0,0 +1,137 @@ +ARG PLATFORM=x86_64 +ARG ARCH=amd64 +ARG REGISTRY=registry.cn-hongkong.aliyuncs.com +ARG VINEYARD_VERSION=latest +FROM $REGISTRY/graphscope/graphscope-dev:$VINEYARD_VERSION-$ARCH AS builder +ARG PARALLEL=8 + +RUN sudo mkdir -p /opt/flex/wheel && sudo chown -R graphscope:graphscope /opt/flex/ +USER graphscope +WORKDIR /home/graphscope + +# change bash as default +SHELL ["/bin/bash", "-c"] + +COPY --chown=graphscope:graphscope . /home/graphscope/GraphScope + +RUN cd ${HOME}/GraphScope && \ + git submodule update --init && cd flex/interactive/sdk && bash generate_sdk.sh -g python -t server && \ + cd master && pip3 install -r requirements.txt && python3 setup.py bdist_wheel && \ + cp dist/*.whl /opt/flex/wheel/ + +# install flex +RUN cd ${HOME}/GraphScope/flex && \ + mkdir build && cd build && cmake .. -DCMAKE_INSTALL_PREFIX=/opt/flex -DBUILD_DOC=OFF -DBUILD_TEST=OFF -DBUILD_FOR_MASTER=ON \ + -DOPTIMIZE_FOR_HOST=${OPTIMIZE_FOR_HOST} -DUSE_STATIC_ARROW=ON -DBUILD_WITH_OSS=ON -DENABLE_SERVICE_REGISTER=ON && \ + make -j ${PARALLEL} && make install + +# strip all .so in /opt/flex/lib +RUN sudo find /opt/flex/lib/ -name "*.so" -type f -exec strip {} \; +# strip all binary in /opt/flex/bin +RUN sudo strip /opt/flex/bin/bulk_loader + +########################### Compiler Builder ########################### +FROM $REGISTRY/graphscope/graphscope-dev:v0.24.2-amd64 AS compiler_builder + +RUN sudo mkdir -p /opt/flex && sudo chown -R graphscope:graphscope /opt/flex/ && mkdir /opt/flex/lib +USER graphscope +WORKDIR /home/graphscope + +COPY --chown=graphscope:graphscope . /home/graphscope/GraphScope + +RUN . ${HOME}/.cargo/env && cd ${HOME}/GraphScope/flex && git submodule update --init && \ + cd ~/GraphScope/interactive_engine/ && mvn clean package -Pexperimental -DskipTests && \ + cd ~/GraphScope/interactive_engine/compiler && cp target/compiler-0.0.1-SNAPSHOT.jar /opt/flex/lib/ && \ + cp target/libs/*.jar /opt/flex/lib/ && \ + ls ~/GraphScope/interactive_engine/executor/ir && \ + cp ~/GraphScope/interactive_engine/executor/ir/target/release/libir_core.so /opt/flex/lib/ + + + +########################### RUNTIME IMAGE ########################### + +FROM ubuntu:22.04 AS master +ARG PLATFORM=x86_64 + +ENV DEBIAN_FRONTEND=noninteractive + +# shanghai zoneinfo +ENV TZ=Asia/Shanghai +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 + +RUN apt-get update && apt-get -y install sudo locales tzdata python3 python3-pip zip unzip curl cmake && \ + locale-gen en_US.UTF-8 && apt-get clean -y && rm -rf /var/lib/apt/lists/* && \ + ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +COPY --from=builder /opt/flex/wheel/ /opt/flex/wheel/ +COPY --from=builder /opt/flex/include/ /opt/graphscope/include/ /opt/vineyard/include/ /opt/flex/include/ +COPY --from=builder /usr/include/arrow /usr/include/arrow +COPY --from=builder /usr/include/yaml-cpp /usr/include/yaml-cpp +COPY --from=builder /usr/include/boost/filesystem* /usr/include/boost +COPY --from=builder /usr/include/boost/format* /usr/include/boost +COPY --from=builder /usr/include/google /usr/include/google +COPY --from=builder /usr/include/glog /usr/include/glog +COPY --from=builder /usr/include/gflags /usr/include/gflags +COPY --from=builder /usr/include/rapidjson /usr/include/rapidjson +COPY --from=builder /usr/lib/$PLATFORM-linux-gnu/openmpi/include/ /opt/flex/include + +RUN apt-get update && apt-get install -y git && pip3 install --upgrade pip && \ + pip3 install git+https://github.com/kragniz/python-etcd3.git@e58a899579ba416449c4e225b61f039457c8072a && \ + pip3 install /opt/flex/wheel/*.whl && \ + apt-get clean -y && rm -rf /var/lib/apt/lists/* + +RUN mkdir /opt/vineyard/ + +#Copy compiler related libs +COPY --from=compiler_builder /opt/flex/lib/ /opt/flex/lib/ + + # copy builder's /opt/flex to final image +COPY --from=builder /opt/flex/bin/bulk_loader \ + /opt/flex/bin/gen_code_from_plan \ + /opt/flex/bin/load_plan_and_gen.sh /opt/flex/bin/ +COPY --from=builder /opt/flex/lib/ /opt/flex/lib/ +COPY --from=builder /opt/graphscope/lib/libgrape-lite.so /opt/flex/lib/ +COPY --from=builder /usr/lib/$PLATFORM-linux-gnu/libsnappy*.so* /usr/lib/$PLATFORM-linux-gnu/ +COPY --from=builder /usr/lib/$PLATFORM-linux-gnu/libprotobuf* /usr/lib/$PLATFORM-linux-gnu/libfmt*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libre2*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libutf8proc*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libevent*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libltdl*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libopen-pal*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libunwind*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libhwloc*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libopen-rte*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libcrypto*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libboost_thread*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libboost_filesystem*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libboost_program_options*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libmpi*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libyaml-cpp*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libglog*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libgflags*.so* \ + /usr/lib/$PLATFORM-linux-gnu/libicudata.so* \ + /usr/lib/$PLATFORM-linux-gnu/ + +RUN sudo rm -rf /usr/lib/$PLATFORM-linux-gnu/libLLVM*.so* && sudo rm -rf /opt/flex/lib/libseastar.a && \ + sudo rm -rf /usr/lib/$PLATFORM-linux-gnu/libcuda.so && \ + sudo rm -rf /usr/lib/$PLATFORM-linux-gnu/libcudart.so + +RUN sudo ln -sf /opt/flex/bin/* /usr/local/bin/ \ + && sudo ln -sfn /opt/flex/include/* /usr/local/include/ \ + && sudo ln -sf -r /opt/flex/lib/* /usr/local/lib \ + && sudo ln -sf /opt/flex/lib64/*so* /usr/local/lib64 \ + && chmod +x /opt/flex/bin/* + +RUN find /opt/flex/lib/ -name "*.a" -type f -delete + +# Add graphscope user with user id 1001 +RUN useradd -m graphscope -u 1001 && \ + echo 'graphscope ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers && \ + chown -R graphscope:graphscope /opt/flex + +# set home to graphscope user +ENV HOME=/home/graphscope +USER graphscope +WORKDIR /home/graphscope \ No newline at end of file diff --git a/k8s/dockerfiles/flex-interactive.Dockerfile b/k8s/dockerfiles/flex-interactive.Dockerfile index 72447d90fd69..179f555bb439 100644 --- a/k8s/dockerfiles/flex-interactive.Dockerfile +++ b/k8s/dockerfiles/flex-interactive.Dockerfile @@ -4,11 +4,12 @@ ARG PLATFORM=x86_64 ARG ARCH=amd64 ARG REGISTRY=registry.cn-hongkong.aliyuncs.com ARG VINEYARD_VERSION=latest -FROM $REGISTRY/graphscope/graphscope-dev:$VINEYARD_VERSION-$ARCH AS builder + +FROM $REGISTRY/graphscope/graphscope-dev:v0.24.2-amd64 AS builder ARG ENABLE_COORDINATOR="false" ARG OPTIMIZE_FOR_HOST=OFF ARG ENABLE_OPENTELMETRY=false -ARG PARALLEL=8 +ARG PARALLEL=16 RUN sudo mkdir -p /opt/flex && sudo chown -R graphscope:graphscope /opt/flex/ USER graphscope @@ -27,12 +28,9 @@ COPY --chown=graphscope:graphscope . /home/graphscope/GraphScope # install flex RUN . ${HOME}/.cargo/env && cd ${HOME}/GraphScope/flex && \ - git submodule update --init && mkdir build && cd build && cmake .. -DCMAKE_INSTALL_PREFIX=/opt/flex -DBUILD_DOC=OFF -DBUILD_TEST=OFF -DOPTIMIZE_FOR_HOST=${OPTIMIZE_FOR_HOST} -DUSE_STATIC_ARROW=ON && make -j ${PARALLEL} && make install && \ - cd ~/GraphScope/interactive_engine/ && mvn clean package -Pexperimental -DskipTests && \ - cd ~/GraphScope/interactive_engine/compiler && cp target/compiler-0.0.1-SNAPSHOT.jar /opt/flex/lib/ && \ - cp target/libs/*.jar /opt/flex/lib/ && \ - ls ~/GraphScope/interactive_engine/executor/ir && \ - cp ~/GraphScope/interactive_engine/executor/ir/target/release/libir_core.so /opt/flex/lib/ + git submodule update --init && mkdir build && cd build && \ + cmake .. -DCMAKE_INSTALL_PREFIX=/opt/flex -DBUILD_DOC=OFF -DBUILD_TEST=OFF -DOPTIMIZE_FOR_HOST=${OPTIMIZE_FOR_HOST} \ + -DUSE_STATIC_ARROW=ON -DBUILD_WITH_OSS=ON -DENABLE_SERVICE_REGISTER=ON && make -j ${PARALLEL} && make install # strip all .so in /opt/flex/lib RUN sudo find /opt/flex/lib/ -name "*.so" -type f -exec strip {} \; @@ -61,6 +59,22 @@ RUN if [ "${ENABLE_COORDINATOR}" = "true" ]; then \ cp dist/*.whl /opt/flex/wheel/; \ fi +########################### Compiler Builder ########################### +FROM $REGISTRY/graphscope/graphscope-dev:v0.24.2-amd64 AS compiler_builder + +RUN sudo mkdir -p /opt/flex && sudo chown -R graphscope:graphscope /opt/flex/ && mkdir /opt/flex/lib +USER graphscope +WORKDIR /home/graphscope + +COPY --chown=graphscope:graphscope . /home/graphscope/GraphScope + +RUN . ${HOME}/.cargo/env && cd ${HOME}/GraphScope/flex && git submodule update --init && \ + cd ~/GraphScope/interactive_engine/ && mvn clean package -Pexperimental -DskipTests && \ + cd ~/GraphScope/interactive_engine/compiler && cp target/compiler-0.0.1-SNAPSHOT.jar /opt/flex/lib/ && \ + cp target/libs/*.jar /opt/flex/lib/ && \ + ls ~/GraphScope/interactive_engine/executor/ir && \ + cp ~/GraphScope/interactive_engine/executor/ir/target/release/libir_core.so /opt/flex/lib/ + ########################### RUNTIME IMAGE ########################### @@ -76,7 +90,7 @@ ENV LANG en_US.UTF-8 ENV LANGUAGE en_US:en ENV LC_ALL en_US.UTF-8 # g++ + jre 500MB -RUN apt-get update && apt-get -y install sudo locales g++ cmake openjdk-11-jre-headless tzdata && \ +RUN apt-get update && apt-get -y install sudo locales g++ cmake openjdk-11-jre-headless tzdata zip unzip && \ locale-gen en_US.UTF-8 && apt-get clean -y && rm -rf /var/lib/apt/lists/* && \ ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone @@ -97,17 +111,24 @@ COPY --from=builder /opt/flex/wheel/ /opt/flex/wheel/ # lib COPY --from=builder /opt/flex/lib/ /opt/flex/lib/ + +#Copy compiler related libs +COPY --from=compiler_builder /opt/flex/lib/ /opt/flex/lib/ + # remove .a files RUN find /opt/flex/lib/ -name "*.a" -type f -delete # include COPY --from=builder /opt/flex/include/ /opt/graphscope/include/ /opt/vineyard/include/ /opt/flex/include/ COPY --from=builder /opt/graphscope/lib/libgrape-lite.so /opt/flex/lib/ +COPY --from=builder /opt/graphscope/lib/libboost_thread*.so* /opt/flex/lib +COPY --from=builder /opt/graphscope/lib/libboost_filesystem*.so* /opt/flex/lib +COPY --from=builder /opt/graphscope/lib/libboost_program_options*.so* /opt/flex/lib # copy the builtin graph, modern_graph RUN mkdir -p /opt/flex/share/gs_interactive_default_graph/ COPY --from=builder /home/graphscope/GraphScope/flex/interactive/examples/modern_graph/* /opt/flex/share/gs_interactive_default_graph/ -COPY --from=builder /home/graphscope/GraphScope/flex/tests/hqps/interactive_config_test.yaml /opt/flex/share/interactive_config.yaml +COPY --from=builder /home/graphscope/GraphScope/flex/tests/hqps/interactive_config_standalone.yaml /opt/flex/share/interactive_config.yaml COPY --from=builder /home/graphscope/GraphScope/k8s/dockerfiles/interactive-entrypoint.sh /opt/flex/bin/entrypoint.sh RUN sed -i 's/name: modern_graph/name: gs_interactive_default_graph/g' /opt/flex/share/gs_interactive_default_graph/graph.yaml && \ sed -i 's/default_graph: modern_graph/default_graph: gs_interactive_default_graph/g' /opt/flex/share/interactive_config.yaml @@ -127,26 +148,27 @@ COPY --from=builder /usr/include/rapidjson /usr/include/rapidjson COPY --from=builder /usr/lib/$PLATFORM-linux-gnu/openmpi/include/ /opt/flex/include -COPY --from=builder /usr/lib/$PLATFORM-linux-gnu/libprotobuf* /usr/lib/$PLATFORM-linux-gnu/libfmt*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libre2*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libutf8proc*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libevent*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libltdl*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libltdl*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libopen-pal*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libunwind*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libhwloc*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libopen-rte*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libcrypto*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libboost_thread*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libboost_filesystem*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libboost_program_options*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libmpi*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libyaml-cpp*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libglog*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libgflags*.so* \ - /usr/lib/$PLATFORM-linux-gnu/libicudata.so* \ - /usr/lib/$PLATFORM-linux-gnu/ +COPY --from=builder /usr/lib/$PLATFORM-linux-gnu/ /usr/lib/$PLATFORM-linux-gnu/ +# COPY --from=builder /usr/lib/$PLATFORM-linux-gnu/libprotobuf* /usr/lib/$PLATFORM-linux-gnu/libfmt*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libre2*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libutf8proc*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libevent*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libltdl*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libltdl*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libopen-pal*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libunwind*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libhwloc*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libopen-rte*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libcrypto*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libboost_thread*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libboost_filesystem*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libboost_program_options*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libmpi*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libyaml-cpp*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libglog*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libgflags*.so* \ +# /usr/lib/$PLATFORM-linux-gnu/libicudata.so* \ +# /usr/lib/$PLATFORM-linux-gnu/ RUN sudo rm -rf /usr/lib/$PLATFORM-linux-gnu/libLLVM*.so* && sudo rm -rf /opt/flex/lib/libseastar.a && \ sudo rm -rf /usr/lib/$PLATFORM-linux-gnu/libcuda.so && \ diff --git a/proto/error/interactive.proto b/proto/error/interactive.proto index ccc5fd418fe4..60dfd4396c7e 100644 --- a/proto/error/interactive.proto +++ b/proto/error/interactive.proto @@ -72,6 +72,10 @@ enum Code { SQL_EXECUTION_ERROR = 109; SQL_BINDING_ERROR = 110; ALREADY_LOCKED = 111; + ERROR_CREATE_META = 112; + META_KEY_NOT_FOUND = 113; + META_KEY_ALREADY_EXIST = 114; + META_LOCK_FAILED = 115; UNKNOWN = 999; }