SymbioticLab · xuyehe · Aug 2, 2022 · Aug 4, 2022 · Aug 8, 2022 · Aug 9, 2022
diff --git a/.gitignore b/.gitignore
@@ -33,4 +33,6 @@ stackoverflow
 common_voice  
 puffer  
 landmark  
-coqa
+coqa
+dump.rdb
+*.pid
diff --git a/benchmark/configs/femnist/conf.yml b/benchmark/configs/femnist/conf.yml
@@ -2,13 +2,13 @@
 
 # ========== Cluster configuration ========== 
 # ip address of the parameter server (need 1 GPU process)
-ps_ip: 10.0.0.1
+ps_ip: 10.0.0.4
 
 # ip address of each worker:# of available gpus process on each gpu in this node
 # Note that if we collocate ps and worker on same GPU, then we need to decrease this number of available processes on that GPU by 1
 # E.g., master node has 4 available processes, then 1 for the ps, and worker should be set to: worker:3
 worker_ips:
-    - 10.0.0.1:[4]
+    - 10.0.0.4:[4]
 
 exp_path: $FEDSCALE_HOME/fedscale/core
 
@@ -49,3 +49,10 @@ job_conf:
     - test_bsz: 20
     - use_cuda: True
 
+# ========== Redis configuration ========== 
+redis_conf:
+    redis_executable: /usr/bin/redis-server
+    redis_host: 10.0.0.4
+    redis_port: 6379
+    redis_password: abc # Use ~ to skip password
+    job_tag: _femnist_1
diff --git a/docker/driver.py b/docker/driver.py
@@ -15,6 +15,8 @@
 from kubernetes import client, config, utils
 from yaml_generator import generate_aggr_template, generate_exec_template
 
+from fedscale.core.storage.redis_utils import is_redis_server_online, start_redis_server
+
 
 def flatten(d):
     out = {}
@@ -40,6 +42,17 @@ def process_cmd(yaml_file, local=False):
 
     yaml_conf = load_yaml_conf(yaml_file)
 
+    # Start redis server
+    redis_conf = yaml_conf['redis_conf']
+    redis_exec = redis_conf['redis_executable']
+    redis_host = redis_conf['redis_host']
+    redis_port = redis_conf['redis_port']
+    redis_password = redis_conf['redis_password']
+    fedscale_home = os.environ['FEDSCALE_HOME']
+    while not is_redis_server_online(redis_host, redis_port, redis_password):
+        start_redis_server(redis_exec, fedscale_home, redis_host, redis_port, redis_password)
+        time.sleep(1) # wait for server to go online
+
     if 'use_container' in yaml_conf:
         if yaml_conf['use_container'] == "docker":
             use_container = "docker"
@@ -78,6 +91,11 @@ def process_cmd(yaml_file, local=False):
 
     for conf in yaml_conf['job_conf']:
         job_conf.update(conf)
+
+    for conf in yaml_conf['redis_conf'].items():
+        if conf[1] is not None: # skip empty password
+            job_conf.update({conf[0]: conf[1]})
+
 
     conf_script = ''
     setup_cmd = ''

diff --git a/environment.yml b/environment.yml
@@ -6,10 +6,11 @@ dependencies:
   - tensorboard
   - numba=0.48.0
   - pip=20.0.2
+  - redis-py=3.5.3
   - pip:
     - torch_optimizer
     - torch
-    - tensorflow
+    - tensorflow==2.3.0
     - torchvision
     - transformers
     - scipy==1.4.1

diff --git a/fedscale/core/aggregation/aggregator.py b/fedscale/core/aggregation/aggregator.py
diff --git a/fedscale/core/config_parser.py b/fedscale/core/config_parser.py
@@ -218,6 +218,13 @@
 parser.add_argument('--no-bidirectional', dest='bidirectional', action='store_false', default=True,
                     help='Turn off bi-directional RNNs, introduces lookahead convolution')
 
+# for redis conf
+parser.add_argument('--redis_host', type=str, default='127.0.0.1')
+parser.add_argument('--redis_port', type=int, default=6379)
+parser.add_argument('--redis_password', type=str, default='')
+parser.add_argument('--job_tag', type=str, default='')
+
+
 args, unknown = parser.parse_known_args()
 args.use_cuda = eval(args.use_cuda)