Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,6 @@ stackoverflow
common_voice
puffer
landmark
coqa
coqa
dump.rdb
*.pid
11 changes: 9 additions & 2 deletions benchmark/configs/femnist/conf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

# ========== Cluster configuration ==========
# ip address of the parameter server (need 1 GPU process)
ps_ip: 10.0.0.1
ps_ip: 10.0.0.4

# ip address of each worker:# of available gpus process on each gpu in this node
# Note that if we collocate ps and worker on same GPU, then we need to decrease this number of available processes on that GPU by 1
# E.g., master node has 4 available processes, then 1 for the ps, and worker should be set to: worker:3
worker_ips:
- 10.0.0.1:[4]
- 10.0.0.4:[4]

exp_path: $FEDSCALE_HOME/fedscale/core

Expand Down Expand Up @@ -49,3 +49,10 @@ job_conf:
- test_bsz: 20
- use_cuda: True

# ========== Redis configuration ==========
redis_conf:
redis_executable: /usr/bin/redis-server
redis_host: 10.0.0.4
redis_port: 6379
redis_password: abc # Use ~ to skip password
job_tag: _femnist_1
18 changes: 18 additions & 0 deletions docker/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
from kubernetes import client, config, utils
from yaml_generator import generate_aggr_template, generate_exec_template

from fedscale.core.storage.redis_utils import is_redis_server_online, start_redis_server


def flatten(d):
out = {}
Expand All @@ -40,6 +42,17 @@ def process_cmd(yaml_file, local=False):

yaml_conf = load_yaml_conf(yaml_file)

# Start redis server
redis_conf = yaml_conf['redis_conf']
redis_exec = redis_conf['redis_executable']
redis_host = redis_conf['redis_host']
redis_port = redis_conf['redis_port']
redis_password = redis_conf['redis_password']
fedscale_home = os.environ['FEDSCALE_HOME']
while not is_redis_server_online(redis_host, redis_port, redis_password):
start_redis_server(redis_exec, fedscale_home, redis_host, redis_port, redis_password)
time.sleep(1) # wait for server to go online

if 'use_container' in yaml_conf:
if yaml_conf['use_container'] == "docker":
use_container = "docker"
Expand Down Expand Up @@ -78,6 +91,11 @@ def process_cmd(yaml_file, local=False):

for conf in yaml_conf['job_conf']:
job_conf.update(conf)

for conf in yaml_conf['redis_conf'].items():
if conf[1] is not None: # skip empty password
job_conf.update({conf[0]: conf[1]})


conf_script = ''
setup_cmd = ''
Expand Down
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ dependencies:
- tensorboard
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember you mentioned that some tf version is not compatible. Can you please update tf version here too?

- numba=0.48.0
- pip=20.0.2
- redis-py=3.5.3
- pip:
- torch_optimizer
- torch
- tensorflow
- tensorflow==2.3.0
- torchvision
- transformers
- scipy==1.4.1
Expand Down
195 changes: 142 additions & 53 deletions fedscale/core/aggregation/aggregator.py

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions fedscale/core/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,13 @@
parser.add_argument('--no-bidirectional', dest='bidirectional', action='store_false', default=True,
help='Turn off bi-directional RNNs, introduces lookahead convolution')

# for redis conf
parser.add_argument('--redis_host', type=str, default='127.0.0.1')
parser.add_argument('--redis_port', type=int, default=6379)
parser.add_argument('--redis_password', type=str, default='')
parser.add_argument('--job_tag', type=str, default='')


args, unknown = parser.parse_known_args()
args.use_cuda = eval(args.use_cuda)

Expand Down
Loading