1
1
#!/usr/bin/env python3
2
2
3
- import os ,sys ,getpass ,warnings ,glob ,shlex ,subprocess ,argparse
3
+ """Returns a list of files from a dataset including only files that are hosted on disk."""
4
+
5
+ import os ,sys ,getpass ,warnings ,glob ,shlex ,subprocess ,argparse # pylint: disable=multiple-imports
4
6
from collections import defaultdict
5
7
8
+ """Gets OS version from shell (other methods return host OS when in container)"""
6
9
def getOS ():
7
10
cmd = r"sed -nr 's/[^0-9]*([0-9]+).*/\1/p' /etc/redhat-release"
8
11
osv = subprocess .check_output (shlex .split (cmd ), encoding = "utf-8" ).rstrip ()
9
12
return osv
10
13
14
+ """Gets list of files on disk for a dataset, and list of sites along with how many files each site has"""
11
15
def getHosted (dataset ):
12
16
osv = getOS ()
13
17
rucio_path = f'/cvmfs/cms.cern.ch/rucio/x86_64/rhel{ osv } /py3/current'
@@ -17,7 +21,7 @@ def getHosted(dataset):
17
21
sys .path .insert (0 ,full_rucio_path + '/site-packages/' )
18
22
19
23
warnings .filterwarnings ("ignore" , message = ".*cryptography.*" )
20
- from rucio .client .client import Client
24
+ from rucio .client .client import Client # pylint: disable=import-error,import-outside-toplevel
21
25
client = Client ()
22
26
23
27
# loop over blocks to avoid timeout error from too-large response
@@ -27,12 +31,13 @@ def getHosted(dataset):
27
31
nblocks = 10
28
32
block_groups = [all_blocks [i :i + nblocks ] for i in range (0 , len (all_blocks ), nblocks )]
29
33
30
- from rucio .client .replicaclient import ReplicaClient
34
+ from rucio .client .replicaclient import ReplicaClient # pylint: disable=import-error,import-outside-toplevel
31
35
rep_client = ReplicaClient ()
32
36
33
37
filelist = set ()
34
38
sitelist = defaultdict (int )
35
- sitecond = lambda site : "_Tape" not in site
39
+ def sitecond (site ):
40
+ return "_Tape" not in site
36
41
for block_group in block_groups :
37
42
reps = list (rep_client .list_replicas ([{'scope' : 'cms' , 'name' : block ['name' ]} for block in block_group ]))
38
43
for rep in reps :
@@ -44,16 +49,17 @@ def getHosted(dataset):
44
49
sys .path .pop (0 )
45
50
return filelist , sitelist
46
51
52
+ """Prints file list and site list"""
47
53
def main (dataset , outfile = None , verbose = False ):
48
54
filelist , sitelist = getHosted (dataset )
49
55
50
56
if verbose :
51
57
print ("Site list:" )
52
58
print ("\n " .join (f'{ k } : { v } ' for k ,v in sitelist .items ()))
53
59
54
- file = open (outfile ,'w' ) if outfile is not None else sys .stdout
60
+ file = open (outfile ,'w' ) if outfile is not None else sys .stdout # pylint: disable=consider-using-with,unspecified-encoding
55
61
print ("\n " .join (filelist ), file = file )
56
- if outfile is not None : file .close ()
62
+ if outfile is not None : file .close () # pylint: disable=multiple-statements
57
63
58
64
if __name__ == "__main__" :
59
65
parser = argparse .ArgumentParser (
0 commit comments