Mercurial > hg > cc > azure
comparison master/src/wecu/wecu.py @ 60:5fdca5baa4e9
refactor a bit, add support for sac with bespoke mapper
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Thu, 28 May 2020 12:55:03 +0000 |
parents | a3edba8dab11 |
children | cfaf5223b071 |
comparison
equal
deleted
inserted
replaced
59:8332faef25e1 | 60:5fdca5baa4e9 |
---|---|
103 | 103 |
104 patterns_str = ' '.join(['"{}"'.format(x) for x in args.pattern]) | 104 patterns_str = ' '.join(['"{}"'.format(x) for x in args.pattern]) |
105 | 105 |
106 cores_per_worker = num_cores(args) | 106 cores_per_worker = num_cores(args) |
107 | 107 |
108 if args.by_file: | 108 os.system('run_sac.sh {} {} {} {} {} {} {}'.format( |
109 os.system('run_sac.sh {} {} {} by-file {} {}'.format(cores_per_worker, | 109 cores_per_worker, |
110 HOSTS_FILEPATH, | 110 HOSTS_FILEPATH, |
111 WORK_DIR, | 111 WORK_DIR, |
112 regex_str, | 112 ('sac_mapper.py' if args.mapper is None |
113 patterns_str)) | 113 else args.mapper), |
114 return | 114 ('by-file' if args.by_file |
115 | 115 else 'aggregate'), |
116 | 116 regex_str, |
117 os.system('run_sac.sh {} {} {} aggregate {} {}'.format(cores_per_worker, | 117 patterns_str)) |
118 HOSTS_FILEPATH, | |
119 WORK_DIR, | |
120 regex_str, | |
121 patterns_str)) | |
122 | 118 |
123 def generate_handler(args): | 119 def generate_handler(args): |
124 import generate_file_list | 120 import generate_file_list |
125 generate_file_list.main() | 121 generate_file_list.main() |
126 | 122 |
172 # Scan-and-count parser | 168 # Scan-and-count parser |
173 sac_list = subparsers.add_parser('sac', help='Execute scan-and-count (SAC) workloads directly from the command line') | 169 sac_list = subparsers.add_parser('sac', help='Execute scan-and-count (SAC) workloads directly from the command line') |
174 sac_list.add_argument('pattern', type=str, nargs='+') | 170 sac_list.add_argument('pattern', type=str, nargs='+') |
175 sac_list.add_argument('--regex', action="store_true", help="Provide this flag to indicate that the provided strings should be treated as regular expressions") | 171 sac_list.add_argument('--regex', action="store_true", help="Provide this flag to indicate that the provided strings should be treated as regular expressions") |
176 sac_list.add_argument('--by-file', action="store_true", help="Provide this flag to indicate that the output should not be aggregated and displayed per file instead") | 172 sac_list.add_argument('--by-file', action="store_true", help="Provide this flag to indicate that the output should not be aggregated and displayed per file instead") |
173 sac_list.add_argument('--mapper', type=str, help="Supply a bespoke mapper for use in place of sac_mapper.py") | |
177 sac_list.add_argument('--jobs-per-worker', type=int, help="By deafult the number of concurrent tasks is set to the number of available logical cores. Provide this flag to set a different number of concurrent tasks.") | 174 sac_list.add_argument('--jobs-per-worker', type=int, help="By deafult the number of concurrent tasks is set to the number of available logical cores. Provide this flag to set a different number of concurrent tasks.") |
178 sac_list.set_defaults(handler=sac_handler) | 175 sac_list.set_defaults(handler=sac_handler) |
179 | 176 |
180 # Generate sample parser | 177 # Generate sample parser |
181 generate_parser = subparsers.add_parser('generate-sample', help='Generate a sample of a chosen Common Crawl month') | 178 generate_parser = subparsers.add_parser('generate-sample', help='Generate a sample of a chosen Common Crawl month') |