comparison master/src/wecu/wecu.py @ 60:5fdca5baa4e9

refactor a bit, add support for sac with bespoke mapper
author Henry S. Thompson <ht@markup.co.uk>
date Thu, 28 May 2020 12:55:03 +0000
parents a3edba8dab11
children cfaf5223b071
comparison
equal deleted inserted replaced
59:8332faef25e1 60:5fdca5baa4e9
103 103
104 patterns_str = ' '.join(['"{}"'.format(x) for x in args.pattern]) 104 patterns_str = ' '.join(['"{}"'.format(x) for x in args.pattern])
105 105
106 cores_per_worker = num_cores(args) 106 cores_per_worker = num_cores(args)
107 107
108 if args.by_file: 108 os.system('run_sac.sh {} {} {} {} {} {} {}'.format(
109 os.system('run_sac.sh {} {} {} by-file {} {}'.format(cores_per_worker, 109 cores_per_worker,
110 HOSTS_FILEPATH, 110 HOSTS_FILEPATH,
111 WORK_DIR, 111 WORK_DIR,
112 regex_str, 112 ('sac_mapper.py' if args.mapper is None
113 patterns_str)) 113 else args.mapper),
114 return 114 ('by-file' if args.by_file
115 115 else 'aggregate'),
116 116 regex_str,
117 os.system('run_sac.sh {} {} {} aggregate {} {}'.format(cores_per_worker, 117 patterns_str))
118 HOSTS_FILEPATH,
119 WORK_DIR,
120 regex_str,
121 patterns_str))
122 118
123 def generate_handler(args): 119 def generate_handler(args):
124 import generate_file_list 120 import generate_file_list
125 generate_file_list.main() 121 generate_file_list.main()
126 122
172 # Scan-and-count parser 168 # Scan-and-count parser
173 sac_list = subparsers.add_parser('sac', help='Execute scan-and-count (SAC) workloads directly from the command line') 169 sac_list = subparsers.add_parser('sac', help='Execute scan-and-count (SAC) workloads directly from the command line')
174 sac_list.add_argument('pattern', type=str, nargs='+') 170 sac_list.add_argument('pattern', type=str, nargs='+')
175 sac_list.add_argument('--regex', action="store_true", help="Provide this flag to indicate that the provided strings should be treated as regular expressions") 171 sac_list.add_argument('--regex', action="store_true", help="Provide this flag to indicate that the provided strings should be treated as regular expressions")
176 sac_list.add_argument('--by-file', action="store_true", help="Provide this flag to indicate that the output should not be aggregated and displayed per file instead") 172 sac_list.add_argument('--by-file', action="store_true", help="Provide this flag to indicate that the output should not be aggregated and displayed per file instead")
173 sac_list.add_argument('--mapper', type=str, help="Supply a bespoke mapper for use in place of sac_mapper.py")
177 sac_list.add_argument('--jobs-per-worker', type=int, help="By deafult the number of concurrent tasks is set to the number of available logical cores. Provide this flag to set a different number of concurrent tasks.") 174 sac_list.add_argument('--jobs-per-worker', type=int, help="By deafult the number of concurrent tasks is set to the number of available logical cores. Provide this flag to set a different number of concurrent tasks.")
178 sac_list.set_defaults(handler=sac_handler) 175 sac_list.set_defaults(handler=sac_handler)
179 176
180 # Generate sample parser 177 # Generate sample parser
181 generate_parser = subparsers.add_parser('generate-sample', help='Generate a sample of a chosen Common Crawl month') 178 generate_parser = subparsers.add_parser('generate-sample', help='Generate a sample of a chosen Common Crawl month')