Mercurial > hg > cc > azure
diff master/src/wecu/wecu.py @ 61:cfaf5223b071
trying to get my own mapper working
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Sun, 31 May 2020 12:06:44 +0000 |
parents | 5fdca5baa4e9 |
children | 892e1c0240e1 |
line wrap: on
line diff
--- a/master/src/wecu/wecu.py Thu May 28 12:55:03 2020 +0000 +++ b/master/src/wecu/wecu.py Sun May 31 12:06:44 2020 +0000 @@ -105,12 +105,14 @@ cores_per_worker = num_cores(args) - os.system('run_sac.sh {} {} {} {} {} {} {}'.format( + os.system('run_sac.sh {} {} {} {} {} {} {} {}'.format( cores_per_worker, HOSTS_FILEPATH, WORK_DIR, ('sac_mapper.py' if args.mapper is None else args.mapper), + ('' if args.filter is None + else "-f '%s'"%args.filter), ('by-file' if args.by_file else 'aggregate'), regex_str, @@ -171,6 +173,7 @@ sac_list.add_argument('--regex', action="store_true", help="Provide this flag to indicate that the provided strings should be treated as regular expressions") sac_list.add_argument('--by-file', action="store_true", help="Provide this flag to indicate that the output should not be aggregated and displayed per file instead") sac_list.add_argument('--mapper', type=str, help="Supply a bespoke mapper for use in place of sac_mapper.py") +sac_list.add_argument('--filter', type=str, help="Supply a filter on the unzipped warc file ahead of the mapper") sac_list.add_argument('--jobs-per-worker', type=int, help="By deafult the number of concurrent tasks is set to the number of available logical cores. Provide this flag to set a different number of concurrent tasks.") sac_list.set_defaults(handler=sac_handler)