diff master/src/wecu/wecu.py @ 62:892e1c0240e1

added more robust (I hope) error handling, got reducer working with support for choosing dict or tsv output
author Henry S. Thompson <ht@markup.co.uk>
date Tue, 02 Jun 2020 17:35:07 +0000
parents cfaf5223b071
children d46c8b12fc04
line wrap: on
line diff
--- a/master/src/wecu/wecu.py	Sun May 31 12:06:44 2020 +0000
+++ b/master/src/wecu/wecu.py	Tue Jun 02 17:35:07 2020 +0000
@@ -105,7 +105,7 @@
 
     cores_per_worker = num_cores(args)
 
-    os.system('run_sac.sh {} {} {} {} {} {} {} {}'.format(
+    os.system('run_sac.sh {} {} {} {} {} {} {} {} {}'.format(
         cores_per_worker,
         HOSTS_FILEPATH,
         WORK_DIR,
@@ -113,8 +113,10 @@
          else args.mapper),
         ('' if args.filter is None
          else "-f '%s'"%args.filter),
+        ('' if args.numKeys is None
+         else "-k %s"%args.numKeys),
         ('by-file' if args.by_file
-         else 'aggregate'),
+         else 'dict' if args.dict else 'aggregate'),
         regex_str,
         patterns_str))
 
@@ -172,8 +174,10 @@
 sac_list.add_argument('pattern', type=str, nargs='+')
 sac_list.add_argument('--regex', action="store_true", help="Provide this flag to indicate that the provided strings should be treated as regular expressions")
 sac_list.add_argument('--by-file', action="store_true", help="Provide this flag to indicate that the output should not be aggregated and displayed per file instead")
+sac_list.add_argument('--dict', action="store_true", help="Provide this flag to indicate that the output should aggregated and displayed in the form 'res={dict}'")
 sac_list.add_argument('--mapper', type=str, help="Supply a bespoke mapper for use in place of sac_mapper.py")
 sac_list.add_argument('--filter', type=str, help="Supply a filter on the unzipped warc file ahead of the mapper")
+sac_list.add_argument('--numKeys', type=int, help="Depth of key list, default 1")
 sac_list.add_argument('--jobs-per-worker', type=int, help="By deafult the number of concurrent tasks is set to the number of available logical cores. Provide this flag to set a different number of concurrent tasks.")
 sac_list.set_defaults(handler=sac_handler)