# HG changeset patch # User Henry S. Thompson # Date 1618578923 0 # Node ID 1a4c5fdc29238af65c44b964d9ed798af6e47181 # Parent 2b880f2ce894703ac9088f7be1134c9774195aec help format hacking done diff -r 2b880f2ce894 -r 1a4c5fdc2923 bin/ix.py --- a/bin/ix.py Fri Apr 16 12:55:05 2021 +0000 +++ b/bin/ix.py Fri Apr 16 13:15:23 2021 +0000 @@ -5,18 +5,18 @@ triples from stdin as tab-delimited lines or complete cdx index lines. -Note that if no output flag(s) is/are given, the whole WARC record will be output, more efficiently than would be the case if all three flags were given.''' +Note that if no output flag(s) is/are given, the whole WARC record will be output, more efficiently than would be the case if -whb is given.''' import sys, argparse, regex -HACK=regex.compile('\[-x\]\n\s*\[length\] \[offset\] \[filename\]') +HACK_USAGE=regex.compile('\[-x\]\n\s*\[length\] \[offset\] \[filename\]') class HackFormat(argparse.RawDescriptionHelpFormatter): def format_help(self): global FOO FOO=argparse.RawDescriptionHelpFormatter.format_help(self) - return HACK.sub('\n [ ( -x | length offset filename ) ]', - FOO) + return HACK_USAGE.sub('\n [ ( -x | length offset filename ) ]', + FOO) parser = argparse.ArgumentParser( description='''Extract records from warc files given length, offset and file triples. @@ -53,7 +53,8 @@ parser.add_argument('filename', help='name of gzipped Common Crawl WARC-format file', nargs='?') - +# Hack the order of optional and positional in the help output +parser._action_groups.sort(key=lambda g:g.title) #parser.print_help() pa=parser.parse_args(sys.argv[1:]) # We have to enforce our own check..