comparison bin/ix.py @ 98:1a4c5fdc2923

help format hacking done
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Fri, 16 Apr 2021 13:15:23 +0000
parents 2b880f2ce894
children d48537c4cbae
comparison
equal deleted inserted replaced
97:2b880f2ce894 98:1a4c5fdc2923
3 given length, offset and file triples. 3 given length, offset and file triples.
4 Input one triple on command line, or 4 Input one triple on command line, or
5 triples from stdin as tab-delimited lines 5 triples from stdin as tab-delimited lines
6 or complete cdx index lines. 6 or complete cdx index lines.
7 7
8 Note that if no output flag(s) is/are given, the whole WARC record will be output, more efficiently than would be the case if all three flags were given.''' 8 Note that if no output flag(s) is/are given, the whole WARC record will be output, more efficiently than would be the case if -whb is given.'''
9 9
10 import sys, argparse, regex 10 import sys, argparse, regex
11 11
12 HACK=regex.compile('\[-x\]\n\s*\[length\] \[offset\] \[filename\]') 12 HACK_USAGE=regex.compile('\[-x\]\n\s*\[length\] \[offset\] \[filename\]')
13 13
14 class HackFormat(argparse.RawDescriptionHelpFormatter): 14 class HackFormat(argparse.RawDescriptionHelpFormatter):
15 def format_help(self): 15 def format_help(self):
16 global FOO 16 global FOO
17 FOO=argparse.RawDescriptionHelpFormatter.format_help(self) 17 FOO=argparse.RawDescriptionHelpFormatter.format_help(self)
18 return HACK.sub('\n [ ( -x | length offset filename ) ]', 18 return HACK_USAGE.sub('\n [ ( -x | length offset filename ) ]',
19 FOO) 19 FOO)
20 20
21 parser = argparse.ArgumentParser( 21 parser = argparse.ArgumentParser(
22 description='''Extract records from warc files given length, offset and file triples. 22 description='''Extract records from warc files given length, offset and file triples.
23 Input one triple on command line, or 23 Input one triple on command line, or
24 triples from stdin as tab-delimited lines 24 triples from stdin as tab-delimited lines
51 help='start position in bytes of gzipped record', 51 help='start position in bytes of gzipped record',
52 nargs='?') 52 nargs='?')
53 parser.add_argument('filename', 53 parser.add_argument('filename',
54 help='name of gzipped Common Crawl WARC-format file', 54 help='name of gzipped Common Crawl WARC-format file',
55 nargs='?') 55 nargs='?')
56 56 # Hack the order of optional and positional in the help output
57 parser._action_groups.sort(key=lambda g:g.title)
57 #parser.print_help() 58 #parser.print_help()
58 pa=parser.parse_args(sys.argv[1:]) 59 pa=parser.parse_args(sys.argv[1:])
59 # We have to enforce our own check.. 60 # We have to enforce our own check..
60 print(pa) 61 print(pa)
61 62