Mercurial > hg > cc > cirrus_home
comparison bin/ix.py @ 98:1a4c5fdc2923
help format hacking done
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Fri, 16 Apr 2021 13:15:23 +0000 |
parents | 2b880f2ce894 |
children | d48537c4cbae |
comparison
equal
deleted
inserted
replaced
97:2b880f2ce894 | 98:1a4c5fdc2923 |
---|---|
3 given length, offset and file triples. | 3 given length, offset and file triples. |
4 Input one triple on command line, or | 4 Input one triple on command line, or |
5 triples from stdin as tab-delimited lines | 5 triples from stdin as tab-delimited lines |
6 or complete cdx index lines. | 6 or complete cdx index lines. |
7 | 7 |
8 Note that if no output flag(s) is/are given, the whole WARC record will be output, more efficiently than would be the case if all three flags were given.''' | 8 Note that if no output flag(s) is/are given, the whole WARC record will be output, more efficiently than would be the case if -whb is given.''' |
9 | 9 |
10 import sys, argparse, regex | 10 import sys, argparse, regex |
11 | 11 |
12 HACK=regex.compile('\[-x\]\n\s*\[length\] \[offset\] \[filename\]') | 12 HACK_USAGE=regex.compile('\[-x\]\n\s*\[length\] \[offset\] \[filename\]') |
13 | 13 |
14 class HackFormat(argparse.RawDescriptionHelpFormatter): | 14 class HackFormat(argparse.RawDescriptionHelpFormatter): |
15 def format_help(self): | 15 def format_help(self): |
16 global FOO | 16 global FOO |
17 FOO=argparse.RawDescriptionHelpFormatter.format_help(self) | 17 FOO=argparse.RawDescriptionHelpFormatter.format_help(self) |
18 return HACK.sub('\n [ ( -x | length offset filename ) ]', | 18 return HACK_USAGE.sub('\n [ ( -x | length offset filename ) ]', |
19 FOO) | 19 FOO) |
20 | 20 |
21 parser = argparse.ArgumentParser( | 21 parser = argparse.ArgumentParser( |
22 description='''Extract records from warc files given length, offset and file triples. | 22 description='''Extract records from warc files given length, offset and file triples. |
23 Input one triple on command line, or | 23 Input one triple on command line, or |
24 triples from stdin as tab-delimited lines | 24 triples from stdin as tab-delimited lines |
51 help='start position in bytes of gzipped record', | 51 help='start position in bytes of gzipped record', |
52 nargs='?') | 52 nargs='?') |
53 parser.add_argument('filename', | 53 parser.add_argument('filename', |
54 help='name of gzipped Common Crawl WARC-format file', | 54 help='name of gzipped Common Crawl WARC-format file', |
55 nargs='?') | 55 nargs='?') |
56 | 56 # Hack the order of optional and positional in the help output |
57 parser._action_groups.sort(key=lambda g:g.title) | |
57 #parser.print_help() | 58 #parser.print_help() |
58 pa=parser.parse_args(sys.argv[1:]) | 59 pa=parser.parse_args(sys.argv[1:]) |
59 # We have to enforce our own check.. | 60 # We have to enforce our own check.. |
60 print(pa) | 61 print(pa) |
61 | 62 |