changeset 130:31abd509e365

importable just in case
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Thu, 28 Sep 2023 16:35:39 +0100
parents 83a574b570a6
children fd16e8fb9223
files lib/python/cc/lmh/lmh.py
diffstat 1 files changed, 13 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/lib/python/cc/lmh/lmh.py	Thu Sep 28 16:34:49 2023 +0100
+++ b/lib/python/cc/lmh/lmh.py	Thu Sep 28 16:35:39 2023 +0100
@@ -13,8 +13,6 @@
 DTAB=bytearray(range(256))
 DDEL=b'TZ-:'
 
-OUT=open(sys.stdout.fileno(),'wb')
-
 def showmeLMH(wtype,buf,part):
   global URI, DATE, SEGMENT, FILETYPE, FILENO
   if part==1:
@@ -42,13 +40,19 @@
       OUT.write(mm[1])
     OUT.write(b'\n')
 
-(CCdate, segment, filetype, fileno) = sys.argv[1:]
-fn='/beegfs/common_crawl/CC-MAIN-%s/*.%s/orig/%s/*%s.warc.gz'%(
-  CCdate, segment, filetype, fileno)
+def main(CCdate, segment, filetype, fileno):
+  global SEGMENT, FILETYPE, FILENO
+
+  OUT=open(sys.stdout.fileno(),'wb')
+
+  fn='/beegfs/common_crawl/CC-MAIN-%s/*.%s/orig/%s/*%s.warc.gz'%(
+    CCdate, segment, filetype, fileno)
 
-SEGMENT=codecs.encode(segment,'ascii')
-FILETYPE=codecs.encode(filetype,'ascii')
-FILENO=codecs.encode(fileno,'ascii')
+  SEGMENT=codecs.encode(segment,'ascii')
+  FILETYPE=codecs.encode(filetype,'ascii')
+  FILENO=codecs.encode(fileno,'ascii')
 
-warc.warc(glob.glob(fn)[0],showmeLMH,[b'response'],parts=3)
+  warc.warc(glob.glob(fn)[0],showmeLMH,[b'response'],parts=3)
 
+if __name__ == '__main__':
+  sys.exit(main(**sys.argv[1:]))