# HG changeset patch # User Henry S. Thompson # Date 1701772515 0 # Node ID 3406742894fcc2a567889c1954e6723579e46c54 # Parent 0dd36f071b1d8e97373b80a7acdb928cddc7a178 compute (component) uri lengths and a few other properties diff -r 0dd36f071b1d -r 3406742894fc lib/python/cc/lmh/ulens.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/python/cc/lmh/ulens.py Tue Dec 05 10:35:15 2023 +0000 @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 +'''extract URI length details from 4-column date-sorted data''' + +import sys +from urllib.parse import urlparse + +with open(sys.argv[1],'rb') as f: + for l in f: + _,_,u,_ = l.split() + uu = urlparse(u) + print(len(u), + *(len(p) for p in uu), + 1 if b'xn--' in uu.netloc else 0, + uu.path.count(b'%'), + uu.query.count(b'%'), + sep='\t')