comparison unicode-histogram.py @ 53:91d71e9760e8

forgot what this is about
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Mon, 29 May 2023 22:02:52 +0100
parents 99bfff1538c6
children
comparison
equal deleted inserted replaced
52:10f17205908f 53:91d71e9760e8
396 blockCatalogElements.append(" ".join([k for _ in xrange(v)])) 396 blockCatalogElements.append(" ".join([k for _ in xrange(v)]))
397 part["unicodeBlockCatalog"] = ", ".join(blockCatalogElements) 397 part["unicodeBlockCatalog"] = ", ".join(blockCatalogElements)
398 398
399 return part 399 return part
400 400
401 Test data 401 #Test data
402 HEART = u'\u2665' 402 # HEART = u'\u2665'
403 SMILY = u'\u263a' 403 # SMILY = u'\u263a'
404 TSU = u'\u30C4' 404 # TSU = u'\u30C4'
405 LEFT = u'\u27E8' 405 # LEFT = u'\u27E8'
406 RIGHT = u'\u27E9' 406 # RIGHT = u'\u27E9'
407 EURO = u'\u20AC' 407 # EURO = u'\u20AC'
408 408
409 if True: 409 # if True:
410 410
411 TESTUNICODE = LEFT + "h" + EURO + "llo " + HEART + HEART + SMILY + TSU + " goodby" + EURO + " " + SMILY + TSU + HEART + HEART + HEART + HEART + RIGHT 411 # TESTUNICODE = LEFT + "h" + EURO + "llo " + HEART + HEART + SMILY + TSU + " goodby" + EURO + " " + SMILY + TSU + HEART + HEART + HEART + HEART + RIGHT
412 412
413 print len(TESTUNICODE) 413 # print len(TESTUNICODE)
414 print json.dumps(TESTUNICODE) 414 # print json.dumps(TESTUNICODE)
415 415
416 TESTDOC = {"@context": "http://localhost:8080/publish/JSON/WSP1WS6-select unix_timestamp(a_importtime)*1000 as timestamp, a_* from ads a join sample s on a_id=s_id limit 50-context.json","schema:provider": {"a": "Organization", "uri": "http://memex.zapto.org/data/organization/1"}, "snapshotUri": "http://memex.zapto.org/data/page/850753E7323B188B93E6E28F730F2BFBFB1CE00B/1396493689000/raw","a": "WebPage","dateCreated": "2013-09-24T18:28:00","hasBodyPart": {"text": TESTUNICODE, "a": "WebPageElement"}, "hasTitlePart": {"text": "\u270b\u270b\u270bOnly Best \u270c\u270c\u270c Forget The Rest \u270b\u270b\u270b Outcall Specials TONIGHT \u270c\ud83d\udc8b\ud83d\udc45 Sexy Blonde is UP LATE \ud83d\udc9c\ud83d\udc9b\u270b\u270c - 25", "a": "WebPageElement"}, "uri": "http://memex.zapto.org/data/page/850753E7323B188B93E6E28F730F2BFBFB1CE00B/1396493689000/processed"} 416 # TESTDOC = {"@context": "http://localhost:8080/publish/JSON/WSP1WS6-select unix_timestamp(a_importtime)*1000 as timestamp, a_* from ads a join sample s on a_id=s_id limit 50-context.json","schema:provider": {"a": "Organization", "uri": "http://memex.zapto.org/data/organization/1"}, "snapshotUri": "http://memex.zapto.org/data/page/850753E7323B188B93E6E28F730F2BFBFB1CE00B/1396493689000/raw","a": "WebPage","dateCreated": "2013-09-24T18:28:00","hasBodyPart": {"text": TESTUNICODE, "a": "WebPageElement"}, "hasTitlePart": {"text": "\u270b\u270b\u270bOnly Best \u270c\u270c\u270c Forget The Rest \u270b\u270b\u270b Outcall Specials TONIGHT \u270c\ud83d\udc8b\ud83d\udc45 Sexy Blonde is UP LATE \ud83d\udc9c\ud83d\udc9b\u270b\u270c - 25", "a": "WebPageElement"}, "uri": "http://memex.zapto.org/data/page/850753E7323B188B93E6E28F730F2BFBFB1CE00B/1396493689000/processed"}
417 417
418 analyze(TESTDOC["hasBodyPart"]) 418 # analyze(TESTDOC["hasBodyPart"])
419 json.dump(TESTDOC, sys.stdout, indent=4); 419 # json.dump(TESTDOC, sys.stdout, indent=4);
420 exit(0) 420 # exit(0)
421 421
422 for line in sys.stdin: 422 for line in sys.stdin:
423 try: 423 try:
424 (url, jrep) = line.split('\t') 424 (url, jrep) = line.split('\t')
425 d = json.loads(jrep) 425 d = json.loads(jrep)