Mercurial > hg > python
comparison unicode-histogram.py @ 53:91d71e9760e8
forgot what this is about
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Mon, 29 May 2023 22:02:52 +0100 |
parents | 99bfff1538c6 |
children |
comparison
equal
deleted
inserted
replaced
52:10f17205908f | 53:91d71e9760e8 |
---|---|
396 blockCatalogElements.append(" ".join([k for _ in xrange(v)])) | 396 blockCatalogElements.append(" ".join([k for _ in xrange(v)])) |
397 part["unicodeBlockCatalog"] = ", ".join(blockCatalogElements) | 397 part["unicodeBlockCatalog"] = ", ".join(blockCatalogElements) |
398 | 398 |
399 return part | 399 return part |
400 | 400 |
401 Test data | 401 #Test data |
402 HEART = u'\u2665' | 402 # HEART = u'\u2665' |
403 SMILY = u'\u263a' | 403 # SMILY = u'\u263a' |
404 TSU = u'\u30C4' | 404 # TSU = u'\u30C4' |
405 LEFT = u'\u27E8' | 405 # LEFT = u'\u27E8' |
406 RIGHT = u'\u27E9' | 406 # RIGHT = u'\u27E9' |
407 EURO = u'\u20AC' | 407 # EURO = u'\u20AC' |
408 | 408 |
409 if True: | 409 # if True: |
410 | 410 |
411 TESTUNICODE = LEFT + "h" + EURO + "llo " + HEART + HEART + SMILY + TSU + " goodby" + EURO + " " + SMILY + TSU + HEART + HEART + HEART + HEART + RIGHT | 411 # TESTUNICODE = LEFT + "h" + EURO + "llo " + HEART + HEART + SMILY + TSU + " goodby" + EURO + " " + SMILY + TSU + HEART + HEART + HEART + HEART + RIGHT |
412 | 412 |
413 print len(TESTUNICODE) | 413 # print len(TESTUNICODE) |
414 print json.dumps(TESTUNICODE) | 414 # print json.dumps(TESTUNICODE) |
415 | 415 |
416 TESTDOC = {"@context": "http://localhost:8080/publish/JSON/WSP1WS6-select unix_timestamp(a_importtime)*1000 as timestamp, a_* from ads a join sample s on a_id=s_id limit 50-context.json","schema:provider": {"a": "Organization", "uri": "http://memex.zapto.org/data/organization/1"}, "snapshotUri": "http://memex.zapto.org/data/page/850753E7323B188B93E6E28F730F2BFBFB1CE00B/1396493689000/raw","a": "WebPage","dateCreated": "2013-09-24T18:28:00","hasBodyPart": {"text": TESTUNICODE, "a": "WebPageElement"}, "hasTitlePart": {"text": "\u270b\u270b\u270bOnly Best \u270c\u270c\u270c Forget The Rest \u270b\u270b\u270b Outcall Specials TONIGHT \u270c\ud83d\udc8b\ud83d\udc45 Sexy Blonde is UP LATE \ud83d\udc9c\ud83d\udc9b\u270b\u270c - 25", "a": "WebPageElement"}, "uri": "http://memex.zapto.org/data/page/850753E7323B188B93E6E28F730F2BFBFB1CE00B/1396493689000/processed"} | 416 # TESTDOC = {"@context": "http://localhost:8080/publish/JSON/WSP1WS6-select unix_timestamp(a_importtime)*1000 as timestamp, a_* from ads a join sample s on a_id=s_id limit 50-context.json","schema:provider": {"a": "Organization", "uri": "http://memex.zapto.org/data/organization/1"}, "snapshotUri": "http://memex.zapto.org/data/page/850753E7323B188B93E6E28F730F2BFBFB1CE00B/1396493689000/raw","a": "WebPage","dateCreated": "2013-09-24T18:28:00","hasBodyPart": {"text": TESTUNICODE, "a": "WebPageElement"}, "hasTitlePart": {"text": "\u270b\u270b\u270bOnly Best \u270c\u270c\u270c Forget The Rest \u270b\u270b\u270b Outcall Specials TONIGHT \u270c\ud83d\udc8b\ud83d\udc45 Sexy Blonde is UP LATE \ud83d\udc9c\ud83d\udc9b\u270b\u270c - 25", "a": "WebPageElement"}, "uri": "http://memex.zapto.org/data/page/850753E7323B188B93E6E28F730F2BFBFB1CE00B/1396493689000/processed"} |
417 | 417 |
418 analyze(TESTDOC["hasBodyPart"]) | 418 # analyze(TESTDOC["hasBodyPart"]) |
419 json.dump(TESTDOC, sys.stdout, indent=4); | 419 # json.dump(TESTDOC, sys.stdout, indent=4); |
420 exit(0) | 420 # exit(0) |
421 | 421 |
422 for line in sys.stdin: | 422 for line in sys.stdin: |
423 try: | 423 try: |
424 (url, jrep) = line.split('\t') | 424 (url, jrep) = line.split('\t') |
425 d = json.loads(jrep) | 425 d = json.loads(jrep) |