view master/src/wecu/sac_mapper.py @ 66:b04870ab3035

don't over-count duplicate URIs in multiple properties, produce composite keys instead
author Henry S. Thompson <ht@markup.co.uk>
date Thu, 04 Jun 2020 16:10:55 +0000
parents cfaf5223b071
children
line wrap: on
line source

#!/usr/bin/python3
import os
import sys
import re

is_regex = sys.argv[1] == 'true'
search_terms = sys.argv[2:]
search_terms_counters = dict()

# Initialise
for term in search_terms:
    search_terms_counters[term] = 0

if not is_regex:
    for line in sys.stdin:
        for term in search_terms_counters:
            search_terms_counters[term] += line.count(term)
else:
    for line in sys.stdin:
        for term_regex in search_terms_counters:
            search_terms_counters[term_regex] += len(re.findall(term_regex, line))


for term in search_terms_counters:
    print("{}\t{}".format(term, search_terms_counters[term]))