# HG changeset patch # User Henry Thompson # Date 1583772489 0 # Node ID e07789816ca5048e6f948b315df81fc7fe1d1152 # Parent 0a3abe59e364c086270aa57ce7326815ecd5a6f0 adding more python files from lib/python on origen diff -r 0a3abe59e364 -r e07789816ca5 csvtotsv.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/csvtotsv.py Mon Mar 09 16:48:09 2020 +0000 @@ -0,0 +1,6 @@ +#!/usr/bin/python3 +import csv,sys +with open(sys.argv[1],newline='') as f: + r=csv.reader(f) + for l in r: + print('\t'.join(s.replace("\n","") for s in l)) diff -r 0a3abe59e364 -r e07789816ca5 hmm/semiSup.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hmm/semiSup.py Mon Mar 09 16:48:09 2020 +0000 @@ -0,0 +1,223 @@ +'''Exploring the claim that a small dictionary can seed +an otherwise unsupervised HMM to learn a decent POS-tagger''' +import nltk, random, itertools +from nltk.corpus import brown +from nltk.tag.hmm import HiddenMarkovModelTagger, HiddenMarkovModelTrainer, logsumexp2 +from nltk.probability import FreqDist,ConditionalFreqDist +from nltk.probability import MLEProbDist, RandomProbDist, DictionaryConditionalProbDist + +def totLogProb(self,sequences): + N = len(self._states) + M = len(self._symbols) + logProb = 0 + for sequence in sequences: + T = len(sequence) + # compute forward and backward probabilities + alpha = self._forward_probability(sequence) + beta = self._backward_probability(sequence) + # find the log probability of the sequence + logProb += logsumexp2(alpha[T-1]) + return logProb + +HiddenMarkovModelTagger.totLogProb=totLogProb + +trainTagsPercent=1.0 +trainHMMPercent=0.9 +knownWordsPercent=1.0 + +SST=SSW='~~' +EST=ESW='~~' +SS=[(SSW,SST)] +ES=[(ESW,EST)] +TAGSETS={ + 'univ': + [u'ADJ', u'ADP', u'ADV', u'CONJ', u'DET', u'NOUN', u'NUM', + u'PRON', u'PRT', u'VERB', u'X', u'.',SST,EST], + 'brown': + [u"ABL", u"ABN", u"ABX", u"AP", u"AP$", u"AP+AP", u"AT", u"BE", + u"BED", u"BED*", u"BEDZ", u"BEDZ*", u"BEG", u"BEM", u"BEM*", + u"BEN", u"BER", u"BER*", u"BEZ", u"BEZ*", u"CC", u"CD", + u"CD$", u"CS", u"DO", u"DO*", u"DO+PPSS", u"DOD", u"DOD*", + u"DOZ", u"DOZ*", u"DT", u"DT$", u"DT+BEZ", u"DT+MD", u"DTI", + u"DTS", u"DTS+BEZ", u"DTX", u"EX", u"EX+BEZ", u"EX+HVD", u"EX+HVZ", + u"EX+MD", u"FW-*", u"FW-AT", u"FW-AT+NN", u"FW-AT+NP", u"FW-BE", u"FW-BER", + u"FW-BEZ", u"FW-CC", u"FW-CD", u"FW-CS", u"FW-DT", u"FW-DT+BEZ", u"FW-DTS", + u"FW-HV", u"FW-IN", u"FW-IN+AT", u"FW-IN+NN", u"FW-IN+NP", u"FW-JJ", + u"FW-JJR", u"FW-JJT", u"FW-NN", u"FW-NN$", u"FW-NNS", u"FW-NP", u"FW-NPS", + u"FW-NR", u"FW-OD", u"FW-PN", u"FW-PP$", u"FW-PPL", u"FW-PPL+VBZ", + u"FW-PPO", u"FW-PPO+IN", u"FW-PPS", u"FW-PPSS", u"FW-PPSS+HV", u"FW-QL", + u"FW-RB", u"FW-RB+CC", u"FW-TO+VB", u"FW-UH", u"FW-VB", u"FW-VBD", + u"FW-VBG", u"FW-VBN", u"FW-VBZ", u"FW-WDT", u"FW-WPO", u"FW-WPS", u"HV", + u"HV*", u"HV+TO", u"HVD", u"HVD*", u"HVG", u"HVN", u"HVZ", u"HVZ*", u"IN", + u"IN+IN", u"IN+PPO", u"JJ", u"JJ$", u"JJ+JJ", u"JJR", u"JJR+CS", u"JJS", + u"JJT", u"MD", u"MD*", u"MD+HV", u"MD+PPSS", u"MD+TO", u"NN", u"NN$", + u"NN+BEZ", u"NN+HVD", u"NN+HVZ", u"NN+IN", u"NN+MD", u"NN+NN", u"NNS", + u"NNS$", u"NNS+MD", u"NP", u"NP$", u"NP+BEZ", u"NP+HVZ", u"NP+MD", + u"NPS", u"NPS$", u"NR", u"NR$", u"NR+MD", u"NRS", u"OD", + u"PN", u"PN$", u"PN+BEZ", u"PN+HVD", u"PN+HVZ", u"PN+MD", u"PP$", + u"PP$$", u"PPL", u"PPLS", u"PPO", u"PPS", u"PPS+BEZ", u"PPS+HVD", + u"PPS+HVZ", u"PPS+MD", u"PPSS", u"PPSS+BEM", u"PPSS+BER", u"PPSS+BEZ", + u"PPSS+BEZ*", u"PPSS+HV", u"PPSS+HVD", u"PPSS+MD", u"PPSS+VB", u"QL", + u"QLP", u"RB", u"RB$", u"RB+BEZ", u"RB+CS", u"RBR", u"RBR+CS", u"RBT", + u"RN", u"RP", u"RP+IN", u"TO", u"TO+VB", u"UH", u"VB", u"VB+AT", + u"VB+IN", u"VB+JJ", u"VB+PPO", u"VB+RP", u"VB+TO", u"VB+VB", u"VBD", + u"VBG", u"VBG+TO", u"VBN", u"VBN+TO", u"VBZ", u"WDT", u"WDT+BER", + u"WDT+BER+PP", u"WDT+BEZ", u"WDT+DO+PPS", u"WDT+DOD", u"WDT+HVZ", u"WP$", + u"WPO", u"WPS", u"WPS+BEZ", u"WPS+HVD", u"WPS+HVZ", u"WPS+MD", u"WQL", + u"WRB", u"WRB+BER", u"WRB+BEZ", u"WRB+DO", u"WRB+DOD", u"WRB+DOD*", + u"WRB+DOZ", u"WRB+IN", u"WRB+MD", + u"(", u")", u"*", u",", u"--", u".", u":"], + 'upenn': + [u"CC", u"CD", u"DT", u"EX", u"FW", u"IN", u"JJ", u"JJR", u"JJS", u"LS", + u"MD", u"NN", u"NNP", u"NNPS", u"NNS", u"PDT", u"POS", u"PRP", u"PRP$", + u"RB", u"RBR", u"RBS", u"RP", u"SYM", u"TO", u"UH", u"VB", u"VBD", u"VBG", + u"VBN", u"VBP", u"VBZ", u"WDT", u"WP", u"WP$", u"WRB", + u"``", u"$", u"''", u"(", u")", u",", u"--", u".", u":"]} + +TAGSETS['universal']=TAGSETS['univ'] +TAGSETS['penn']=TAGSETS['upenn'] + +def setup(cat='news',tagset='brown',corpus=brown): + return ([list(itertools.chain(iter(SS), + ((word.lower(),tag) for (word,tag) in s) + ,iter(ES))) + for s in corpus.tagged_sents(categories=cat,tagset=tagset)], + list(itertools.chain(iter(SS), iter(ES), + ((word.lower(),tag) for (word,tag) in + corpus.tagged_words(categories=cat,tagset=tagset)))), + TAGSETS[tagset]) + +def notCurrent(s,missList): + global i,n,done + if done or (missList[i] is not s): + return True + else: + i+=1 + if i==n: + done=True + return False + +def splitData(words,wordPercent,sentences,sentPercent): + global i,n, done + trainWords=random.sample(words,int(wordPercent*len(words))) + # random.sample(sentences,int(sentPercent*len(sentences))) + trainSents=[s for s in sentences if random.random()','D','N','V',''] +symbols=['~~','the','sheep','run','~~'] +sents=[[('~~','~~'),('the','D'),('sheep','N'),('run','V'),('~~','~~')], + [('~~','~~'),('sheep','N'),('run','V'),('the','D'),('sheep','N'),('~~','~~')], + [('~~','~~'),('run','V'),('the','D'),('sheep','N'),('~~','~~')]] + +taglists=[('',[('~~',1),('the',0),('sheep',0),('run',0),('~~',0)]), + ('D',[('the',1),('sheep',0),('run',0),('~~',0),('~~',0)]), + ('N',[('the',0),('sheep',.5),('run',.5),('~~',0),('~~',0)]), + ('V',[('the',0),('sheep',.5),('run',.5),('~~',0),('~~',0)]), + ('',[('~~',0),('the',0),('sheep',0),('run',0),('~~',1)])] + +tagdict=dict((k,MLEProbDist(FreqDist(dict(v)))) for k,v in taglists) + +priors = MLEProbDist(FreqDist({'~~':1, + 'D':0, + 'N':0, + 'V':0, + '~~':0})) + +transitions = DictionaryConditionalProbDist( + dict((state, RandomProbDist(tagset)) + for state in tagset)) + +outputs = DictionaryConditionalProbDist(tagdict) + + +for tag in tagset: + cp=outputs[tag] + print tag,sum(cp.prob(s) for s in symbols) + +model = HiddenMarkovModelTagger(symbols, tagset, + transitions, outputs, priors) + +for tag in tagset: + cp=model._outputs[tag] + print tag,sum(cp.prob(s) for s in symbols) + +nm=HiddenMarkovModelTrainer(states=tagset,symbols=symbols) + +# Note that contrary to naive reading of the documentation, +# train_unsupervised expects a sequence of sequences of word/tag pairs, +# it just ignores the tags +nnm=nm.train_unsupervised(sents,model=model,max_iterations=10,updateOutputs=False) + +for tag in tagset: + if tag=='': + break + cp=nnm._transitions[tag] + print((" "+4*"%6s")%tuple(tagset[1:])) + print(("%3s: "+4*"%6.3f")%tuple([tag]+[cp.prob(s) for s in tagset[1:]])) + +for tag in tagset: + cp=nnm._outputs[tag] + print((" "+5*"%6s")%tuple(symbols)) + x=[cp.prob(s) for s in symbols] + print(("%3s: "+5*"%6.3f"+"%11.4e")%tuple([tag]+x+[sum(x)])) + +print nnm.evaluate(sents) diff -r 0a3abe59e364 -r e07789816ca5 mailer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mailer.py Mon Mar 09 16:48:09 2020 +0000 @@ -0,0 +1,415 @@ +#!/usr/bin/python +'''Attempt at flexible mailout functionality +Usage: mailer.py [-n] [-s] [-C cc string] [-c COLSPEC[,COLSPEC]*] [-B bcc string] [-b COLSPEC[,COLSPEC]*] [-S col[,col]*] [-a COLSPEC[,COLSPEC]*] [-p COLPAT]* -SA file[,file]* COLSPEC[,COLSPEC]* subject {addr-file|-} body-file + +Sends the body as a message from me with subject to destinations per +lines in the addr-file selected by COLSPECs (to:) or -c/-b COLSPECs (Cc:/Bcc:) + +-n for dry run, prints to stdout +-c for Cc column(s) +-C for static Cc +-b for Bcc columns(s) +-B for static Bcc +-a for attachment file column(s) +-A for attachment file pattern column(s) +-SA for static attachment files +-u Use unicode for attachments +-s for substitute into body +-S for columns to substitute as such +-p for augmentation pattern for a column + +COLSPEC is of the form a[:n[:f[:g]]] selects from addr-file, which must be tsv + a gives the column for an email address + n (optional) gives column for a name + f gives format for the name: FS, SF or S.F for + forenames surname (fornames space separated) + surname forenames (space separated) + surname, forenames (space separated) + default is FS + _ will be replaced by space in surnames + g gives column for gender (for pronouns), m or f +COLPAT takes the form i:template, where i selects an address column +and template is a string containing exactly 1 "%s", which is replaced with +the column value to give the string which will be used for COLSPEC +references to that column, e.g. 1:S%s@sms.ed.ac.uk +if column 1 contains bare student numbers + +-s enables body substitution. body may contain + %(fi)s first forename of column i + %(si)s surname + %(fsi)s all forenames + %(i)s the undivided original and/or -S col value + if there is a supplied gender + %(pni)s 'he'/'she' + %(pai)s 'him'/'her' + %(pgi)s 'his/her' + +All column indices are 1-origin, as for cut''' + +import smtplib, sys, re, os.path, codecs +from email.mime.text import MIMEText + +addrPat=re.compile("<([^>]*)>") + +def usage(hint=None): + if hint is None: + print __doc__ + exit() + else: + print >>sys.stderr,"Trouble with your commandline at %s\n %s"%(hint, + __doc__) + exit(1) + +def parseCols(specs,where): + return [Column(s,where) for s in specs.split(',')] + +def parsePat(spec): + (c,t)=spec.split(':') + c=int(c) + found=False + for colTab in (ccCols,bccCols,toCols,attCols): + if c in colTab: + colTab[c].addTemplate(t) + found=True + if not found: + print >>sys.stderr, "Warning, template supplied for column %s, but no use of the column found!"%c + +def addrList(addrFields,cols,att=False): + global someExpand + if att and someExpand: + # There were some file patterns + return itertools.chain(*(c.fullAddr(addrFields,True) for c in cols.values())) + else: + return [c.fullAddr(addrFields) for c in cols.values()] + +def addrLine(hdr,addrFields,cols): + return "%s: %s"%(hdr,", ".join(addrList(addrFields,cols))) + +def subDict(addrFields): + res={} + for c in names.values(): + c.subDo(addrFields,res) + for c in subs.values(): + if c not in names: + c.subDo(addrFields,res) + return res + +bccCols={} +ccCols={} +attCols={} +toCols={} +names={} +subs={} +CC=[] +BCC=[] +rawCols={} + +class Column: + _expand=False + def __init__(self,spec,where): + global names, subs + parts=spec.split(':') + if (len(parts)<1 or len(parts)>4): + print >>sys.stderr, "col spec. must have 1--4 :-separated parts: %s"%parts + usage('colspec') + self.a=int(parts[0]) + if len(parts)>1: + self.n=int(parts[1]) + if len(parts)>2: + self.f=parts[2] + else: + self.f='FS' + if len(parts)>3: + self.g=int(parts[3]) + else: + self.g=None + else: + self.n=None + if self.a<=0: + print >>sys.stderr, "addr column index %s not allowed -- 1-origin indexing"%self.a + exit(2) + if self.a in where: + print >>sys.stderr, "duplicate column %s"%self.a + exit(2) + if self.n is not None: + if self.n<=0: + print >>sys.stderr, "name column index %s not allowed -- 1-origin indexing"%self.n + exit(3) + if self.n in where: + print >>sys.stderr, "can't use column %s as both name and address"%self.n + exit(3) + if self.n in names: + print >>sys.stderr, "attempt to redefine %s from \"%s\" to \"%s\""%(self.n,names[self.n],self) + exit(3) + if self.f not in ('FS','SF','S.F'): + print >>sys.stderr, "name format %s not recognised"%self.f + exit(4) + where[self.a]=self + if self.n is not None: + if isinstance(self,RawColumn): + subs[self.n]=self + else: + names[self.n]=self + + def __str__(self): + if self.n is None: + return str(self.a) + else: + return "%s:%s"%(self.a,self.n) + + def __repr__(self): + return str(self) + + def addTemplate(self,template): + try: + print >>sys.stderr,"Attempt to overwrite existing template \"%s\" for %s with \"%s\""%(self.template, + self.a, + template) + except AttributeError: + self.template=template + + def name(self): + return self.n + + def expAddr(self,fields): + addr=fields[self.a-1] + try: + return self.template%addr + except AttributeError: + return addr + + def fullAddr(self,fields,att=False): + global someExpand + if self.n is None: + res=self.expAddr(fields) + if att and someExpand: + if self._expand: + return glob.iglob(res) + else: + return [res] + else: + return res + else: + return '"%s" <%s>'%(fields[self.n-1].replace('_',' '),self.expAddr(fields)) + + def subDo(self,addrFields,dict): + f=addrFields[self.n-1] + dict[str(self.n)]=f + nparts=f.split(' ') + if self.f=='FS': + sur=nparts.pop() + elif self.f=='SF': + sur=nparts.pop(0) + elif self.f=='S.F': + sur=nparts.pop(0)[:-1] + fores=nparts + dict['fs%s'%self.n]=' '.join(fores) + dict['f%s'%self.n]=fores[0] + dict['s%s'%self.n]=sur.replace('_',' ') + if self.g is not None: + gg=addrFields[self.g-1] + if gg=='m': + dict['pn%s'%self.n]='he' + dict['pa%s'%self.n]='him' + dict['pg%s'%self.n]='his' + elif gg=='f': + dict['pn%s'%self.n]='she' + dict['pa%s'%self.n]='her' + dict['pg%s'%self.n]='her' + else: + print >>sys.stderr,"Warning, unrecognised gender in column %s: %s"%(self.n,gg) + + def setExpand(self): + self._expand=True + +class RawColumn(Column): + '''Not for person names, just raw text''' + + def subDo(self,addrFields,dict): + f=addrFields[self.n-1] + dict[str(self.n)]=f + +def doAtt(msg,att,codec): + (mt,enc)=mimetypes.guess_type(att) + (tp,subtp)=mt.split('/',2) + if tp=='text': + attf=codecs.open(att,'r',codec) + atm=MIMEText(attf.read(),subtp,codec) + elif tp=='application': + from email.mime.application import MIMEApplication + attf=open(att,'r') + atm=MIMEApplication(attf.read(),subtp) + else: + print >>sys.stderr, "Help: Media type %s (for attachment %s) not supported"%(mt,att) + exit(5) + atm.add_header('Content-Disposition','attachment', + filename=os.path.basename(att)) + msg.attach(atm) + +dryrun=False +sys.argv.pop(0) +doSub=False +pats=[] +someExpand=False +codec='iso-8859-1' +staticAtts=[] +while sys.argv: + if sys.argv[0]=='-n': + dryrun=True + sys.argv.pop(0) + elif sys.argv[0]=='-c' and ccCols=={}: + sys.argv.pop(0) + if sys.argv: + parseCols(sys.argv.pop(0),ccCols) + else: + usage('cc') + elif sys.argv[0]=='-C' and CC==[]: + sys.argv.pop(0) + if sys.argv: + CC=sys.argv.pop(0).split(',') + else: + usage('CC') + elif sys.argv[0]=='-b' and bccCols=={}: + sys.argv.pop(0) + if sys.argv: + parseCols(sys.argv.pop(0),bccCols) + else: + usage('bcc') + elif sys.argv[0]=='-B' and BCC==[]: + sys.argv.pop(0) + if sys.argv: + BCC=sys.argv.pop(0).split(',') + else: + usage('BCC') + elif sys.argv[0] in ('-a','-A','-SA'): # and attCols=={} + expand=sys.argv[0]=='-A' + static=sys.argv[0]=='-SA' + sys.argv.pop(0) + if sys.argv: + if static: + staticAtts=sys.argv.pop(0).split(',') + else: + pc=parseCols(sys.argv.pop(0),attCols) + if expand: + import itertools, glob + someExpand=True + for c in pc: + c.setExpand() + from email.mime.multipart import MIMEMultipart + import mimetypes + else: + usage('attachment') + elif sys.argv[0]=='-u': + sys.argv.pop(0) + codec='utf-8' + elif sys.argv[0]=='-s': + sys.argv.pop(0) + doSub=True + elif sys.argv[0]=='-S' and rawCols=={}: + sys.argv.pop(0) + if sys.argv: + for c in sys.argv.pop(0).split(','): + RawColumn("%s:%s"%(c,c),rawCols) + else: + usage('raw subs') + elif sys.argv[0]=='-p': + sys.argv.pop(0) + if sys.argv: + pats.append(sys.argv.pop(0)) + else: + usage('pat') + elif sys.argv[0][0]=='-': + print sys.argv + usage() + else: + break + +if sys.argv: + parseCols(sys.argv.pop(0),toCols) +else: + usage('to') + +pats=[parsePat(p) for p in pats] + +if sys.argv: + subj=sys.argv.pop(0) +else: + usage('subj') + +if sys.argv: + af=sys.argv.pop(0) + if af=='-': + addrFile=sys.stdin + else: + try: + addrFile=open(af,'r') + except: + usage('addr: %s'%sys.exc_value) +else: + usage('addr') + +if sys.argv: + bf=sys.argv.pop(0) + try: + bodyFile=open(bf,'r') + except: + usage('body: %s'%sys.exc_value) +else: + usage('body') + +try: + sig=open("/home/ht/.signature","r") + signature=sig.read().rstrip() +except: + signature=None + +CS=', ' +body=bodyFile.read().rstrip() +if not dryrun: + mailer=smtplib.SMTP() + mailer.connect() +for l in addrFile: + addrFields=l.rstrip().split('\t') + if doSub: + bodyPlus=body%subDict(addrFields) + else: + bodyPlus=body + if signature is not None: + bodyPlus+="\n--\n" + bodyPlus+=signature + if attCols or staticAtts: + msg=MIMEMultipart() + msg.attach(MIMEText(bodyPlus)) + else: + msg=MIMEText(bodyPlus) + #to=addrLine("To",addrFields,toCols) + to=addrList(addrFields,toCols) + #msg=to + #recips=addrPat.findall(to) + msg['To']=CS.join(to) + recips=[]+list(to) + cc=CC + if ccCols: + cc+=addrList(addrFields,ccCols) + if cc!=[]: + msg["Cc"]=CS.join(cc) + recips+=list(cc) + bcc=BCC + if bccCols: + bcc+=addrList(addrFields,bccCols) + if bcc!=[]: + msg["Bcc"]=CS.join(bcc) + recips+=list(bcc) + msg["Subject"]=subj + for att in staticAtts: + doAtt(msg,att,codec) + if attCols: + for att in addrList(addrFields,attCols,True): + doAtt(msg,att,codec) + if dryrun: + print recips + print msg.as_string() + exit() + print "mailing to %s"%recips + mailer.sendmail("ht@inf.ed.ac.uk",recips,msg.as_string()) +mailer.quit() diff -r 0a3abe59e364 -r e07789816ca5 pdfComments.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pdfComments.py Mon Mar 09 16:48:09 2020 +0000 @@ -0,0 +1,30 @@ +import PyPDF2 as pyPdf, sys + +if sys.argv[1]=='-v': + verbose=True + sys.argv.pop(1) +else: + verbose=False + +f = open(sys.argv[1],'rb') + +pdf = pyPdf.PdfFileReader(f) +pgs = pdf.getNumPages() +key = '/Annots' +uri = '/URI' +ank = '/A' + +#print pdf.getNamedDestinations() + +for pg in range(pgs): + print '#',pg + p = pdf.getPage(pg) + o = p.getObject() + #print o.keys() + if o.has_key(key): + ann = o[key] + #print key,ann + for a in ann: + u = a.getObject() + if '/Contents' in u: + print "%s: %s"%(u['/Subtype'],u['/Contents']) diff -r 0a3abe59e364 -r e07789816ca5 req_dep.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/req_dep.py Mon Mar 09 16:48:09 2020 +0000 @@ -0,0 +1,48 @@ +#!/usr/bin/python2.7 +import sys +kids={} +parents={} +known=set(()) +l=sys.stdin.readline() +while l: + ff=l.strip().split() + if len(ff)!=0: + d=ff[0] + sys.stdin.readline() + pp=sys.stdin.readline().strip().replace(',','').split()[2:] + parents[d]=pp + known.add(d) + for p in pp: + known.add(p) + try: + c=kids[p] + c.add(d) + except KeyError: + c=set([d]) + kids[p]=c + l=sys.stdin.readline() +roots={} +still=set(()) +for k in known: + if k in parents: + still.add(k) + else: + roots[k]=set([k]) +#print len(parents),len(kids),len(roots),len(still),roots +print roots.keys() +while len(still)>0: + #print len(still),len(roots) + pend=still + still=set([]) + while len(pend)>0: + x=pend.pop() + #print x,len(pend) + for p in parents[x]: + try: + r=roots[p] + roots[x]=roots.get(x,set(())).union(r) + except KeyError: + if p in parents: + still.add(x) +for x,r in sorted(roots.items()): + print x,r diff -r 0a3abe59e364 -r e07789816ca5 simpleCloud.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/simpleCloud.py Mon Mar 09 16:48:09 2020 +0000 @@ -0,0 +1,31 @@ +#!/usr/bin/env python +""" +Minimal Example +=============== +Generating a square wordcloud from a text file using default arguments. +""" + +import sys +from os import path +from wordcloud import WordCloud +from matplotlib import use +use('pdf') + +# Read the whole text. +text = open(sys.argv[1]).read() + +# Generate a word cloud image +wordcloud = WordCloud().generate(text) + +# Display the generated image: +# the matplotlib way: +import matplotlib.pyplot as plt +plt.imshow(wordcloud, interpolation='bilinear') +plt.axis("off") + +# lower max_font_size +wordcloud = WordCloud(max_font_size=40).generate(text) +plt.figure() +plt.imshow(wordcloud, interpolation="bilinear") +plt.axis("off") +plt.savefig('/tmp/cloud.pdf') diff -r 0a3abe59e364 -r e07789816ca5 threaDemo.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/threaDemo.py Mon Mar 09 16:48:09 2020 +0000 @@ -0,0 +1,39 @@ +#!/usr/bin/python3 +import threading +from queue import Queue +import time + +# lock to serialize console output +lock = threading.Lock() + +def do_work(item): + time.sleep(.1) # pretend to do some lengthy work. + # Make sure the whole print completes or threads can mix up output in one line. + with lock: + print(threading.current_thread().name,item) + +# The worker thread pulls an item from the queue and processes it +def worker(): + while True: + item = q.get() + do_work(item) + q.task_done() + +# Create the queue and thread pool. +q = Queue() +for i in range(4): + t = threading.Thread(target=worker) + t.daemon = True # thread dies when main thread (only non-daemon thread) exits. + t.start() + +# stuff work items on the queue (in this case, just a number). +start = time.perf_counter() +for item in range(20): + q.put(item) + +q.join() # block until all tasks are done + +# "Work" took .1 seconds per task. +# 20 tasks serially would be 2 seconds. +# With 4 threads should be about .5 seconds (contrived because non-CPU intensive "work") +print('time:',time.perf_counter() - start) diff -r 0a3abe59e364 -r e07789816ca5 trip2xml.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trip2xml.py Mon Mar 09 16:48:09 2020 +0000 @@ -0,0 +1,93 @@ +#!/usr/bin/python +'''Convert a screen-scrape from Check My Trip to diary-style XML''' +import re, sys, datetime +year="2016" +leg=re.compile(year+"TO") +duration=re.compile("([0-9][0-9]) ([A-Za-z]*) %s .*duration"%year) +plusOne=re.compile(" [+]1 day$") +flight=re.compile("\\\\| *([^ ]*) *confirmed") +dep=re.compile("^Dep: (.*)") +arr=re.compile("^Arr: (.*)") +CS=", " + +cleg=None +class Leg: + def __init__(self): + self.flights=[] + self.p1=False + self.dd=None + + def addFlight(self,flight): + self.flights.append(flight) + + def setDD(self,m): + print 'sdd' + td="%s-%s-%s"%(year,m.group(2),m.group(1)) + self.ddate=datetime.datetime.strptime(td,"%Y-%B-%d").date() + self.dd=self.ddate.isoformat() + if self.p1: + self.ads=" -- %s"%(self.ddate+datetime.timedelta(1)).isoformat() + else: + self.ads="" + + def setPlusOne(self): + print 'p1' + self.p1=True + + def __str__(self): + fa=self.flights[0].fa + ta=self.flights[-1].ta + if len(self.flights)>1: + va=" via "+CS.join([f.fa for f in self.flights[1:]]) + else: + va="" + ff=CS.join([f.fn for f in self.flights]) + tt=CS.join(["%s--%s"%(f.dt,f.at) for f in self.flights]) + return "%s->%s%s; %s; %s"%(self.dd,self.ads,fa,ta,va,ff,tt) + +class Flight(): + def __init__(self,fn): + self.fn=fn # flight number + + def setDep(self,ds): + dss=ds.split('|') + self.dt=dss[0][:2]+dss[0][3:5] + self.fa=dss[1].split()[-1] + + def setArr(self,ax): + ass=ax.split('|') + self.at=ass[0][:2]+ass[0][3:5] + if (ass[0].find('(+1 day)')==6): + self.at+="+1" + self.ta=ass[1].split()[-1] + +print "" +for l in sys.stdin: + if leg.search(l): + if cleg is not None: + print cleg + cleg=Leg() + m = flight.search(l) + if m: + fl=Flight(m.group(1)) + cleg.addFlight(fl) + continue + if ((cleg is not None) and + (cleg.dd is None)): + m=duration.search(l) + if m: + cleg.setDD(m) + continue + m=plusOne.search(l) + if m: + cleg.setPlusOne() + continue + m=dep.search(l) + if m: + fl.setDep(m.group(1)) + continue + m=arr.search(l) + if m: + fl.setArr(m.group(1)) +print cleg +print "" diff -r 0a3abe59e364 -r e07789816ca5 trivSelenium.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trivSelenium.py Mon Mar 09 16:48:09 2020 +0000 @@ -0,0 +1,41 @@ +# To install the Python client library: +# pip install -U selenium + +# Import the Selenium 2 namespace (aka "webdriver") +from selenium import webdriver + +# iPhone +#driver = webdriver.Remote(browser_name="iphone", command_executor='http://172.24.101.36:3001/hub') + +# Android +#driver = webdriver.Remote(browser_name="android", command_executor='http://127.0.0.1:8080/hub') + +# Google Chrome +#driver = webdriver.Chrome() + +# Firefox +driver = webdriver.Firefox() + +# ------------------------------ +# The actual test scenario: Test the codepad.org code execution service. + +# Go to codepad.org +driver.get('http://codepad.org') + +# Select the Python language option +python_link = driver.find_elements_by_xpath("//input[@name='lang' and @value='Python']")[0] +python_link.click() + +# Enter some text! +text_area = driver.find_element_by_id('textarea') +text_area.send_keys("print 'Hello,' + ' World!'") + +# Submit the form! +submit_button = driver.find_element_by_name('submit') +submit_button.click() + +# Make this an actual test. Isn't Python beautiful? +assert "Hello, World!" in driver.get_page_source() + +# Close the browser! +driver.quit() diff -r 0a3abe59e364 -r e07789816ca5 wsgi_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wsgi_test.py Mon Mar 09 16:48:09 2020 +0000 @@ -0,0 +1,25 @@ +from wsgiref.simple_server import make_server + +# Every WSGI application must have an application object - a callable +# object that accepts two arguments. For that purpose, we're going to +# use a function (note that you're not limited to a function, you can +# use a class for example). The first argument passed to the function +# is a dictionary containing CGI-style envrironment variables and the +# second variable is the callable object (see PEP 333). +n = 0 +def hello_world_app(environ, start_response): + global n + status = '200 OK' # HTTP Status + headers = [('Content-type', 'text/plain')] # HTTP Headers + start_response(status, headers) + + # The returned object is going to be printed + n=n+1 + return ["Hello World %s"%n] + +httpd = make_server('', 8000, hello_world_app) +print "Serving on port 8000..." + +# Serve until process is killed +httpd.serve_forever() +f