annotate trip2xml.py @ 2:e07789816ca5

adding more python files from lib/python on origen
author Henry Thompson <ht@markup.co.uk>
date Mon, 09 Mar 2020 16:48:09 +0000
parents
children 2d7c91f89f6b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
1 #!/usr/bin/python
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
2 '''Convert a screen-scrape from Check My Trip to diary-style XML'''
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
3 import re, sys, datetime
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
4 year="2016"
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
5 leg=re.compile(year+"TO")
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
6 duration=re.compile("([0-9][0-9]) ([A-Za-z]*) %s .*duration"%year)
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
7 plusOne=re.compile(" [+]1 day$")
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
8 flight=re.compile("\\\\| *([^ ]*) *confirmed")
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
9 dep=re.compile("^Dep: (.*)")
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
10 arr=re.compile("^Arr: (.*)")
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
11 CS=", "
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
12
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
13 cleg=None
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
14 class Leg:
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
15 def __init__(self):
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
16 self.flights=[]
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
17 self.p1=False
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
18 self.dd=None
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
19
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
20 def addFlight(self,flight):
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
21 self.flights.append(flight)
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
22
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
23 def setDD(self,m):
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
24 print 'sdd'
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
25 td="%s-%s-%s"%(year,m.group(2),m.group(1))
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
26 self.ddate=datetime.datetime.strptime(td,"%Y-%B-%d").date()
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
27 self.dd=self.ddate.isoformat()
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
28 if self.p1:
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
29 self.ads=" -- %s"%(self.ddate+datetime.timedelta(1)).isoformat()
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
30 else:
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
31 self.ads=""
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
32
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
33 def setPlusOne(self):
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
34 print 'p1'
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
35 self.p1=True
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
36
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
37 def __str__(self):
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
38 fa=self.flights[0].fa
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
39 ta=self.flights[-1].ta
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
40 if len(self.flights)>1:
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
41 va=" via "+CS.join([f.fa for f in self.flights[1:]])
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
42 else:
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
43 va=""
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
44 ff=CS.join([f.fn for f in self.flights])
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
45 tt=CS.join(["%s--%s"%(f.dt,f.at) for f in self.flights])
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
46 return "<item term='%s%s'>%s->%s%s; %s; %s</item>"%(self.dd,self.ads,fa,ta,va,ff,tt)
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
47
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
48 class Flight():
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
49 def __init__(self,fn):
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
50 self.fn=fn # flight number
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
51
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
52 def setDep(self,ds):
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
53 dss=ds.split('|')
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
54 self.dt=dss[0][:2]+dss[0][3:5]
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
55 self.fa=dss[1].split()[-1]
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
56
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
57 def setArr(self,ax):
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
58 ass=ax.split('|')
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
59 self.at=ass[0][:2]+ass[0][3:5]
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
60 if (ass[0].find('(+1 day)')==6):
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
61 self.at+="+1"
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
62 self.ta=ass[1].split()[-1]
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
63
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
64 print "<list type='defn'>"
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
65 for l in sys.stdin:
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
66 if leg.search(l):
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
67 if cleg is not None:
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
68 print cleg
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
69 cleg=Leg()
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
70 m = flight.search(l)
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
71 if m:
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
72 fl=Flight(m.group(1))
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
73 cleg.addFlight(fl)
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
74 continue
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
75 if ((cleg is not None) and
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
76 (cleg.dd is None)):
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
77 m=duration.search(l)
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
78 if m:
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
79 cleg.setDD(m)
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
80 continue
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
81 m=plusOne.search(l)
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
82 if m:
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
83 cleg.setPlusOne()
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
84 continue
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
85 m=dep.search(l)
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
86 if m:
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
87 fl.setDep(m.group(1))
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
88 continue
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
89 m=arr.search(l)
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
90 if m:
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
91 fl.setArr(m.group(1))
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
92 print cleg
e07789816ca5 adding more python files from lib/python on origen
Henry Thompson <ht@markup.co.uk>
parents:
diff changeset
93 print "</list>"