Limbo
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
bibconvert.py
1 ##
2 # @package bibconvert
3 # @brief convert bibtex format of references to jemdoc and latex formats
4 # @author Yibo Lin
5 # @date Dec 2016
6 #
7 #!/bin/python
8 
9 import sys
10 import re
11 import datetime
12 ## require bibtexparser package
13 import bibtexparser
14 
15 ## @brief read bibtex files with bibtexparser
16 # @param filenames array of bibtex files
17 # @param commentPrefix take lines starting with specific charactors as comment
18 # @return bibtex database
19 def read(filenames, commentPrefix):
20  # read content from bibtex files
21  content = ""
22  for filename in filenames:
23  with open(filename) as inFile:
24  for line in inFile:
25  # remove comments
26  # it is not perfect now, since I cannot merge them
27  line = re.sub(re.compile("[ \t]"+commentPrefix+".*?$"), "", line)
28  line = re.sub(re.compile("^"+commentPrefix+".*?$"), "", line)
29  content = content+line+"\n"
30 
31  bibDB = bibtexparser.loads(content)
32  return bibDB
33 
34 ## @brief extract date time from entry
35 # @param entry bibentry
36 # @return formated date time
37 def getDatetime(entry):
38  date = entry['year']
39  timeFormat = "%Y"
40  if 'month' in entry and entry['month']:
41  date = date+","+entry['month']
42  timeFormat = "%Y,%B"
43  if 'day' in entry and entry['day']:
44  date = date+","+entry['day'].split('-', 1)[0]
45  timeFormat = "%Y,%B,%d"
46  return datetime.datetime.strptime(date, timeFormat)
47 
48 ## @brief extract address and date time from entry
49 # @param entry bibentry
50 # @return formated address and date
51 def getAddressAndDate(entry):
52  addressAndDate = ""
53  prefix = ""
54  if 'address' in entry and entry['address']:
55  addressAndDate += prefix + entry['address']
56  prefix = ", "
57  if 'month' in entry and entry['month']:
58  addressAndDate += prefix + datetime.datetime.strptime(entry['month'], "%B").strftime("%b")
59  prefix = " " if 'day' in entry and entry['day'] else ", "
60  if 'day' in entry and entry['day']:
61  addressAndDate += prefix + entry['day'].replace("--", "-")
62  prefix = ", "
63  if 'year' in entry and entry['year']:
64  addressAndDate += prefix + entry['year']
65  return addressAndDate
66 
67 ## @brief switch from [last name, first name] to [first name last name]
68 # @param author author list
69 # @return formated author list
71  authorArray = author.split('and')
72  for i, oneAuthor in enumerate(authorArray):
73  if ',' in oneAuthor:
74  nameArray = oneAuthor.split(',')
75  assert len(nameArray) == 2, "len(nameArray) = %d" % len(nameArray)
76  authorArray[i] = nameArray[1].strip() + ' ' + nameArray[0].strip()
77  if i == 0:
78  author = authorArray[i]
79  elif i+1 < len(authorArray):
80  author += ", " + authorArray[i]
81  else:
82  author += " and " + authorArray[i]
83  return author
84 
85 ## @brief print bibtex database with target format
86 # @param bibDB bibtex database
87 # @param highlightAuthors authors need to be highlighted
88 # @param suffix target format
89 def printBibDB(bibDB, highlightAuthors, suffix):
90  # differentiate journal and conference
91  # I assume journal uses 'journal'
92  # conference uses 'booktitle'
93  journalEntries = []
94  conferenceEntries = []
95 
96  for entry in bibDB.entries:
97  if 'journal' in entry:
98  journalEntries.append(entry)
99  else:
100  conferenceEntries.append(entry)
101  # sort by years from large to small
102  journalEntries.sort(key=lambda entry: getDatetime(entry), reverse=True)
103  conferenceEntries.sort(key=lambda entry: getDatetime(entry), reverse=True)
104  stringMap = dict(bibDB.strings)
105 
106  # call kernel print functions
107  if suffix.lower() == 'web':
108  print """# jemdoc: menu{MENU}{publications.html}
109 # jemdoc: addcss{yibolin_homepage/jemdoc.css}
110 # jemdoc: title{Yibo Lin's Homepage}
111 
112 = Publications
113 
114 """
115  printWeb(bibDB, stringMap, highlightAuthors, journalEntries, 'journal', 'journal')
116  printWeb(bibDB, stringMap, highlightAuthors, conferenceEntries, 'conference', 'booktitle')
117  elif suffix.lower() == 'cv':
118  print """\\begin{rSection}{Publications}
119 
120 """
121  printCV(bibDB, stringMap, highlightAuthors, journalEntries, 'journal', 'journal')
122  printCV(bibDB, stringMap, highlightAuthors, conferenceEntries, 'conference', 'booktitle')
123  print """
124 \end{rSection}
125 
126 """
127  else:
128  assert 0, "unknown suffix = %s" % suffix
129 
130 ## @brief print in Jemdoc format
131 # @param bibDB bibtex database
132 # @param stringMap strings defined in bibtex database, which will be used to replace some references
133 # @param highlightAuthors authors to be highlighed
134 # @param entries list of bibentry to be printed
135 # @param publishType type of publications
136 # @param booktitleKey the keyword of entries need to search in the stringMap
137 def printWeb(bibDB, stringMap, highlightAuthors, entries, publishType, booktitleKey):
138  prefix = ""
139  if publishType == 'journal':
140  print "=== Journal Papers\n"
141  prefix = "J"
142  else:
143  print "=== Conference Papers\n"
144  prefix = "C"
145  # print
146  currentYear = ''
147  count = len(entries)
148  for i, entry in enumerate(entries):
149  if not currentYear or currentYear.lower() != entry['year'].lower():
150  currentYear = entry['year']
151  print "==== %s\n" % (currentYear)
152  # switch from [last name, first name] to [first name last name]
153  author = switchToFirstLastNameStyle(entry['author'])
154  if highlightAuthors: # highlight some authors
155  for highlightAuthor in highlightAuthors:
156  author = author.replace(highlightAuthor, "*"+highlightAuthor+"*")
157  title = entry['title'].replace("{", "").replace("}", "")
158  booktitle = stringMap[entry[booktitleKey]] if entry[booktitleKey] in stringMap else entry[booktitleKey]
159  address = entry['address'] if 'address' in entry else ""
160  publishlink = entry['publishlink'] if 'publishlink' in entry else ""
161  annotate = entry['annotateweb'] if 'annotateweb' in entry else ""
162  if publishlink: # create link if publishlink is set
163  title = "[" + publishlink + " " + title +"]"
164  addressAndDate = getAddressAndDate(entry)
165  print """
166 - \[%s%d\] %s,
167  "%s",
168  %s, %s.
169  %s
170  """ % (prefix, count, author, title, booktitle, addressAndDate, annotate)
171  count = count-1
172 
173 ## @brief print in Latex format
174 # @param bibDB bibtex database
175 # @param stringMap strings defined in bibtex database, which will be used to replace some references
176 # @param highlightAuthors authors to be highlighed
177 # @param entries list of bibentry to be printed
178 # @param publishType type of publications
179 # @param booktitleKey the keyword of entries need to search in the stringMap
180 def printCV(bibDB, stringMap, highlightAuthors, entries, publishType, booktitleKey):
181  prefix = ""
182  if publishType == 'journal':
183  print """
184 \\textbf{Journal Papers}
185  """
186  prefix = "J"
187  else:
188  print """
189 \\textbf{Conference Papers}
190  """
191  prefix = "C"
192  print """
193 \\begin{description}[font=\\normalfont]
194 %{{{
195  """
196 
197  # print
198  currentYear = ''
199  count = len(entries)
200  for i, entry in enumerate(entries):
201  if not currentYear or currentYear.lower() != entry['year'].lower():
202  currentYear = entry['year']
203  # switch from [last name, first name] to [first name last name]
204  author = switchToFirstLastNameStyle(entry['author'])
205  if highlightAuthors: # highlight some authors
206  for highlightAuthor in highlightAuthors:
207  author = author.replace(highlightAuthor, "\\textbf{"+highlightAuthor+"}")
208  title = entry['title']
209  booktitle = stringMap[entry[booktitleKey]] if entry[booktitleKey] in stringMap else entry[booktitleKey]
210  publishlink = entry['publishlink'] if 'publishlink' in entry else ""
211  annotate = entry['annotatecv'] if 'annotatecv' in entry else ""
212  if publishlink: # create link if publishlink is set
213  title = "\\href{" + publishlink + "}{" + title +"}"
214  addressAndDate = getAddressAndDate(entry)
215  print """
216 \item[{[%s%d]}]{
217  %s,
218  ``%s'',
219  %s, %s.
220  %s
221 }
222  """ % (prefix, count, author, title, booktitle, addressAndDate, annotate)
223  count = count-1
224 
225  print """
226 %}}}
227 \end{description}
228  """
229 
230 ## @brief print help message
231 def printHelp():
232  print """
233 usage: python bibconvert.py --suffix suffix --highlight author1 [--highlight author2] --input 1.bib [--input 2.bib]
234 suffix can be 'web' or 'cv'
235  'web': jemdoc format for personal webpage
236  'cv': latex format for resume
237 """
238 
239 if __name__ == "__main__":
240  ## target format
241  suffix = None
242  ## list of authors for highlight
243  highlightAuthors = []
244  ## list of bibtex files
245  filenames = []
246 
247  if len(sys.argv) < 3 or sys.argv[1] in ('--help', '-h'):
248  printHelp()
249  raise SystemExit
250  for i in range(1, len(sys.argv), 2):
251  if sys.argv[i] == '--suffix':
252  if suffix:
253  raise RuntimeError("only one suffix can be accepted")
254  suffix = sys.argv[i+1]
255  elif sys.argv[i] == '--highlight':
256  highlightAuthors.append(sys.argv[i+1])
257  elif sys.argv[i] == '--input':
258  filenames.append(sys.argv[i+1])
259  else:
260  break
261 
262  ## bibtex database
263  bibDB = read(filenames, "%")
264  #print(bibDB.strings)
265  #print(bibDB.entries)
266 
267  # write
268  printBibDB(bibDB, highlightAuthors, suffix)
def printWeb
print in Jemdoc format
Definition: bibconvert.py:137
def switchToFirstLastNameStyle
switch from [last name, first name] to [first name last name]
Definition: bibconvert.py:70
def printCV
print in Latex format
Definition: bibconvert.py:180
def printHelp
print help message
Definition: bibconvert.py:231
def getAddressAndDate
extract address and date time from entry
Definition: bibconvert.py:51
def read
read bibtex files with bibtexparser
Definition: bibconvert.py:19
def printBibDB
print bibtex database with target format
Definition: bibconvert.py:89
def getDatetime
extract date time from entry
Definition: bibconvert.py:37