# fetchdblpbibtex.py, Version 0.2
# Thomas Kesselheim, no rights reserved, use at own risk
# usage: "fetchdblpbibtex.py document.aux" generates dblp.bib for all references starting with DBLP:
# alternative usage: "fetchdblpbibtex.py document.aux --append" keeps dblp.bib and adds all references missing references starting with DBLP:

import sys
import re
import urllib2

if len(sys.argv) < 2:
	sys.exit('no file name given')

auxfile = open(sys.argv[1], 'r')
auxfilestring = auxfile.read()

#dblpkeys = [] # will contain keys in the form 'conf/sigecom/DuttingKT15'

dblpkeys = re.findall(r'\\citation{DBLP:(.*?)}', auxfilestring) # match all commands \citation{DBLP: ... } the dots are stored into dblpkeys
	
seen = set() # a set to avoid duplicates

appendonly = ('--append' in sys.argv)

if appendonly:
	bibfile = open('dblp.bib', 'r+')
	bibfilestring = bibfile.read()
	existingkeys = re.findall(r'@.*?{DBLP:(.*?),', bibfilestring)
	for dblpkey in existingkeys:
		seen.add(dblpkey)
else:
	bibfile = open('dblp.bib', 'w')

for dblpkey in dblpkeys:
	if dblpkey not in seen:
		seen.add(dblpkey)
		url = 'http://dblp.uni-trier.de/rec/bib1/' + dblpkey + '.bib'
		response = urllib2.urlopen(url)
		httpstring = response.read()
		bibfile.write(httpstring)