import requests
import os
import datetime
import time

def	sdate_to_mtime (sdate,decalage):
	"""
	Retourne le nombre de secondes / 1-1-1970 correspondant a sdate
	"""

	# Il y a 2 formats possibles:
	#
	#	29-Dec-2015 HH:MM
	#		- ex: http://sprg.ssl.berkeley.edu/data/maven/data/sci/kp/insitu/sav/2017/01/
	#
	#	2012-01-05 HH:MM
	#		- ex: http://stereo.irap.omp.eu/CEF/MAG/ahead
	#		- ex: https://sohowww.nascom.nasa.gov/solarsoft/stereo/gen/data/spice

	DEBUG = 0

	if DEBUG:
		print sdate

	if sdate[2].isdigit():
		epoch = datetime.datetime.strptime(sdate,'%Y-%m-%d %H:%M')	#	2012-01-05 HH:MM
	else:
		epoch = datetime.datetime.strptime(sdate,'%d-%b-%Y %H:%M')	#	29-Dec-2015 HH:MM
	

	if DEBUG:
		print epoch

	epoch0 = datetime.datetime.utcfromtimestamp(0)

	valeur = (epoch - epoch0).total_seconds()
	valeur = valeur + decalage*3600 # rajout de 1/2 jour afin de regler les problemes de fuseaux horaires
	valeur = int(valeur) # transformer le float en int

	return valeur


def	extract_html_links (s,decalage):
	"""
	Retourne (link,date) pour chaque lien contenu dans la chaine s
	"""

	# Chercher <a href="link">...</a> ... date ... <

	DEBUG = 0

	#link = []
	#date = []

	while 1:

		p0 = s.find('<a href="')
		if p0 == -1:
			break
		p1 = s.find('">',p0+9)
		if p1 == -1:
			break
		nom = s[p0+9:p1].strip()

		p2 = s.find('</a>',p1+2)
		if p2 == -1:
			break

		# Rechercher le premier chiffre apres le </a>
		debut = -1
		for i in range(1000):
			if s[p2+4+i] == '\n':
				break # arrive sur JUNO
			if s[p2+4+i].isdigit():
				debut = i
				break
		if debut == -1:
			# arrive sur JUNO
			s = s[p2+4:]
			continue
		p3 = s.find("<",p2+4+debut)
		if p3==-1:
			s = s[p2+4:]
			continue
		tmp = s[p2+4+debut:p3]

		tmp1 = tmp.split()
		if len(tmp1) >= 2:
			sdate = tmp1[0]
			sheure = tmp1[1]
			if sheure[0].isdigit():
				mtime = sdate_to_mtime (sdate+' '+sheure, decalage)
				#link.append(nom)
				#date.append(mtime)
				if DEBUG:
					print "link=[%s] date=[%s][%s]" % (nom,tmp1[0],tmp1[1])
				yield nom,mtime
		s = s[p3:] # reduire la variable s et continuer

	#return link, date


def	get_http_dir (url, login, password, decalage):
	"""
	Retourne (link,date) contenu dans url
	"""

	DEBUG = 0
	nbtentatives = 10

	if DEBUG:
		print "url=[%s]" % url
		print 'login=[%s]' % login
		print 'password=[%s]' % password
	else:
		print url

	for tentatives in range(nbtentatives):
		try:
			if DEBUG: print "   avant requests.get"
			r = requests.get (url, auth=(login,password), timeout=30.0)
			if DEBUG: print "   apres requests.get"
			break
		except IOError as e:
			print "  tentative %d/%d %s" % (tentatives+1,nbtentatives,e)
			if tentatives == 9:
				print "PYTHON_ERROR: GET %s" % (url)
				return ()
	if DEBUG:
		print "r=[",r.text,']'
	return extract_html_links (r.text, decalage)


def	download (remote, local, login, password, slow):
	"""
	Telecharge dans local le fichier remote
	"""

	DEBUG = 0
	nbtentatives = 10

	telecharger = True
	if slow:
		top1 = time.time()
		if os.path.isfile(local):
			stat = os.stat (local)
			date_local = stat.st_mtime
			size_local = stat.st_size

			for tentatives in range(nbtentatives):
				try:
					if DEBUG: print "   avant requests.head"
					r = requests.head (remote, auth=(login,password), timeout=30.0)
					if DEBUG: print "   apres requests.head"
					size_remote = int(r.headers["content-length"])
					epoch = datetime.datetime.strptime(r.headers["last-modified"],'%a, %d %b %Y %H:%M:%S %Z')
					epoch0 = datetime.datetime.utcfromtimestamp(0)
					date_remote = int((epoch - epoch0).total_seconds())
					if DEBUG: print "%d %d remote=%s" % (size_remote,date_remote,remote)
					if DEBUG: print "%d %d local=%s\n" % (size_local,date_local,local)
					telecharger = (size_local != size_remote) or (date_remote > date_local)
					break
				except IOError as e:
					print "  tentative %d/%d %s" % (tentatives+1,nbtentatives,e)
					if tentatives == 9:
						print "PYTHON_ERROR: HEAD %s" % (remote)
						return

			if telecharger:
				print "slow=1 MODIFIED  %s" % (remote)
		else:
			print "slow=1 NEW       %s" % (remote)
		top2 = time.time()
		if not telecharger:
			print "slow=1 OK        %s in %.2f sec" % (remote,top2-top1)

	if telecharger:

		top1 = time.time()
		# Creation repertoire si necessaire
		dir = os.path.dirname(local)
		if not os.path.exists(dir):
			os.makedirs(dir)

		for tentatives in range(nbtentatives):
			try:
				if DEBUG: print "   avant requests.get"
				r = requests.get (remote, auth=(login,password), stream=True, timeout=30.0)
				if DEBUG: print "   apres requests.get"
				nbchunk = 0
				# Creation fichier local.tmp
				with open(local+'.tmp', 'wb') as f:
					if DEBUG: print "   avant r.iter_content"
					for chunk in r.iter_content(chunk_size=1024*1024):
						nbchunk += 1
						if DEBUG: print "   len(chunk)", len(chunk)
						#print len(chunk)
						f.write(chunk)
					if DEBUG: print "   apres r.iter_content"
				break
			except IOError as e:
				print "  tentative %d/%d %s" % (tentatives+1,nbtentatives,e)
				if tentatives == 9:
					print "PYTHON_ERROR: GET %s" % (remote)
					return

		# Renommer local.tmp en local
		if os.path.isfile(local):
			os.remove(local)
		os.rename (local+'.tmp', local)

		top2 = time.time()
		print "   %s -> %s in %.2f sec (%d chunks)" % (remote, local, top2-top1, nbchunk)


def	wget (httpdir, localdir, login, password, accept='', reject='', readonly=0, slow=0, decalage=12):
	"""
	Recupere recursivment dans localdir tous les fichiers de httpdir
	"""

	for link,date in get_http_dir (httpdir, login, password, decalage):

		if link.endswith('/'):

			# Repertoire
			repertoire = link[:-1]

			# pas forcement besoin de lire ce repertoire
			if reject != '' and repertoire.find(reject) != -1:
				#print "AIE reject", remote
				continue

			if repertoire == '..':
				continue # trouve sur JUNO

			wget (httpdir + '/' + repertoire, localdir + '/' + repertoire, login, password, accept, reject, readonly, slow, decalage)

		else:

			# Fichier
			local = localdir + '/' + link
			remote = httpdir + '/' + link

			if accept and remote.find(accept) == -1:
				#print "AIE accept", remote
				continue

			if reject and remote.find(reject) != -1:
				#print "AIE reject", remote
				continue

			if slow:

				download (remote, local, login, password, slow)

			else:

				sdate_remote = datetime.datetime.utcfromtimestamp(date).strftime('%d/%b/%Y %H:%M:%S')
				if os.path.isfile(local):
					stat = os.stat (local)
					#print 'remote=',date,' local=',stat.st_mtime
					if stat.st_mtime < date:
						sdate_local = datetime.datetime.utcfromtimestamp(stat.st_mtime).strftime('%d/%b/%Y %H:%M:%S')
						print "MODIFIED  REMOTE=%s  LOCAL=%s  %s" % (sdate_remote,sdate_local,remote)
						if readonly==0:
							download (remote, local, login, password, slow)
				else:
					print "NEW       REMOTE=%s                              %s" % (sdate_remote,remote)
					if readonly==0:
						download (remote, local, login, password, slow)
			
