#!C:\Programme\Python25\python.exe
# -*- coding: UTF-8 -*-
import os, sys, subprocess, signal, datetime, hashlib
from os.path import *
# ***** include the biocase.lib directory in the python sys path for importing *****
execfile( os.path.abspath( os.path.join( os.path.dirname( __file__ ), os.pardir, 'lib', 'biocase', 'adjustpath.py' ) ))
execfile( os.path.abspath( os.path.join( os.path.dirname( __file__ ), os.pardir, 'lib', 'biocase', 'appinit.py' ) ))
from biocase.configtool.general import *
from biocase.datasources import Datasource
print "Content-Type: text/plain\n"
# dsa is the only mandatory parameter; all others are optional
dsa = form.getvalue('dsa')
cfg = biocase.configuration.Cfg()
try:
dsaObj = Datasource(name=dsa)
psfObj = dsaObj.getPSFObj()
except:
print "Invalid Datasource given!"
sys.exit()
# Check authentication (ticket indicates we've been invoked from the UI)
# Ticket is the MD5-encrypted psf/biocase password
vticket = hashlib.md5(psfObj.adminpassword if psfObj.adminpassword else cfg.server.adminpassword).hexdigest()
if form.getvalue('ticket'):
from_ui = True
ticket = form.getvalue('ticket')
if ticket != vticket:
print "Authentication failed!"
sys.exit()
else:
from_ui = False
pw = form.getvalue('pw')
if pw is None or pw == "":
print "No password provided!"
sys.exit()
else:
if pw == (psfObj.adminpassword if psfObj.adminpassword else cfg.server.adminpassword):
ticket = vticket
else:
print "Authentication failed!"
sys.exit()
# set defaults for optional parameters
filesize = form.getvalue('filesize')
errors = form.getvalue('errors')
if filesize is None or not filesize.isdigit():
filesize = "1000"
if errors is None or not errors.isdigit():
errors = "1"
# Schema: If it's empty when called directly, find the most likely schema
schema = form.getvalue('schema')
if schema is None:
try:
sl = dsaObj.getSchemaListObj()
# try to find the ABCD2 CMF
cmf = [e for e in sl if e.NS =='http://www.tdwg.org/schemas/abcd/2.06']
# if not, try to find the HISPID5 CMF
if cmf == []:
cmf = [e for e in sl if e.NS =='http://www.chah.org.au/schemas/hispid/5']
# if not, use the first one we get
if cmf == []:
cmf = sl[:1]
# get name of schema mapping (remove trailing .xml)
schema = cmf[0].name[:-4]
except:
print "The specified datasource doesn't have any schemas mapped!"
sys.exit()
# If action is dwca_only, set transformer from parameter; otherwise, get NS and transformer form schema
if action == 'dwc_only':
transformer = form.getvalue('transformer')
schemaNs = None
else:
try:
cmf = dsaObj.getSchemaListObj().getSchemaByName(schema + '.xml')
schemaNs = cmf.NS
transformer = 'abcd2.ktr' if schemaNs == 'http://www.tdwg.org/schemas/abcd/2.06' else 'hispid5.ktr'
except:
print "The specified schema mapping doesn't exist for this datasource!"
sys.exit()
# Set/ValidateFilename
archivefilename = form.getvalue('archivefilename')
if archivefilename is None:
archivefilename = dsa + '_' + schema + '.zip'
archivefilename = archivefilename.replace(' ', '_')
issues = [c for c in archivefilename if c not in string.letters + string.digits + '_.-']
if issues != []:
print "Archive file name provided contains invalid characters. Only letters, digits, and the characters '_', '.' and '-' are allowed."
sys.exit()
# Last, check the action values and do some more validation
action = form.getvalue('action')
if action is None:
action = 'log'
else:
action = action.lower()
if action not in ['log', 'xml', 'dwc', 'dwc_only', 'cancel']:
print "Unrecognized value for parameter action. Only 'log', 'xml', 'dwc' and 'cancel' are allowed."
sys.exit()
if action == 'dwc' and schemaNs not in ['http://www.tdwg.org/schemas/abcd/2.06', 'http://www.chah.org.au/schemas/hispid/5']:
print "DarwinCore archives can only be created for the ABCD 2.06 and HISPID 5 schemas, so make sure the datasource supports one of these. If you don't specify a schema in the request, BioCASe will try to use ABCD 2.06, otherwise HIDPID 5."
sys.exit()
#print dsa
#print filesize
#print errors
#print schema
#print schemaNs
#print transformer
#print archivefilename
#print action
#print from_ui
#print ticket
# Set some variables
wrapperurl = dsaObj.getBioCASeAccessPoint()
archivePath = join(cfg.archiveWorkLocator, dsa)
archiveBin = join(cfg.archiveLibLocator, "archive.py")
proc_dwca = join(archivePath, dsa + '_DwCA_1.0.proc')
proc_xml = join(archivePath, archivefilename + ".proc")
log_dwca = join(archivePath, dsa + '_DwCA_1.0.log')
log_xml = join(archivePath, archivefilename + '.log')
dldPath = join(cfg.archiveDownloadLocator, dsa)
def is_running():
running = False
for fname in os.listdir(archivePath):
if fname.endswith(".proc"):
running = True
break
return running
def log():
print 'Processsing.' if is_running() else 'Idle.'
print "\nBelow you'll find the log of the latest archiving process.\n" + '*' * 120
# Append the latest log
l = [join(archivePath, s) for s in os.listdir(archivePath) if s.endswith('.log')]
l.sort(key = lambda s: getmtime(s))
if len(l) == 0:
print "-- No archiving process for this datasource so far --"
else:
for line in open(l[-1]).readlines():
print line,
# If the latest process was DwCA, also attach the latest XML log
if l[-1].endswith('DwCA_1.0.log') and len(l) > 1:
print "\nBelow you'll find the log of the corresponding XML archiving process.\n" + '*' * 120
for line in open(l[-2]).readlines():
print line,
def create(xml, dwca):
try:
if xml:
# Write config/query file
q = open(join(archivePath, archivefilename + ".query.xml"), "w")
q.write("\n")
q.write("\n")
q.write(" \n")
q.write(" \n")
q.write(" "+schemaNs+"\n")
q.write(" "+schemaNs+"\n")
q.write(" false\n")
q.write(" \n")
q.write("\n")
q.close()
c = open(join(archivePath, archivefilename + ".config.xml"), "w")
c.write("\n")
c.write("\n")
c.write("" + dsa + "\n")
c.write("" + archivefilename + "\n")
c.write("" + cfg.archiveDownloadLocator + "\n")
c.write("" + wrapperurl + "\n")
c.write(""+schema+"\n")
c.write(""+schemaNs+"\n")
c.write(""+errors+"\n")
c.write(""+str(from_ui)+"\n")
c.write(""+ticket+"\n")
c.write("\n")
c.close()
# Try to open (an empty) logfile
l = open(log_xml, 'w')
l.close()
if dwca:
# Try to open (an empty) logfile
l = open(log_dwca, 'w')
l.close()
# We need to set stdin and stdout because of a bug in Python:
# Leaving these as None will result in a "handle not valid" error on IIS/Windows
proc = subprocess.Popen([sys.executable, archiveBin, str(xml), str(dwca), dsa, archivefilename, archivePath, dldPath,
cfg.server.java, cfg.archiveLibLocator, cfg.kettleLibLocator, cfg.archiveWorkLocator, cfg.server.java_mem, cfg.server.sort_size,
str(transformer)], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
for err in proc.stderr:
print err,
sys.exit()
except Exception, err:
print err
def cancel():
def cancel_xml(proc_file):
p = open(proc_file, "w")
p.write("cancel")
p.close()
if from_ui:
print "Cancellation requested. Waiting for the running statement to finish..."
def dwca_cancel(proc_file):
# Read process id
q = open(proc_file, 'r')
for line in q:
id = line
q.close()
# First, try the Linux version
try:
try:
os.kill(int(id), signal.SIGTERM)
if from_ui:
print "Process terminated by sending TERMINATE event."
except AttributeError:
# Wrong OS; raising error to get to get to Windows section
raise
except:
print "Sorry, couldn't terminate process. Please wait for it to finish or cancel it manually using ps -kill (look for java)."
# If that fails, try Windows taskkill with option f (forcefull)
except:
res = subprocess.call("taskkill /f /pid %s" % id)
if res == 0:
if from_ui:
print "Process terminated using Windows Taskkill."
else:
print "Sorry, couldn't terminate process. Please wait for it to finish or cancel it manually using the task manager (look for java.exe)."
# Try to find the proc file
for fname in os.listdir(archivePath):
if fname.endswith('.proc'):
proc_file = join(archivePath, fname)
if fname.endswith('DwCA_1.0.proc'):
dwca_cancel(proc_file)
else:
cancel_xml(proc_file)
# Before returning, wait until proc file disappears (only if not from UI)
if not from_ui:
while exists(proc_file):
time.sleep(5)
print "Process cancelled."
return
# No proc file found
print "No process running."
# Main
if action == 'log':
log()
elif action == 'cancel':
cancel()
else:
# Check if a process is already running
if is_running():
print "An archiving process is already running. Please wait for it to finish."
else:
if action == "xml":
create(True, False)
elif action == "dwc_only":
create(False, True)
elif action == 'dwc':
create(True, True)