#!C:\Programme\Python25\python.exe # -*- coding: UTF-8 -*- import os, sys, hashlib # ***** include the biocase.lib directory in the python sys path for importing ***** execfile(os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, 'lib', 'biocase', 'adjustpath.py'))) import subprocess, signal, cgi, time import simplejson as json import biocase.configuration from biocase.archive.general import transformers, ns_dataset_aware, NS_ABCD206, NS_ABCD21, NS_HISPID5, DWCA_SCHEMA_NAME, proc from biocase.datasources import Datasource print "Content-Type: text/plain\n" # First, check the action values form = cgi.FieldStorage() action = form.getvalue('action') if action is None: action = 'log' else: action = action.lower() if action not in ['log', 'xml', 'dwc', 'dwc_only', 'cancel']: print "Unrecognized value for parameter action. Only 'log', 'xml', 'dwc' and 'cancel' are allowed." sys.exit() # dsa is the only mandatory parameter; all others are optional dsa = form.getvalue('dsa') try: dsaObj = Datasource(name=dsa) psfObj = dsaObj.getPSFObj() except: print "Invalid Datasource given!" sys.exit() # Check authentication (ticket indicates we've been invoked from the UI) # Ticket is the MD5-encrypted psf/biocase password cfg = biocase.configuration.Cfg() vticket = hashlib.md5(psfObj.adminpassword if psfObj.adminpassword else cfg.server.adminpassword).hexdigest() if form.getvalue('ticket'): from_ui = True ticket = form.getvalue('ticket') if ticket != vticket: print "Authentication failed!" sys.exit() else: from_ui = False pw = form.getvalue('pw') if pw is None or pw == "": print "No password provided!" sys.exit() else: if pw == (psfObj.adminpassword if psfObj.adminpassword else cfg.server.adminpassword): ticket = vticket else: print "Authentication failed!" sys.exit() # set defaults for optional parameterss filesize = form.getvalue('filesize') if filesize is None or not filesize.isdigit(): filesize = "1000" # Schema: If it's empty when called directly, find the most likely schema schema = form.getvalue('schema') if schema is None: try: sl = dsaObj.getSchemaListObj() # try to find the ABCD2 CMF # as an alternative, sl.getSchemaByNS(NS_ABCD206) could be used here cmf = [e for e in sl if e.NS == NS_ABCD206] # if not, try to find the ABCD2.1 CMF if cmf == []: cmf = [e for e in sl if e.NS == NS_ABCD21] # if not, try to find the HISPID5 CMF if cmf == []: cmf = [e for e in sl if e.NS == NS_HISPID5] # if not, use the first one we get if cmf == []: cmf = sl[:1] # get name of schema mapping (remove trailing .xml) and set filename schema = cmf[0].name[:-4] except: print "The specified datasource doesn't have any schemas mapped!" sys.exit() # Get schema namespace try: cmf = dsaObj.getSchemaListObj().getSchemaByName(schema + '.xml') schemaNs = cmf.NS except: print "The specified schema mapping doesn't exist for this datasource!" sys.exit() # For DwC, check namespace and set transformer transformer = None if action in ('dwc', 'dwc_only'): if schemaNs in transformers.keys(): transformer = transformers[schemaNs] else: print "DarwinCore archives can only be created for the ABCD 2.06, ABCD 2.1 and HISPID 5 schemas, so make sure the datasource supports one of these. If you don't specify a schema in the request, BioCASe will try to use ABCD 2.06, ABCD 2.1 or HIDPID 5 (in this order)." sys.exit() # For DwC only, there can be an optional filename list (indicating the XML archives to be converted) # For XML, a list of datasets can be passed. Both parameters are mutually exclusive. # For the dataset names, convert non-breakable unicode spaces into regular spaces filenames = form.getlist('filename') datasets = [unicode(d, 'utf-8').replace(unichr(160), ' ') for d in form.getlist('dataset')] if filenames and datasets: print "The parameters filename and dataset are mutually exclusive. Use filename if you want to apply DarwinCore archive transformation to certain XML archive(s) _OR_ the parameter dataset if you want to restrict XML archiving to certain dataset(s)." sys.exit() if action in ('xml', 'dwc') and filenames: print "The parameter filename can only be used for action 'dwc_only'. If you want to narrow down the whole archiving process, use the dataset parameter." sys.exit() if datasets: if action not in ('xml', 'dwc'): print "The parameter dataset can only be used for actions 'xml' and 'dwc'. If you want to apply DarwinCore archive transformation to certain XML archive(s), use the filename parameter." sys.exit() if schemaNs not in ns_dataset_aware: print "The parameter dataset can only be used for dataset-aware schemas %s." % str(ns_dataset_aware) sys.exit() # Set some variables log = schema + '.log' archivePath = os.path.join(cfg.archiveWorkLocator, dsa) archiveBin = os.path.join(cfg.archiveLibLocator, "archive.py") log_dwca = os.path.join(archivePath, DWCA_SCHEMA_NAME + '.log') proc_dwca = os.path.join(archivePath, DWCA_SCHEMA_NAME + '.proc') log_xml = os.path.join(archivePath, schema + '.log') proc_xml = os.path.join(archivePath, schema + ".proc") def log(): print 'Processsing.' if proc(dsa) else 'Idle.' print "\nBelow you'll find the log of the latest archiving process.\n" + '*' * 120 # Append the latest log l = [os.path.join(archivePath, s) for s in os.listdir(archivePath) if s.endswith('.log')] l.sort(key=lambda s: os.path.getmtime(s)) if len(l) == 0: print "-- No archiving process for this datasource so far --" else: for line in open(l[-1]).readlines(): print line, # If the latest process was DwCA, also attach the latest XML log if l[-1].endswith(DWCA_SCHEMA_NAME + '.log') and len(l) > 1: print "\nBelow you'll find the log of the corresponding XML archiving process.\n" + '*' * 120 for line in open(l[-2]).readlines(): print line, def create(xml, dwca): try: if xml: # Try to open (an empty) log/procfile open(log_xml, 'w').close() open(proc_xml, 'w').close() # Write configuration file c = open(os.path.join(archivePath, schema + ".config.xml"), "w") c.write("\n") c.write("\n") c.write("" + dsa + "\n") c.write("" + schema + "\n") c.write("" + schemaNs + "\n") c.write("" + filesize + "\n") c.write("" + str(from_ui) + "\n") c.write("\n") c.close() if dwca: # Try to open (an empty) log/logfile open(log_dwca, 'w').close() if not xml: open(proc_dwca, 'w').close() # We need to set stdin and stdout because of a bug in Python: # Leaving these as None will result in a "handle not valid" error on IIS/Windows proc = subprocess.Popen([sys.executable, archiveBin, str(xml), str(dwca), dsa, schema, str(transformer), json.dumps(datasets), json.dumps(filenames)], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) for err in proc.stderr: print err, sys.exit() except Exception, err: print err def cancel(): def cancel_xml(proc_file): p = open(proc_file, "w") p.write("cancel") p.close() if from_ui: print "Cancellation requested. Waiting for the running statement to finish..." def dwca_cancel(proc_file): # Read process id q = open(proc_file, 'r') for line in q: pid = line q.close() # First, try the Linux version try: try: os.kill(int(pid), signal.SIGTERM) if from_ui: print "Process terminated by sending TERMINATE event." except AttributeError: # Wrong OS; raising error to get to get to Windows section raise except: print "Sorry, couldn't terminate process. Please wait for it to finish or cancel it manually using ps -kill (look for java)." # If that fails, try Windows taskkill with option f (forcefull) except: res = subprocess.call("taskkill /f /pid %s" % pid) if res == 0: if from_ui: print "Process terminated using Windows Taskkill." else: print "Sorry, couldn't terminate process. Please wait for it to finish or cancel it manually using the task manager (look for java.exe)." # Try to find the proc file for fname in os.listdir(archivePath): if fname.endswith('.proc'): proc_file = os.path.join(archivePath, fname) if fname.endswith(DWCA_SCHEMA_NAME + '.proc'): dwca_cancel(proc_file) else: cancel_xml(proc_file) # Before returning, wait until proc file disappears (only if not from UI) if not from_ui: while os.path.exists(proc_file): time.sleep(5) print "Process cancelled." return # No proc file found print "No process running." # Main if action == 'log': log() elif action == 'cancel': cancel() else: # Check if a process is already running if proc(dsa): print "An archiving process is already running. Please wait for it to finish." else: if action == "xml": create(True, False) elif action == "dwc_only": create(False, True) elif action == 'dwc': create(True, True)