#!C:\Programme\Python25\python.exe # -*- coding: UTF-8 -*- import os, sys, subprocess, signal, datetime, hashlib from os.path import * # ***** include the biocase.lib directory in the python sys path for importing ***** execfile( os.path.abspath( os.path.join( os.path.dirname( __file__ ), os.pardir, 'lib', 'biocase', 'adjustpath.py' ) )) execfile( os.path.abspath( os.path.join( os.path.dirname( __file__ ), os.pardir, 'lib', 'biocase', 'appinit.py' ) )) from biocase.configtool.general import * from biocase.datasources import Datasource print "Content-Type: text/plain\n" # dsa is the only mandatory parameter; all others are optional dsa = form.getvalue('dsa') cfg = biocase.configuration.Cfg() try: dsaObj = Datasource(name=dsa) psfObj = dsaObj.getPSFObj() except: print "Invalid Datasource given!" sys.exit() # Check authentication (ticket indicates we've been invoked from the UI) # Ticket is the MD5-encrypted psf/biocase password vticket = hashlib.md5(psfObj.adminpassword if psfObj.adminpassword else cfg.server.adminpassword).hexdigest() if form.getvalue('ticket'): from_ui = True ticket = form.getvalue('ticket') if ticket != vticket: print "Authentication failed!" sys.exit() else: from_ui = False pw = form.getvalue('pw') if pw is None or pw == "": print "No password provided!" sys.exit() else: if pw == (psfObj.adminpassword if psfObj.adminpassword else cfg.server.adminpassword): ticket = vticket else: print "Authentication failed!" sys.exit() # set defaults for optional parameters filesize = form.getvalue('filesize') errors = form.getvalue('errors') if filesize is None or not filesize.isdigit(): filesize = "1000" if errors is None or not errors.isdigit(): errors = "1" # Schema: If it's empty when called directly, find the most likely schema schema = form.getvalue('schema') if schema is None: try: sl = dsaObj.getSchemaListObj() # try to find the ABCD2 CMF cmf = [e for e in sl if e.NS =='http://www.tdwg.org/schemas/abcd/2.06'] # if not, try to find the HISPID5 CMF if cmf == []: cmf = [e for e in sl if e.NS =='http://www.chah.org.au/schemas/hispid/5'] # if not, use the first one we get if cmf == []: cmf = sl[:1] # get name of schema mapping (remove trailing .xml) schema = cmf[0].name[:-4] except: print "The specified datasource doesn't have any schemas mapped!" sys.exit() # If action is dwca_only, set transformer from parameter; otherwise, get NS and transformer form schema if action == 'dwc_only': transformer = form.getvalue('transformer') schemaNs = None else: try: cmf = dsaObj.getSchemaListObj().getSchemaByName(schema + '.xml') schemaNs = cmf.NS transformer = 'abcd2.ktr' if schemaNs == 'http://www.tdwg.org/schemas/abcd/2.06' else 'hispid5.ktr' except: print "The specified schema mapping doesn't exist for this datasource!" sys.exit() # Set/ValidateFilename archivefilename = form.getvalue('archivefilename') if archivefilename is None: archivefilename = dsa + '_' + schema + '.zip' archivefilename = archivefilename.replace(' ', '_') issues = [c for c in archivefilename if c not in string.letters + string.digits + '_.-'] if issues != []: print "Archive file name provided contains invalid characters. Only letters, digits, and the characters '_', '.' and '-' are allowed." sys.exit() # Last, check the action values and do some more validation action = form.getvalue('action') if action is None: action = 'log' else: action = action.lower() if action not in ['log', 'xml', 'dwc', 'dwc_only', 'cancel']: print "Unrecognized value for parameter action. Only 'log', 'xml', 'dwc' and 'cancel' are allowed." sys.exit() if action == 'dwc' and schemaNs not in ['http://www.tdwg.org/schemas/abcd/2.06', 'http://www.chah.org.au/schemas/hispid/5']: print "DarwinCore archives can only be created for the ABCD 2.06 and HISPID 5 schemas, so make sure the datasource supports one of these. If you don't specify a schema in the request, BioCASe will try to use ABCD 2.06, otherwise HIDPID 5." sys.exit() #print dsa #print filesize #print errors #print schema #print schemaNs #print transformer #print archivefilename #print action #print from_ui #print ticket # Set some variables wrapperurl = dsaObj.getBioCASeAccessPoint() archivePath = join(cfg.archiveWorkLocator, dsa) archiveBin = join(cfg.archiveLibLocator, "archive.py") proc_dwca = join(archivePath, dsa + '_DwCA_1.0.proc') proc_xml = join(archivePath, archivefilename + ".proc") log_dwca = join(archivePath, dsa + '_DwCA_1.0.log') log_xml = join(archivePath, archivefilename + '.log') dldPath = join(cfg.archiveDownloadLocator, dsa) def is_running(): running = False for fname in os.listdir(archivePath): if fname.endswith(".proc"): running = True break return running def log(): print 'Processsing.' if is_running() else 'Idle.' print "\nBelow you'll find the log of the latest archiving process.\n" + '*' * 120 # Append the latest log l = [join(archivePath, s) for s in os.listdir(archivePath) if s.endswith('.log')] l.sort(key = lambda s: getmtime(s)) if len(l) == 0: print "-- No archiving process for this datasource so far --" else: for line in open(l[-1]).readlines(): print line, # If the latest process was DwCA, also attach the latest XML log if l[-1].endswith('DwCA_1.0.log') and len(l) > 1: print "\nBelow you'll find the log of the corresponding XML archiving process.\n" + '*' * 120 for line in open(l[-2]).readlines(): print line, def create(xml, dwca): try: if xml: # Write config/query file q = open(join(archivePath, archivefilename + ".query.xml"), "w") q.write("\n") q.write("\n") q.write("
search
\n") q.write(" \n") q.write(" "+schemaNs+"\n") q.write(" "+schemaNs+"\n") q.write(" false\n") q.write(" \n") q.write("
\n") q.close() c = open(join(archivePath, archivefilename + ".config.xml"), "w") c.write("\n") c.write("\n") c.write("" + dsa + "\n") c.write("" + archivefilename + "\n") c.write("" + cfg.archiveDownloadLocator + "\n") c.write("" + wrapperurl + "\n") c.write(""+schema+"\n") c.write(""+schemaNs+"\n") c.write(""+errors+"\n") c.write(""+str(from_ui)+"\n") c.write(""+ticket+"\n") c.write("\n") c.close() # Try to open (an empty) logfile l = open(log_xml, 'w') l.close() if dwca: # Try to open (an empty) logfile l = open(log_dwca, 'w') l.close() # We need to set stdin and stdout because of a bug in Python: # Leaving these as None will result in a "handle not valid" error on IIS/Windows proc = subprocess.Popen([sys.executable, archiveBin, str(xml), str(dwca), dsa, archivefilename, archivePath, dldPath, cfg.server.java, cfg.archiveLibLocator, cfg.kettleLibLocator, cfg.archiveWorkLocator, cfg.server.java_mem, cfg.server.sort_size, str(transformer)], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) for err in proc.stderr: print err, sys.exit() except Exception, err: print err def cancel(): def cancel_xml(proc_file): p = open(proc_file, "w") p.write("cancel") p.close() if from_ui: print "Cancellation requested. Waiting for the running statement to finish..." def dwca_cancel(proc_file): # Read process id q = open(proc_file, 'r') for line in q: id = line q.close() # First, try the Linux version try: try: os.kill(int(id), signal.SIGTERM) if from_ui: print "Process terminated by sending TERMINATE event." except AttributeError: # Wrong OS; raising error to get to get to Windows section raise except: print "Sorry, couldn't terminate process. Please wait for it to finish or cancel it manually using ps -kill (look for java)." # If that fails, try Windows taskkill with option f (forcefull) except: res = subprocess.call("taskkill /f /pid %s" % id) if res == 0: if from_ui: print "Process terminated using Windows Taskkill." else: print "Sorry, couldn't terminate process. Please wait for it to finish or cancel it manually using the task manager (look for java.exe)." # Try to find the proc file for fname in os.listdir(archivePath): if fname.endswith('.proc'): proc_file = join(archivePath, fname) if fname.endswith('DwCA_1.0.proc'): dwca_cancel(proc_file) else: cancel_xml(proc_file) # Before returning, wait until proc file disappears (only if not from UI) if not from_ui: while exists(proc_file): time.sleep(5) print "Process cancelled." return # No proc file found print "No process running." # Main if action == 'log': log() elif action == 'cancel': cancel() else: # Check if a process is already running if is_running(): print "An archiving process is already running. Please wait for it to finish." else: if action == "xml": create(True, False) elif action == "dwc_only": create(False, True) elif action == 'dwc': create(True, True)