#!c:\Program Files\Python39\python.exe
# -*- coding: UTF-8 -*-
import os, sys, hashlib, json, subprocess, signal, cgi, time
# ***** include the biocase.lib directory in the python sys path for importing *****
exec(open(os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir, 'lib', 'biocase', 'adjustpath.py'))).read())
import biocase.configuration
from biocase.archive.general import transformers, ns_dataset_aware, NS_ABCD206, NS_ABCD21, NS_HISPID5, DWCA_SCHEMA_NAME, proc
from biocase.datasources import Datasource
print("Content-Type: text/plain\n")
# First, check the action values
form = biocase.configuration.sanitiseFieldStorage(cgi.FieldStorage())
action = form.getvalue('action')
if action is None:
action = 'log'
else:
action = action.lower()
if action not in ['log', 'xml', 'dwc', 'dwc_only', 'cancel', 'reset']:
print("Unrecognized value for parameter action. Only 'log', 'xml', 'dwc', 'cancel' and 'reset' are allowed.")
sys.exit()
# dsa is the only mandatory parameter; all others are optional
dsa = form.getvalue('dsa')
try:
dsaObj = Datasource(name=dsa)
psfObj = dsaObj.getPSFObj()
except:
print("Invalid Datasource given!")
sys.exit()
# Check authentication (ticket indicates we've been invoked from the UI)
# Ticket is the MD5-encrypted psf/biocase password
cfg = biocase.configuration.Cfg()
pw = psfObj.adminpassword if psfObj.adminpassword else cfg.server.adminpassword
vticket = hashlib.md5(pw.encode()).hexdigest()
if form.getvalue('ticket'):
from_ui = True
ticket = form.getvalue('ticket')
if ticket != vticket:
print("Authentication failed!")
sys.exit()
else:
from_ui = False
pw = form.getvalue('pw')
if pw is None or pw == "":
print("No password provided!")
sys.exit()
else:
if pw == (psfObj.adminpassword if psfObj.adminpassword else cfg.server.adminpassword):
ticket = vticket
else:
print("Authentication failed!")
sys.exit()
# set defaults for optional parameterss
filesize = form.getvalue('filesize')
if filesize is None or not filesize.isdigit():
filesize = "1000"
# Schema: If it's empty when called directly, find the most likely schema
schema = form.getvalue('schema')
if schema is None:
try:
sl = dsaObj.getSchemaListObj()
# try to find the ABCD2 CMF
# as an alternative, sl.getSchemaByNS(NS_ABCD206) could be used here
cmf = [e for e in sl if e.NS == NS_ABCD206]
# if not, try to find the ABCD2.1 CMF
if cmf == []:
cmf = [e for e in sl if e.NS == NS_ABCD21]
# if not, try to find the HISPID5 CMF
if cmf == []:
cmf = [e for e in sl if e.NS == NS_HISPID5]
# if not, use the first one we get
if cmf == []:
cmf = sl[:1]
# get name of schema mapping (remove trailing .xml) and set filename
schema = cmf[0].name[:-4]
except:
print("The specified datasource doesn't have any schemas mapped!")
sys.exit()
# Get schema namespace
try:
cmf = dsaObj.getSchemaListObj().getSchemaByName(schema + '.xml')
schemaNs = cmf.NS
except:
print("The specified schema mapping doesn't exist for this datasource!")
sys.exit()
# For DwC, check namespace and set transformer
transformer = None
if action in ('dwc', 'dwc_only'):
if schemaNs in list(transformers.keys()):
transformer = transformers[schemaNs]
else:
print("DarwinCore archives can only be created for the ABCD 2.06, ABCD 2.1 and HISPID 5 schemas, so make sure the datasource supports one of these. If you don't specify a schema in the request, BioCASe will try to use ABCD 2.06, ABCD 2.1 or HIDPID 5 (in this order).")
sys.exit()
# For DwC only, there can be an optional filename list (indicating the XML archives to be converted)
# For XML, a list of datasets can be passed. Both parameters are mutually exclusive.
# For the dataset names, convert non-breakable unicode spaces into regular spaces
filenames = form.getlist('filename')
datasets = [d.replace(chr(160), ' ') for d in form.getlist('dataset')]
if filenames and datasets:
print("The parameters filename and dataset are mutually exclusive. Use filename if you want to apply DarwinCore archive transformation to certain XML archive(s) _OR_ the parameter dataset if you want to restrict XML archiving to certain dataset(s).")
sys.exit()
if action in ('xml', 'dwc') and filenames:
print("The parameter filename can only be used for action 'dwc_only'. If you want to narrow down the whole archiving process, use the dataset parameter.")
sys.exit()
if datasets:
if action not in ('xml', 'dwc'):
print("The parameter dataset can only be used for actions 'xml' and 'dwc'. If you want to apply DarwinCore archive transformation to certain XML archive(s), use the filename parameter.")
sys.exit()
if schemaNs not in ns_dataset_aware:
print("The parameter dataset can only be used for dataset-aware schemas %s." % str(ns_dataset_aware))
sys.exit()
# For xml and dwc action, there can be an optional BioCASe filter
xml_filter = form.getvalue('filter')
if xml_filter is None:
xml_filter = ''
# Download parameter
download = form.getvalue('download')
if download is not None and download.lower() in ('true', '1', 'yes'):
download = True
else:
download = False
# Set some variables
log = schema + '.log'
archivePath = os.path.join(cfg.archiveWorkLocator, dsa)
archiveBin = os.path.join(cfg.archiveLibLocator, "archive.py")
log_dwca = os.path.join(archivePath, DWCA_SCHEMA_NAME + '.log')
proc_dwca = os.path.join(archivePath, DWCA_SCHEMA_NAME + '.proc')
log_xml = os.path.join(archivePath, schema + '.log')
proc_xml = os.path.join(archivePath, schema + ".proc")
def log():
print('Processsing.' if proc(dsa) else 'Idle.')
print("\nBelow you'll find the log of the latest archiving process.\n" + '*' * 120)
# Append the latest log
l = [os.path.join(archivePath, s) for s in os.listdir(archivePath) if s.endswith('.log')]
l.sort(key=lambda s: os.path.getmtime(s))
if len(l) == 0:
print("-- No archiving process for this datasource so far --")
else:
for line in open(l[-1]).readlines():
print(line, end=' ')
# If the latest process was DwCA, also attach the latest XML log
if l[-1].endswith(DWCA_SCHEMA_NAME + '.log') and len(l) > 1:
print("\nBelow you'll find the log of the corresponding XML archiving process.\n" + '*' * 120)
for line in open(l[-2]).readlines():
print(line, end=' ')
def create(xml, dwca):
try:
if xml:
# Try to open (an empty) log/procfile
open(log_xml, 'w').close()
open(proc_xml, 'w').close()
# Write configuration file
c = open(os.path.join(archivePath, schema + ".config.xml"), "w")
c.write("\n")
c.write("\n")
c.write("" + dsa + "\n")
c.write("" + schema + "\n")
c.write("" + schemaNs + "\n")
c.write("" + filesize + "\n")
c.write("" + str(from_ui) + "\n")
c.write("\n")
c.close()
if dwca:
# Try to open (an empty) log/logfile
open(log_dwca, 'w').close()
if not xml:
open(proc_dwca, 'w').close()
# We need to set stdin and stdout because of a bug in Python:
# Leaving these as None will result in a "handle not valid" error on IIS/Windows
proc = subprocess.Popen([sys.executable, archiveBin, str(xml), str(dwca), dsa, schema, str(transformer), json.dumps(datasets), json.dumps(filenames), json.dumps(xml_filter), json.dumps(download)],
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
for err in proc.stderr:
print(err, end=' ')
sys.exit()
except Exception as err:
print(err)
def cancel_xml(proc_file):
p = open(proc_file, "w")
p.write("cancel")
p.close()
if from_ui:
print("Cancellation requested. Waiting for the running statement to finish... (Reset)")
def dwca_cancel(proc_file):
# Read process id
q = open(proc_file, 'r')
for line in q:
pid = line
q.close()
# First, try the Linux version
try:
try:
os.kill(int(pid), signal.SIGTERM)
if from_ui:
print("Process terminated by sending TERMINATE event.")
except AttributeError:
# Wrong OS; raising error to get to get to Windows section
raise
except:
if action != 'reset':
print("Sorry, couldn't terminate java process. Please wait for it to finish or cancel it manually. (Reset)")
# If that fails, try Windows taskkill with option f (forcefull)
except:
res = subprocess.call("taskkill /f /pid %s" % pid)
if res == 0:
if from_ui:
print("Process terminated using Windows Taskkill.")
else:
if action != 'reset':
print("Sorry, couldn't terminate java process. Please wait for it to finish or cancel it manually. (Reset)")
def cancel():
# Try to find the proc file
for fname in os.listdir(archivePath):
if fname.endswith('.proc'):
proc_file = os.path.join(archivePath, fname)
if fname.endswith(DWCA_SCHEMA_NAME + '.proc'):
dwca_cancel(proc_file)
else:
cancel_xml(proc_file)
# Before returning, wait until proc file disappears (only if not from UI)
if not from_ui:
while os.path.exists(proc_file):
time.sleep(5)
print("Process cancelled.")
return
# No proc file found
print("No process running.")
def reset():
for fname in os.listdir(archivePath):
if fname.endswith('.proc'):
proc_file = os.path.join(archivePath, fname)
# First, terminate process
dwca_cancel(proc_file)
# Then wait 5 secs, then remove the proc file
time.sleep(5)
if os.path.exists(proc_file):
os.remove(proc_file)
# Finally, return
print("Archiving has been reset")
return
# No proc file found
print("No process running.")
# Main
if action == 'log':
log()
elif action == 'cancel':
cancel()
elif action == 'reset':
reset()
else:
# Check if a process is already running
if proc(dsa):
print("An archiving process is already running. Please wait for it to finish.")
else:
if action == "xml":
create(True, False)
elif action == "dwc_only":
create(False, True)
elif action == 'dwc':
create(True, True)