#!/usr/bin/python
# -*- coding: UTF-8 -*-

HARVEST_DIR = '/var/lib/tomcat8/harvest/bhit'
HTML_RESPONSE = 'Content-Type: text/html\nStatus: %s\n\n<html><body>%s</body></html>'
TEXT_RESPONSE = 'Content-Type: text/plain\nStatus: 200 OK\n\n%s'
import cgi, sys, os, traceback

# First, get the parameters
form = cgi.FieldStorage()
baseDirectory = form.getvalue('baseDirectory')
fileStore = form.getvalue('fileStore')
# This is a workaround for the start: strip http://import-bgbm.org/wfo/store down to wfo/store
#if portal and portal.startswith('http://'):
#    portal = portal[7:].split('/')[1]
fileName = form.getvalue('fileName')
id = form.getvalue('id')
idIndex = form.getvalue('idIndex')
fieldsTerminatedBy = form.getvalue('fieldsTerminatedBy')
linesTerminatedBy = form.getvalue('linesTerminatedBy')
ignoreHeaderLines = form.getvalue('ignoreHeaderLines')
fieldsEnclosedBy = form.getvalue('fieldsEnclosedBy')
encoding = form.getvalue('encoding')
#print fileStore, baseDirectory, id

# If no ID is given, return empty document (to prevent exceptions in GGBN portal)
if not id:
    print TEXT_RESPONSE % ''
    sys.exit()

# Check for mandatory parameters, set defaults for optional parameters
if not (baseDirectory and fileStore and fileName):
    print HTML_RESPONSE % ('400 Bad Request', 'Please specify at least the mandatory parameters baseDirectory, fileStore and fileName.')
    sys.exit()
if not encoding:
    encoding = 'UTF-8'
if not fieldsTerminatedBy:
    fieldsTerminatedBy = '\t'
if not linesTerminatedBy:
    linesTerminatedBy = '\n'
if not idIndex:
    idIndex = 0
if not ignoreHeaderLines:
    ignoreHeaderLines = 0
else:
    ignoreHeaderLines = int(ignoreHeaderLines)

# If idIndex=0, we can use the fast search: just see if the line starts with the ID
idIndex = int(idIndex)
if idIndex == 0:
    lineStart = id + fieldsTerminatedBy
else:
    lineStart = None

# Open file
try:
    f = open(os.path.join(HARVEST_DIR, fileStore, baseDirectory, fileName))
except:
    print HTML_RESPONSE % ('404 Not Found', traceback.format_exc())
    sys.exit()

# And go!
try:
    res = ''
    for line in f:
        if lineStart:
            if line.startswith(lineStart):
                res += line
        else:
            #cols = line.decode(encoding).split('\t')
            if id in line:
                res += line
    f.close()
    while ignoreHeaderLines > 0:
        res = linesTerminatedBy + res
        ignoreHeaderLines -= 1
    print TEXT_RESPONSE % res

except:
    print HTML_RESPONSE % ('500 Internal Server Error', traceback.format_exc())