xquery version "3.0"; module namespace biocase-lib = "http://exist-db.org/xquery/rebind/biocase-lib"; import module namespace biocase-settings = "http://exist-db.org/xquery/rebind/biocase-settings" at "biocase_settings.xql"; declare namespace biocase = "http://www.biocase.org/schemas/protocol/1.3"; declare namespace functx = "http://www.functx.com"; declare function biocase-lib:index-of-string ( $arg as xs:string? , $substring as xs:string ) as xs:integer* { (: code taken from http://www.xqueryfunctions.com/xq/functx_index-of-string.html :) if (contains($arg, $substring)) then (string-length(substring-before($arg, $substring))+1, for $other in biocase-lib:index-of-string(substring-after($arg, $substring), $substring) return $other + string-length(substring-before($arg, $substring)) + string-length($substring)) else () } ; (: replaced all characters which could lead to XQuery injections with their entity counterparts :) declare function biocase-lib:sanitize-input($string as xs:string?) as xs:string? { let $output := replace($string, "&", "&") let $output := replace($output, '"', """) let $output := replace($output, "'", "'") return $output }; (: sanitize several strings by calling the individual sanitize function :) declare function biocase-lib:sanitize-inputs($strings as xs:string*) as xs:string* { for $string in $strings return biocase-lib:sanitize-input($string) }; (: lowercase several strings by calling the normal lowercase function on the individual strings :) declare function biocase-lib:lower-case-all($strings as xs:string*) as xs:string* { for $string in $strings return lower-case($string) }; (: runs the index-of function for all members of a set and returns the results :) declare function biocase-lib:index-of-all($source as xs:anyAtomicType*, $searches as xs:anyAtomicType*) as xs:integer*{ for $search in $searches return index-of($source,$search) }; (: implementation of the like operator in Xquery, matched * as universal wildcard (0-many characters) and _ as one character wildcard. The match is case-insensitive. :) declare function biocase-lib:like($needle as xs:string, $haystacks as xs:string*) as xs:boolean { let $needle := lower-case($needle) let $haystacks := biocase-lib:lower-case-all($haystacks) let $result := if(count($haystacks)>0)then( let $sub-results := for $haystack in $haystacks (: call subfunction for each haystack :) let $loop-result := biocase-lib:looped-like($needle, $haystack) return $loop-result (: returns true of any of the haystacks is matched to the needle:) return contains($sub-results, true()) )else(false()) return $result }; (: reverses a string "hello world" -> "dlrow olleh" :) declare function biocase-lib:reverse-string($string as xs:string) as xs:string { codepoints-to-string(reverse(string-to-codepoints($string))) }; (: matched a needle string to a haystack string, using the biocase like rules :) (: TODO maybe also support [] and [^] syntax :) declare function biocase-lib:looped-like($needle as xs:string, $haystack as xs:string) as xs:boolean { (: tokenize the needle, so each segment separated by a * wildcard is now an individual string element in the tokens set :) let $tokens := tokenize($needle, "[*%]") (: check the first token, it has to match with the beginning of the string :) let $result1 := if(contains($tokens[1],"_"))then( (: split the first token into subtokens for the underscore wildcard :) let $subtokens := tokenize($tokens[1], "_") (: get the first position where the underscore wildcards match the current haystack:) let $matched-position := biocase-lib:generate-underscore-token-candiates($haystack, $subtokens) (: since this is the first token, the position of the match must be 1, otherwise it is no match :) return if(exists($matched-position) and $matched-position eq 1)then(true())else(false()) )else( (: the first token doesn't contain an underscore, just check using the regular starts-with function:) starts-with($haystack,$tokens[1]) ) (: if the needle doesn't contain any wildcards with multiple length, then the needle needs to be the same length as the haystack for it to be a valid match:) let $result1 := if(count($tokens) eq 1 and $result1)then( string-length($tokens[1]) eq string-length($haystack) )else($result1) (: remove the first characters of the haystack, accoding to the lenght of first token. The plus one is necessary, to remove the correct number of letters :) let $haystack := substring($haystack,string-length($tokens[1])+1) (: chained check: only check the next step if the previous one was true :) let $result2 := if($result1)then( if(count($tokens) ge 2)then( (: no further checking is necessary if there is only one token, which means no tokenizer characters are used :) if(contains($tokens[last()],"_"))then( (: check just like the first token, only this time reverse the letters in the last token and in the haystack to reuse the existing functions:) let $subtokens := tokenize(biocase-lib:reverse-string($tokens[last()]), "_") (: get the first position (i.e. last without the reversal) where the underscore wildcards match the reversed haystack:) let $matched-position := biocase-lib:generate-underscore-token-candiates(biocase-lib:reverse-string($haystack), $subtokens) (: since this is the last token, the position of the match must be at the end, but since we checked with reversed order, we just need to check if it is 1, otherwise it is no match :) return if(exists($matched-position) and $matched-position eq 1)then(true())else(false()) )else( (: the last token doesn't contain an underscore, just check using the regular ends-with function:) ends-with($haystack,$tokens[last()]) ) )else( (: there is only one token, so no second token needs to be matched: tokens[1] is the same as tokens[last()]:) true() ) )else( (: the first result was false, so the second result will also be false: chained check :) false() ) (: again: remove the number of characters of the last token, but this time from the end :) let $haystack := substring($haystack,0,string-length($haystack) - string-length($tokens[last()]) +1) (: again: chained check, only $result2 needs to be check. If $result1 was false(), $result2 will also be false() :) let $result3 := if($result2)then( (: pass with middle tokens (the first and last one are removed) and the shortened haystack to the recursive function where the remaining tokens will be checked individually:) biocase-lib:recursive-like($haystack, $tokens[position() = 2 to last()-1]) )else(false()) return $result3 }; (: TODO continue commenting here :) declare function biocase-lib:recursive-like($haystack as xs:string, $tokens as xs:string*) as xs:boolean { let $result := if(count($tokens)>0)then( let $token := $tokens[1] let $token-pos := if(contains($token,'_'))then( let $subtokens := tokenize($token, "_") return biocase-lib:generate-underscore-token-candiates($haystack, $subtokens) )else( biocase-lib:index-of-string($haystack,$token)[1] ) let $localResult := if(exists($token-pos) and $token-pos gt 0)then(true())else(false()) let $haystack := if($localResult and string-length($token) gt 0)then(substring($haystack, $token-pos + string-length($token)))else($haystack) let $recursiveResult := if($localResult)then(biocase-lib:recursive-like($haystack,$tokens[position() = 2 to last()]))else(false()) return $recursiveResult )else(true()) return $result }; declare function biocase-lib:generate-underscore-token-candiates($haystack as xs:string, $subtokens as xs:string*) as xs:integer? { if(string-length($subtokens[1]) eq 0)then( let $result := biocase-lib:generate-underscore-token-candiates(substring($haystack,2), $subtokens[position() = 2 to last()]) return if(exists($result))then($result)else() )else( (: generate list of candidates:) let $positions := biocase-lib:index-of-string($haystack,$subtokens[1]) let $substrings := for $pos in $positions return substring($haystack,$pos+string-length($subtokens[1])) let $result := biocase-lib:process-underscore-token-candiates($substrings, $positions, $subtokens[position() = 2 to last()]) return if(count($result) ge 1)then($result[1])else() ) }; (: TODO rewrite to depth search for first candidate. :) declare function biocase-lib:process-underscore-token-candiates($candidates as xs:string*, $positions as xs:integer*, $subtokens as xs:string*) as xs:int* { (: remove the first letter of each candidate, because when this function is called, there was an underscore separator that needs to be compensated for:) let $candidates := for $candidate in $candidates return substring($candidate,2) return if(count($subtokens) = 0)then( (: recursion breakout: return all candidate positions :) for $candidate at $index in $candidates return $positions[$index] )else( let $candidates := for $candidate in $candidates return if(starts-with($candidate, $subtokens[1]))then(substring($candidate,string-length($subtokens[1])+1)) (: remove the matched part, the plus one is necessary, because $x = substring($x,1), therefore only the number of characters of the token are removed from the string :) else('') return biocase-lib:process-underscore-token-candiates($candidates, $positions, $subtokens[position() = 2 to last()]) ) }; declare function biocase-lib:print-element-parents($nodes as node()*){ for $node in $nodes let $parents := biocase-lib:print-parents($node) return $parents }; declare function biocase-lib:print-attribute-parents($nodes as node()*){ for $node in $nodes let $parents := biocase-lib:print-parents($node/parent::*) return concat($parents,'[@',$node/local-name(),']') }; declare function biocase-lib:print-parents($node as node()){ let $parent := $node/parent::* return if(fn:string-length($parent/name())>0) then ( concat(biocase-lib:print-parents($parent),'/',$node/local-name()) )else( concat('/',$node/local-name()) ) }; declare function biocase-lib:build-capabilities-report() as element()? { let $concepts := {for $namespace in $biocase-settings:namespaces let $concepts := biocase-lib:get-suppored-concepts($namespace) return if(count($concepts)>0)then( {for $concept in $concepts return {$concept}} )else()} let $separator := biocase-lib:index-of-string($biocase-settings:capabilities-file,"/")[last()] let $path := xmldb:store(substring($biocase-settings:capabilities-file,0,$separator),substring($biocase-settings:capabilities-file,$separator+1), {$concepts}) return $concepts }; declare function biocase-lib:get-suppored-concepts($namespace as xs:string) as xs:string* { let $elements := util:eval('xquery version "3.0"; declare namespace namespace="' || $namespace || '"; let $collection := collection("'|| $biocase-settings:collection ||'") return $collection//namespace:*[count(./namespace:*)=0 and (./ancestor::*/namespace-uri() != "'|| $biocase-settings:edits-namespace ||'")]') let $attributes := util:eval('xquery version "3.0"; declare namespace namespace="' || $namespace || '"; let $collection := collection("'|| $biocase-settings:collection ||'") return $collection//namespace:*[./parent::* and ./ancestor::*/namespace-uri() != "'|| $biocase-settings:edits-namespace ||'"]/attribute()') return distinct-values((biocase-lib:print-element-parents($elements),biocase-lib:print-attribute-parents($attributes))) };