abaev-basex/xq/abv-mod.xqm
2025-03-22 23:43:48 +03:00

306 lines
No EOL
9.5 KiB
Text
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

module namespace abv-m = 'http://ossetic-studies.org/ns/abaevdict-mod';
declare namespace tei = "http://www.tei-c.org/ns/1.0";
declare namespace abv = "http://ossetic-studies.org/ns/abaevdict";
declare function abv-m:normalize-str($str as xs:string?)
as xs:string? {
$str => translate('
','') => normalize-space()
};
declare function abv-m:sortKey($word as xs:string)
as xs:string {
$word => replace('-','')
=> replace('\(','')
=> replace('\)','')
=> replace('́','') (: combining acute :)
=> replace('\*','')
=> replace('ᵆ','')
=> replace('a','/')
=> replace('A', '/')
=> replace('ā', '/')
=> replace('Ā', '/')
=> replace('á', '/')
=> replace('Á', '/')
=> replace('ā́', '/')
=> replace('Ā́', '/')
=> replace('æ', '1')
=> replace('Æ', '1')
=> replace('ǽ', '1')
=> replace('Ǽ', '1')
=> replace('b', '2')
=> replace('B', '2')
=> replace('cʼ', '4')
=> replace('Cʼ', '4')
=> replace('c', '3')
=> replace('C', '3')
=> replace('d', '5')
=> replace('D', '5')
=> replace('ʒ', '6')
=> replace('Ʒ', '6')
=> replace('ǯ', '6')
=> replace('e', '7')
=> replace('E', '7')
=> replace('é', '7')
=> replace('É', '7')
=> replace('f', '8')
=> replace('F', '8')
=> replace('g[˳₀ₒ]', '9')
=> replace('G[˳₀ₒ]', '9')
=> replace('g', '9')
=> replace('G', '9')
=> replace('ǵ', '9')
=> replace('Ǵ', '9')
=> replace('ǧ[˳₀ₒ]', 'A')
=> replace('Ǧ[˳₀ₒ]', 'A')
=> replace('ǧ', 'A')
=> replace('Ǧ', 'A')
=> replace('h', 'A')
=> replace('H', 'A')
=> replace('i', 'B')
=> replace('I', 'B')
=> replace('í', 'B')
=> replace('Í', 'B')
=> replace('ī', 'B')
=> replace('Ī', 'B')
=> replace('ī́', 'B')
=> replace('Ī́', 'B')
=> replace('j', 'D')
=> replace('J', 'D')
=> replace('kʼ[˳₀ₒ]', 'F')
=> replace('Kʼ[˳₀ₒ]', 'F')
=> replace('kʼ', 'F')
=> replace('Kʼ', 'F')
=> replace('k[˳₀ₒ]', 'E')
=> replace('K[˳₀ₒ]', 'E')
=> replace('k', 'E')
=> replace('K', 'E')
=> replace('ḱʼ', 'F')
=> replace('Ḱʼ', 'F')
=> replace('ḱ', 'E')
=> replace('Ḱ', 'E')
=> replace('l', 'H')
=> replace('L', 'H')
=> replace('m', 'I')
=> replace('M', 'I')
=> replace('n', 'J')
=> replace('N', 'J')
=> replace('o', 'K')
=> replace('O', 'K')
=> replace('ó', 'K')
=> replace('Ó', 'K')
=> replace('pʼ', 'M')
=> replace('Pʼ', 'M')
=> replace('p', 'L')
=> replace('P', 'L')
=> replace('q[˳₀ₒ]', 'N')
=> replace('Q[˳₀ₒ]', 'N')
=> replace('q', 'N')
=> replace('Q', 'N')
=> replace('r', 'O')
=> replace('R', 'O')
=> replace('s', 'P')
=> replace('S', 'P')
=> replace('tʼ', 'R')
=> replace('Tʼ', 'R')
=> replace('t', 'Q')
=> replace('T', 'Q')
=> replace('u', 'S')
=> replace('U', 'S')
=> replace('ú', 'S')
=> replace('Ú', 'S')
=> replace('ū', 'S')
=> replace('Ū', 'S')
=> replace('ū́', 'S')
=> replace('Ū́', 'S')
=> replace('v', 'T')
=> replace('V', 'T')
=> replace('w', 'U')
=> replace('W', 'U')
=> replace('x[˳₀ₒ]', 'V')
=> replace('X[˳₀ₒ]', 'V')
=> replace('x', 'V')
=> replace('X', 'V')
=> replace('y', 'W')
=> replace('Y', 'W')
=> replace('ý', 'W')
=> replace('Ý', 'W')
=> replace('z', 'X')
=> replace('Z', 'X')
=> replace('ž', 'X')
=> replace('Ž', 'X')
};
declare function abv-m:sort-collection($docs as document-node()+) {
sort($docs, (), function($d) {
abv-m:sortKey($d/tei:entry[1]/tei:form[1]/tei:orth[1]/text()[1]) })
};
declare function abv-m:sort-nodes($docs as node()+) {
sort($docs, (), function($d) {
abv-m:sortKey($d/text()) })
};
(: Apply XSL transformations to create a TEI-conformant <entry/> element,
or a sequence thereof :)
declare function abv-m:make-lng($src as document-node()+,
$lang as xs:string)
as node()+ {
for $doc in $src
let $del_lang := if ($lang = 'en') then 'ru' else 'en'
return ($doc => xslt:transform(doc('../xsl/strip-space.xsl'))
=> xslt:transform(doc('../xsl/delete-lang.xsl'),
{'lang': $del_lang})
=> xslt:transform(doc('../xsl/insert-refs.xsl'),
{'lookup-raw':
serialize(doc('abaevdict_index/lookup.xml')),
'biblio-raw':
serialize(doc('abaevdict_index/abaev_biblio.xml')),
'bib-lang': $lang})
=> xslt:transform(doc('../xsl/insert-langs.xsl'),
{'name-lang': $lang})
=> xslt:transform(doc('../xsl/standardize.xsl'),
{'standardize-lang': $lang})
=> xslt:transform(doc('../xsl/change-default-lang.xsl'),
{'default-lang': $lang}))/tei:TEI/tei:text/tei:body/tei:entry[1]
};
declare function abv-m:insert-full-lang($abbr as node(), $lang as xs:string) {
let $flang := tokenize($abbr/@data-lang,',')
return $abbr transform with {
insert node attribute title {
string-join(doc('abaevdict_index/langnames.xml')/csv[1]/record[code/text() = $flang]/*[name()=$lang]/text(),', ')
} as first into .
}
};
declare function abv-m:make-html($src as document-node()+,
$lang as xs:string)
as node()+ {
for $doc in $src
let $html := xslt:transform($doc,
doc('../xsl/abaev2html.xsl'), {'lang': $lang})
return $html transform with {for $abbr in //abbr
let $newabbr := abv-m:insert-full-lang($abbr,$lang)
return replace node $abbr with $newabbr}
};
(: Function to generate a map for each mentioned in a document,
to be later fed to a geo map :)
declare function abv-m:make-geomap($doc as document-node(), $lang as xs:string) {
(: let $id := $doc/tei:entry/@xml:id :)
let $list := for $mlang in distinct-values($doc//tei:mentioned[not(tei:mentioned)]/@xml:lang)
let $ment := $doc//tei:mentioned[not(tei:mentioned) and @xml:lang=$mlang][1]
let $linfo := doc(`abaevdict_index/langnames.xml`)/csv[1]/record[code/text() = $mlang]
where $mlang != 'os'
and not(starts-with($mlang,'os-'))
and $linfo/long[1] != '-99'
and $linfo/lat[1] != '-99'
and $ment[tei:w]
return {
'text': ($ment/tei:w/text())[1],
'hovertext': string($linfo/*[name()=$lang][1]/text()),
'lon': xs:float($linfo/long[1]),
'lat': xs:float($linfo/lat[1])
}
let $map := map:merge(({'type': 'scattergeo',
'mode': 'markers+text',
'hoverinfo': 'text',
'textposition': 'top center',
'marker': {'size': 7,
'line': {'width': 1}
}
},
for $k in map:keys($list[1])
let $seq := for $e in $list return $e($k)
return map:entry($k, array { $seq })))
return $map
};
declare function abv-m:langname-by-id($id as xs:string, $lang as xs:string) {
doc('abaevdict_index/langnames.xml')/csv[1]/record[code/text()=$id]/*[name()=$lang]/text()
};
declare function abv-m:entry-form-by-id($id as xs:string) {
doc(`abaevdict_index/lookup.xml`)/tei:table[1]/tei:entry[@xml:id=$id]/text()
};
declare function abv-m:mark-element($doc as document-node(), $path as xs:string) {
let $doc-tr := $doc transform with {
for $n in xquery:eval($path, {'': .})
return replace node $n
with <abv:mark>{$n}</abv:mark>
}
return $doc-tr
};
(: Function to search, used in API and elsewhere :)
declare function abv-m:search($db-lang as xs:string,
$type as xs:string,
$query as xs:string) {
let $pexpr := string-join(
('declare namespace tei = "http://www.tei-c.org/ns/1.0";',
switch($type)
case "full" return "//text()"
case "form" return "/tei:entry[1]/tei:form/tei:orth"
case "sense" return "/tei:entry[1]/tei:sense"
case "example" return "/tei:entry[1]//tei:cit[@type='example']/tei:quote"
case "translation" return "/tei:entry[1]//tei:cit[@type='translation']"
case "mentioned" return "/tei:entry[1]//tei:mentioned/(tei:m|tei:w|tei:phr|tei:s)"
case "gloss" return "tei:entry[1]//tei:gloss"
case "etym" return "tei:entry[1]/tei:etym[1]//text()"
default return "//text()")
)
return array{for $doc in collection(`abaevdict_{$db-lang}/xml`)
let $hits := for $node in xquery:eval($pexpr, {'': $doc})
where $node contains text {$query}
return path($node)
where count($hits) > 0
order by abv-m:sortKey($doc/tei:entry[1]/tei:form[1]/tei:orth[1])
return {'entry_id': string($doc/tei:entry[1]/@xml:id),
'path': array:build($hits)}}
};