abaev-basex/xq/abv-mod.xqm

306 lines
9.5 KiB
Text
Raw Normal View History

2025-03-21 14:14:03 +03:00
module namespace abv-m = 'http://ossetic-studies.org/ns/abaevdict-mod';
declare namespace tei = "http://www.tei-c.org/ns/1.0";
declare namespace abv = "http://ossetic-studies.org/ns/abaevdict";
declare function abv-m:normalize-str($str as xs:string?)
as xs:string? {
$str => translate('
','') => normalize-space()
};
declare function abv-m:sortKey($word as xs:string)
as xs:string {
$word => replace('-','')
=> replace('\(','')
=> replace('\)','')
=> replace('́','') (: combining acute :)
=> replace('\*','')
2025-03-22 00:08:52 +03:00
=> replace('ᵆ','')
2025-03-21 14:14:03 +03:00
=> replace('a','/')
=> replace('A', '/')
=> replace('ā', '/')
=> replace('Ā', '/')
=> replace('á', '/')
=> replace('Á', '/')
=> replace('ā́', '/')
=> replace('Ā́', '/')
=> replace('æ', '1')
=> replace('Æ', '1')
=> replace('ǽ', '1')
=> replace('Ǽ', '1')
=> replace('b', '2')
=> replace('B', '2')
=> replace('cʼ', '4')
=> replace('Cʼ', '4')
=> replace('c', '3')
=> replace('C', '3')
=> replace('d', '5')
=> replace('D', '5')
=> replace('ʒ', '6')
=> replace('Ʒ', '6')
2025-03-22 00:08:52 +03:00
=> replace('ǯ', '6')
2025-03-21 14:14:03 +03:00
=> replace('e', '7')
=> replace('E', '7')
=> replace('é', '7')
=> replace('É', '7')
=> replace('f', '8')
=> replace('F', '8')
=> replace('g[˳₀ₒ]', '9')
=> replace('G[˳₀ₒ]', '9')
=> replace('g', '9')
=> replace('G', '9')
=> replace('ǵ', '9')
=> replace('Ǵ', '9')
=> replace('ǧ[˳₀ₒ]', 'A')
=> replace('Ǧ[˳₀ₒ]', 'A')
=> replace('ǧ', 'A')
=> replace('Ǧ', 'A')
2025-03-22 00:08:52 +03:00
=> replace('h', 'A')
=> replace('H', 'A')
2025-03-21 14:14:03 +03:00
=> replace('i', 'B')
=> replace('I', 'B')
=> replace('í', 'B')
=> replace('Í', 'B')
=> replace('ī', 'B')
=> replace('Ī', 'B')
=> replace('ī́', 'B')
=> replace('Ī́', 'B')
=> replace('j', 'D')
=> replace('J', 'D')
=> replace('kʼ[˳₀ₒ]', 'F')
=> replace('Kʼ[˳₀ₒ]', 'F')
=> replace('kʼ', 'F')
=> replace('Kʼ', 'F')
=> replace('k[˳₀ₒ]', 'E')
=> replace('K[˳₀ₒ]', 'E')
=> replace('k', 'E')
=> replace('K', 'E')
=> replace('ḱʼ', 'F')
=> replace('Ḱʼ', 'F')
=> replace('ḱ', 'E')
=> replace('Ḱ', 'E')
=> replace('l', 'H')
=> replace('L', 'H')
=> replace('m', 'I')
=> replace('M', 'I')
=> replace('n', 'J')
=> replace('N', 'J')
=> replace('o', 'K')
=> replace('O', 'K')
=> replace('ó', 'K')
=> replace('Ó', 'K')
=> replace('pʼ', 'M')
=> replace('Pʼ', 'M')
=> replace('p', 'L')
=> replace('P', 'L')
=> replace('q[˳₀ₒ]', 'N')
=> replace('Q[˳₀ₒ]', 'N')
=> replace('q', 'N')
=> replace('Q', 'N')
=> replace('r', 'O')
=> replace('R', 'O')
=> replace('s', 'P')
=> replace('S', 'P')
=> replace('tʼ', 'R')
=> replace('Tʼ', 'R')
=> replace('t', 'Q')
=> replace('T', 'Q')
=> replace('u', 'S')
=> replace('U', 'S')
=> replace('ú', 'S')
=> replace('Ú', 'S')
=> replace('ū', 'S')
=> replace('Ū', 'S')
=> replace('ū́', 'S')
=> replace('Ū́', 'S')
=> replace('v', 'T')
=> replace('V', 'T')
=> replace('w', 'U')
=> replace('W', 'U')
=> replace('x[˳₀ₒ]', 'V')
=> replace('X[˳₀ₒ]', 'V')
=> replace('x', 'V')
=> replace('X', 'V')
=> replace('y', 'W')
=> replace('Y', 'W')
=> replace('ý', 'W')
=> replace('Ý', 'W')
=> replace('z', 'X')
=> replace('Z', 'X')
2025-03-22 00:08:52 +03:00
=> replace('ž', 'X')
=> replace('Ž', 'X')
2025-03-21 14:14:03 +03:00
};
declare function abv-m:sort-collection($docs as document-node()+) {
sort($docs, (), function($d) {
abv-m:sortKey($d/tei:entry[1]/tei:form[1]/tei:orth[1]/text()[1]) })
};
declare function abv-m:sort-nodes($docs as node()+) {
sort($docs, (), function($d) {
abv-m:sortKey($d/text()) })
};
(: Apply XSL transformations to create a TEI-conformant <entry/> element,
or a sequence thereof :)
declare function abv-m:make-lng($src as document-node()+,
$lang as xs:string)
as node()+ {
for $doc in $src
let $del_lang := if ($lang = 'en') then 'ru' else 'en'
return ($doc => xslt:transform(doc('../xsl/strip-space.xsl'))
=> xslt:transform(doc('../xsl/delete-lang.xsl'),
{'lang': $del_lang})
=> xslt:transform(doc('../xsl/insert-refs.xsl'),
{'lookup-raw':
serialize(doc('abaevdict_index/lookup.xml')),
'biblio-raw':
serialize(doc('abaevdict_index/abaev_biblio.xml')),
'bib-lang': $lang})
=> xslt:transform(doc('../xsl/insert-langs.xsl'),
{'name-lang': $lang})
=> xslt:transform(doc('../xsl/standardize.xsl'),
{'standardize-lang': $lang})
=> xslt:transform(doc('../xsl/change-default-lang.xsl'),
{'default-lang': $lang}))/tei:TEI/tei:text/tei:body/tei:entry[1]
};
declare function abv-m:insert-full-lang($abbr as node(), $lang as xs:string) {
let $flang := tokenize($abbr/@data-lang,',')
return $abbr transform with {
insert node attribute title {
string-join(doc('abaevdict_index/langnames.xml')/csv[1]/record[code/text() = $flang]/*[name()=$lang]/text(),', ')
} as first into .
}
};
declare function abv-m:make-html($src as document-node()+,
$lang as xs:string)
as node()+ {
for $doc in $src
let $html := xslt:transform($doc,
doc('../xsl/abaev2html.xsl'), {'lang': $lang})
return $html transform with {for $abbr in //abbr
let $newabbr := abv-m:insert-full-lang($abbr,$lang)
return replace node $abbr with $newabbr}
};
(: Function to generate a map for each mentioned in a document,
to be later fed to a geo map :)
declare function abv-m:make-geomap($doc as document-node(), $lang as xs:string) {
(: let $id := $doc/tei:entry/@xml:id :)
let $list := for $mlang in distinct-values($doc//tei:mentioned[not(tei:mentioned)]/@xml:lang)
let $ment := $doc//tei:mentioned[not(tei:mentioned) and @xml:lang=$mlang][1]
let $linfo := doc(`abaevdict_index/langnames.xml`)/csv[1]/record[code/text() = $mlang]
where $mlang != 'os'
and not(starts-with($mlang,'os-'))
and $linfo/long[1] != '-99'
and $linfo/lat[1] != '-99'
and $ment[tei:w]
return {
'text': ($ment/tei:w/text())[1],
'hovertext': string($linfo/*[name()=$lang][1]/text()),
'lon': xs:float($linfo/long[1]),
'lat': xs:float($linfo/lat[1])
}
let $map := map:merge(({'type': 'scattergeo',
'mode': 'markers+text',
'hoverinfo': 'text',
'textposition': 'top center',
'marker': {'size': 7,
'line': {'width': 1}
}
},
for $k in map:keys($list[1])
let $seq := for $e in $list return $e($k)
return map:entry($k, array { $seq })))
return $map
};
declare function abv-m:langname-by-id($id as xs:string, $lang as xs:string) {
doc('abaevdict_index/langnames.xml')/csv[1]/record[code/text()=$id]/*[name()=$lang]/text()
};
declare function abv-m:entry-form-by-id($id as xs:string) {
doc(`abaevdict_index/lookup.xml`)/tei:table[1]/tei:entry[@xml:id=$id]/text()
2025-03-22 23:43:48 +03:00
};
declare function abv-m:mark-element($doc as document-node(), $path as xs:string) {
let $doc-tr := $doc transform with {
for $n in xquery:eval($path, {'': .})
return replace node $n
with <abv:mark>{$n}</abv:mark>
}
return $doc-tr
};
(: Function to search, used in API and elsewhere :)
declare function abv-m:search($db-lang as xs:string,
$type as xs:string,
$query as xs:string) {
let $pexpr := string-join(
('declare namespace tei = "http://www.tei-c.org/ns/1.0";',
switch($type)
case "full" return "//text()"
case "form" return "/tei:entry[1]/tei:form/tei:orth"
case "sense" return "/tei:entry[1]/tei:sense"
case "example" return "/tei:entry[1]//tei:cit[@type='example']/tei:quote"
case "translation" return "/tei:entry[1]//tei:cit[@type='translation']"
case "mentioned" return "/tei:entry[1]//tei:mentioned/(tei:m|tei:w|tei:phr|tei:s)"
case "gloss" return "tei:entry[1]//tei:gloss"
case "etym" return "tei:entry[1]/tei:etym[1]//text()"
default return "//text()")
)
return array{for $doc in collection(`abaevdict_{$db-lang}/xml`)
let $hits := for $node in xquery:eval($pexpr, {'': $doc})
where $node contains text {$query}
return path($node)
where count($hits) > 0
order by abv-m:sortKey($doc/tei:entry[1]/tei:form[1]/tei:orth[1])
return {'entry_id': string($doc/tei:entry[1]/@xml:id),
'path': array:build($hits)}}
};