abaev-basex/xq/gen-mentioned-index.xq
2025-03-22 18:32:23 +03:00

115 lines
No EOL
3.9 KiB
Text

declare namespace tei = "http://www.tei-c.org/ns/1.0";
declare namespace abv = "http://ossetic-studies.org/ns/abaevdict";
import module namespace abv-m = 'http://ossetic-studies.org/ns/abaevdict-mod' at './abv-mod.xqm';
declare function local:get-gloss($ment as node())
as xs:string* {
if (name($ment) = 'mentioned') then (
if (count($ment/tei:gloss) > 0)
then $ment/tei:gloss/tei:q/text()
else local:get-gloss($ment/..)
)
else ()
};
declare function local:get-ment($ment as node(), $id as xs:string)
as map(xs:string, map(xs:string, map(xs:string, item()*)))* {
for $w in $ment/tei:w/text()
return map{$w : map{$id :
map {'gloss': local:get-gloss($ment),
'node': path($ment) }
}
}
};
declare variable $metalang external := 'en';
let $docs := collection(`abaevdict_{$metalang}/xml`)
(: let $docs := doc(`abaevdict_{$metalang}/xml/entry_mad.xml`) :)
(: Create sequence of isolated indices for each entry :)
let $langs-seq := for $doc in $docs
let $ments := $doc//tei:mentioned[tei:w]
let $id := $doc/tei:entry[1]/@xml:id
for $ment in $ments
return map:build($ment, keys := fn {./@xml:lang },
value := fn { local:get-ment(., $id) } )
(: Merge these sequences into one, creating a list containing duplicates :)
let $langs-merged := map:merge($langs-seq, {'duplicates': 'combine'})
(: For each word, merge occurrence information :)
let $langs-index := map:merge(
for $lang in map:keys($langs-merged)
let $words := map:merge($langs-merged($lang),
{'duplicates': 'combine'}
)
let $words-m := map:merge(for $w in map:keys($words)
return {$w: map:merge($words($w),
{'duplicates': 'combine'}
)})
return {$lang: $words-m}
)
(: Now we have to go through these languages and merge duplicates
for each of them :)
(: let $lang-index := map:merge(
for $lang in map:keys($langs-merged)
let $linfo := map:merge($langs-merged($lang),
{'duplicates': 'combine'})
let $linfo-m := map:merge(
for $w in map:keys($linfo)
return map{$w: {'entries':
distinct-values($winfo('entries')),
'glosses':
distinct-values($winfo('glosses'))}}
)
return {$lang: $linfo-m}
) :)
(: return $langs-index :)
(: for $lang in map:keys($lang-index)
for $w in map:keys($lang-index($lang))
where count($lang-index($lang)($w)('entries')) > 1
return ($lang,$w, $lang-index($lang)($w)) :)
return
<lang-index>
{
for $lang in map:keys($langs-index)
let $words := $langs-index($lang)
order by abv-m:langname-by-id($lang, $metalang)
return
<lang id='{$lang}'>
{
for $w in map:keys($words)
let $wn := abv-m:normalize-str($w)
order by $wn
return
<word text="{$wn}">
{
for $e in map:keys($words($w))
return
<entry id="{$e}">
{
for $ref in $words($w)($e)
return
<ref path="{$ref('node')}">
{
for $g in $ref('gloss')
return
if ($g != '') then <gloss text="{$g}"/> else ()
}
</ref>
}
</entry>
}
</word>
}
</lang>
}
</lang-index>