115 lines
No EOL
3.9 KiB
Text
115 lines
No EOL
3.9 KiB
Text
declare namespace tei = "http://www.tei-c.org/ns/1.0";
|
|
declare namespace abv = "http://ossetic-studies.org/ns/abaevdict";
|
|
|
|
import module namespace abv-m = 'http://ossetic-studies.org/ns/abaevdict-mod' at './abv-mod.xqm';
|
|
|
|
declare function local:get-gloss($ment as node())
|
|
as xs:string* {
|
|
if (name($ment) = 'mentioned') then (
|
|
if (count($ment/tei:gloss) > 0)
|
|
then $ment/tei:gloss/tei:q/text()
|
|
else local:get-gloss($ment/..)
|
|
)
|
|
else ()
|
|
};
|
|
|
|
declare function local:get-ment($ment as node(), $id as xs:string)
|
|
as map(xs:string, map(xs:string, map(xs:string, item()*)))* {
|
|
for $w in $ment/tei:w/text()
|
|
return map{$w : map{$id :
|
|
map {'gloss': local:get-gloss($ment),
|
|
'node': path($ment) }
|
|
}
|
|
}
|
|
};
|
|
|
|
declare variable $metalang external := 'en';
|
|
|
|
let $docs := collection(`abaevdict_{$metalang}/xml`)
|
|
(: let $docs := doc(`abaevdict_{$metalang}/xml/entry_mad.xml`) :)
|
|
|
|
(: Create sequence of isolated indices for each entry :)
|
|
let $langs-seq := for $doc in $docs
|
|
let $ments := $doc//tei:mentioned[tei:w]
|
|
let $id := $doc/tei:entry[1]/@xml:id
|
|
for $ment in $ments
|
|
return map:build($ment, keys := fn {./@xml:lang },
|
|
value := fn { local:get-ment(., $id) } )
|
|
|
|
(: Merge these sequences into one, creating a list containing duplicates :)
|
|
let $langs-merged := map:merge($langs-seq, {'duplicates': 'combine'})
|
|
|
|
(: For each word, merge occurrence information :)
|
|
let $langs-index := map:merge(
|
|
for $lang in map:keys($langs-merged)
|
|
let $words := map:merge($langs-merged($lang),
|
|
{'duplicates': 'combine'}
|
|
)
|
|
let $words-m := map:merge(for $w in map:keys($words)
|
|
return {$w: map:merge($words($w),
|
|
{'duplicates': 'combine'}
|
|
)})
|
|
|
|
return {$lang: $words-m}
|
|
)
|
|
|
|
(: Now we have to go through these languages and merge duplicates
|
|
for each of them :)
|
|
(: let $lang-index := map:merge(
|
|
for $lang in map:keys($langs-merged)
|
|
let $linfo := map:merge($langs-merged($lang),
|
|
{'duplicates': 'combine'})
|
|
let $linfo-m := map:merge(
|
|
for $w in map:keys($linfo)
|
|
return map{$w: {'entries':
|
|
distinct-values($winfo('entries')),
|
|
'glosses':
|
|
distinct-values($winfo('glosses'))}}
|
|
)
|
|
return {$lang: $linfo-m}
|
|
) :)
|
|
|
|
(: return $langs-index :)
|
|
|
|
(: for $lang in map:keys($lang-index)
|
|
for $w in map:keys($lang-index($lang))
|
|
where count($lang-index($lang)($w)('entries')) > 1
|
|
return ($lang,$w, $lang-index($lang)($w)) :)
|
|
|
|
|
|
return
|
|
<lang-index>
|
|
{
|
|
for $lang in map:keys($langs-index)
|
|
let $words := $langs-index($lang)
|
|
order by abv-m:langname-by-id($lang, $metalang)
|
|
return
|
|
<lang id='{$lang}'>
|
|
{
|
|
for $w in map:keys($words)
|
|
let $wn := abv-m:normalize-str($w)
|
|
order by $wn
|
|
return
|
|
<word text="{$wn}">
|
|
{
|
|
for $e in map:keys($words($w))
|
|
return
|
|
<entry id="{$e}">
|
|
{
|
|
for $ref in $words($w)($e)
|
|
return
|
|
<ref path="{$ref('node')}">
|
|
{
|
|
for $g in $ref('gloss')
|
|
return
|
|
if ($g != '') then <gloss text="{$g}"/> else ()
|
|
}
|
|
</ref>
|
|
}
|
|
</entry>
|
|
}
|
|
</word>
|
|
}
|
|
</lang>
|
|
}
|
|
</lang-index> |