declare namespace tei = "http://www.tei-c.org/ns/1.0"; declare namespace abv = "http://ossetic-studies.org/ns/abaevdict"; import module namespace abv-m = 'http://ossetic-studies.org/ns/abaevdict-mod' at './abv-mod.xqm'; declare function local:get-gloss($ment as node()) as xs:string* { if (name($ment) = 'mentioned') then ( if (count($ment/tei:gloss) > 0) then $ment/tei:gloss/tei:q/text() else local:get-gloss($ment/..) ) else () }; declare function local:get-ment($ment as node(), $id as xs:string) as map(xs:string, map(xs:string, map(xs:string, item()*)))* { for $w in $ment/tei:w/text() return map{$w : map{$id : map {'gloss': local:get-gloss($ment), 'node': path($ment) } } } }; declare variable $metalang external := 'en'; let $docs := collection(`abaevdict_{$metalang}/xml`) (: let $docs := doc(`abaevdict_{$metalang}/xml/entry_mad.xml`) :) (: Create sequence of isolated indices for each entry :) let $langs-seq := for $doc in $docs let $ments := $doc//tei:mentioned[tei:w] let $id := $doc/tei:entry[1]/@xml:id for $ment in $ments return map:build($ment, keys := fn {./@xml:lang }, value := fn { local:get-ment(., $id) } ) (: Merge these sequences into one, creating a list containing duplicates :) let $langs-merged := map:merge($langs-seq, {'duplicates': 'combine'}) (: For each word, merge occurrence information :) let $langs-index := map:merge( for $lang in map:keys($langs-merged) let $words := map:merge($langs-merged($lang), {'duplicates': 'combine'} ) let $words-m := map:merge(for $w in map:keys($words) return {$w: map:merge($words($w), {'duplicates': 'combine'} )}) return {$lang: $words-m} ) (: Now we have to go through these languages and merge duplicates for each of them :) (: let $lang-index := map:merge( for $lang in map:keys($langs-merged) let $linfo := map:merge($langs-merged($lang), {'duplicates': 'combine'}) let $linfo-m := map:merge( for $w in map:keys($linfo) return map{$w: {'entries': distinct-values($winfo('entries')), 'glosses': distinct-values($winfo('glosses'))}} ) return {$lang: $linfo-m} ) :) (: return $langs-index :) (: for $lang in map:keys($lang-index) for $w in map:keys($lang-index($lang)) where count($lang-index($lang)($w)('entries')) > 1 return ($lang,$w, $lang-index($lang)($w)) :) return { for $lang in map:keys($langs-index) let $words := $langs-index($lang) order by abv-m:langname-by-id($lang, $metalang) return { for $w in map:keys($words) let $wn := abv-m:normalize-str($w) order by $wn return { for $e in map:keys($words($w)) return { for $ref in $words($w)($e) return { for $g in $ref('gloss') return if ($g != '') then else () } } } } }