initial commit
This commit is contained in:
parent
cee3efab31
commit
71219733bd
31 changed files with 9242 additions and 0 deletions
115
xq/gen-mentioned-index.xq
Normal file
115
xq/gen-mentioned-index.xq
Normal file
|
@ -0,0 +1,115 @@
|
|||
declare namespace tei = "http://www.tei-c.org/ns/1.0";
|
||||
declare namespace abv = "http://ossetic-studies.org/ns/abaevdict";
|
||||
|
||||
import module namespace abv-m = 'http://ossetic-studies.org/ns/abaevdict-mod' at './abv-mod.xqm';
|
||||
|
||||
declare function local:get-gloss($ment as node())
|
||||
as xs:string* {
|
||||
if (name($ment) = 'mentioned') then (
|
||||
if (count($ment/tei:gloss) > 0)
|
||||
then $ment/tei:gloss/tei:q/text()
|
||||
else local:get-gloss($ment/..)
|
||||
)
|
||||
else ()
|
||||
};
|
||||
|
||||
declare function local:get-ment($ment as node(), $id as xs:string)
|
||||
as map(xs:string, map(xs:string, map(xs:string, item()*)))* {
|
||||
for $w in $ment/tei:w/text()
|
||||
return map{$w : map{$id :
|
||||
map {'gloss': local:get-gloss($ment),
|
||||
'node': db:node-id($ment) }
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
declare variable $metalang external := 'en';
|
||||
|
||||
let $docs := collection(`abaevdict_{$metalang}/xml`)
|
||||
(: let $docs := doc(`abaevdict_{$metalang}/xml/entry_mad.xml`) :)
|
||||
|
||||
(: Create sequence of isolated indices for each entry :)
|
||||
let $langs-seq := for $doc in $docs
|
||||
let $ments := $doc//tei:mentioned[tei:w]
|
||||
let $id := $doc/tei:entry[1]/@xml:id
|
||||
for $ment in $ments
|
||||
return map:build($ment, keys := fn {./@xml:lang },
|
||||
value := fn { local:get-ment(., $id) } )
|
||||
|
||||
(: Merge these sequences into one, creating a list containing duplicates :)
|
||||
let $langs-merged := map:merge($langs-seq, {'duplicates': 'combine'})
|
||||
|
||||
(: For each word, merge occurrence information :)
|
||||
let $langs-index := map:merge(
|
||||
for $lang in map:keys($langs-merged)
|
||||
let $words := map:merge($langs-merged($lang),
|
||||
{'duplicates': 'combine'}
|
||||
)
|
||||
let $words-m := map:merge(for $w in map:keys($words)
|
||||
return {$w: map:merge($words($w),
|
||||
{'duplicates': 'combine'}
|
||||
)})
|
||||
|
||||
return {$lang: $words-m}
|
||||
)
|
||||
|
||||
(: Now we have to go through these languages and merge duplicates
|
||||
for each of them :)
|
||||
(: let $lang-index := map:merge(
|
||||
for $lang in map:keys($langs-merged)
|
||||
let $linfo := map:merge($langs-merged($lang),
|
||||
{'duplicates': 'combine'})
|
||||
let $linfo-m := map:merge(
|
||||
for $w in map:keys($linfo)
|
||||
return map{$w: {'entries':
|
||||
distinct-values($winfo('entries')),
|
||||
'glosses':
|
||||
distinct-values($winfo('glosses'))}}
|
||||
)
|
||||
return {$lang: $linfo-m}
|
||||
) :)
|
||||
|
||||
(: return $langs-index :)
|
||||
|
||||
(: for $lang in map:keys($lang-index)
|
||||
for $w in map:keys($lang-index($lang))
|
||||
where count($lang-index($lang)($w)('entries')) > 1
|
||||
return ($lang,$w, $lang-index($lang)($w)) :)
|
||||
|
||||
|
||||
return
|
||||
<lang-index>
|
||||
{
|
||||
for $lang in map:keys($langs-index)
|
||||
let $words := $langs-index($lang)
|
||||
order by abv-m:langname-by-id($lang, $metalang)
|
||||
return
|
||||
<lang id='{$lang}'>
|
||||
{
|
||||
for $w in map:keys($words)
|
||||
let $wn := abv-m:normalize-str($w)
|
||||
order by $wn
|
||||
return
|
||||
<word text="{$wn}">
|
||||
{
|
||||
for $e in map:keys($words($w))
|
||||
return
|
||||
<entry id="{$e}">
|
||||
{
|
||||
for $ref in $words($w)($e)
|
||||
return
|
||||
<ref node-id="{$ref('node')}">
|
||||
{
|
||||
for $g in $ref('gloss')
|
||||
return
|
||||
if ($g != '') then <gloss text="{$g}"/> else ()
|
||||
}
|
||||
</ref>
|
||||
}
|
||||
</entry>
|
||||
}
|
||||
</word>
|
||||
}
|
||||
</lang>
|
||||
}
|
||||
</lang-index>
|
Loading…
Add table
Add a link
Reference in a new issue