abaev-basex/xq/abv-mod.xqm

module namespace abv-m = 'http://ossetic-studies.org/ns/abaevdict-mod';

declare namespace tei = "http://www.tei-c.org/ns/1.0";
declare namespace abv = "http://ossetic-studies.org/ns/abaevdict";

declare function abv-m:normalize-str($str as xs:string?)
as xs:string? {
  $str => translate('&#xA;','') => normalize-space()
};

declare function abv-m:sortKey($word as xs:string)
as xs:string {
  $word => replace('-','')
  => replace('\(','')
  => replace('\)','')
  => replace('́','') (: combining acute :)
  => replace('\*','')
  => replace('ᵆ','')
  
  => replace('a','/')
  => replace('A', '/')
  => replace('ā', '/')
  => replace('Ā', '/')
  => replace('á', '/')
  => replace('Á', '/')
  => replace('ā́', '/')
  => replace('Ā́', '/')
  
  => replace('æ', '1')
  => replace('Æ', '1')
  => replace('ǽ', '1')
  => replace('Ǽ', '1')    
      
  => replace('b', '2')
  => replace('B', '2')
      
  => replace('cʼ', '4')
  => replace('Cʼ', '4')
      
  => replace('c', '3')
  => replace('C', '3')
      
  => replace('d', '5')
  => replace('D', '5')
      
  => replace('ʒ', '6')
  => replace('Ʒ', '6')
  => replace('ǯ', '6')
      
  => replace('e', '7')
  => replace('E', '7')
  => replace('é', '7')
  => replace('É', '7')    
      
  => replace('f', '8')
  => replace('F', '8')
  
  => replace('g[˳₀ₒ]', '9')
  => replace('G[˳₀ₒ]', '9')
      
  => replace('g', '9')
  => replace('G', '9')
      
  => replace('ǵ', '9')
  => replace('Ǵ', '9')
  
  => replace('ǧ[˳₀ₒ]', 'A')
  => replace('Ǧ[˳₀ₒ]', 'A')
      
  => replace('ǧ', 'A')
  => replace('Ǧ', 'A')
  => replace('h', 'A')
  => replace('H', 'A')
      
  => replace('i', 'B')
  => replace('I', 'B')
  => replace('í', 'B')
  => replace('Í', 'B')
      
  => replace('ī', 'B')
  => replace('Ī', 'B')
  => replace('ī́', 'B')
  => replace('Ī́', 'B')    
      
  => replace('j', 'D')
  => replace('J', 'D')
  
  => replace('kʼ[˳₀ₒ]', 'F')
  => replace('Kʼ[˳₀ₒ]', 'F')
  
  => replace('kʼ', 'F')
  => replace('Kʼ', 'F')
  
  => replace('k[˳₀ₒ]', 'E')
  => replace('K[˳₀ₒ]', 'E')
      
  => replace('k', 'E')
  => replace('K', 'E')
  
  => replace('ḱʼ', 'F')
  => replace('Ḱʼ', 'F')
      
  => replace('ḱ', 'E')
  => replace('Ḱ', 'E')
  
  => replace('l', 'H')
  => replace('L', 'H')
      
  => replace('m', 'I')
  => replace('M', 'I')
      
  => replace('n', 'J')
  => replace('N', 'J')
      
  => replace('o', 'K')
  => replace('O', 'K')
  => replace('ó', 'K')
  => replace('Ó', 'K')    
      
  => replace('pʼ', 'M')
  => replace('Pʼ', 'M')    
      
  => replace('p', 'L')
  => replace('P', 'L')
  
  => replace('q[˳₀ₒ]', 'N')
  => replace('Q[˳₀ₒ]', 'N')
      
  => replace('q', 'N')
  => replace('Q', 'N')
  
  => replace('r', 'O')
  => replace('R', 'O')
      
  => replace('s', 'P')
  => replace('S', 'P')
  
  => replace('tʼ', 'R')
  => replace('Tʼ', 'R')
      
  => replace('t', 'Q')
  => replace('T', 'Q')
      
  => replace('u', 'S')
  => replace('U', 'S')
  => replace('ú', 'S')
  => replace('Ú', 'S')    
      
  => replace('ū', 'S')
  => replace('Ū', 'S')
  => replace('ū́', 'S')
  => replace('Ū́', 'S')    
      
  => replace('v', 'T')
  => replace('V', 'T')
      
  => replace('w', 'U')
  => replace('W', 'U')
  
  => replace('x[˳₀ₒ]', 'V')
  => replace('X[˳₀ₒ]', 'V')
      
  => replace('x', 'V')
  => replace('X', 'V')
      
  => replace('y', 'W')
  => replace('Y', 'W')
  => replace('ý', 'W')
  => replace('Ý', 'W')    
      
  => replace('z', 'X')
  => replace('Z', 'X')    
  => replace('ž', 'X')
  => replace('Ž', 'X')    
};

declare function abv-m:sort-collection($docs as document-node()+) {
  sort($docs, (), function($d) { 
      abv-m:sortKey($d/tei:entry[1]/tei:form[1]/tei:orth[1]/text()[1]) })  
};

declare function abv-m:sort-nodes($docs as node()+) {
  sort($docs, (), function($d) { 
      abv-m:sortKey($d/text()) })  
};

(: Apply XSL transformations to create a TEI-conformant <entry/> element, 
or a sequence thereof :)
declare function abv-m:make-lng($src as document-node()+, 
                                    $lang as xs:string) 
  as node()+ {
    for $doc in $src
    let $del_lang := if ($lang = 'en') then 'ru' else 'en'
    return ($doc => xslt:transform(doc('../xsl/strip-space.xsl')) 
       => xslt:transform(doc('../xsl/delete-lang.xsl'),
                         {'lang': $del_lang})
       => xslt:transform(doc('../xsl/insert-refs.xsl'),
                         {'lookup-raw': 
                             serialize(doc('abaevdict_index/lookup.xml')),
                          'biblio-raw': 
                             serialize(doc('abaevdict_index/abaev_biblio.xml')),
                          'bib-lang': $lang})
       => xslt:transform(doc('../xsl/insert-langs.xsl'),
                         {'name-lang': $lang})
       => xslt:transform(doc('../xsl/standardize.xsl'),
                         {'standardize-lang': $lang})
       => xslt:transform(doc('../xsl/change-default-lang.xsl'),
                         {'default-lang': $lang}))/tei:TEI/tei:text/tei:body/tei:entry[1]
};

declare function abv-m:insert-full-lang($abbr as node(), $lang as xs:string) {
  let $flang := tokenize($abbr/@data-lang,',')
  return $abbr transform with {
    insert node attribute title { 
    string-join(doc('abaevdict_index/langnames.xml')/csv[1]/record[code/text() = $flang]/*[name()=$lang]/text(),', ')
  } as first into .
  }
};

declare function abv-m:make-html($src as document-node()+,
                                 $lang as xs:string)
  as node()+ {
    for $doc in $src
    let $html := xslt:transform($doc, 
                          doc('../xsl/abaev2html.xsl'), {'lang': $lang})                          
    return $html transform with {for $abbr in //abbr
                          let $newabbr := abv-m:insert-full-lang($abbr,$lang)
                          return replace node $abbr with $newabbr}
  };

(: Function to generate a map for each mentioned in a document, 
to be later fed to a geo map :)
declare function abv-m:make-geomap($doc as document-node(), $lang as xs:string) {
  (: let $id := $doc/tei:entry/@xml:id :)
  let $list := for $mlang in distinct-values($doc//tei:mentioned[not(tei:mentioned)]/@xml:lang)
               let $ment := $doc//tei:mentioned[not(tei:mentioned) and @xml:lang=$mlang][1]
               let $linfo := doc(`abaevdict_index/langnames.xml`)/csv[1]/record[code/text() = $mlang]
               where $mlang != 'os' 
                     and not(starts-with($mlang,'os-')) 
                     and $linfo/long[1] != '-99' 
                     and $linfo/lat[1] != '-99'
                     and $ment[tei:w]
               return {
                 'text': ($ment/tei:w/text())[1],
                 'hovertext': string($linfo/*[name()=$lang][1]/text()),
                 'lon': xs:float($linfo/long[1]),
                 'lat': xs:float($linfo/lat[1])
               }
  let $map := map:merge(({'type': 'scattergeo',
                          'mode': 'markers+text',
                          'hoverinfo': 'text',
                          'textposition': 'top center',
                          'marker': {'size': 7,
                                     'line': {'width': 1}
                                    }
                         },
                        for $k in map:keys($list[1])
                        let $seq := for $e in $list return $e($k)
                        return map:entry($k, array { $seq })))
              
  return $map
};

declare function abv-m:langname-by-id($id as xs:string, $lang as xs:string) {
  doc('abaevdict_index/langnames.xml')/csv[1]/record[code/text()=$id]/*[name()=$lang]/text()
};

declare function abv-m:entry-form-by-id($id as xs:string) {
  doc(`abaevdict_index/lookup.xml`)/tei:table[1]/tei:entry[@xml:id=$id]/text()
};

declare function abv-m:mark-element($doc as document-node(), $path as xs:string) {
  let $doc-tr := $doc transform with {
    for $n in xquery:eval($path, {'': .})
    return replace node $n
           with <abv:mark>{$n}</abv:mark>
  }
  return $doc-tr
};

(: Function to search, used in API and elsewhere :)
declare function abv-m:search($db-lang as xs:string, 
                            $type as xs:string, 
                            $query as xs:string) {
          let $pexpr := string-join(
            ('declare namespace tei = "http://www.tei-c.org/ns/1.0";',
                        switch($type)
                        case "full" return "//text()"
                        case "form" return "/tei:entry[1]/tei:form/tei:orth"
                        case "sense" return "/tei:entry[1]/tei:sense"
                        case "example" return "/tei:entry[1]//tei:cit[@type='example']/tei:quote"
                        case "translation" return "/tei:entry[1]//tei:cit[@type='translation']"
                        case "mentioned" return "/tei:entry[1]//tei:mentioned/(tei:m|tei:w|tei:phr|tei:s)"
                        case "gloss" return "tei:entry[1]//tei:gloss"
                        case "etym" return "tei:entry[1]/tei:etym[1]//text()"
                        default return "//text()")
                      )
          return array{for $doc in collection(`abaevdict_{$db-lang}/xml`)
          let $hits := for $node in xquery:eval($pexpr, {'': $doc})
                       where $node contains text {$query}
                       return path($node)
          where count($hits) > 0
          order by abv-m:sortKey($doc/tei:entry[1]/tei:form[1]/tei:orth[1])
          return {'entry_id': string($doc/tei:entry[1]/@xml:id), 
                  'path': array:build($hits)}}
        };