# encoding: utf-8
=begin
 * Name: SiSU
 * Description: a framework for document structuring, publishing and search
 * Author: Ralph Amissah
 * Copyright: (C) 1997 - 2012, Ralph Amissah, All Rights Reserved.
 * License: GPL 3 or later:
   SiSU, a framework for document structuring, publishing and search
   Copyright (C) Ralph Amissah
   This program is free software: you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the Free
   Software Foundation, either version 3 of the License, or (at your option)
   any later version.
   This program is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
   more details.
   You should have received a copy of the GNU General Public License along with
   this program. If not, see .
   If you have Internet connection, the latest version of the GPL should be
   available at these locations:
   
   
   
   
   
 * SiSU uses:
   * Standard SiSU markup syntax,
   * Standard SiSU meta-markup syntax, and the
   * Standard SiSU object citation numbering and system
 * Hompages:
   
   
 * Download:
   
 * Ralph Amissah
   
   
 ** Description: common file for xml generation
=end
module SiSU_XML_Munge
  class Trans
    require_relative 'defaults'                         # defaults.rb
    def initialize(md)
      @md=md
      @sys=SiSU_Env::SystemCall.new
      @dir=SiSU_Env::InfoEnv.new(@md.fns)
      @brace_url=SiSU_Viz::Skin.new.url_decoration
      if @md.sem_tag
        @ab ||=semantic_tags.default
      end
    end
    def semantic_tags
      def default
        {
          pub:   'publication',
          conv:  'convention',
          vol:   'volume',
          pg:    'page',
          cty:   'city',
          org:   'organization',
          uni:   'university',
          dept:  'department',
          fac:   'faculty',
          inst:  'institute',
          co:    'company',
          com:   'company',
          conv:  'convention',
          dt:    'date',
          y:     'year',
          m:     'month',
          d:     'day',
          ti:    'title',
          au:    'author',
          ed:    'editor', #editor?
          v:     'version', #edition
          n:     'name',
          fn:    'firstname',
          mn:    'middlename',
          ln:    'lastname',
          in:    'initials',
          qt:    'quote',
          ct:    'cite',
          ref:   'reference',
          ab:    'abreviation',
          def:   'define',
          desc:  'description',
          trans: 'translate',
        }
      end
      self
    end
    def char_enc #character encode
      def utf8(dob='')
        if @sys.locale =~/utf-?8/i # instead ucs for utf8 # String#encode Iñtërnâtiônàlizætiøn
          str=if defined? dob.obj; dob.obj
          elsif dob.class==String; dob
          end
          if str
            #¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûü
            #¢£¥§©ª«®°±²³µ¶¹º»¼½¾×÷
            str=str.gsub(//um,'>').    # '>'     # >
              gsub(/¢/um,'¢').   # '¢'   # ¢
              gsub(/£/um,'£').   # '£'  # £
              gsub(/¥/um,'¥').   # '¥'    # ¥
              gsub(/§/um,'§').   # '§'   # §
              gsub(/©/um,'©').   # '©'   # ©
              gsub(/ª/um,'ª').   # 'ª'   # ª
              gsub(/«/um,'«').   # '«'  # «
              gsub(/®/um,'®').   # '®'    # ®
              gsub(/°/um,'°').   # '°'    # °
              gsub(/±/um,'±').   # '±' # ±
              gsub(/²/um,'²').   # '²'   # ²
              gsub(/³/um,'³').   # '³'   # ³
              gsub(/µ/um,'µ').   # 'µ'  # µ
              gsub(/¶/um,'¶').   # '¶'   # ¶
              gsub(/¹/um,'¹').   # '¹'   # ¹
              gsub(/º/um,'º').   # 'º'   # º
              gsub(/»/um,'»').   # '»'  # »
              gsub(/¼/um,'¼').   # '¼' # ¼
              gsub(/½/um,'½').   # '½' # ½
              gsub(/¾/um,'¾').   # '¾' # ¾
              gsub(/×/um,'×').   # '×'  # ×
              gsub(/÷/um,'÷').   # '÷' # ÷
              gsub(/¿/um,'¿').   # '¿' # ¿
              gsub(/À/um,'À').   # 'À' # À
              gsub(/Á/um,'Á').   # 'Á' # Á
              gsub(/Â/um,'Â').   # 'Â'  # Â
              gsub(/Ã/um,'Ã').   # 'Ã' # Ã
              gsub(/Ä/um,'Ä').   # 'Ä'   # Ä
              gsub(/Å/um,'Å').   # 'Å'  # Å
              gsub(/Æ/um,'Æ').   # 'Æ'  # Æ
              gsub(/Ç/um,'Ç').   # 'Ç' # Ç
              gsub(/È/um,'È').   # 'È' # È
              gsub(/É/um,'É').   # 'É' # É
              gsub(/Ê/um,'Ê').   # 'Ê'  # Ê
              gsub(/Ë/um,'Ë').   # 'Ë'   # Ë
              gsub(/Ì/um,'Ì').   # 'Ì' # Ì
              gsub(/Í/um,'Í').   # 'Í' # Í
              gsub(/Î/um,'Î').   # 'Î'  # Î
              gsub(/Ï/um,'Ï').   # 'Ï'   # Ï
              gsub(/Ð/um,'Ð').   # 'Ð'    # Ð
              gsub(/Ñ/um,'Ñ').   # 'Ñ' # Ñ
              gsub(/Ò/um,'Ò').   # 'Ò' # Ò
              gsub(/Ó/um,'Ó').   # 'Ó' # Ó
              gsub(/Ô/um,'Ô').   # 'Ô'  # Ô
              gsub(/Õ/um,'Õ').   # 'Õ' # Õ
              gsub(/Ö/um,'Ö').   # 'Ö'   # Ö
              gsub(/Ø/um,'Ø').   # 'Ø' # Ø
              gsub(/Ù/um,'Ù').   # 'Ù' # Ù
              gsub(/Ú/um,'Ú').   # 'Ú' # Ú
              gsub(/Û/um,'Û').   # 'Û'  # Û
              gsub(/Ü/um,'Ü').   # 'Ü'   # Ü
              gsub(/Ý/um,'Ý').   # 'Ý' # Ý
              gsub(/Þ/um,'Þ').   # 'Þ'  # Þ
              gsub(/ß/um,'ß').   # 'ß'  # ß
              gsub(/à/um,'à').   # 'à' # à
              gsub(/á/um,'á').   # 'á' # á
              gsub(/â/um,'â').   # 'â'  # â
              gsub(/ã/um,'ã').   # 'ã' # ã
              gsub(/ä/um,'ä').   # 'ä'   # ä
              gsub(/å/um,'å').   # 'å'  # å
              gsub(/æ/um,'æ').   # 'æ'  # æ
              gsub(/ç/um,'ç').   # 'ç' # ç
              gsub(/è/um,'è').   # 'è' # è
              gsub(/é/um,'é').   # '´'  # é
              gsub(/ê/um,'ê').   # 'ˆ'   # ê
              gsub(/ë/um,'ë').   # 'ë'   # ë
              gsub(/ì/um,'ì').   # 'ì' # ì
              gsub(/í/um,'í').   # '´'  # í
              gsub(/î/um,'î').   # 'î'  # î
              gsub(/ï/um,'ï').   # 'ï'   # ï
              gsub(/ð/um,'ð').   # 'ð'    # ð
              gsub(/ñ/um,'ñ').   # 'ñ' # ñ
              gsub(/ò/um,'ò').   # 'ò' # ò
              gsub(/ó/um,'ó').   # 'ó' # ó
              gsub(/ô/um,'ô').   # 'ô'  # ô
              gsub(/õ/um,'õ').   # 'õ' # õ
              gsub(/ö/um,'ö').   # 'ö'   # ö
              gsub(/ø/um,'ø').   # 'ø' # ø
              gsub(/ù/um,'ú').   # 'ù' # ú
              gsub(/ú/um,'û').   # 'ú' # û
              gsub(/û/um,'ü').   # 'û'  # ü
              gsub(/ü/um,'ý').   # 'ü'   # ý
              gsub(/þ/um,'þ').   # 'þ'  # þ
              gsub(/ÿ/um,'ÿ').   # 'ÿ'   # ÿ
              gsub(/‘/um,'‘').  # '‘'  # ‘
              gsub(/’/um,'’').  # '’'  # ’
              gsub(/“/um,'“').  # “    # “
              gsub(/”/um,'”').  # ”    # ”
              gsub(/–/um,'–').  # –    # –
              gsub(/—/um,'—').  # —    # —
              gsub(/∝/um,'∝').  # ∝     # ∝
              gsub(/∞/um,'∞').  # ∞    # ∞
              gsub(/™/um,'™').  # ™    # ™
              gsub(/✠/um,'✠'). # ✗    # ✠
              gsub(/ /um,' ').       # space identify
              gsub(/ /um,' ')       # space identify
          end
          dob=if defined? dob.obj
            dob.obj=str
            dob
          elsif dob.class==String
            str
          end
          dob
        end
      end
      def html(dob='')
        if @sys.locale =~/utf-?8/i # instead ucs for utf8 # String#encode Iñtërnâtiônàlizætiøn
          dob.obj=dob.obj.gsub(/ /u,' ').           # space identify
            gsub(/ /u,' ')           # space identify
        else
          dob.obj=dob.obj.gsub(/¢/u,'¢').      # ¢
            gsub(/£/u,'£').     # £
            gsub(/¥/u,'¥').       # ¥
            gsub(/§/u,'§').      # §
            gsub(/©/u,'©').      # ©
            gsub(/ª/u,'ª').      # ª
            gsub(/«/u,'«').     # «
            gsub(/®/u,'®').       # ®
            gsub(/°/u,'°').       # °
            gsub(/±/u,'±').    # ±
            gsub(/²/u,'²').      # ²
            gsub(/³/u,'³').      # ³
            gsub(/µ/u,'µ').     # µ
            gsub(/¶/u,'¶').      # ¶
            gsub(/¹/u,'¹').      # ¹
            gsub(/º/u,'º').      # º
            gsub(/»/u,'»').     # »
            gsub(/¼/u,'¼').    # ¼
            gsub(/½/u,'½').    # ½
            gsub(/¾/u,'¾').    # ¾
            gsub(/×/u,'×').     # ×
            gsub(/÷/u,'÷').    # ÷
            gsub(/¿/u,'¿').    # ¿
            gsub(/À/u,'À').    # À
            gsub(/Á/u,'Á').    # Á
            gsub(/Â/u,'Â').     # Â
            gsub(/Ã/u,'Ã').    # Ã
            gsub(/Ä/u,'Ä').      # Ä
            gsub(/Å/u,'Å').     # Å
            gsub(/Æ/u,'Æ').     # Æ
            gsub(/Ç/u,'Ç').    # Ç
            gsub(/È/u,'È').    # È
            gsub(/É/u,'É').    # É
            gsub(/Ê/u,'Ê').     # Ê
            gsub(/Ë/u,'Ë').      # Ë
            gsub(/Ì/u,'Ì').    # Ì
            gsub(/Í/u,'Í').    # Í
            gsub(/Î/u,'Î').     # Î
            gsub(/Ï/u,'Ï').      # Ï
            gsub(/Ð/u,'Ð').       # Ð
            gsub(/Ñ/u,'Ñ').    # Ñ
            gsub(/Ò/u,'Ò').    # Ò
            gsub(/Ó/u,'Ó').    # Ó
            gsub(/Ô/u,'Ô').     # Ô
            gsub(/Õ/u,'Õ').    # Õ
            gsub(/Ö/u,'Ö').      # Ö
            gsub(/Ø/u,'Ø').    # Ø
            gsub(/Ù/u,'Ù').    # Ù
            gsub(/Ú/u,'Ú').    # Ú
            gsub(/Û/u,'Û').     # Û
            gsub(/Ü/u,'Ü').      # Ü
            gsub(/Ý/u,'Ý').    # Ý
            gsub(/Þ/u,'Þ').     # Þ
            gsub(/ß/u,'ß').     # ß
            gsub(/à/u,'à').    # à
            gsub(/á/u,'á').    # á
            gsub(/â/u,'â').     # â
            gsub(/ã/u,'ã').    # ã
            gsub(/ä/u,'ä').      # ä
            gsub(/å/u,'å').     # å
            gsub(/æ/u,'æ').     # æ
            gsub(/ç/u,'ç').    # ç
            gsub(/è/u,'è').    # è
            gsub(/é/u,'´').     # é
            gsub(/ê/u,'ˆ').      # ê
            gsub(/ë/u,'ë').      # ë
            gsub(/ì/u,'ì').    # ì
            gsub(/í/u,'´').     # í
            gsub(/î/u,'î').     # î
            gsub(/ï/u,'ï').      # ï
            gsub(/ð/u,'ð').       # ð
            gsub(/ñ/u,'ñ').    # ñ
            gsub(/ò/u,'ò').    # ò
            gsub(/ó/u,'ó').    # ó
            gsub(/ô/u,'ô').     # ô
            gsub(/õ/u,'õ').    # õ
            gsub(/ö/u,'ö').      # ö
            gsub(/ø/u,'ø').    # ø
            gsub(/ù/u,'ù').    # ú
            gsub(/ú/u,'ú').    # û
            gsub(/û/u,'û').     # ü
            gsub(/ü/u,'ü').      # ý
            gsub(/þ/u,'þ').     # þ
            gsub(/ÿ/u,'ÿ').      # ÿ
            gsub(/‘/u,'lsquo;').    # ‘  # ‘
            gsub(/’/u,'rsquo;').    # ’  # ’
            gsub(/“/u,'“').     # “  # “
            gsub(/”/u,'”').     # ”  # ”
            gsub(/–/u,'–').     # –  # –
            gsub(/—/u,'—').     # —  # —
            gsub(/∝/u,'∝').      # ∝   # ∝
            gsub(/∞/u,'∞').     # ∞  # ∞
            gsub(/™/u,'™').     # ™  # ™
            gsub(/✠/u,'✠').    # ✠
            #gsub(/✠/u '†').    # † # † incorrect replacement †
            gsub(/ /u,' ').           # space identify
            gsub(/ /u,' ')           # space identify
        end
      end
      self
    end
    def tidywords(wordlist)
      wordlist_new=[]
      wordlist.each do |x|
        #imperfect solution will not catch all possible cases
        x=x.gsub(/&/,'&') unless x =~/&\S+;/
        x=x.gsub(/&([A-Z])/,'&\1')
        wordlist_new << x
      end
      wordlist_new
    end
    def markup(dob='')
      wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
      dob.obj=tidywords(wordlist).join(' ').strip
      unless dob.is=='table'
        dob.obj=dob.obj.gsub(/#{Mx[:br_line]}/u,'
').
          gsub(/#{Mx[:br_paragraph]}/u,'
').
          gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
')
      end
      dob.obj=dob.obj.gsub(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'').
        gsub(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;').
        gsub(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;').
        gsub(/(^|#{Mx[:gl_c]}|\s+)<\s+/,'\1< ').gsub(/\s+>(\s+|$)/,' >\1').
        #gsub(/#{Mx[:fa_emphasis_o]}(.+?)#{Mx[:fa_emphasis_c]}/,'\1'). #reinstate
        gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/m,'\1').
        gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/m,'\1').
        gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1').
        gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1').
        gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1').
        gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1').
        gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1').
        gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1').
        gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1').
        gsub(/<:pb>\s*/,''). #Fix
        gsub(/<+[-~]#>+/,'')
      if dob.is !~/^code/
        #embeds a red-bullet image -->
        dob.obj=dob.obj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1').
          gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1').
          gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1').
          gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1')
        dob.obj=dob.obj.gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
') unless dob.is=='table'
        dob.obj=dob.obj.gsub(/#{Mx[:br_page]}\s*/,'').
          gsub(/#{Mx[:br_page_new]}\s*/,'').
          gsub(/#{Mx[:pa_non_object_no_heading]}|#{Mx[:pa_non_object_dummy_heading]}/,'').
          gsub(/<[-~]#>/,'').
          gsub(/href="#{Xx[:segment]}/m,'href="').
          gsub(/#{Mx[:lnk_o]}([^#{Mx[:lnk_o]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{Mx[:rel_c]}]+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}(\.\.\/\S+?)#{Mx[:rel_c]}/,
            '\1').
          gsub(/#{Mx[:lnk_o]}([^#{Mx[:lnk_o]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{Mx[:rel_c]}]+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}:(\S+?)#{Mx[:rel_c]}/,
            '\1').
          gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}(\S+?)#{Mx[:rel_c]}/,
            '\1').
          gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))[ ]+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
            %{[\\1] \\4}).
          gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))([ ]+[^}]+)?#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
            %{\\1}).
          gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))[ ]+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}image/,
            %{[\\1] \\4}).
          gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))([ ]+[^}]+)?#{Mx[:lnk_c]}image/,
            %{\\1}).
          gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
            '\1'). #watch, compare html_tune
          gsub(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
            %{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}).
          gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,
            '\1') #escaped urls not linked, deal with later
      else
        dob.obj=dob.obj.gsub(//m,'>')
      end
      if dob.of=='block'
        dob.obj=dob.obj.gsub(/#{Mx[:gl_bullet]}/,'● ')
      end
      dob.obj=dob.obj.gsub(/#{Mx[:url_o]}([a-zA-Z0-9._-]+\@\S+?\.[a-zA-Z0-9._-]+)#{Mx[:url_c]}/,
          %{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}).
        gsub(/#{Dx[:url_o]}/,"#{Dx[:url_o_xml]}").
        gsub(/#{Dx[:url_c]}/,"#{Dx[:url_c_xml]}").
        gsub(/ |#{Mx[:nbsp]}/m,' ')
      dob
    end
    def markup_light(dob='')
      dob.obj=dob.obj.gsub(/\/\{(.+?)\}\//,'\1').
        gsub(/[*!]\{(.+?)\}[*!]/,'\1').
        gsub(/_\{(.+?)\}_/,'\1').
        gsub(/-\{(.+?)\}-/,'\1').
        gsub(/
/,'
').
        gsub(/<:pb>\s*/,'').
        gsub(/<[-~]#>/,'').
        gsub(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& '). #sort
        gsub(/&([^;]{1,5})/,'&\1'). #sort, rough estimate, revisit #WATCH found in node not sax
        gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/,
          "#{@md.file.output_path.xml.rel_image}\/\\1").
        gsub(/ |#{Mx[:nbsp]}/,' ')
      wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
      dob.obj=tidywords(wordlist).join(' ').strip
      dob
    end
    def markup_fictionbook(dob='')
      dob.obj.gsub(/~\{([\d*+]+).+?\}~/,'[\1]').
        gsub(/\/\{(.+?)\}\//,'\1').
        gsub(/[*!]\{(.+?)\}[*!]/,'\1').
        gsub(/_\{(.+?)\}_/,'\1').
        gsub(/-\{(.+?)\}-/,'\1').
        gsub(/
/,'
').
        gsub(/<:pb>\s*/,'').
        gsub(/<[-~]#>/,'').
        #temporary -->
        gsub(/<:\S+?>/,'').
        #<-- temporary
        gsub(/<[-~]#>/,'').
        gsub(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& '). #sort
        gsub(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax
        gsub(/(#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/,
          "#{@md.file.output_path.xml.rel_image}\/\\1").
        gsub(/ |#{Mx[:nbsp]}/,' ')
      wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
      dob.obj=tidywords(wordlist).join(' ').strip
      dob
    end
    def markup_group(dob='')
      dob.obj=dob.obj.gsub(/,'<').gsub(/>/,'>').
        gsub(/<:?br(?:\s+\/)?>/,'
').
        gsub(/<(link xmlns:xlink=".+?")>/,'<\1>').
        gsub(/<(\/link)>/,'<\1>').
        gsub(/<(\/?en)>/,'<\1>')
      dob
    end
    def markup_block(dob='')
      dob.obj=dob.obj.gsub(/,'<').gsub(/>/,'>').
        gsub(/<:?br(?:\s+\/)?>/,'
').
        gsub(/<(link xmlns:xlink=".+?")>/,'<\1>').
        gsub(/<(\/link)>/,'<\1>').
        gsub(/<(\/?en)>/,'<\1>')
      dob
    end
    def xml_sem_block_paired(matched) # colon depth: many, recurs
      matched=matched.gsub(/\b(au):\{(.+?)\}:\1\b/m,  %{\\2}).
        gsub(/\b(vol):\{(.+?)\}:\1\b/m, %{\\2}).
        gsub(/\b(pub):\{(.+?)\}:\1\b/m, %{\\2}).
        gsub(/\b(ref):\{(.+?)\}:\1\b/m, %{\\2}).
        gsub(/\b(desc):\{(.+?)\}:\1\b/m,%{\\2}).
        gsub(/\b(conv):\{(.+?)\}:\1\b/m,%{\\2}).
        gsub(/\b(ct):\{(.+?)\}:\1\b/m,  %{\\2}).
        gsub(/\b(cty):\{(.+?)\}:\1\b/m, %{\\2}).
        gsub(/\b(org):\{(.+?)\}:\1\b/m, %{\\2}).
        gsub(/\b(dt):\{(.+?)\}:\1\b/m,  %{\\2}).
        gsub(/\b(n):\{(.+?)\}:\1\b/m,   %{\\2}).
        gsub(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m,'\2')
    end
    def xml_semantic_tags(dob)
      if @md.sem_tag
        dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) }
        dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) }
        dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) }
        dob.obj=dob.obj.gsub(/:\{(.+?)\}:au\b/m,             %{\\1}).
          gsub(/:\{(.+?)\}:n\b/m,              %{\\1}).
          gsub(/:\{(.+?)\}:ti\b/m,             %{\\1}).
          gsub(/:\{(.+?)\}:ref\b/m,            %{\\1}).
          gsub(/:\{(.+?)\}:desc\b/m,           %{\\1}).
          gsub(/:\{(.+?)\}:cty\b/m,            %{\\1}).
          gsub(/:\{(.+?)\}:org\b/m,            %{\\1}).
          gsub(/:\{(.+?)\}:([a-z]+(?:[_:.][a-z]+)*)/m,'\1').
          gsub(/;\{([^}]+(?![;]))\};ti\b/m,    %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};qt\b/m,    %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};ref\b/m,   %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};ed\b/m,    %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};v\b/m,     %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};desc\b/m,  %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};def\b/m,   %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};trans\b/m, %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};y\b/m,     %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};ab\b/m,    %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};pg\b/m,    %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};fn?\b/m,   %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};mn?\b/m,   %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};ln?\b/m,   %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};in\b/m,    %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};uni\b/m,   %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};fac\b/m,   %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};inst\b/m,  %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};dept\b/m,  %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};org\b/m,   %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};com?\b/m,  %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};cty\b/m,   %{\\1}).
          gsub(/;\{([^}]+(?![;]))\};([a-z]+(?:[_:.][a-z]+)*)/m,'\1')
      end
      dob
    end
  end
end
module SiSU_XML_Tags #Format
  require_relative 'param'                              # param.rb
    include SiSU_Param
  include SiSU_Viz
  class RDF
    def initialize(md='',seg_name=[],tracker=0)
      @full_title=@subtitle=@author=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@copyright=@owner=@keywords=''
      @md=md
      @rdfurl=%{  rdf:about="http://www.jus.uio.no/lm/toc"\n}
      if defined? @md.title.full \
      and @md.title.full                          # DublinCore 1 - title
        @rdf_title=%{    dc.title="#{seg_name}#{@md.title.full}"\n}
        @full_title=%{  \n}
      end
      if defined? @md.creator.author \
      and @md.creator.author=~/\S+/                                            # DublinCore 2 - creator/author (author)
        @rdf_author=%{    dc.author="#{@md.creator.author}"\n}
        content=meta_content_clean(@md.creator.author)
        @author=%{  \n}
      end
      if defined? @md.classify.subject \
      and @md.classify.subject=~/\S+/                                          # DublinCore 3 - subject (us library of congress, eric or udc, or schema???)
        @rdf_subject=%{    dc.subject="#{@md.classify.subject}"\n}
        content=meta_content_clean(@md.classify.subject)
        @subject=%{  \n}
      end
      if defined? @md.notes.description \
      and @md.notes.description=~/\S+/                                         # DublinCore 4 - description
        @rdf_description=%{    dc.description="#{@md.notes.description}"\n}
        content=meta_content_clean(@md.notes.description)
        @description=%{  \n}
      end
      if defined? @md.publisher \
      and @md.publisher                                                        # DublinCore 5 - publisher (current copy published by)
        @rdf_publisher=%{    dc.publisher="#{@md.publisher}"\n}
        content=meta_content_clean(@md.publisher)
        @publisher=%{  \n}
      end
      if defined? @md.creator.contributor \
      and @md.creator.contributor=~/\S+/                                      # DublinCore 6 - contributor
        @rdf_contributor=%{    dc.contributor="#{@md.creator.contributor}"\n}
        content=meta_content_clean(@md.creator.contributor)
        @contributor=%{  \n}
      end
      if defined? @md.date.published \
      and @md.date.published=~/\S+/                                           # DublinCore 7 - date year-mm-dd
        @rdf_date=%{    dc.date="#{@md.date.published}"\n}
        @date=%{  \n} # fix @md.date_scheme
      end
      if defined? @md.date.created \
      and @md.date.created=~/\S+/                                             # DublinCore 7 - date.created year-mm-dd
        @rdf_date_created=%{    dc.date.created="#{@md.date.created}"\n}
        @date_created=%{  \n}
      end
      if defined? @md.date.issued \
      and @md.date.issued=~/\S+/                                              # DublinCore 7 - date.issued year-mm-dd
        @rdf_date_issued=%{    dc.date.issued="#{@md.date.issued}"\n}
        @date_issued=%{  \n}
      end
      if defined? @md.date.available \
      and @md.date.available=~/\S+/                                           # DublinCore 7 - date.available year-mm-dd
        @rdf_date_available=%{    dc.date.available="#{@md.date.available}"\n}
        @date_available=%{  \n}
      end
      if defined? @md.date.valid \
      and @md.date.valid=~/\S+/                                               # DublinCore 7 - date.valid year-mm-dd
        @rdf_date_valid=%{    dc.date.valid="#{@md.date.valid}"\n}
        @date_valid=%{  \n}
      end
      if defined? @md.date.modified \
      and @md.date.modified=~/\S+/                                            # DublinCore 7 - date.modified year-mm-dd
        @rdf_date_modified=%{    dc.date.modified="#{@md.date.modified}"\n}
        @date_modified=%{  \n}
      end
      if defined? @md.type \
      and @md.type                                                            # DublinCore 8 - type (genre eg. report, convention etc)
        @rdf_type=%{    dc.type="#{@md.type}"\n}
        content=meta_content_clean(@md.type)
        @type=%{  \n}
      end
      if defined? @md.classify.format \
      and @md.classify.format=~/\S+/                                          # DublinCore 9 - format (use your mime type)
        @rdf_format=%{    dc.format="#{@md.classify.format}"\n}
        content=meta_content_clean(@md.classify.format)
        @format=%{  \n}
      end
      if defined? @md.classify.identifier \
      and @md.classify.identifier=~/\S+/                                       # DublinCore 10 - identifier (your identifier, could use urn which is free)
        @rdf_identifier=%{    dc.identifier="#{@md.classify.identifier}"\n}
        content=meta_content_clean(@md.classify.identifier)
        @identifier=%{  \n}
      end
      if defined? @md.original.source \
      and @md.original.source=~/\S+/                                           # DublinCore 11 - source (document source)
        @rdf_source=%{    dc.source="#{@md.original.source}"\n}
        content=meta_content_clean(@md.original.source)
        @source=%{  \n}
      end
      if defined? @md.title.language \
      and @md.title.language=~/\S+/                                            # DublinCore 12 - language (English)
        @rdf_language=%{    dc.language="#{@md.title.language}"\n}
        @language=%{  \n}
      end
      if defined? @md.original.language \
      and @md.original.language=~/\S+/
        @rdf_language_original=%{    dc.language="#{@md.original.language}"\n}
        @language_original=%{  \n}
      end
      if defined? @md.classify.relation \
      and @md.classify.relation=~/\S+/                                         # DublinCore 13 - relation
        @rdf_relation=%{    dc.relation="#{@md.classify.relation}"\n}
        content=meta_content_clean(@md.classify.relation)
        @relation=%{  \n}
      end
      if defined? @md.classify.coverage \
      and @md.classify.coverage=~/\S+/                                         # DublinCore 14 - coverage
        @rdf_coverage=%{    dc.coverage="#{@md.classify.coverage}"\n}
        content=meta_content_clean(@md.classify.coverage)
        @coverage=%{  \n}
      end
      if defined? @md.rights.all \
      and @md.rights.all                                                      # DublinCore 15 - rights
        @rdf_rights=%{    dc.rights="#{@md.rights.all}"\n}
        content=meta_content_clean(@md.rights.all)
        @rights=%{  \n}
      end
      content=meta_content_clean(@md.keywords)
      @keywords=%{  \n} if @md.keywords
      @vz=SiSU_Env::GetInit.instance.skin
    end
    def meta_content_clean(content='')
      content
      unless content.nil?
        content=content.tr('"',"'")
        content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content)
      end
      content
    end
    def rdfseg #segHead
      rdftoc
    end
    def comment_xml(extra='')
      generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})"  if @md.sisu_version[:version]
      lastdone="Last Generated on: #{Time.now}"
      rubyv="Ruby version: #{@md.ruby_version}"
      sc=if @md.sc_info
        "Source file: #{@md.sc_filename} version: #{@md.sc_number} of: #{@md.sc_date}"
      else ''
      end
      if extra.empty?
<
WOK
     else
<
WOK
     end
    end
    def comment_xml_sax
      desc='SiSU XML, SAX type representation'
      comment_xml(desc)
    end
    def comment_xml_node
      desc='SiSU XML, Node type representation'
      comment_xml(desc)
    end
    def comment_xml_dom
      desc='SiSU XML, DOM type representation'
      comment_xml(desc)
    end
    def metatag_html #values strung together, because some empty, and resulting output (line breaks) is much better
<