diff options
| author | Ralph Amissah <ralph.amissah@gmail.com> | 2021-04-02 19:37:00 -0400 | 
|---|---|---|
| committer | Ralph Amissah <ralph.amissah@gmail.com> | 2021-04-02 20:03:27 -0400 | 
| commit | 90051a7ea55acb043434b1c2483b878d602246ba (patch) | |
| tree | 9e803c11a0ac4e37023b3c79f19f5b372d4175ee /org/object_munge.org | |
| parent | nix ruby 3.0 (available) (diff) | |
org mode (ruby code within)
Diffstat (limited to 'org/object_munge.org')
| -rw-r--r-- | org/object_munge.org | 331 | 
1 files changed, 331 insertions, 0 deletions
| diff --git a/org/object_munge.org b/org/object_munge.org new file mode 100644 index 00000000..7e3f95e5 --- /dev/null +++ b/org/object_munge.org @@ -0,0 +1,331 @@ +-*- mode: org -*- +#+TITLE:       sisu object munge +#+DESCRIPTION: documents - structuring, various output representations & search +#+FILETAGS:    :sisu:munge:objects: +#+AUTHOR:      Ralph Amissah +#+EMAIL:       [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] +#+COPYRIGHT:   Copyright (C) 2015 - 2021 Ralph Amissah +#+LANGUAGE:    en +#+STARTUP:     content hideblocks hidestars noindent entitiespretty +#+OPTIONS:     H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t +#+PROPERTY:    header-args  :exports code +#+PROPERTY:    header-args+ :noweb yes +#+PROPERTY:    header-args+ :eval no +#+PROPERTY:    header-args+ :results no +#+PROPERTY:    header-args+ :cache no +#+PROPERTY:    header-args+ :padline no + +* object_munge.rb + +#+BEGIN_SRC ruby  :tangle "../lib/sisu/object_munge.rb" +# <<sisu_document_header>> +module SiSU_Object_Munge +  def i_src_o_strip_markup(txtobj) +    txtobj=txtobj. +      gsub(/#{Mx[:srcrgx_bold_o]}(.+?)#{Mx[:srcrgx_bold_c]}/m,'\1'). +      gsub(/#{Mx[:srcrgx_italics_o]}(.+?)#{Mx[:srcrgx_italics_c]}/m,'\1'). +      gsub(/#{Mx[:srcrgx_underscore_o]}(.+?)#{Mx[:srcrgx_underscore_c]}/m,'\1'). +      gsub(/#{Mx[:srcrgx_cite_o]}(.+?)#{Mx[:srcrgx_cite_c]}/m,'\1'). +      gsub(/#{Mx[:srcrgx_insert_o]}(.+?)#{Mx[:srcrgx_insert_c]}/m,'\1'). +      gsub(/#{Mx[:srcrgx_strike_o]}(.+?)#{Mx[:srcrgx_strike_c]}/m,'\1'). +      gsub(/#{Mx[:srcrgx_superscript_o]}(\d+)#{Mx[:srcrgx_superscript_c]}/m,'[\1]'). +      gsub(/#{Mx[:srcrgx_superscript_o]}(.+?)#{Mx[:srcrgx_superscript_c]}/m,'\1'). +      gsub(/#{Mx[:srcrgx_subscript_o]}(.+?)#{Mx[:srcrgx_subscript_c]}/m,'\1'). +      gsub(/#{Mx[:srcrgx_hilite_o]}(.+?)#{Mx[:srcrgx_hilite_c]}/m,'\1'). +      gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~'). +      gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/m,''). # endnote removed +      gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/m,''). # endnote removed +      gsub(/(?:#{Mx[:nbsp]})+/,' '). +      gsub(/(?:#{Mx[:br_nl]})+/,"\n"). +      gsub(/(?:#{Mx[:br_paragraph]})+/,"\n"). +      gsub(/(?:#{Mx[:br_line]})+/,"\n"). +      gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<'). +      gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>'). +      gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&'). +      gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!'). +      gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#'). +      gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*'). +      gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-'). +      gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/'). +      gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_'). +      gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{'). +      gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}'). +      gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~'). +      gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©'). +      gsub(/[ ][ ]s+/,' '). +      strip +if txtobj =~/Reading this/ +  puts txtobj +  if txtobj =~ /#{Mx[:srcrgx_italics_o]}(.+?)#{Mx[:srcrgx_italics_c]}/ +    puts __LINE__ +    puts Mx[:srcrgx_italics_o] +    puts txtobj +  end +end +; txtobj +  end +  def i_ao_o_strip_markup(txtobj) +    txtobj=txtobj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1'). +      gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1'). +      gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1'). +      gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1'). +      gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1'). +      gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1'). +      gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]'). +      gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1'). +      gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1'). +      gsub(/#{Mx[:fa_hilite_o]}(.+?)#{Mx[:fa_hilite_c]}/,'\1'). +      gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~'). +      gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,''). # endnote removed +      gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,''). # endnote removed +      gsub(/(?:#{Mx[:nbsp]})+/,' '). +      gsub(/(?:#{Mx[:br_nl]})+/,"\n"). +      gsub(/(?:#{Mx[:br_paragraph]})+/,"\n"). +      gsub(/(?:#{Mx[:br_line]})+/,"\n"). +      gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<'). +      gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>'). +      gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&'). +      gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!'). +      gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#'). +      gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*'). +      gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-'). +      gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/'). +      gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_'). +      gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{'). +      gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}'). +      gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~'). +      gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©'). +      gsub(/[ ][ ]s+/,' '). +      strip +  end +  def i_ao_o_src_markup_restore(txtobj) +    @txtobj=txtobj +    def textface_marks +      @txtobj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'*{\1}*'). +        gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'/{\1}/'). +        gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'_{\1}_'). +        gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"{\1}"'). +        gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+{\1}+'). +        gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'-{\1}-'). +        gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'^{[\1]}^'). +        gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^{\1}^'). +        gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,',{\1},'). +        gsub(/#{Mx[:fa_hilite_o]}(.+?)#{Mx[:fa_hilite_c]}/,'\1'). +        gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~'). +        gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'~{\1 \2}~'). +        gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,''). # endnote removed +        gsub(/(?:#{Mx[:nbsp]})+/,' '). +        gsub(/(?:#{Mx[:br_nl]})+/,"\n"). +        gsub(/(?:#{Mx[:br_paragraph]})+/,"\n"). +        gsub(/(?:#{Mx[:br_line]})+/,"\n"). +        gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<'). +        gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>'). +        gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&'). +        gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!'). +        gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#'). +        gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*'). +        gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-'). +        gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/'). +        gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_'). +        gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{'). +        gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}'). +        gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~'). +        gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©'). +        gsub(/[ ][ ]s+/,' '). +        strip +    end +    def object_marks +      @txtobj +    end +    self +  end +  def clean_text(txtobj,markup=:ao) +    if txtobj.class==String +      txtobj=if markup ==:ao +        i_ao_o_strip_markup(txtobj) +      elsif markup ==:src +        i_src_o_strip_markup(txtobj) +      else p __FILE__; p __LINE__ +      end +    elsif txtobj.class.inspect=~/^SiSU_AO_DocumentStructure::/ +      txtobj.obj=i_ao_o_strip_markup(txtobj.obj) +    else p 'error' +    end +    txtobj +  end +  def footnotes_inline(txtobj) +  end +  def footnotes_ref_and_note(txtobj) +  end +  def src_markup(txtobj) +    txtobj +  end +  def extract_endnotes(doc_obj_txt,endnotes_)               #% used for extraction of endnotes from paragraphs +    if endnotes_ ==:separate +      notes_a=doc_obj_txt.scan(/#{Mx[:en_a_o]}([\d]+\s+.+?)#{Mx[:en_a_c]}/) +      ##notes_a=doc_obj_txt.scan(/#{Mx[:en_a_o]}([\d*+]+\s+.+?)#{Mx[:en_a_c]}/) +      #notes_b=doc_obj_txt.scan(/#{Mx[:en_b_o]}([\d*+]+\s+.+?)#{Mx[:en_b_c]}/) +      n=[] +      notes_a.flatten.each do |note| #high cost to deal with <br> appropriately within plaintext, consider +        note=note.dup.to_s +        note=note.gsub(/^([\d]+)\s+/,'^~\1 '). +          gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/, +            ' \\\\\\ ') +        n << note +      end +      notes_a=n.flatten +      doc_obj_txt=doc_obj_txt. +        gsub(/#{Mx[:en_a_o]}([\d]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'~^')   # endnote marker marked up +    else +      doc_obj_txt=doc_obj_txt. +        gsub(/#{Mx[:en_b_o]}[\d]+\s+(.+?)#{Mx[:en_b_c]}/, +          '~[ \1 ]~').     # inline endnote with marker marked up +        gsub(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)#{Mx[:en_a_c]}/, +          '~{\1 \2 }~'). # inline endnote with marker marked up +        gsub(/#{Mx[:en_b_o]}([*+]+)\s+(.+?)#{Mx[:en_b_c]}/, +          '~[\1 \2 ]~') # inline endnote with marker marked up +    end +    [doc_obj_txt,notes_a] +  end +  def objects #def i_ao_o_src_markup_restore(txtobj) +    def code_(dob) +      if dob.is==:code +        dob.obj=dob.obj.gsub(/(^|[^}])_([<>])/m,'\1\2'). # _> _< +          gsub(/(^|[^}])_([<>])/m,'\1\2') # _<_< +      end +      dob +    end +    def block_(dob) +      dob.obj=if dob.of==:block                                   # watch +        dob.obj.gsub(/#{Mx[:gl_o]}●#{Mx[:gl_c]}/,"* "). +          gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n") +      else dob.obj.gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n\n") +      end +      dob +    end +    def textface_marks_po4a(dob,endnotes_=:inline) +      notes='' +      dob.obj=dob.obj. +        gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/, +          Mx[:src_bold_o] + '\1' + Mx[:src_bold_c]). +        gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/, +          Mx[:src_italics_o] + '\1' + Mx[:src_italics_c]). +        gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/, +          Mx[:src_underscore_o] + '\1' + Mx[:src_underscore_c]). +        gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/, +          Mx[:src_subscript_o] + '\1' + Mx[:src_subscript_c]). +        gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/, +          Mx[:src_superscript_o] + '\1' + Mx[:src_superscript_c]). +        gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/, +          Mx[:src_insert_o] + '\1' + Mx[:src_insert_c]). +        gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/, +          Mx[:src_cite_o] + '\1' + Mx[:src_cite_c]). +        gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/, +          Mx[:src_strike_o] + '\1' + Mx[:src_strike_c]). +        gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/, +          Mx[:src_monospace_o] + '\1' + Mx[:src_monospace_c]) +      unless dob.is==:code +        dob.obj=dob.obj. +          gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}/,'\1'). +          gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1'). +          gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, +            '\1 [link: <\2>]'). +          gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}image/, +            '\1 [link: local image]'). +          gsub(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1') +        dob.obj,notes=extract_endnotes(dob.obj,endnotes_) +        dob.obj=dob.obj. +          gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<'). +          gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>'). +          gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&'). +          gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!'). +          gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#'). +          gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*'). +          gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-'). +          gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/'). +          gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_'). +          gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{'). +          gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}'). +          gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~'). +          gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©') +      end +      dob=block_(dob) +      dob=code_(dob) +      dob.obj=dob.obj.gsub(/#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}/,''). # remove page breaks +        gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1'). +        gsub(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'').                 # remove name links +        gsub(/ |#{Mx[:nbsp]}/,' ').                                 # decide on +        gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/, +          '    [ \1 ]'). #"[ #{dir.url.images_local}\/\\1 ]") +        gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}image/, +          '    [ \1 ]'). #"[ #{dir.url.images_local}\/\\1 ]") +        gsub(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/, +          '[image: "\1"]') +      [dob,notes] +    end +    def object_marks +      @txtobj +    end +    self +  end +end +__END__ +#+END_SRC + +* document header + +#+NAME: sisu_document_header +#+BEGIN_SRC text +encoding: utf-8 +- Name: SiSU + +  - Description: documents, structuring, processing, publishing, search +    object_munge + +  - Author: Ralph Amissah +    <ralph.amissah@gmail.com> + +  - Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +    2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2019, +    2020, 2021, Ralph Amissah, +    All Rights Reserved. + +  - License: GPL 3 or later: + +    SiSU, a framework for document structuring, publishing and search + +    Copyright (C) Ralph Amissah + +    This program is free software: you can redistribute it and/or modify it +    under the terms of the GNU General Public License as published by the Free +    Software Foundation, either version 3 of the License, or (at your option) +    any later version. + +    This program is distributed in the hope that it will be useful, but WITHOUT +    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +    FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +    more details. + +    You should have received a copy of the GNU General Public License along with +    this program. If not, see <http://www.gnu.org/licenses/>. + +    If you have Internet connection, the latest version of the GPL should be +    available at these locations: +    <http://www.fsf.org/licensing/licenses/gpl.html> +    <http://www.gnu.org/licenses/gpl.html> + +    <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html> + +  - SiSU uses: +    - Standard SiSU markup syntax, +    - Standard SiSU meta-markup syntax, and the +    - Standard SiSU object citation numbering and system + +  - Homepages: +    <http://www.sisudoc.org> + +  - Git +    <https://git.sisudoc.org/projects/> +    <https://git.sisudoc.org/projects/?p=software/sisu.git;a=summary> +    <https://git.sisudoc.org/projects/?p=markup/sisu-markup-samples.git;a=summary> +#+END_SRC | 
