diff options
Diffstat (limited to 'lib/sisu/txt_markdown.rb')
| -rw-r--r-- | lib/sisu/txt_markdown.rb | 389 | 
1 files changed, 389 insertions, 0 deletions
| diff --git a/lib/sisu/txt_markdown.rb b/lib/sisu/txt_markdown.rb new file mode 100644 index 00000000..8840dd18 --- /dev/null +++ b/lib/sisu/txt_markdown.rb @@ -0,0 +1,389 @@ +# encoding: utf-8 +=begin + +* Name: SiSU + +** Description: documents, structuring, processing, publishing, search +*** plaintext (smarttext) generation, markdown + +** Author: Ralph Amissah +  <ralph@amissah.com> +  <ralph.amissah@gmail.com> + +** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +  2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah, +  All Rights Reserved. + +** License: GPL 3 or later: + +  SiSU, a framework for document structuring, publishing and search + +  Copyright (C) Ralph Amissah + +  This program is free software: you can redistribute it and/or modify it +  under the terms of the GNU General Public License as published by the Free +  Software Foundation, either version 3 of the License, or (at your option) +  any later version. + +  This program is distributed in the hope that it will be useful, but WITHOUT +  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +  more details. + +  You should have received a copy of the GNU General Public License along with +  this program. If not, see <http://www.gnu.org/licenses/>. + +  If you have Internet connection, the latest version of the GPL should be +  available at these locations: +  <http://www.fsf.org/licensing/licenses/gpl.html> +  <http://www.gnu.org/licenses/gpl.html> + +  <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html> + +** SiSU uses: +  * Standard SiSU markup syntax, +  * Standard SiSU meta-markup syntax, and the +  * Standard SiSU object citation numbering and system + +** Hompages: +  <http://www.jus.uio.no/sisu> +  <http://www.sisudoc.org> + +** Git +  <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary> +  <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/txt_markdown.rb;hb=HEAD> + +=end +module SiSU_Txt_Markdown +  require_relative 'ao'                                 # ao.rb +  require_relative 'se'                                 # se.rb +    include SiSU_Env +  require_relative 'shared_metadata'                    # shared_metadata.rb +  require_relative 'generic_parts'                      # generic_parts.rb +  require_relative 'txt_read'                           # txt_read.rb +  require_relative 'txt_shared'                         # txt_shared.rb +  require_relative 'txt_markdown_decorate'              # txt_markdown_decorate.rb +  require_relative 'txt_output'                         # txt_output.rb +  include SiSU_Param +  @@alt_id_count,@@alt_id_count=0,0 +  @@tablefoot='' +  class Source +    include SiSU_Txt_Read +    def initialize(opt) +      @opt=opt +      unless @opt.fns =~/(.+?)\.(?:-|ssm\.)?sst$/ +        puts "#{sf} not a processed file type" +      end +    end +    def read +      begin +        md=SiSU_Param::Parameters.new(@opt).get +        specific={ +          description:     'Markdown (plaintext utf-8)', +          output_path:     md.file.output_path.markdown.dir, +          output_file:     md.file.base_filename.markdown, +        } +        read_generic(@opt,specific) +        SiSU_Txt_Markdown::Source::Scroll.new(md,@ao_array,@wrap_width).songsheet +      rescue +        SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do +          __LINE__.to_s + ':' + __FILE__ +        end +      ensure +      end +    end +    private +    class Scroll <Source +      include SiSU_Parts_Generic +      include SiSU_TextUtils +      include SiSU_Decorate_Txt_Markdown +      @@endnotes={ para: [], end: [] } +      def initialize(md,data,wrap_width) +        @md,@data,@wrap_width=md,data,wrap_width +        @env=SiSU_Env::InfoEnv.new(@md.fns) +        @tab="\t" +        @@endnotes_=case md.opt.selections.str +        when /--footnote/ then false +        when /--endnote/  then true +        else                   true +        end +        @plaintext={ body: [], open: [], close: [], head: [], metadata: [], tail: [] } +      end +      def songsheet +        plaintext=markup(@data) +        publish(plaintext) +      end +      def break_line +        "\n" +      end +      # Used for extraction of endnotes from paragraphs +      def extract_endnotes(dob='') +        notes=dob.obj.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/) +        @n=[] +        notes.flatten.each do |n| #high cost to deal with <br> appropriately within plaintext, consider +          n=n.dup.to_s +          if n =~/#{Mx[:br_line]}|#{Mx[:br_nl]}/ +            fix = n.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/) #watch #added +            fix.each do |x| +              unless x.empty? then @n << x +              end +            end +          else                     @n << n +          end +        end +        notes=@n.flatten +        notes.each do |e| +          util=(e.to_s =~/^\[[\d*+]+\]:/) \ +          ? (SiSU_TextUtils::Wrap.new(e.to_s,@wrap_width,4,1)) +          : (SiSU_TextUtils::Wrap.new(e.to_s,@wrap_width,1,1)) +          wrap=util.line_wrap +          wrap=if wrap =~ /^\s*[\d*+]+\s+.+?\s*\Z/m +            wrap.gsub(/^(\s*)([\d*+]+)\s+(.+?)\s*\Z/m, <<-GSUB +\\1[\\2]: \\3 +              GSUB +            ) +          else +            wrap.gsub(/^(.+)\Z/m, <<-GSUB +\\1 +              GSUB +            ) +          end +          @@endnotes[:para] << "-#{wrap}" +          @@endnotes[:end] << '' << wrap +        end +        @@endnotes +      end +      def plaintext_metadata +        array=SiSU_Metadata::Summary.new(@md).plaintext.metadata +        array.each do |meta| +          tag,inf=meta.scan(/^.+?:\s|.+/) +          if tag and inf +            util=SiSU_TextUtils::Wrap.new(inf,@wrap_width,15,1) +            txt=util.line_wrap +            @plaintext[:metadata] <<<<WOK + +#{@tab}#{tag}#{txt} +WOK +          end +        end +      end +      def plaintext_tail +#       env=SiSU_Env::InfoEnv.new(@md.fns) +        generator="Generated by: #{@md.project_details.project} #{@md.project_details.version} of #{@md.project_details.date_stamp} (#{@md.project_details.date})"  if @md.project_details.version +        lastdone="Last Generated on: #{Time.now}" +        rubyv="Ruby version: #{@md.ruby_version}" +        sc=if @md.sc_info +          "Source file:    #{@md.sc_filename}#{break_line}Version number: #{@md.sc_number}#{break_line}Version date:   #{@md.sc_date}#{break_line}" +        else '' +        end +        @plaintext[:tail] <<<<WOK +#{break_line} +plaintext (plain text): +   #{@md.file.output_path.markdown.url}/#{@md.file.base_filename.markdown}#{break_line} +Other versions of this document: #{break_line} +manifest: +   #{@md.file.output_path.manifest.url}/#{@md.file.base_filename.manifest}#{break_line} +at: +   #{@md.file.output_path.base.url}#{break_line} + +#{sc} +* #{generator} +* #{rubyv} +* #{lastdone} +* SiSU #{the_url.sisu_txt} +WOK +      end +      def heading_decorated_inline(dob) +        if dob.is==:heading +          heading_inline = case dob.lc +          when 0 then decorate.heading.inline.l0 +          when 1 then decorate.heading.inline.l1 +          when 2 then decorate.heading.inline.l2 +          when 3 then decorate.heading.inline.l3 +          when 4 then decorate.heading.inline.l4 +          when 5 then decorate.heading.inline.l5 +          when 6 then decorate.heading.inline.l6 +          end +          heading_inline + ' ' +  dob.obj + ' ' + heading_inline +        end +      end +      def heading_decorated_underscore(dob,times,p_num) +        if dob.is==:heading +          #times=@wrap_width if times > @wrap_width +          case dob.lc +          when 0 then decorate.heading.underscore.l0*times + p_num << break_line*2 +          when 1 then decorate.heading.underscore.l1*times + p_num << break_line*2 +          when 2 then decorate.heading.underscore.l2*times + p_num << break_line*2 +          when 3 then decorate.heading.underscore.l3*times + p_num << break_line*2 +          when 4 then decorate.heading.underscore.l4*times + p_num << break_line*2 +          when 5 then decorate.heading.underscore.l5*times + p_num << break_line*2 +          when 6 then decorate.heading.underscore.l6*times + p_num << break_line*2 +          end +        end +      end +      def plaintext_structure(dob='',p_num='') #% Used to extract the structure of a document +        heading_decoration=:inline #(:inline|:underscore) #switch heading decoration between inline & underscore options +        util=nil +        wrapped=if dob.is==:para \ +        || dob.is==:heading +          if dob.is==:heading +            util=(heading_decoration== :inline) \ +            ? (SiSU_TextUtils::Wrap.new(heading_decorated_inline(dob),@wrap_width,0)) +            : (SiSU_TextUtils::Wrap.new(dob.obj,@wrap_width,0)) +          elsif dob.is==:para +            if dob.hang \ +            and dob.hang =~/[0-9]/ \ +            and dob.indent != dob.hang +              util=SiSU_TextUtils::Wrap.new(dob.obj,@wrap_width,dob.indent.to_i*2,dob.hang.to_i*2) +              #util=SiSU_TextUtils::Wrap.new(dob.obj,@wrap_width,dob.hang.to_i*2,0) +            elsif dob.indent =~/[1-9]/ +              util=if dob.bullet_ +                SiSU_TextUtils::Wrap.new("* #{dob.obj}",@wrap_width,dob.indent.to_i*2) +              else SiSU_TextUtils::Wrap.new(dob.obj,@wrap_width,dob.indent.to_i*2) +              end +            else +              util=if dob.bullet_ +                SiSU_TextUtils::Wrap.new("* #{dob.obj}",@wrap_width,0) +              else SiSU_TextUtils::Wrap.new(dob.obj,@wrap_width,0) +              end +            end +          else util=SiSU_TextUtils::Wrap.new(dob.obj,@wrap_width,0) +          end +          dob.is==:heading ? util.no_wrap_no_breaks : util.line_wrap +        end +        if heading_decoration== :underscore \ +        and dob.is==:heading +          @plaintext[:body] << wrapped + p_num # main text, contents, body KEEP +          @plaintext[:body] << heading_decorated_underscore(dob,wrapped.length,p_num) +        else +          @plaintext[:body] << wrapped + p_num << break_line # main text, contents, body KEEP +        end +        if @@endnotes[:para] \ +        and not @@endnotes_ +          @@endnotes[:para].each {|e| @plaintext[:body] << e << break_line} +        elsif @@endnotes[:para] \ +        and @@endnotes_ +        end +        @@endnotes[:para]=[] +      end +      def markup(data)                                                       # Used for major markup instructions +        SiSU_Env::InfoEnv.new(@md.fns) +        @data_mod,@endnotes,@level,@cont,@copen,@plaintext_contents_close=Array.new(6){[]} +        (0..6).each { |x| @cont[x]=@level[x]=false } +        (4..6).each { |x| @plaintext_contents_close[x]='' } +        plaintext_tail #($1,$2) +        plaintext_metadata +        table_message='[table conversion awaited, see other document formats]' +        data.each do |dob| +          dob.obj=dob.obj.gsub(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+/um,"#{break_line}#{table_message}"). #fix +            gsub(/.+?#{Mx[:gl_o]}-##{Mx[:gl_c]}/,'').                              # remove dummy headings (used by html) #check also [~-]# +            gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/, +              "#{decorate.bold.open}\\1#{decorate.bold.close}"). +            gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/, +              "#{decorate.italics.open}\\1#{decorate.italics.close}"). +            gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/, +              "#{decorate.underscore.open}\\1#{decorate.underscore.close}"). +            gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/, +              "#{decorate.subscript.open}\\1#{decorate.subscript.close}"). +            gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/, +              "#{decorate.superscript.open}\\1#{decorate.superscript.close}"). +            gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/, +              "#{decorate.insert.open}\\1#{decorate.insert.close}"). +            gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/, +              "#{decorate.cite.open}\\1#{decorate.cite.close}"). +            gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/, +              "#{decorate.strike.open}\\1#{decorate.strike.close}"). +            gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/, +              "#{decorate.monospace.open}\\1#{decorate.monospace.close}") +          unless dob.is==:code +            dob.obj=dob.obj.gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}/,'\1'). +              gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1'). +              gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1 [link: <\2>]'). +              gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}image/,'\1 [link: local image]'). +              gsub(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,"#{the_text.url_open}\\1#{the_text.url_close}") +            extract_endnotes(dob) +            dob.obj=dob.obj.gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'[^\1]'). # endnote marker marked up +              gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'[^\1]'). # endnote marker marked up +              gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<'). +              gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>'). +              gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&'). +              gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!'). +              gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#'). +              gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*'). +              gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-'). +              gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/'). +              gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_'). +              gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{'). +              gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}'). +              gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~'). +              gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©'). +              gsub(/#{Mx[:gl_o]}#092#{Mx[:gl_c]}/,'\\') +          end +          dob.obj=if dob.of==:block                                   # watch +            dob.obj.gsub(/#{Mx[:gl_o]}●#{Mx[:gl_c]}/m,"* "). +              gsub(/\n?#{Mx[:br_line]}\n?|\n?#{Mx[:br_nl]}\n?/m,break_line) +          else dob.obj.gsub(/\n?#{Mx[:br_line]}\n?|\n?#{Mx[:br_nl]}\n?/m,break_line*2) +          end +          if dob.is==:code +            dob.obj=dob.obj.gsub(/(^|[^}])_([<>])/m,'\1\2'). # _> _< +              gsub(/(^|[^}])_([<>])/m,'\1\2') # _<_< +          end +          dob.obj=dob.obj.gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1'). +            gsub(/<a href=".+?">(.+?)<\/a>/m,'\1'). +            gsub(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'').                       # remove name links +            gsub(/ |#{Mx[:nbsp]}/,' ').                                       # decide on +            gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'    [ \1 ]'). #"[ #{dir.url.images_local}\/\\1 ]") +            gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}image/,'    [ \1 ]'). +            gsub(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') +          if dob.obj !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ +            p_num='' +            #ocn +            if dob.is==:heading \ +            or dob.is==:para +              plaintext_structure(dob,p_num) +            elsif dob.is==:group \ +            or dob.is==:block \ +            or dob.is==:verse \ +            or dob.is==:code \ +            or dob.is==:table +              @plaintext[:body] << dob.obj + p_num << break_line +            elsif dob.is==:break +              sp=' ' +              ln='-' +              @plaintext[:body] <<=if dob.obj==Mx[:br_page] \ +              or dob.obj==Mx[:br_page_new] \ +              or dob.obj==Mx[:br_page_line] +                "#{break_line}#{ln*40}#{break_line*2}" +              elsif dob.obj ==Mx[:br_obj] +                "#{break_line}#{sp*20}*  *  *#{break_line*2}" +              end # following empty line (break_line) missing, fix +            end +            dob='' if (dob.obj =~/<a name="n\d+">/ \ +              and dob.obj =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/) # -endnote +            if dob ## Clean Prepared Text +              dob.obj=dob.obj.gsub(/<!.+!>/,' '). +                gsub(/<:\S+>/,' ') +            end +          end +        end +        @plaintext +      end +      def publish(plaintext) +        divider='=' +        content=[] +        content << plaintext[:open] +        content << plaintext[:head] +        content << plaintext[:body] +        content << @@endnotes[:end] if @@endnotes_ +        content << "#{break_line}#{divider*@wrap_width}#{break_line}" +        content << plaintext[:metadata] +        content << "#{break_line}#{divider*@wrap_width}#{break_line}" if @md.stmp =~/\w+/ #not used? +        content << plaintext[:tail] +        outputfile=SiSU_Env::FileOp.new(@md).write_file.markdown +        Txt_Output::Output.new.document(content,outputfile) +        @@endnotes={ para: [], end: [] } +      end +    end +  end +end +__END__ | 
