diff options
Diffstat (limited to 'lib/sisu/ao_idx.rb')
| -rw-r--r-- | lib/sisu/ao_idx.rb | 422 | 
1 files changed, 422 insertions, 0 deletions
| diff --git a/lib/sisu/ao_idx.rb b/lib/sisu/ao_idx.rb new file mode 100644 index 00000000..494b9396 --- /dev/null +++ b/lib/sisu/ao_idx.rb @@ -0,0 +1,422 @@ +# encoding: utf-8 +=begin + +* Name: SiSU + +** Description: documents, structuring, processing, publishing, search +*** system environment, resource control and configuration details + +** Author: Ralph Amissah +  <ralph@amissah.com> +  <ralph.amissah@gmail.com> + +** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +  2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah, +  All Rights Reserved. + +** License: GPL 3 or later: + +  SiSU, a framework for document structuring, publishing and search + +  Copyright (C) Ralph Amissah + +  This program is free software: you can redistribute it and/or modify it +  under the terms of the GNU General Public License as published by the Free +  Software Foundation, either version 3 of the License, or (at your option) +  any later version. + +  This program is distributed in the hope that it will be useful, but WITHOUT +  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +  more details. + +  You should have received a copy of the GNU General Public License along with +  this program. If not, see <http://www.gnu.org/licenses/>. + +  If you have Internet connection, the latest version of the GPL should be +  available at these locations: +  <http://www.fsf.org/licensing/licenses/gpl.html> +  <http://www.gnu.org/licenses/gpl.html> + +  <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html> + +** SiSU uses: +  * Standard SiSU markup syntax, +  * Standard SiSU meta-markup syntax, and the +  * Standard SiSU object citation numbering and system + +** Hompages: +  <http://www.jus.uio.no/sisu> +  <http://www.sisudoc.org> + +** Git +  <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary> +  <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/ao_idx.rb;hb=HEAD> + +=end +module SiSU_AO_BookIndex +  class BookIndex +    def initialize(md,data,env=nil) +      @md,@data,@env=md,data,env +      @rgx_idx=/#{Mx[:idx_o]}(?:.+?)#{Mx[:idx_c]}\s*/ +      @rgx_idx_ocn_seg=/(.+?)~(\d+)~(\S+)/ +      @rgx_idx_ocn=/(.+?)~(\d+)/ +      @env ||=SiSU_Env::InfoEnv.new(@md.fns) +    end +    def indexing_song +      data=@data +      data, +        sisu_markup_idx_rel, +        sisu_markup_idx_rel_html_seg, +        html_idx,xhtml_idx= +          extract_book_index(data) +      data= +        clean_and_insert_index( +          data, +          sisu_markup_idx_rel_html_seg +        ) +      [ +        data, +        sisu_markup_idx_rel, +        sisu_markup_idx_rel_html_seg, +        html_idx, +        xhtml_idx, +      ] +    end +    def extract_book_index(data) +      tuned_file=[] +      idx_array=[] +      data.each do |dob| +        if (dob.is ==:heading \ +        || dob.is ==:heading_insert) \ +        && dob.ln==4 +          @seg=dob.name +        end +        if defined? dob.idx \ +        and dob.idx.is_a?(Hash) +          idx_array << { +            idx: dob.idx, +            ocn: dob.ocn, +            seg: @seg +          } +        end +        tuned_file << dob if dob +      end +      if idx_array.length > 0 +        the_idx=construct_book_index(idx_array) +        if @md.book_idx +          idx=index(the_idx) +          sisu_markup_idx_rel,sisu_markup_idx_rel_html_seg,html_idx,  xhtml_idx= +            idx[:sst_rel],    idx[:sst_rel_html_seg],      idx[:html],idx[:xhtml] +        else +          sisu_markup_idx_rel= +            sisu_markup_idx_rel_html_seg= +            html_idx= +            xhtml_idx= +            nil +        end +      end +      [ +        tuned_file, +        sisu_markup_idx_rel, +        sisu_markup_idx_rel_html_seg, +        html_idx, +        xhtml_idx, +      ] +    end +    def construct_book_index(idx_array) +      the_idx={} +      idx_array.each do |idx| +        idx[:idx].each_pair do |term,term_info| +          location=(term_info[:plus].to_i > 0) \ +          ? (%{#{idx[:ocn]}-#{idx[:ocn].to_i + term_info[:plus].to_i}}) +          : idx[:ocn].to_s +          the_idx[term]={} \ +            unless the_idx[term] \ +            and defined? the_idx[term] +          the_idx[term]['node_0_terms']=[] \ +            unless the_idx[term]['node_0_terms'] \ +            and defined? the_idx[term]['node_0_terms'] +          the_idx[term]['node_0_terms'] << { ocn: idx[:ocn], range: location, seg: idx[:seg] } +          if term_info[:sub].is_a?(Array) \ +          and term_info[:sub].length > 0 +            term_info[:sub].each do |y| +              y.each_pair do |subterm,subterm_info| +                location=(subterm_info[:plus].to_i > 0) \ +                ? (%{#{idx[:ocn]}-#{idx[:ocn].to_i + subterm_info[:plus].to_i}}) +                : idx[:ocn].to_s +                the_idx[term]={} \ +                  unless the_idx[term] \ +                  and defined? the_idx[term] +                the_idx[term]['node_0_terms']=[] \ +                  unless the_idx[term]['node_0_terms']\ +                  and    defined? the_idx[term]['node_0_terms'] +                the_idx[term]['node_1_subterms']={} \ +                  unless the_idx[term]['node_1_subterms'] \ +                  and defined? the_idx[term]['node_1_subterms'] +                the_idx[term]['node_1_subterms'][subterm]=[] \ +                  unless the_idx[term]['node_1_subterms'][subterm] \ +                  and defined? the_idx[term]['node_1_subterms'][subterm] +                the_idx[term]['node_1_subterms'][subterm] << +                  { ocn: idx[:ocn], range: location, seg: idx[:seg] } +              end +            end +          end +        end +      end +      the_idx=the_idx.sort +      the_idx +    end +    def clean_xml(str) +      str=str.gsub(/&/,'&') +      str +    end +    def index(the_idx) +      @x=1 +      idx={} +      idx[:sst_rel_html_seg],idx[:sst_rel],idx[:html],idx[:xhtml]= +        [],                  [],           [],        [] +      h={ +        obj: Mx[:br_page] +      } +      o=SiSU_AO_DocumentStructure::ObjectLayout.new.break(h) +      idx[:sst_rel_html_seg] << o +      idx[:sst_rel] << o +      h={ +        lv: '1', +        name: 'index', +        obj: "Index" +      } +      o=SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h) +      idx[:sst_rel_html_seg] << o +      idx[:sst_rel] << o +      h={ +        lv: '4', +        name: 'idx', +        obj: " [Index] #{Mx[:pa_non_object_dummy_heading]}" +      } +      o=SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h) +      idx[:sst_rel_html_seg] << o +      idx[:sst_rel] << o +      alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] +      idx[:html] << '<p>' +      idx[:xhtml] << '<p>' +      alph.each do |x| +        if x =~/[0-9]/ +          idx[:html] << '' +          idx[:xhtml] << '' +        else +          idx[:html] << +            %{<a href="##{x}">#{x}</a>,#{$ep[:hsp]}} +          idx[:xhtml] << +            %{<a href="##{x.downcase}">#{x}</a>,#{$ep[:hsp]}} +        end +      end +      idx[:html] << '</p>' +      idx[:xhtml] << '</p>' +      letter=alph.shift +      idx[:html] << +        %{\n<p class="book_index_lev1"><a name="numeral"></a></p>} +      idx[:xhtml] << +        %{\n<p class="letter" id="numeral">0 - 9</p>} +      the_idx.each do |i| +        i.each do |x| +          if x.is_a?(String) +            f=/^(\S)/.match(x)[1] +            if letter < f +              while letter < f +                if alph.length > 0 +                  letter=alph.shift +                  idx[:html] << +                    %{\n<p class="letter"><a name="#{letter}">#{letter}</a></p><p class="book_index_lev1"><a name="#{letter.downcase}"> </a></p>} +                  idx[:xhtml] << +                    %{\n<p class="letter" id="#{letter.downcase}">#{letter}</p>} +                else break +                end +              end +            end +            idx[:sst_rel_html_seg] << +              %{\n\n#{Mx[:fa_bold_o]}#{x},#{Mx[:fa_bold_c]} } +            idx[:sst_rel] << +              %{\n\n#{Mx[:fa_bold_o]}#{x},#{Mx[:fa_bold_c]} } +            aname=x.gsub(/\s+/,'_') +            idx[:html] << +              %{\n<p class="book_index_lev1"><a name="#{aname}"><b>#{x}</b></a>, } +            c=clean_xml(x.dup) +            idx[:xhtml] << +              %{\n<p class="book_index_lev1"><b>#{c}</b>, } +            @o=idx[:sst_rel_html_seg].index(idx[:sst_rel_html_seg].last) +            @t=idx[:sst_rel].index(idx[:sst_rel].last) +            @q=idx[:html].index(idx[:html].last) +            @r=idx[:xhtml].index(idx[:xhtml].last) +            print "\n" + x + ', ' if @md.opt.act[:verbose_plus][:set]==:on +          elsif x.is_a?(Array) +            p 'array error? -->' +            print x +          elsif x.is_a?(Hash) +            if x['node_0_terms'].is_a?(Array) +              x['node_0_terms'].each do |a| +                if a[:range] +                  idx[:sst_rel_html_seg][@o]= +                    idx[:sst_rel_html_seg][@o] + +                    %{#{Mx[:lnk_o]}#{a[:range]}#{Mx[:lnk_c]}#{Mx[:rel_o]}/#{a[:seg]}.html##{a[:ocn]}#{Mx[:rel_c]}, } +                  idx[:sst_rel][@t]= +                    idx[:sst_rel][@t] + +                    %{#{Mx[:lnk_o]}#{a[:range]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{a[:ocn]}#{Mx[:rel_c]}, } +                  idx[:html][@q]= +                    idx[:html][@q] + +                    %{<a href="#{a[:seg]}.html##{a[:ocn]}">#{a[:range]}</a>, } +                  idx[:xhtml][@q]= +                    idx[:xhtml][@q] + +                    %{<a href="#{a[:seg]}.xhtml#o#{a[:ocn]}">#{a[:range]}</a>, } +                  print a[:range] + ', ' if @md.opt.act[:verbose_plus][:set]==:on +                elsif a[:ocn] +                  idx[:sst_rel_html_seg][@o]= +                    idx[:sst_rel_html_seg][@o] + +                    %{#{Mx[:lnk_o]}#{a[:ocn]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{a[:seg]}.html##{a[:ocn]}#{Mx[:rel_c]}, } +                  idx[:sst_rel][@t]= +                    idx[:sst_rel][@t] + +                    %{#{Mx[:lnk_o]}#{a[:ocn]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{a[:ocn]}#{Mx[:rel_c]}, } +                  idx[:html][@q]= +                    idx[:html][@q] + +                    %{<a href="#{a[:seg]}.html##{a[:ocn]}">#{a[:ocn]}</a>, } +                  idx[:xhtml][@q]= +                    idx[:xhtml][@q] + +                    %{<a href="#{a[:seg]}.xhtml#o#{a[:ocn]}">#{a[:ocn]}</a>, } +                  print a[:ocn] + ', ' if @md.opt.act[:verbose_plus][:set]==:on +                else p 'error' +                end +              end +              idx[:html][@q]=idx[:html][@q] + '</p>' +              idx[:xhtml][@r]=idx[:xhtml][@r] + '</p>' +            end +            if x['node_1_subterms'] +             x['node_1_subterms'].sort.each do |k,y| +                if k !~/node_0_terms/ +                  idx[:sst_rel_html_seg][@o]= +                    idx[:sst_rel_html_seg][@o] + +                    %{#{k}, } +                  idx[:sst_rel][@t]= +                    idx[:sst_rel][@t] + +                    %{#{k}, } +                  idx[:html][@q]= +                    idx[:html][@q] + +                    %{\n<p class="book_index_lev2">#{k}, } +                  c=clean_xml(k.dup) +                  idx[:xhtml][@r]= +                    idx[:xhtml][@r] + +                    %{\n<p class="book_index_lev2">#{c}, } +                  print "\n\t" + k + ', ' if @md.opt.act[:verbose_plus][:set]==:on +                  y.each do |z| +                    if z[:range] +                      idx[:sst_rel_html_seg][@o]= +                        idx[:sst_rel_html_seg][@o] + +                        %{#{Mx[:lnk_o]}#{z[:range]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{z[:seg]}.html##{z[:ocn]}#{Mx[:rel_c]}, } +                      idx[:sst_rel][@t]= +                        idx[:sst_rel][@t] + +                        %{#{Mx[:lnk_o]}#{z[:range]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{z[:ocn]}#{Mx[:rel_c]}, } +                      idx[:html][@q]= +                        idx[:html][@q] + +                        %{<a href="#{z[:seg]}.html##{z[:ocn]}">#{z[:range]}</a>, } +                      idx[:xhtml][@q]= +                        idx[:xhtml][@q] + +                        %{<a href="#{z[:seg]}.xhtml#o#{z[:ocn]}">#{z[:range]}</a>, } +                      print z[:range] + ', ' if @md.opt.act[:verbose_plus][:set]==:on +                    elsif z[:ocn] +                      idx[:sst_rel_html_seg][@o]= +                        idx[:sst_rel_html_seg][@o] + +                        %{#{Mx[:lnk_o]}#{z[:ocn]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{z[:seg]}.html##{z[:ocn]}#{Mx[:rel_c]}, } +                      idx[:sst_rel][@t]= +                        idx[:sst_rel][@t] + +                        %{#{Mx[:lnk_o]}#{z[:ocn]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{z[:ocn]}#{Mx[:rel_c]}, } +                      idx[:html][@q]= +                        idx[:html][@q] + +                        %{<a href="#{z[:seg]}.html##{z[:ocn]}">#{z[:ocn]}</a>, } +                      idx[:xhtml][@q]= +                        idx[:xhtml][@q] + +                        %{<a href="#{z[:seg]}.xhtml#o#{z[:ocn]}">#{z[:ocn]}</a>, } +                      print z[:ocn] + ', ' if @md.opt.act[:verbose_plus][:set]==:on +                    else p 'error' +                    end +                  end +                  idx[:html][@q]=idx[:html][@q] + '</p>' +                  idx[:xhtml][@r]=idx[:xhtml][@r] + '</p>' +                end +              end +            end +            @x +=1 +          end +        end +      end +      print "\n" if @md.opt.act[:verbose_plus][:set]==:on +      idx +    end +    def screen_print(the_idx) +      the_idx.each do |i| +        i.each do |x| +          if x.is_a?(String) +            print "\n" + x + ', ' +          elsif x.is_a?(Array) +            p 'array error? -->' +            print x +          elsif x.is_a?(Hash) +            if x['node_0_terms'].is_a?(Array) +              x['node_0_terms'].each do |a| +                if a[:range] +                  print a[:range] + ', ' +                elsif a[:ocn] +                  print a[:ocn] + ', ' +                else p 'error' +                end +              end +            end +            if x['node_1_subterms'] +              x['node_1_subterms'].sort.each do |k,y| +                if k !~/node_0_terms/ +                  print "\n\t" + k + ', ' +                  y.each do |z| +                    if z[:range] +                      print z[:range] + ', ' +                    elsif z[:ocn] +                      print z[:ocn] + ', ' +                    else p 'error' +                    end +                  end +                end +              end +            end +          end +        end +      end +    end +    def output_idx(idx) +      if @md.book_idx +        path="#{@env.path.output}/#{@md.fnb}" +        Dir.mkdir(path) unless FileTest.directory?(path) +        puts "#{path}/#{@md.fn[:book_idx_html]} #{__FILE__}::#{__LINE__}" +        html_index_file=File.new("#{path}/#{@md.fn[:book_idx_html]}",'w') +        idx[:html].each {|x| html_index_file << x } +        html_index_file.close +      end +    end +    def clean_and_insert_index(data,sisu_markup_idx) +      tuned_file=[] +      data.each do |dob| +        tuned_file << dob +        if dob.obj =~/#{Mx[:br_endnotes]}/ \ +        and sisu_markup_idx +          sisu_markup_idx.each do |idx| +            tuned_file << idx +          end +        end +      end +      tuned_file +    end +    def clean_index(data)                                  #check on use of dob +      data.each.map do |para| +        para.gsub(/\n*#{@rgx_idx}/m,'') +      end +    end +  end +end +__END__ | 
