diff options
Diffstat (limited to 'lib/sisu/0.52/html_segments.rb')
| -rw-r--r-- | lib/sisu/0.52/html_segments.rb | 471 | 
1 files changed, 471 insertions, 0 deletions
diff --git a/lib/sisu/0.52/html_segments.rb b/lib/sisu/0.52/html_segments.rb new file mode 100644 index 00000000..765c012e --- /dev/null +++ b/lib/sisu/0.52/html_segments.rb @@ -0,0 +1,471 @@ +=begin + * Name: SiSU information Structuring Universe - Structured information, Serialized Units + * Author: Ralph Amissah +   * http://www.jus.uio.no/sisu +   * http://www.jus.uio.no/sisu/SiSU/download.html + + * Description: html segment generation, processing + + * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah + + * License: GPL 2 or later + +  Summary of GPL 2 + +  This program is free software; you can redistribute it and/or modify it +  under the terms of the GNU General Public License as published by the Free +  Software Foundation; either version 2 of the License, or (at your option) +  any later version. + +  This program is distributed in the hope that it will be useful, but WITHOUT +  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +  more details. + +  You should have received a copy of the GNU General Public License along +  with this program; if not, write to the Free Software Foundation, Inc., +  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + +  If you have Internet connection, the latest version of the GPL should be +  available at these locations: +    http://www.fsf.org/licenses/gpl.html +    http://www.gnu.org/copyleft/gpl.html +    http://www.jus.uio.no/sisu/gpl2.fsf + +  SiSU was first released to the public on January 4th 2005 + +  SiSU uses: +   +  *  Standard SiSU markup syntax, +  *  Standard SiSU meta-markup syntax, and the +  *  Standard SiSU object citation numbering and system +   +  © Ralph Amissah 1997, current 2007. +  All Rights Reserved. + + * Ralph Amissah: ralph@amissah.com +                  ralph.amissah@gmail.com +=end +module SiSU_HTML_seg +  require SiSU_lib + '/shared_html' +  require SiSU_lib + '/html' +  require SiSU_lib + '/html_promo' +  class Seg +    @@seg,@@seg_subtoc,@@seg_endnotes,@@seg_ad={},{},{},{} +    @@seg_name,@@seg_name_html,@@seg_name_php,@@segtocband=[],[],[],[] +    @@filename_seg=@@filename_segphp=@@seg_url=@@fn=@@to_lev4=@@get_hash_to=@@get_hash_fn='' +    @@loop_count=@@seg_total=@@tracker=0 +    @@is4=@@is3=@@is2=@@is1=0 +    @@header1=@@header2=@@header3=@@header4=0 +    @@seg[:dot_nav],@@seg[:tocband],@@seg[:title],@@seg[:headers],@@seg[:main],@@seg[:tail],@@seg[:credits],@@seg_subtoc_array,@@seg_endnotes_array,@@heading_endnotes_array,@@seg[:endnote_all]=Array.new(11){[]} +    @@seg[:header_endnotes]='' +    @@tablehead,@@number_of_cols=0,0 +    @@flag_group=false +    @@dp=nil +    attr_reader :seg_name_html,:seg_name_html_tracker +    def initialize(data='',md='') +      @data,@md=data,md +      @vz=SiSU_Env::Get_init.instance.skin +      @seg_name_html=@@seg_name_html || nil +      @seg_name_html_tracker=@@tracker || nil +      @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern +    end +    def songsheet +      begin +        Seg.new(@data,@md).get_subtoc_endnotes +        Seg.new(@data,@md).articles +        Seg.new.cleanup # (((( added )))) +        #### (((( END )))) #### +      rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error +      ensure +        @@seg_name=[] +      end +    end +  protected +    def articles +      data=@data +      track,tracking,newfile=0,0,0 +      @@is4=@@is3=@@is2=@@is1=0 +      printed_endnote_seg='n' +      @h_sfx='.php' if @md.file_type =~/php/ +      @h_sfx=@md.sfx if @md.file_type =~/html/ +      @h_sfx='.html' if @md.file_type =~/html/ #used in creating file, not to be omitted. +      data.each do |para| +        if para =~/^4~/ +          @@seg_name << para[/^4~(\S+)/,1] +          seg_name=para[/^4~(\S+)/,1] +          @@seg_ad[seg_name]=para[/.+?<:\d\s+(.+)\s*?>/,1] #watch +        end +      end +      @@seg_name_html=@@seg_name +      @@seg_total=@@seg_name.length +      testforartnum=@@seg_name_html +      tell=SiSU_Screen::Ansi.new(@md.cmd,@@seg_name.length) +      tell.segmented unless @md.cmd =~/q/ +      flagend='y' +      data.each do |para| +        if para =~/^4~.+/ #watch +          if para =~/<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ +            @@header4=para.to_s[/^4~(?:\S+\s+)?(.+?)<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,1] +          else @@header4=para.to_s[/^4~(?:\S+\s+)?(.+)/,1] +          end +          @@is4=newfile=1 +        end +        if para =~/^3~.+/ +          @@header3=para.to_s[/^3~(?:~\S+\s+)?(.+)/,1] +          @@is4,@@is3=0,1 +        end +        if para =~/^2~.+/ +          @@header2=para.to_s[/^2~(?:~\S+\s+)?(.+)/,1] +          @@is4,@@is3,@@is2=0,0,1 +        end +        if para =~/^1~.+/ +          @@header1=para.to_s[/^1~(?:~\S+\s+)?(.+)/,1] +          @@is4,@@is3,@@is2,@@is1=0,0,0,1 +        end +        if (@@is1 && !@@is2 && !@@is3 && !@@is4) +          unless para =~/^1~/; head1=$_ #; +          end +        end +        if @@is4 == 1 or para =~/^<ENDNOTES>|^<EOF>/ +          if newfile == 1 or para =~/^<ENDNOTES>|^<EOF>/ +            newfile=0 +            if para =~/^4~\S+/ or para =~/^<ENDNOTES>|^<EOF>/ # @@level4 +              if tracking != 0 +                File.mkpath(@md.dir_out) unless FileTest.directory?(@md.dir_out) #bug - added specifically for nav! not needed by regular seg, check !!! +                Seg.new('',@md).tail +                segfilename="#{@md.dir_out}/#{@md.fnl[:pre]}#{@@seg_name_html[tracking-1]}#{@md.fnl[:mid]}#@h_sfx#{@md.fnl[:post]}" +                @@filename_seg=File.new(segfilename,'w') if @@seg_name_html[tracking-1] +                unless (@@seg_name_html[tracking-1] =~/endnotes/) +                  Seg.new.output +                else Seg.new.output('endnotes') +                end +                Seg.new.reinitialise +                Seg.new(para,@md).header_art +                Seg.new(para,@md).head +                if @@seg_name_html[tracking] =~/metadata/ # this is for metadata +                  segfilename="#{@md.dir_out}/#{@md.fnl[:pre]}#{@@seg_name_html[tracking]}#{@md.fnl[:mid]}#@h_sfx#{@md.fnl[:post]}" +                  @@filename_seg=File.new(segfilename,'w') +                  Seg.new.reinitialise +                  flagend="x" +                  @@filename_seg.close                                         #%(((( EOF )))) --> +                end +              end +              if  tracking == 0 +                Seg.new(para,@md).header_art +                Seg.new(para,@md).head +              end +            end +            tracking=tracking + 1 +          end +          m=para[/.+?<a name="(\d+)">.*/]; @@get_hash_to=$1 if m              # changed 2002w42, again w44 ! & again 2003w16 +          m=para[/^4~(\S+)/]; @@get_hash_fn=$1 if m +          if testforartnum[tracking-1] !~/endnote/ +            Seg.new(para,@md).markup +            Seg.new(para,@md).txt +          else +            Seg.new(para,@md).markup +            Seg.new(para,@md).txt +            if printed_endnote_seg == 'n' +              Seg.new(para,@md).endnote +              printed_endnote_seg='y' +            end +          end +        end +      end +    end +    def header_art +      @data.each do |para| +        format_head_seg=SiSU_HTML_Format_type::Head_seg.new(@md) +        if para =~/^[0-6]~/ #2004w27/5 +          if @@tracker < @@seg_total-1; @@seg[:dot_nav] << format_head_seg.dot_control_pre_next +          else                          @@seg[:dot_nav] << format_head_seg.dot_control_pre +          end +        end +        ads=SiSU_HTML_promo::Ad.new(@md) +        @@seg[:title]=format_head_seg.head << ads.div.major +      end +    end +    def head +      data=@data +      clean=/<!.*?!>|<:.*?>|<~\d+;(?:[ohum]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ +      format_head_seg=SiSU_HTML_Format_type::Head_seg.new(@md) +      unless @md.flag_pdf +        if @@tracker < @@seg_total-1 +          if @@tracker == 0; @@segtocband << format_head_seg.toc_next3 +          else               @@segtocband << format_head_seg.toc_pre_next3 +          end +        elsif @@tracker == @@seg_total +          @@segtocband << format_head_seg.toc_pre3 +        end +      else  # identical code without .pdf +        if @@tracker < @@seg_total-1 +          if @@tracker == 0; @@segtocband << format_head_seg.toc_next2 +          else               @@segtocband << format_head_seg.toc_pre_next2 +          end +        else @@segtocband << format_head_seg.toc_pre2 +        end +      end +      @p_num ||= '' +      if @@is1 == 1 +        @dc_creator=%{<b><sup>©</sup> #{@md.dc_creator}</b>\n} if @md.dc_creator.to_s =~/\S/ +        @@seg[:tocband] << format_head_seg.navigation_band(@@segtocband,@@seg[:dot_nav]) +        @@seg[:headers] << format_head_seg.seg_head_escript if SiSU_HTML_Format_type::Head_seg.method_defined? :seg_head_escript #debug PHP move up in text #bug +        @@seg[:headers] << format_head_seg.title_banner(@md.title,@md.subtitle,@dc_creator).gsub(clean,'') +        paranum=if @@header1[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/]; $1 +        else '' +        end +        @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) +        format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,@@header1,@p_num.ocn_display) +        @@seg[:headers] << format_seg.title_header1.gsub(clean,'') +        @@header1.gsub!(/ <a name="-[\d*+]+" href="#_[\d*+]+"> <sup>[\d*+]+<\/sup> <\/a>/,'') +      end +      if @@is2 == 1 +        header2=@@header2 +        paranum=if header2[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/]; $1 +        else '' +        end +        @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) +        format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,header2,@p_num.ocn_display) +        @@seg[:headers] << format_seg.title_header2.gsub(clean,'') +        @@header2.gsub!(/ <a name="-[\d*+]+" href="#_[\d*+]+"> <sup>[\d*+]+<\/sup> <\/a>/,'') +      end +      if @@is3 == 1 +        header3=@@header3 +        paranum=if header3[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/]; $1 +        else '' +        end +        @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) +        format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,header3,@p_num.ocn_display) +        @@seg[:headers] << format_seg.title_header3.gsub(clean,'') +        @@header3.gsub!(/ <a name="-[\d*+]+" href="#_[\d*+]+"> <sup>[\d*+]+<\/sup> <\/a>/,'') +      end +      if @@is4 == 1 +        header4=@@header4 +        paranum=if header4[/.+?<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/]; $1 +        else '' +        end +        @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) +        format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,header4,@p_num.ocn_display) +        @@seg[:headers] << format_seg.title_header4.gsub(clean,'') +      end +      @@seg[:header_endnotes]=format_head_seg.title_endnote(@md.title,@md.subtitle,@dc_creator,@@seg[:dot_nav]) +      @@tracker=@@tracker+1 +    end +    def markup +      @debug=[] +      data=@data.dup #bugwatch tied +      @group_collect=[] +      data.each do |para| +        format_head_seg=SiSU_HTML_Format_type::Head_seg.new(@md) +        if para !~/^0~/ +          m=para[/.+?<~(\d+);(?:[ohm]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/] +          if m +            paranum=m[1].to_s +            @p_num=SiSU_HTML_Format_type::Paragraph_number.new(@md,paranum) +          end +          if para =~/<:(?:code|alt|verse|group)>/ or @@flag_group==true +            if para =~/<:(?:code|alt|verse|group)>/  +              @group_collect << @vz.margin_txt_0 + para +              @@flag_group=true +            elsif @@flag_group==true +              unless para =~/<:(?:code|alt|verse|group)-end>/                  # neither ideal nor necessary sort later +                @group_collect << para  +              else @group_collect << para.gsub(/<:(?:code|alt|verse|group)-end>/,'') +              end +            end +            if para =~/<:(?:code|alt|verse|group)-end>/ +              para = @group_collect.join +              @@flag_group=false +              @group_collect=[] +            end +          end +          if para !~/^[0-9]~/ +            if para =~/(.*)<~0;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>(.*)/ +              one,two=$1,$2 +              format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) +              para=format_seg.no_paranum +            end +          end +          if para[/<~(\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+><#@dp:#@dp)>$/] +            @sto=SiSU_HTML::Source::Split_text_object.new(@md,para).lev_segname_para_ocn +            format_txt_obj=SiSU_HTML_Format_type::Format_text_object.new(@md,@sto.text) if @sto.format =~/i[12]|_1?\*|<:i[12]>\s*_\*|null/ +            para=case @sto.format # work area 2003w29 ||@|def lev_segname_para_ocn| +            when /^4~\S+/;       @sto.seg_lev_para_ocn.header4 # work on see Split_text_object +            when /^5~(?:~\S+)?/; @sto.seg_lev_para_ocn.header5 +            when /^6~(?:~\S+)?/; @sto.seg_lev_para_ocn.header6 +            when /^_\*$/;        @sto.seg_lev_para_ocn.bullet +            when /^_1\*$/ +              format_txt_obj.gsub_body +              @sto.seg_lev_para_ocn.bullet_indent1 +            when /^i1$/ +              format_txt_obj.gsub_body +              @sto.seg_lev_para_ocn.indent1 +            when /^i2$/ +              format_txt_obj.gsub_body +              @sto.seg_lev_para_ocn.indent2 +            when /^(?:verse|group|alt)$/ +              @sto.seg_lev_para_ocn.para +            when /^code$/ +              @sto.seg_lev_para_ocn.code +            when /null/ +              if para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/ and para !~/^<!TZ!>/ +                format_txt_obj.gsub_body +                @sto.seg_lev_para_ocn.para +              elsif para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/ and para =~/^<!TZ!>/ +                format_txt_obj.gsub_body +                @sto.seg_lev_para_ocn.table_end +              else para +              end +            else para +            end +          elsif para =~/¡|<!T[hZ]?/ +            table=SiSU_HTML_shared::Table.new(para) +            para=table.table +          end +          if @md.flag_separate_endnotes +            para.gsub!(/"\s+href="#_(\d+)">/,%{" href=\"endnotes#{@md.sfx}#_\\1">})       #endnote- twice #removed file type +          end +          if para !~/#{@vz.margin_txt_w1}|#{@vz.margin_txt_w2}/ +            if para[/(.*)<~0;(?:u|[0-6]:)\d+;\w\d+><#@dp:#@dp>(.*)/] #% watch u & m? +              one,two=$1,$2 +              format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) +              para=format_seg.seg_no_paranum                                   #% undefined +            end +            para.gsub!(/\s*(-\{{2}~\d+|<:e[:_]\d+>).*/,'')                   #potentially dagerous - removes all paragraphs with <!e_!> #?? workpoint +            if para =~/<a name="_\d+" href="#-\d+"> <sup>/                #endnote- note- +              format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,para) +              para=format_seg.no_paranum +            end +          end +          if para =~/^4~\S+|4~!/  +            para.gsub!(/4~\S+|<:[-_\w\d]?(-.+?-)?>|4~!.+/,'')              #sort seg headers +            @@seg[:main] << para  +            @@seg[:main] << @@seg_subtoc[@@get_hash_fn]                       #% insertion of sub-toc +          else +            para.gsub!(/<:[-_\w\d]?(-.+?-)?>|4~!.+/,'') +            @@seg[:main] << para unless @@flag_group==true +          end +        end +      end +    end +    def txt +    end +    def endnote +    end +    def tail +      format_head_seg=SiSU_HTML_Format_type::Head_seg.new(@md) +      if @md.flag_auto_endnotes +        @@seg[:tail] << format_head_seg.endnote_mark +        @@seg[:tail] << @@seg_endnotes[@@get_hash_fn] #endnotes deposited at end of individual segments||@|EXTRACTION OF ENDNOTES| +      end +      @@seg[:tail] << '<table summary="whitespace"><tr><td> </td></tr></table>' +      ads = SiSU_HTML_promo::Ad.new(@md) +      @@seg[:credits] << format_head_seg.credit << ads.div.close << ads.display << format_head_seg.html_close +    end +    def output(type='') +      if @@seg[:title] =~/\S/ #kludge (for exception file better.ways, how ironic) get a real ruby test, e.g. test that not array or...  +        @@filename_seg << @@seg[:title]  +        #@@filename_seg << @@seg[:dot_nav] #places dot control at very top of segment +        @@filename_seg << @@seg[:tocband] +        if type !~/endnote/ +          @@filename_seg << @@seg[:headers] +          @@filename_seg << @@seg[:main] +        else +          @@filename_seg << @@seg[:header_endnotes] +          @@filename_seg << @@seg[:endnote_all] +        end +        @@filename_seg << @@seg[:tail] +        @@filename_seg << @@seg[:tocband] +        @@filename_seg << @@seg[:credits] +        @@filename_seg.close +      end +    end +    def reinitialise +      @@seg[:title],@@seg[:dot_nav],@@segtocband,@@seg[:tocband],@@seg[:headers],@@seg[:main],@@seg[:tail],@@seg[:credits]=Array.new(8){[]} +    end +    def cleanup +      reinitialise +      @@seg_total,@@tracker=0,0 +      @@seg_endnotes,@@seg_subtoc={},{} +      @@seg_endnotes_array,@@seg_subtoc_array,@@heading_endnotes_array=[],[],[] +      @@seg[:endnote_all]=[] +    end +    def get_subtoc_endnotes #get endnotes & sub-table of contents subtoc +      @data.each do |para| +        para.gsub!(/<a name=\"h\d.*?\">(.+?)<\/a>/mi,'\1') +        if @md.flag_auto_endnotes +          if para =~/^[1234]~/ and not @@fn.empty? +            @@seg_endnotes[@@fn] = [] +            @@seg_endnotes[@@fn] << @@seg_endnotes_array +            @@seg_endnotes_array=[] if para=~/^4~/ +          end +        end +        if para =~/^4~/                                              #% EXTRACTION OF SUB-TOCs  +          @@seg_subtoc[@@fn]=@@seg_subtoc_array +          @@seg_subtoc_array=[] +        end +        if para =~/^4~/                                              #% SEGMENT NAME, after EXTRACTION OF ENDNOTES & SUB-TOCs  +          m=para[/^4~(\S+).+?<~(\d+);(?:[oh]|4:)\d+;\w\d+><#@dp:#@dp>$/] +          if m;  @@fn,@@to_lev4=$1,$2 if m # changed 2004w07          #endnotes and sub-tocs +          else +            m=para[/^4~(\S+)/] +            @@fn,@@to_lev4=$1,'nonum' if m # changed 2005w13 +          end +        end +        if para =~/^[56]~\S*\s+(.+)?<~(\d+);(?:h|[56]:)\d+;\w\d+><#@dp:#@dp>$/ +          para.gsub!(/ <\/a>/,' ') +          case para # series changed 2002w42 +          when /^5~\S*\s+(.+)?<~(\d+);(?:h|[56]:)\d+;\w\d+><#@dp:#@dp>$/ #remove [u]? req by pg texts, revist +            one,two=$1,$2 +            format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) +            para=format_seg.subtoc_lev5 +          when /^6~\S*\s+(.+)?<~(\d+);(?:h|[56]:)\d+;\w\d+><#@dp:#@dp>$/ +            one,two=$1,$2 +            format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) +            para=format_seg.subtoc_lev6 +          end +          @@seg_subtoc_array << para +        end +        if @md.flag_auto_endnotes +          if para =~/~[{\[][\d*+]+ <a name="_[\d*+]+"/ # endnote- +            endnote_array=[] +            if para=~/~\{.+?\}\~/m +              endnote_array << para.scan(/~\{.+?\}\~/m) +            end +            if para=~/~\[[*]\d+\s.+?\]\~/m +              endnote_array << para.scan(/~\[[*]\d+\s.+?\]\~/m) +            end +            if para=~/~\[[+]\d+\s.+?\]\~/m +              endnote_array << para.scan(/~\[[+]\d+\s.+?\]\~/m) +            end +            endnote_array.flatten.each do |note|  +              note_match=note.dup +              note_match_seg=note.dup +              e_n=note_match_seg[/(?:~\{[\d*+]+|~\[[*+]\d+)\s+(.+?)[}\]]~/m,1] +              try=e_n.split(/<br \/>/) +              try.each do |e| +                format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,e) +                note_match=if e =~/<:i[12]>/ +                  format_seg.endnote_body_seg_tail_indent +                else format_seg.endnote_body_seg_tail +                end +                @@seg_endnotes_array << note_match +              end +              try.join('<br \/>') +              #% creation of separate end segment/page of all endnotes referenced back to reference segment +              m=/(?:~\{[\d*+]+|~\[[*+]\d+)\s+(.+?href=")(#-[\d*+]+".+)[}\]]~/mi +              one=note_match_seg[m,1] #note~ [a name] +              two=note_match_seg[m,2] #note- +              format_seg=SiSU_HTML_Format_type::Format_seg.new(@md,one,two) +              note_match_all_seg=format_seg.endnote_seg_body(@@fn) #BUG WATCH 200408 +              @@seg[:endnote_all] << note_match_all_seg +            end +            para.gsub!(/~[{\[].+?[}\]]~\s*/m,' ') +          end +        end +      end +    end +  end +end +__END__  | 
