diff options
Diffstat (limited to 'lib/sisu/v2/dal_hash_digest.rb')
| -rw-r--r-- | lib/sisu/v2/dal_hash_digest.rb | 155 | 
1 files changed, 155 insertions, 0 deletions
| diff --git a/lib/sisu/v2/dal_hash_digest.rb b/lib/sisu/v2/dal_hash_digest.rb new file mode 100644 index 00000000..a78c54f1 --- /dev/null +++ b/lib/sisu/v2/dal_hash_digest.rb @@ -0,0 +1,155 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + +   SiSU, a framework for document structuring, publishing and search + +   Copyright (C) Ralph Amissah + +   This program is free software: you can redistribute it and/or modify it +   under the terms of the GNU General Public License as published by the Free +   Software Foundation, either version 3 of the License, or (at your option) +   any later version. + +   This program is distributed in the hope that it will be useful, but WITHOUT +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +   more details. + +   You should have received a copy of the GNU General Public License along with +   this program. If not, see <http://www.gnu.org/licenses/>. + +   If you have Internet connection, the latest version of the GPL should be +   available at these locations: +   <http://www.fsf.org/licensing/licenses/gpl.html> +   <http://www.gnu.org/copyleft/gpl.html> + +   <http://www.jus.uio.no/sisu/gpl.fsf/toc.html> +   <http://www.jus.uio.no/sisu/gpl.fsf/doc.html> +   <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt> + + * SiSU uses: +   * Standard SiSU markup syntax, +   * Standard SiSU meta-markup syntax, and the +   * Standard SiSU object citation numbering and system + + * Hompages: +   <http://www.jus.uio.no/sisu> +   <http://www.sisudoc.org> + + * Download: +   <http://www.jus.uio.no/sisu/SiSU/download.html> + + * Ralph Amissah +   <ralph@amissah.com> +   <ralph.amissah@gmail.com> + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_hash +  require "#{SiSU_lib}/shared_markup_alt.rb"               #shared_markup_alt.rb +  class Object_digest +    def initialize(md,data,env=nil) +      @md,@data,@env=md,data,env +      @env ||=SiSU_Env::Info_env.new(@md.fns) +    end +    def object_digest +    # 1. clean/stripped text without any markup, paragraph, headings etc. without endnotes +    # 2. endnotes clean/stripped text digest only (there may be several endnotes within a paragraph) +    # 3. whole object, text with markup and any endnotes, (question: with or without the endnote digests??? presumption better without, [however may be easier to check with?]) +    # [digests should not include other digests] +      data=@data +      @tuned_file=[] +      data.compact! +      sha_ =(@env.digest.type=='sha256' ? true : false) +      sha_ ? (require 'digest/sha2') : (require 'digest/md5') +      data.each do |t_o| +        unless t_o.obj.class==Array +          t_o.obj.strip! +        end +        if t_o.of !~/structure|comment|layout/ \ +        and t_o.ocn.class==Fixnum +          if sha_ +            for hash_class in [ Digest::SHA256 ] +              @tuned_file << stamped(t_o,hash_class) +            end +          else +            for hash_class in [ Digest::MD5 ] +              @tuned_file << stamped(t_o,hash_class) +            end +          end +        else @tuned_file << t_o unless t_o.nil? +        end +      end +      @tuned_file=@tuned_file.flatten +      #use md5 or to create hash of each dal object including ocn, & add into to each dal object +    end +    def endnote_digest(data) +      t_o_bit=[] +      data.each do |en_plus| +        t_o_bit <<= case en_plus +        when /#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/ +          if en_plus =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/ +            t_o_txt,en_open,en_txt,en_close=/(.*?)(#{Mx[:en_a_o]}|#{Mx[:en_b_o]})(.+?)(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m.match(en_plus)[1..4] +            stripped_en=SiSU_text_representation::Alter.new(en_txt).strip_clean_of_markup +            digest_en_strip=if @env.digest.type =~/sha256/ +              Digest::SHA256.hexdigest(stripped_en) +            else +              Digest::MD5.hexdigest(stripped_en) +            end +            t_o_txt + en_open + en_txt + Mx[:id_o] + digest_en_strip + Mx[:id_c] + en_close +          else puts "Error Exception - problem encountered with:\n#{en_plus}" #arbitrary exception, tidy up +          end +        else en_plus +        end +      end +      t_o_bit.join +    end +    def stamped(t_o,hash_class) #decide what hash information is most useful, is compromise necessary? +      t_o.obj=SiSU_text_representation::Alter.new(t_o).strip_clean_of_extra_spaces +      t_obj=t_o.inspect.sub(/:0x[0-9a-f]{8}\s/,': ') +      stripped=SiSU_text_representation::Alter.new(t_o).strip_clean_of_markup +      markup=SiSU_text_representation::Alter.new(t_o).semi_revert_markup +      digests=SiSU_text_representation::Modified_text_plus_Hash_digest.new(@md,t_o).composite.dgst +      unless t_o.is=='code' +        case t_o.obj +        when /#{Mx[:en_a_o]}[\d*+]+\s+.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}[*+]\d+\s+.+?#{Mx[:en_b_c]}/m +          en_and_t_o,en_and_t_o_digest=[],[] +          t_o.obj.gsub!(/\s*(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m,' \1') #watch +          t_o_plus_en=t_o.obj.scan(/.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m) +          t_o_tail=if t_o.obj =~/(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+([\s\S]+)/m +            /(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+.*/m.match(t_o.obj)[1] +          else '' +          end +          t_o_plus_en << t_o_tail +          en_and_t_o_digest << endnote_digest(t_o_plus_en) +          t_o_new=en_and_t_o_digest.join(' ') +          #@tuned << t_o_new + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless t_o.nil? +        else #@tuned << t_o + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless t_o.nil? +        end +      else #@tuned << t_o + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless t_o.nil? +      end +      t_o #KEEP intact +    end +    def strip_clean_extra_spaces(s)                                            # dal output tuned +      s=s.dup +      s=s.gsub(/[ ]+([,.;:?](?:$|\s))/,'\1') unless s =~/#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/ +      s=s.gsub(/ [ ]+/,' ') +      s=s.gsub(/^ [ ]+/,'') +      s=s.gsub(/ [ ]+$/,'') +      s=s.gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2') +      s=s.gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2') +    end +  end +end +__END__ | 
