aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2026-04-22 14:02:51 -0400
committerRalph Amissah <ralph.amissah@gmail.com>2026-04-22 20:42:31 -0400
commit0edd3f79f2ceaf74c009a1f4dc25aa95b5a1c673 (patch)
tree65c123a56aa8fcac15f2e0e37cea0a366bce00c8 /src
parent.ssp serializer: omit identifier when it equals OCN (diff)
.ssp serializer: include all ObjGenericComposite fields
- Make the .ssp format a complete representation of the document abstraction by serializing all remaining fields from ObjGenericComposite (only omitting ptr.* runtime indices which are meaningless outside the in-memory context). - New fields added: .ancestors_collapsed: - collapsed level ancestor chain .dom_status: - DOM structure markedup tags status[8] .dom_status_collapsed: - DOM structure collapsed status[8] .heading_lev_collapsed: - collapsed heading level .parent_lev: - parent heading level (markup) .o_n_type: - object numbering type (0=ocn, 1=non, 2=bkidx) .is_of_type: - para/block type classification .attrib: - general attributes string .meta_lang: - block language (group/block/quote) .meta_syntax: - codeblock syntax from metainfo .sha256: - hex-encoded SHA-256 digest of object content .has: images_no_dim - image without dimensions flag .table_aligns: - column alignment array .table_walls: - table walls/borders flag .stow_link: - extracted URLs (one per line) .heading_lev_anchor: - heading level anchor tag .segment_epub: - EPUB segment anchor tag .heading_ancestors_text: - pipe-separated ancestor headings .lev4_subtoc: - sub-table-of-contents entries (one per line) .anchor_tag: - additional anchor tags (one per line) - Tested against all 35 sample documents - zero failures. Co-Authored-By: Anthropic Claude Opus 4.6 (1M context)
Diffstat (limited to 'src')
-rw-r--r--src/sisudoc/io_out/create_abstraction_txt.d129
1 files changed, 120 insertions, 9 deletions
diff --git a/src/sisudoc/io_out/create_abstraction_txt.d b/src/sisudoc/io_out/create_abstraction_txt.d
index 140a27d..7d0425c 100644
--- a/src/sisudoc/io_out/create_abstraction_txt.d
+++ b/src/sisudoc/io_out/create_abstraction_txt.d
@@ -53,6 +53,7 @@ module sisudoc.io_out.create_abstraction_txt;
/+ ↓ write document abstraction as human-readable .ssp text file +/
template spineAbstractionTxt() {
import std.conv : to;
+ import std.digest : toHexString;
import std.file;
import std.path;
import std.stdio;
@@ -201,23 +202,94 @@ template spineAbstractionTxt() {
output ~= ".last_descendant: " ~ obj.metainfo.last_descendant_ocn.to!string;
/+ ↓ ancestors (only if non-zero) +/
- bool has_ancestors = false;
- foreach (a; obj.metainfo.markedup_ancestors) {
- if (a != 0) { has_ancestors = true; break; }
+ {
+ bool has_anc = false;
+ foreach (a; obj.metainfo.markedup_ancestors) {
+ if (a != 0) { has_anc = true; break; }
+ }
+ if (has_anc) {
+ string anc;
+ foreach (i, a; obj.metainfo.markedup_ancestors) {
+ if (i > 0) anc ~= " ";
+ anc ~= a.to!string;
+ }
+ output ~= ".ancestors: " ~ anc;
+ }
}
- if (has_ancestors) {
- string anc;
- foreach (i, a; obj.metainfo.markedup_ancestors) {
- if (i > 0) anc ~= " ";
- anc ~= a.to!string;
+ /+ ↓ collapsed ancestors (only if non-zero) +/
+ {
+ bool has_anc_c = false;
+ foreach (a; obj.metainfo.collapsed_ancestors) {
+ if (a != 0) { has_anc_c = true; break; }
+ }
+ if (has_anc_c) {
+ string anc;
+ foreach (i, a; obj.metainfo.collapsed_ancestors) {
+ if (i > 0) anc ~= " ";
+ anc ~= a.to!string;
+ }
+ output ~= ".ancestors_collapsed: " ~ anc;
+ }
+ }
+ /+ ↓ dom structure status (only if non-zero) +/
+ {
+ bool has_dom = false;
+ foreach (d; obj.metainfo.dom_structure_markedup_tags_status) {
+ if (d != 0) { has_dom = true; break; }
+ }
+ if (has_dom) {
+ string ds;
+ foreach (i, d; obj.metainfo.dom_structure_markedup_tags_status) {
+ if (i > 0) ds ~= " ";
+ ds ~= d.to!string;
+ }
+ output ~= ".dom_status: " ~ ds;
+ }
+ }
+ {
+ bool has_dom_c = false;
+ foreach (d; obj.metainfo.dom_structure_collapsed_tags_status) {
+ if (d != 0) { has_dom_c = true; break; }
+ }
+ if (has_dom_c) {
+ string ds;
+ foreach (i, d; obj.metainfo.dom_structure_collapsed_tags_status) {
+ if (i > 0) ds ~= " ";
+ ds ~= d.to!string;
+ }
+ output ~= ".dom_status_collapsed: " ~ ds;
}
- output ~= ".ancestors: " ~ anc;
}
+ if (obj.metainfo.heading_lev_collapsed < 9)
+ output ~= ".heading_lev_collapsed: " ~ obj.metainfo.heading_lev_collapsed.to!string;
+ if (obj.metainfo.parent_lev_markup != 0)
+ output ~= ".parent_lev: " ~ obj.metainfo.parent_lev_markup.to!string;
if (obj.metainfo.dummy_heading)
output ~= ".dummy: true";
if (obj.metainfo.object_number_off)
output ~= ".ocn_off: true";
+ if (obj.metainfo.o_n_type != 0)
+ output ~= ".o_n_type: " ~ obj.metainfo.o_n_type.to!string;
+ if (obj.metainfo.is_of_type.length > 0)
+ output ~= ".is_of_type: " ~ obj.metainfo.is_of_type;
+ if (obj.metainfo.attrib.length > 0)
+ output ~= ".attrib: " ~ obj.metainfo.attrib;
+ if (obj.metainfo.lang.length > 0)
+ output ~= ".meta_lang: " ~ obj.metainfo.lang;
+ if (obj.metainfo.syntax.length > 0)
+ output ~= ".meta_syntax: " ~ obj.metainfo.syntax;
+
+ /+ ↓ sha256 digest +/
+ {
+ bool has_sha = false;
+ foreach (b; obj.metainfo.sha256) {
+ if (b != 0) { has_sha = true; break; }
+ }
+ if (has_sha) {
+ output ~= ".sha256: " ~ obj.metainfo.sha256.toHexString.to!string;
+ }
+ }
/+ ↓ text attributes +/
if (obj.attrib.indent_base != 0 || obj.attrib.indent_hang != 0)
@@ -235,6 +307,7 @@ template spineAbstractionTxt() {
if (obj.has.inline_notes_reg) has_flags ~= "notes_reg";
if (obj.has.inline_notes_star) has_flags ~= "notes_star";
if (obj.has.images) has_flags ~= "images";
+ if (obj.has.image_without_dimensions) has_flags ~= "images_no_dim";
if (has_flags.length > 0)
output ~= ".has: " ~ has_flags.join(" ");
}
@@ -247,8 +320,13 @@ template spineAbstractionTxt() {
foreach (w; obj.table.column_widths) ws ~= w.to!string;
output ~= ".table_widths: " ~ ws.join(" ");
}
+ if (obj.table.column_aligns.length > 0) {
+ output ~= ".table_aligns: " ~ obj.table.column_aligns.join(" ");
+ }
if (obj.table.heading)
output ~= ".table_header: true";
+ if (obj.table.walls)
+ output ~= ".table_walls: true";
}
/+ ↓ code block properties +/
@@ -259,6 +337,13 @@ template spineAbstractionTxt() {
output ~= ".code_linenumbers: true";
}
+ /+ ↓ stow (extracted links) +/
+ if (obj.stow.link.length > 0) {
+ foreach (lnk; obj.stow.link) {
+ output ~= ".stow_link: " ~ lnk;
+ }
+ }
+
/+ ↓ tag properties +/
if (obj.tags.in_segment_html.length > 0)
output ~= ".segment: " ~ obj.tags.in_segment_html;
@@ -269,6 +354,32 @@ template spineAbstractionTxt() {
output ~= ".segment_prev: " ~ obj.tags.segname_prev;
if (obj.tags.segname_next.length > 0)
output ~= ".segment_next: " ~ obj.tags.segname_next;
+ if (obj.tags.heading_lev_anchor_tag.length > 0)
+ output ~= ".heading_lev_anchor: " ~ obj.tags.heading_lev_anchor_tag;
+ if (obj.tags.segment_anchor_tag_epub.length > 0)
+ output ~= ".segment_epub: " ~ obj.tags.segment_anchor_tag_epub;
+ /+ ↓ heading ancestors text +/
+ {
+ bool has_hat = false;
+ foreach (h; obj.tags.heading_ancestors_text) {
+ if (h.length > 0) { has_hat = true; break; }
+ }
+ if (has_hat) {
+ output ~= ".heading_ancestors_text: " ~ obj.tags.heading_ancestors_text.join("|");
+ }
+ }
+ /+ ↓ lev4 subtoc +/
+ if (obj.tags.lev4_subtoc.length > 0) {
+ foreach (st; obj.tags.lev4_subtoc) {
+ output ~= ".lev4_subtoc: " ~ st;
+ }
+ }
+ /+ ↓ anchor tags +/
+ if (obj.tags.anchor_tags.length > 0) {
+ foreach (at; obj.tags.anchor_tags) {
+ output ~= ".anchor_tag: " ~ at;
+ }
+ }
/+ ↓ text content +/
if (obj.text.length > 0) {