aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2026-04-22 13:52:21 -0400
committerRalph Amissah <ralph.amissah@gmail.com>2026-04-22 20:42:31 -0400
commit51549f11d60cd353564486b3598e69259fb01b66 (patch)
tree8dec193314d88ccfd76d80bc1cef951acf2b2204 /src
parent.ssp document abstraction as PEG parsable text (diff)
document abstraction as per document sqlite db
--show-abstraction-db flag to write per-document - SQLite database of document abstraction (Claude-Code primary assist) - Add a new output mode that serializes the in-memory document abstraction to a per-document SQLite database. This complements the .ssp text format (--show-abstraction) with a queryable database representation of the same data. - Schema: metadata table - key/value pairs for document metadata (title, creator, dates, rights, classify, identifiers, language, notes, make settings, doc_has counts) objects table - one row per document object with columns: section, seq (position within section), ocn, is_a, is_of_part, is_of_type, heading_level, identifier, parent_ocn, last_descendant_ocn, ancestors, indent/bullet/lang, has_* flags, segment/anchor tags, table/code properties, text content Indexed on: section, ocn, parent_ocn, is_a, heading_level - Uses prepared statements via d2sqlite3 (existing dependency) for safe and efficient insertion. Each document produces a standalone .abstraction.db file in the abstraction/ output directory. - New files: src/sisudoc/io_out/create_abstraction_db.d Follows the same pattern as create_abstraction_txt.d. Creates schema, populates metadata via key/value inserts, then iterates all sections writing objects with prepared statements within a single transaction. - Changes to spine.d: - Add "show-abstraction-db" to opts init, getopt, OptActions - Add to abstraction(), require_processing_files(), and meta_processing_general() gates - Insert call at both spineAbstraction sites - Tested against all 35 sample documents (including 9-language live-manual) - zero failures. Works standalone or combined with --show-abstraction and other output flags. - Example queries the database supports: SELECT ocn, heading_level, text FROM objects WHERE is_a = 'heading' AND section = 'body'; SELECT * FROM objects WHERE parent_ocn = 10; SELECT key, value FROM metadata WHERE key LIKE 'title.%'; Co-Authored-By: Anthropic Claude Opus 4.6 (1M context)
Diffstat (limited to 'src')
-rw-r--r--src/sisudoc/io_out/create_abstraction_db.d355
-rwxr-xr-xsrc/sisudoc/spine.d18
2 files changed, 373 insertions, 0 deletions
diff --git a/src/sisudoc/io_out/create_abstraction_db.d b/src/sisudoc/io_out/create_abstraction_db.d
new file mode 100644
index 0000000..20ca074
--- /dev/null
+++ b/src/sisudoc/io_out/create_abstraction_db.d
@@ -0,0 +1,355 @@
+/+
+- Name: SisuDoc Spine, Doc Reform [a part of]
+ - Description: documents, structuring, processing, publishing, search
+ - static content generator
+
+ - Author: Ralph Amissah
+ [ralph.amissah@gmail.com]
+
+ - Copyright: (C) 2015 (continuously updated, current 2026) Ralph Amissah, All Rights Reserved.
+
+ - License: AGPL 3 or later:
+
+ Spine (SiSU), a framework for document structuring, publishing and
+ search
+
+ Copyright (C) Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU AFERO General Public License as published by the
+ Free Software Foundation, either version 3 of the License, or (at your
+ option) any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see [https://www.gnu.org/licenses/].
+
+ If you have Internet connection, the latest version of the AGPL should be
+ available at these locations:
+ [https://www.fsf.org/licensing/licenses/agpl.html]
+ [https://www.gnu.org/licenses/agpl.html]
+
+ - Spine (by Doc Reform, related to SiSU) uses standard:
+ - docReform markup syntax
+ - standard SiSU markup syntax with modified headers and minor modifications
+ - docReform object numbering
+ - standard SiSU object citation numbering & system
+
+ - Homepages:
+ [https://www.sisudoc.org]
+ [https://www.doc-reform.org]
+
+ - Git
+ [https://git.sisudoc.org/]
+
++/
+module sisudoc.io_out.create_abstraction_db;
+
+/+ ↓ write document abstraction as per-document sqlite3 database +/
+template spineAbstractionDb() {
+ import std.conv : to;
+ import std.file;
+ import std.path;
+ import std.stdio;
+ import std.string;
+ import std.array;
+ import d2sqlite3;
+ import sisudoc.io_out.paths_output;
+
+ void spineAbstractionDb(D)(D doc) {
+ auto doc_abstraction = doc.abstraction;
+ auto doc_matters = doc.matters;
+
+ /+ ↓ determine output path +/
+ auto out_pth = spineOutPaths!()(doc_matters.output_path, doc_matters.src.language);
+ string base_dir = "abstraction";
+ string base_pth = ((out_pth.output_base.chainPath(base_dir)).asNormalizedPath).array;
+ try {
+ if (!exists(base_pth)) {
+ base_pth.mkdirRecurse;
+ }
+ } catch (Exception ex) {
+ }
+ string db_file = ((base_pth.chainPath(
+ doc_matters.src.doc_uid_out ~ ".abstraction.db")).asNormalizedPath).array;
+
+ /+ ↓ remove existing file to start fresh +/
+ try {
+ if (exists(db_file)) {
+ remove(db_file);
+ }
+ } catch (Exception ex) {
+ }
+
+ if (doc_matters.opt.action.vox_gt_1) {
+ writeln(" ", db_file);
+ }
+
+ /+ ↓ open database and create schema +/
+ auto db = Database(db_file);
+ db.run("PRAGMA journal_mode=WAL");
+ db.run("PRAGMA synchronous=NORMAL");
+
+ db.run("
+ CREATE TABLE metadata (
+ key TEXT PRIMARY KEY,
+ value TEXT NOT NULL
+ );
+
+ CREATE TABLE objects (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ section TEXT NOT NULL,
+ seq INTEGER NOT NULL,
+ ocn INTEGER DEFAULT 0,
+ is_a TEXT NOT NULL,
+ is_of_part TEXT,
+ is_of_type TEXT,
+ heading_level INTEGER,
+ identifier TEXT,
+ parent_ocn INTEGER DEFAULT 0,
+ last_descendant_ocn INTEGER DEFAULT 0,
+ ancestors TEXT,
+ dummy_heading INTEGER DEFAULT 0,
+ object_number_off INTEGER DEFAULT 0,
+ indent_base INTEGER DEFAULT 0,
+ indent_hang INTEGER DEFAULT 0,
+ bullet INTEGER DEFAULT 0,
+ lang TEXT,
+ has_links INTEGER DEFAULT 0,
+ has_notes_reg INTEGER DEFAULT 0,
+ has_notes_star INTEGER DEFAULT 0,
+ has_images INTEGER DEFAULT 0,
+ segment TEXT,
+ segment_prev TEXT,
+ segment_next TEXT,
+ anchor TEXT,
+ table_cols INTEGER,
+ table_widths TEXT,
+ table_header INTEGER,
+ code_syntax TEXT,
+ code_linenumbers INTEGER DEFAULT 0,
+ text TEXT
+ );
+
+ CREATE INDEX idx_objects_section ON objects(section);
+ CREATE INDEX idx_objects_ocn ON objects(ocn);
+ CREATE INDEX idx_objects_parent ON objects(parent_ocn);
+ CREATE INDEX idx_objects_is_a ON objects(is_a);
+ CREATE INDEX idx_objects_heading ON objects(heading_level)
+ WHERE heading_level IS NOT NULL;
+ ");
+
+ /+ ↓ populate metadata +/
+ db.run("BEGIN TRANSACTION");
+
+ auto meta_stmt = db.prepare(
+ "INSERT INTO metadata (key, value) VALUES (:key, :value)"
+ );
+ auto meta = doc_matters.conf_make_meta.meta;
+
+ void insertMeta(string key, string value) {
+ if (value.length > 0) {
+ meta_stmt.bind(":key", key);
+ meta_stmt.bind(":value", value);
+ meta_stmt.execute();
+ meta_stmt.reset();
+ }
+ }
+
+ insertMeta("title.main", meta.title_main);
+ insertMeta("title.subtitle", meta.title_subtitle);
+ insertMeta("title.full", meta.title_full);
+ insertMeta("title.language", meta.title_language);
+ insertMeta("creator.author", meta.creator_author);
+ insertMeta("creator.author_surname", meta.creator_author_surname);
+ insertMeta("creator.author_surname_fn", meta.creator_author_surname_fn);
+ insertMeta("creator.author_email", meta.creator_author_email);
+ insertMeta("creator.illustrator", meta.creator_illustrator);
+ insertMeta("creator.translator", meta.creator_translator);
+ insertMeta("date.published", meta.date_published);
+ insertMeta("date.created", meta.date_created);
+ insertMeta("date.issued", meta.date_issued);
+ insertMeta("date.available", meta.date_available);
+ insertMeta("date.modified", meta.date_modified);
+ insertMeta("date.valid", meta.date_valid);
+ insertMeta("rights.copyright", meta.rights_copyright);
+ insertMeta("rights.license", meta.rights_license);
+ insertMeta("classify.topic_register", meta.classify_topic_register);
+ insertMeta("classify.subject", meta.classify_subject);
+ insertMeta("classify.keywords", meta.classify_keywords);
+ insertMeta("classify.loc", meta.classify_loc);
+ insertMeta("classify.dewey", meta.classify_dewey);
+ insertMeta("identifier.isbn", meta.identifier_isbn);
+ insertMeta("identifier.oclc", meta.identifier_oclc);
+ insertMeta("language.document", meta.language_document);
+ insertMeta("notes.abstract", meta.notes_abstract);
+ insertMeta("notes.description", meta.notes_description);
+ insertMeta("notes.summary", meta.notes_summary);
+
+ /+ ↓ make settings +/
+ auto make = doc_matters.conf_make_meta.make;
+ insertMeta("make.doc_type", make.doc_type);
+ insertMeta("make.auto_num_top_at_level", make.auto_num_top_at_level);
+ insertMeta("make.auto_num_top_lv", make.auto_num_top_lv.to!string);
+ insertMeta("make.auto_num_depth", make.auto_num_depth.to!string);
+
+ /+ ↓ doc_has counts +/
+ insertMeta("doc_has.inline_links", doc_matters.has.inline_links.to!string);
+ insertMeta("doc_has.inline_notes_reg", doc_matters.has.inline_notes_reg.to!string);
+ insertMeta("doc_has.inline_notes_star", doc_matters.has.inline_notes_star.to!string);
+ insertMeta("doc_has.tables", doc_matters.has.tables.to!string);
+ insertMeta("doc_has.codeblocks", doc_matters.has.codeblocks.to!string);
+ insertMeta("doc_has.images", doc_matters.has.images.to!string);
+ insertMeta("doc_has.poems", doc_matters.has.poems.to!string);
+ insertMeta("doc_has.groups", doc_matters.has.groups.to!string);
+ insertMeta("doc_has.blocks", doc_matters.has.blocks.to!string);
+ insertMeta("doc_has.quotes", doc_matters.has.quotes.to!string);
+
+ meta_stmt.finalize();
+
+ /+ ↓ populate objects +/
+ auto obj_stmt = db.prepare(
+ "INSERT INTO objects ("
+ ~ "section, seq, ocn, is_a, is_of_part, is_of_type,"
+ ~ "heading_level, identifier, parent_ocn, last_descendant_ocn,"
+ ~ "ancestors, dummy_heading, object_number_off,"
+ ~ "indent_base, indent_hang, bullet, lang,"
+ ~ "has_links, has_notes_reg, has_notes_star, has_images,"
+ ~ "segment, segment_prev, segment_next, anchor,"
+ ~ "table_cols, table_widths, table_header,"
+ ~ "code_syntax, code_linenumbers, text"
+ ~ ") VALUES ("
+ ~ ":section, :seq, :ocn, :is_a, :is_of_part, :is_of_type,"
+ ~ ":heading_level, :identifier, :parent_ocn, :last_descendant_ocn,"
+ ~ ":ancestors, :dummy_heading, :object_number_off,"
+ ~ ":indent_base, :indent_hang, :bullet, :lang,"
+ ~ ":has_links, :has_notes_reg, :has_notes_star, :has_images,"
+ ~ ":segment, :segment_prev, :segment_next, :anchor,"
+ ~ ":table_cols, :table_widths, :table_header,"
+ ~ ":code_syntax, :code_linenumbers, :text"
+ ~ ")"
+ );
+
+ string[] section_order = ["head", "toc", "body", "endnotes",
+ "glossary", "bibliography", "bookindex", "blurb"];
+
+ foreach (section; section_order) {
+ if (section !in doc_abstraction) continue;
+ auto section_objs = doc_abstraction[section];
+ if (section_objs.length == 0) continue;
+
+ foreach (seq, obj; section_objs) {
+ obj_stmt.bind(":section", section);
+ obj_stmt.bind(":seq", cast(int) seq);
+ obj_stmt.bind(":ocn", obj.metainfo.ocn);
+ obj_stmt.bind(":is_a", obj.metainfo.is_a);
+
+ /+ ↓ nullable string fields +/
+ void bindStr(string param, string val) {
+ import std.typecons : Nullable;
+ if (val.length > 0) {
+ obj_stmt.bind(param, val);
+ } else {
+ obj_stmt.bind(param, Nullable!string());
+ }
+ }
+
+ bindStr(":is_of_part", obj.metainfo.is_of_part);
+ bindStr(":is_of_type", obj.metainfo.is_of_type);
+
+ /+ ↓ heading level +/
+ {
+ import std.typecons : Nullable;
+ if (obj.metainfo.is_a == "heading" && obj.metainfo.heading_lev_markup < 9) {
+ obj_stmt.bind(":heading_level", obj.metainfo.heading_lev_markup);
+ } else {
+ obj_stmt.bind(":heading_level", Nullable!int());
+ }
+ }
+
+ bindStr(":identifier", obj.metainfo.identifier);
+ obj_stmt.bind(":parent_ocn", obj.metainfo.parent_ocn);
+ obj_stmt.bind(":last_descendant_ocn", obj.metainfo.last_descendant_ocn);
+
+ /+ ↓ ancestors as space-separated integers +/
+ {
+ bool has_ancestors = false;
+ foreach (a; obj.metainfo.markedup_ancestors) {
+ if (a != 0) { has_ancestors = true; break; }
+ }
+ if (has_ancestors) {
+ string anc;
+ foreach (i, a; obj.metainfo.markedup_ancestors) {
+ if (i > 0) anc ~= " ";
+ anc ~= a.to!string;
+ }
+ obj_stmt.bind(":ancestors", anc);
+ } else {
+ import std.typecons : Nullable;
+ obj_stmt.bind(":ancestors", Nullable!string());
+ }
+ }
+
+ obj_stmt.bind(":dummy_heading", obj.metainfo.dummy_heading ? 1 : 0);
+ obj_stmt.bind(":object_number_off", obj.metainfo.object_number_off ? 1 : 0);
+ obj_stmt.bind(":indent_base", obj.attrib.indent_base);
+ obj_stmt.bind(":indent_hang", obj.attrib.indent_hang);
+ obj_stmt.bind(":bullet", obj.attrib.bullet ? 1 : 0);
+ bindStr(":lang", obj.attrib.language);
+ obj_stmt.bind(":has_links", obj.has.inline_links ? 1 : 0);
+ obj_stmt.bind(":has_notes_reg", obj.has.inline_notes_reg ? 1 : 0);
+ obj_stmt.bind(":has_notes_star", obj.has.inline_notes_star ? 1 : 0);
+ obj_stmt.bind(":has_images", obj.has.images ? 1 : 0);
+ bindStr(":segment", obj.tags.in_segment_html);
+ bindStr(":segment_prev", obj.tags.segname_prev);
+ bindStr(":segment_next", obj.tags.segname_next);
+ bindStr(":anchor", obj.tags.anchor_tag_html);
+
+ /+ ↓ table properties +/
+ {
+ import std.typecons : Nullable;
+ if (obj.metainfo.is_a == "table" && obj.table.number_of_columns > 0) {
+ obj_stmt.bind(":table_cols", obj.table.number_of_columns);
+ if (obj.table.column_widths.length > 0) {
+ string[] ws;
+ foreach (w; obj.table.column_widths) ws ~= w.to!string;
+ obj_stmt.bind(":table_widths", ws.join(" "));
+ } else {
+ obj_stmt.bind(":table_widths", Nullable!string());
+ }
+ obj_stmt.bind(":table_header", obj.table.heading ? 1 : 0);
+ } else {
+ obj_stmt.bind(":table_cols", Nullable!int());
+ obj_stmt.bind(":table_widths", Nullable!string());
+ obj_stmt.bind(":table_header", Nullable!int());
+ }
+ }
+
+ /+ ↓ code block properties +/
+ {
+ import std.typecons : Nullable;
+ if (obj.metainfo.is_a == "code") {
+ bindStr(":code_syntax", obj.code_block.syntax);
+ obj_stmt.bind(":code_linenumbers", obj.code_block.linenumbers ? 1 : 0);
+ } else {
+ obj_stmt.bind(":code_syntax", Nullable!string());
+ obj_stmt.bind(":code_linenumbers", 0);
+ }
+ }
+
+ /+ ↓ text content +/
+ bindStr(":text", obj.text);
+
+ obj_stmt.execute();
+ obj_stmt.reset();
+ }
+ }
+
+ obj_stmt.finalize();
+ db.run("COMMIT TRANSACTION");
+ }
+}
diff --git a/src/sisudoc/spine.d b/src/sisudoc/spine.d
index ceb9d6c..f7b7d66 100755
--- a/src/sisudoc/spine.d
+++ b/src/sisudoc/spine.d
@@ -167,6 +167,7 @@ string program_name = "spine";
"pod" : false,
"serial" : false,
"show-abstraction" : false,
+ "show-abstraction-db" : false,
"show-config" : false,
"show-curate" : false,
"show-curate-authors" : false,
@@ -289,6 +290,7 @@ string program_name = "spine";
"serial", "serial processing", &opts["serial"],
"skip-output", "skip output", &opts["skip-output"],
"show-abstraction", "show document abstraction (write .ssp file)", &opts["show-abstraction"],
+ "show-abstraction-db", "show document abstraction (write .db sqlite file)", &opts["show-abstraction-db"],
"show-config", "show config", &opts["show-config"],
"show-curate", "show curate", &opts["show-curate"],
"show-curate-authors", "show curate authors", &opts["show-curate-authors"],
@@ -503,6 +505,9 @@ string program_name = "spine";
@trusted bool show_abstraction() {
return opts["show-abstraction"];
}
+ @trusted bool show_abstraction_db() {
+ return opts["show-abstraction-db"];
+ }
@trusted bool show_curate() {
return opts["show-curate"];
}
@@ -752,6 +757,7 @@ string program_name = "spine";
return (
opts["abstraction"]
|| show_abstraction
+ || show_abstraction_db
|| concordance
|| source_or_pod
|| curate
@@ -779,6 +785,7 @@ string program_name = "spine";
|| odt
|| manifest
|| show_abstraction
+ || show_abstraction_db
|| show_make
|| show_metadata
|| show_summary
@@ -794,6 +801,7 @@ string program_name = "spine";
return (
opts["abstraction"]
|| show_abstraction
+ || show_abstraction_db
|| curate
|| html
|| epub
@@ -1316,6 +1324,11 @@ string program_name = "spine";
import sisudoc.io_out.create_abstraction_txt;
spineAbstractionTxt!()(doc);
}
+ /+ ↓ document abstraction sqlite database +/
+ if (doc.matters.opt.action.show_abstraction_db) {
+ import sisudoc.io_out.create_abstraction_db;
+ spineAbstractionDb!()(doc);
+ }
if (doc.matters.opt.action.curate) {
auto _hvst = spineMetaDocCurate!()(doc.matters, hvst);
if (
@@ -1420,6 +1433,11 @@ string program_name = "spine";
import sisudoc.io_out.create_abstraction_txt;
spineAbstractionTxt!()(doc);
}
+ /+ ↓ document abstraction sqlite database +/
+ if (doc.matters.opt.action.show_abstraction_db) {
+ import sisudoc.io_out.create_abstraction_db;
+ spineAbstractionDb!()(doc);
+ }
if (doc.matters.opt.action.curate) {
auto _hvst = spineMetaDocCurate!()(doc.matters, hvst);
if (