diff options
| author | Ralph Amissah <ralph.amissah@gmail.com> | 2019-05-22 10:50:33 -0400 | 
|---|---|---|
| committer | Ralph Amissah <ralph.amissah@gmail.com> | 2019-10-17 19:07:20 -0400 | 
| commit | e973365c4b74be2b2cff9be970ccba5928dbe368 (patch) | |
| tree | f5af8c28ba939095b9c1310c5ea7b91816c12ddf /src | |
| parent | 0.7.2 latex (for pdf) (initial stub) (diff) | |
0.7.3 start to look at document harvest (initial stub)
Diffstat (limited to 'src')
| -rwxr-xr-x | src/doc_reform/doc_reform.d | 104 | ||||
| -rw-r--r-- | src/doc_reform/meta/conf_make_meta_json.d | 215 | ||||
| -rw-r--r-- | src/doc_reform/meta/conf_make_meta_structs.d | 3 | ||||
| -rw-r--r-- | src/doc_reform/meta/metadoc.d | 1 | ||||
| -rw-r--r-- | src/doc_reform/meta/metadoc_harvest.d | 30 | ||||
| -rw-r--r-- | src/doc_reform/meta/metadoc_summary.d | 4 | ||||
| -rw-r--r-- | src/doc_reform/meta/metadochead.d | 84 | ||||
| -rw-r--r-- | src/doc_reform/meta/rgx.d | 7 | 
8 files changed, 270 insertions, 178 deletions
| diff --git a/src/doc_reform/doc_reform.d b/src/doc_reform/doc_reform.d index 06866c2..908a3a6 100755 --- a/src/doc_reform/doc_reform.d +++ b/src/doc_reform/doc_reform.d @@ -53,8 +53,7 @@  module doc_reform.sisu_document_parser;  import    doc_reform.conf.compile_time_info, -  doc_reform.meta.metadoc, -  doc_reform.meta.metadochead; +  doc_reform.meta.metadoc;  import    std.datetime,    std.getopt, @@ -64,6 +63,7 @@ import  import    doc_reform.meta,    doc_reform.meta.metadoc_summary, +  doc_reform.meta.metadoc_harvest,    doc_reform.meta.metadoc_from_src,    doc_reform.meta.conf_make_meta_structs,    doc_reform.meta.conf_make_meta_toml, @@ -91,6 +91,18 @@ void main(string[] args) {    mixin DocReformBiblio;    mixin DocReformRgxInitFlags;    mixin outputHub; +  struct Harvest { +    string   title                = ""; +    string   author               = ""; +    string   author_date_title    = ""; +    string   date_published       = ""; +    string[] topic_register_arr   = [""]; +    string   html_seg_toc         = ""; +    string   html_scroll          = ""; +    string   epub                 = ""; +  } +  Harvest harvested; +  Harvest[] harvests;    string flag_action;    string arg_unrecognized;    enum dAM { abstraction, matters } @@ -119,6 +131,9 @@ void main(string[] args) {      "debug"              : false,      "digest"             : false,      "epub"               : false, +    "harvest"            : false, +    "harvest-authors"    : false, +    "harvest-topics"     : false,      "html"               : false,      "html-seg"           : false,      "html-scroll"        : false, @@ -174,6 +189,9 @@ void main(string[] args) {      "debug",              "--debug",                                                                  &opts["debug"],      "digest",             "--digest hash digest for each object",                                     &opts["digest"],      "epub",               "--epub process epub output",                                               &opts["epub"], +    "harvest",            "--harvest extract info on authors & topics from document header metadata", &opts["harvest"], +    "harvest-authors",    "--harvest-authors extract info on authors from document header metadata",  &opts["harvest-authors"], +    "harvest-topics",     "--harvest-topics extract info on topics from document header metadata",    &opts["harvest-topics"],      "html",               "--html process html output",                                               &opts["html"],      "html-seg",           "--html-seg process html output",                                           &opts["html-seg"],      "html-scroll",        "--html-seg process html output",                                           &opts["html-scroll"], @@ -250,6 +268,22 @@ void main(string[] args) {      bool epub() {        return opts["epub"];      } +    bool harvest() { +      bool _is = ( +        opts["harvest"] +        || opts["harvest-authors"] +        || opts["harvest-topics"] +      ) +      ? true +      : false; +      return _is; +    } +    bool harvest_authors() { +      return opts["harvest-authors"]; +    } +    bool harvest_topics() { +      return opts["harvest-topics"]; +    }      bool html() {        bool _is;        if ( opts["html"] || opts["html-seg"] || opts["html-scroll"]) @@ -637,7 +671,22 @@ void main(string[] args) {            }            /+ ↓ debugs +/            if (doc_matters.opt.action.verbose) { -            DocReformAbstractionSummary!()(doc_abstraction, doc_matters); +            DocReformMetaDocSummary!()(doc_abstraction, doc_matters); +          } +          if (doc_matters.opt.action.harvest) { +            if (doc_matters.opt.action.harvest_authors) { +            } +            if (doc_matters.opt.action.harvest_topics) { +            } +            Harvest[] DocReformMetaDocHarvests()( +              Harvest    harvested, +              Harvest[]  harvests, +            ) { +              harvests ~= harvested; +              return harvests; +            } +            harvested = DocReformMetaDocHarvest!()(doc_matters, harvested); +            harvests = DocReformMetaDocHarvests!()(harvested, harvests);            }            /+ ↓ debugs +/            if (doc_matters.opt.action.debug_do) { @@ -717,7 +766,22 @@ void main(string[] args) {            }            /+ ↓ debugs +/            if (doc_matters.opt.action.verbose) { -            DocReformAbstractionSummary!()(doc_abstraction, doc_matters); +            DocReformMetaDocSummary!()(doc_abstraction, doc_matters); +          } +          if (doc_matters.opt.action.harvest) { +            if (doc_matters.opt.action.harvest_authors) { +            } +            if (doc_matters.opt.action.harvest_topics) { +            } +            Harvest[] DocReformMetaDocHarvests()( +              Harvest    harvested, +              Harvest[]  harvests, +            ) { +              harvests ~= harvested; +              return harvests; +            } +            harvested = DocReformMetaDocHarvest!()(doc_matters, harvested); +            harvests = DocReformMetaDocHarvests!()(harvested, harvests);            }            /+ ↓ debugs +/            if (doc_matters.opt.action.debug_do) { @@ -755,4 +819,36 @@ void main(string[] args) {        }      }    } +  if (_opt_action.verbose +    && harvests.length > 1 +  ) { +    auto min_repeat_number = 42; +    foreach(doc_harvest; harvests) { +      auto char_repeat_number = (doc_harvest.title.length +        + doc_harvest.author.length + 16); +      char_repeat_number = (char_repeat_number > min_repeat_number) +      ? char_repeat_number +      : min_repeat_number; +      writefln( +        "%s\n\"%s\", %s%s", +        mkup.repeat_character_by_number_provided("-", char_repeat_number), +        doc_harvest.title, +        doc_harvest.author, +        (doc_harvest.date_published.length > 0) ? " (" ~ doc_harvest.date_published ~ ")" : "", +      ); +      string[] _topic_arr; +      foreach(topic; doc_harvest.topic_register_arr.sort) { +        foreach (i, _top; topic.split(mkup.sep)) { +          writeln("  ", ("  ".repeat(i).join), "- ", _top); +        } +      } +    } +    string[] _author_date_title; +    foreach(doc_harvest; harvests) { +      _author_date_title ~= doc_harvest.author_date_title; +    } +    foreach(_adt; _author_date_title.sort) { +      writeln(_adt); +    } +  }  } diff --git a/src/doc_reform/meta/conf_make_meta_json.d b/src/doc_reform/meta/conf_make_meta_json.d index fcd52c1..5fd4499 100644 --- a/src/doc_reform/meta/conf_make_meta_json.d +++ b/src/doc_reform/meta/conf_make_meta_json.d @@ -5,6 +5,8 @@  module doc_reform.meta.conf_make_meta_json;  static template contentJSONtoDocReformStruct() {    import +    std.algorithm, +    std.array,      std.exception,      std.regex,      std.stdio, @@ -16,6 +18,7 @@ static template contentJSONtoDocReformStruct() {    import      doc_reform.meta.conf_make_meta_structs,      doc_reform.meta.conf_make_meta_json, +    doc_reform.meta.defaults,      doc_reform.meta.rgx;    ConfCompositePlus _struct_composite;    auto contentJSONtoDocReformStruct(C,J)(C _struct_composite, J _json, string _identifier) { @@ -361,6 +364,108 @@ static template contentJSONtoDocReformStruct() {        }      }      /+ meta ------------------------------------------------------------------- +/ +    if (_struct_composite.meta.creator_author.empty) { +      if ("creator" in _json.object) { +        if ("author" in _json.object["creator"] +          && (_json.object["creator"]["author"].type().to!string == "string") +        ) { +          _struct_composite.meta.creator_author = _json.object["creator"]["author"].str; +        } +        if ("email" in _json.object["creator"] +          && (_json.object["creator"]["email"].type().to!string == "string") +        ) { +          _struct_composite.meta.creator_author_email = _json.object["creator"]["email"].str; +        } +        if ("illustrator" in _json.object["creator"] +          && (_json.object["creator"]["illustrator"].type().to!string == "string") +        ) { +          _struct_composite.meta.creator_illustrator = _json.object["creator"]["illustrator"].str; +        } +        if ("translator" in _json.object["creator"] +          && (_json.object["creator"]["translator"].type().to!string == "string") +        ) { +          _struct_composite.meta.creator_translator = _json.object["creator"]["translator"].str; +        } +      } +      string[] authors_arr; +      string[][string] authors_hash_arr = [ "first" : [], "last" : [], "full" : [], "last_first" : [], "as_input" : [] ]; +      string[] authors_raw_arr +        = _struct_composite.meta.creator_author.split(rgx.arr_delimiter); +      auto _lastname = appender!(char[])(); +      foreach (author_raw; authors_raw_arr) { +        authors_arr                  ~= author_raw.replace(rgx.raw_author_munge, "$2 $1"); +        authors_hash_arr["first"]    ~= author_raw.replace(rgx.raw_author_munge, "$2"); +        authors_hash_arr["last"]     ~= author_raw.replace(rgx.raw_author_munge, "$1"); +        authors_hash_arr["full"]     ~= author_raw.replace(rgx.raw_author_munge, "$2 $1"); +        authors_hash_arr["as_input"] ~= author_raw; +        if (auto m = author_raw.match(rgx.raw_author_munge)) { +          (m.captures[1]).map!toUpper.copy(_lastname); +          authors_hash_arr["last_first"] ~= _lastname.data.to!string ~ ", " ~ m.captures[2]; +          _lastname = appender!(char[])(); +        } +      } +      _struct_composite.meta.creator_author     = authors_arr.join(", ").chomp.chomp; +      string _author_name_last_first = authors_hash_arr["last_first"].join("; ").chomp.chomp; +      _struct_composite.meta.creator_author_surname_fn = (_author_name_last_first.length > 0) +      ? _author_name_last_first +      : authors_hash_arr["as_input"].join("; ").chomp.chomp; +    } +    if (_struct_composite.meta.title_main.empty) { +      if ("title" in _json.object) { +        if ((_json.object["title"].type().to!string) == "string") { +          _struct_composite.meta.title_main = _json.object["title"].str; +        } else { +          if ("edition" in _json.object["title"] +            && (_json.object["title"]["edition"].type().to!string == "string") +          ) { +            _struct_composite.meta.title_edition = _json.object["title"]["edition"].str; +          } +          if ("full" in _json.object["title"] +            && (_json.object["title"]["full"].type().to!string == "string") +          ) {} +          if ("language" in _json.object["title"] +            && (_json.object["title"]["language"].type().to!string == "string") +          ) { +            _struct_composite.meta.title_language = _json.object["title"]["language"].str; +          } +          if ("main" in _json.object["title"] +            && (_json.object["title"]["main"].type().to!string == "string") +          ) { +            _struct_composite.meta.title_main = _json.object["title"]["main"].str; +          } else if ("title" in _json.object["title"] +            && (_json.object["title"]["title"].type().to!string == "string") +          ) { +            _struct_composite.meta.title_main = _json.object["title"]["title"].str; +          } +          if ("note" in _json.object["title"] +            && (_json.object["title"]["note"].type().to!string == "string") +          ) { +            _struct_composite.meta.title_note = _json.object["title"]["note"].str; +          } +          if ("sub" in _json.object["title"] +            && (_json.object["title"]["sub"].type().to!string == "string") +          ) { +            _struct_composite.meta.title_sub = _json.object["title"]["sub"].str; +          } +          if ("subtitle" in _json.object["title"] +            && (_json.object["title"]["subtitle"].type().to!string == "string") +          ) { +            _struct_composite.meta.title_subtitle = _json.object["title"]["subtitle"].str; +          } +        } +      } +      if ((!(_struct_composite.meta.title_subtitle.empty)) +      && (_struct_composite.meta.title_sub.empty)) { +        _struct_composite.meta.title_sub = _struct_composite.meta.title_subtitle; +      } +      _struct_composite.meta.title_full = (_struct_composite.meta.title_sub.empty) +      ? _struct_composite.meta.title_main +      : format( +          "%s - %s", +          _struct_composite.meta.title_main, +          _struct_composite.meta.title_sub, +        ); +    }      if ("classify" in _json.object) {        if ("dewey" in _json.object["classify"]          && (_json.object["classify"]["dewey"].type().to!string == "string") @@ -386,6 +491,22 @@ static template contentJSONtoDocReformStruct() {          && (_json.object["classify"]["topic_register"].type().to!string == "string")        ) {          _struct_composite.meta.classify_topic_register = _json.object["classify"]["topic_register"].str; +        string[] main_topics_ = _struct_composite.meta.classify_topic_register.split(rgx.topic_register_main_terms_split); +        string[] topics; +        string   topics_tmp; +        string[] multiple_sub_terms; +        foreach (mt; main_topics_) { +          topics_tmp = mt.replaceAll(rgx.topic_register_main_term_plus_rest_split,    mkup.sep); +          if (auto m = topics_tmp.match(rgx.topic_register_multiple_sub_terms_split)) { +            multiple_sub_terms = m.captures[1].split(rgx.topic_register_sub_terms_split); +            foreach (subterm; multiple_sub_terms) { +              topics ~= m.captures.pre ~ mkup.sep ~ subterm; +            } +          } else { +            topics ~= topics_tmp; +          } +        } +        _struct_composite.meta.classify_topic_register_arr = topics;        }      }      if ("date" in _json.object) { @@ -424,6 +545,13 @@ static template contentJSONtoDocReformStruct() {        ) {          _struct_composite.meta.date_valid = _json.object["date"]["valid"].str;        } +      _struct_composite.meta.author_date_title = format( +        "%s %s \"%s\"", +        _struct_composite.meta.creator_author_surname_fn, +        (_struct_composite.meta.date_published.length > 0) +          ? "(" ~ _struct_composite.meta.date_published ~ ")" : "", +        _struct_composite.meta.title_full, +      );      }      if ("links" in _json.object) {}      if ("notes" in _json.object) { @@ -508,93 +636,6 @@ static template contentJSONtoDocReformStruct() {          _struct_composite.meta.rights_license = _json.object["rights"]["license"].str;        }      } -    if (_struct_composite.meta.creator_author.empty) { -      if ("creator" in _json.object) { -        if ("author" in _json.object["creator"] -          && (_json.object["creator"]["author"].type().to!string == "string") -        ) { -          _struct_composite.meta.creator_author = _json.object["creator"]["author"].str; -        } -        if ("email" in _json.object["creator"] -          && (_json.object["creator"]["email"].type().to!string == "string") -        ) { -          _struct_composite.meta.creator_author_email = _json.object["creator"]["email"].str; -        } -        if ("illustrator" in _json.object["creator"] -          && (_json.object["creator"]["illustrator"].type().to!string == "string") -        ) { -          _struct_composite.meta.creator_illustrator = _json.object["creator"]["illustrator"].str; -        } -        if ("translator" in _json.object["creator"] -          && (_json.object["creator"]["translator"].type().to!string == "string") -        ) { -          _struct_composite.meta.creator_translator = _json.object["creator"]["translator"].str; -        } -      } -      string[] authors_arr; -      string[] authors_raw_arr -        = _struct_composite.meta.creator_author.split(rgx.arr_delimiter); -      foreach (author_raw; authors_raw_arr) { -        authors_arr ~= author_raw.replace(rgx.raw_author_munge, "$2 $1"); -      } -      _struct_composite.meta.creator_author = join(authors_arr, ", ").chomp.chomp; -    } -    if (_struct_composite.meta.title_main.empty) { -      if ("title" in _json.object) { -        if ((_json.object["title"].type().to!string) == "string") { -          _struct_composite.meta.title_main = _json.object["title"].str; -        } else { -          if ("edition" in _json.object["title"] -            && (_json.object["title"]["edition"].type().to!string == "string") -          ) { -            _struct_composite.meta.title_edition = _json.object["title"]["edition"].str; -          } -          if ("full" in _json.object["title"] -            && (_json.object["title"]["full"].type().to!string == "string") -          ) {} -          if ("language" in _json.object["title"] -            && (_json.object["title"]["language"].type().to!string == "string") -          ) { -            _struct_composite.meta.title_language = _json.object["title"]["language"].str; -          } -          if ("main" in _json.object["title"] -            && (_json.object["title"]["main"].type().to!string == "string") -          ) { -            _struct_composite.meta.title_main = _json.object["title"]["main"].str; -          } else if ("title" in _json.object["title"] -            && (_json.object["title"]["title"].type().to!string == "string") -          ) { -            _struct_composite.meta.title_main = _json.object["title"]["title"].str; -          } -          if ("note" in _json.object["title"] -            && (_json.object["title"]["note"].type().to!string == "string") -          ) { -            _struct_composite.meta.title_note = _json.object["title"]["note"].str; -          } -          if ("sub" in _json.object["title"] -            && (_json.object["title"]["sub"].type().to!string == "string") -          ) { -            _struct_composite.meta.title_sub = _json.object["title"]["sub"].str; -          } -          if ("subtitle" in _json.object["title"] -            && (_json.object["title"]["subtitle"].type().to!string == "string") -          ) { -            _struct_composite.meta.title_subtitle = _json.object["title"]["subtitle"].str; -          } -        } -      } -      if ((!(_struct_composite.meta.title_subtitle.empty)) -      && (_struct_composite.meta.title_sub.empty)) { -        _struct_composite.meta.title_sub = _struct_composite.meta.title_subtitle; -      } -      _struct_composite.meta.title_full = (_struct_composite.meta.title_sub.empty) -      ? _struct_composite.meta.title_main -      : format( -          "%s - %s", -          _struct_composite.meta.title_main, -          _struct_composite.meta.title_sub, -        ); -    }      return _struct_composite;    }  } diff --git a/src/doc_reform/meta/conf_make_meta_structs.d b/src/doc_reform/meta/conf_make_meta_structs.d index 874e509..ff1ec76 100644 --- a/src/doc_reform/meta/conf_make_meta_structs.d +++ b/src/doc_reform/meta/conf_make_meta_structs.d @@ -181,7 +181,9 @@ struct MetaComposite {    string   classify_loc;    string   classify_subject;    string   classify_topic_register; +  string[] classify_topic_register_arr;    string   creator_author; +  string   creator_author_surname_fn;    string   creator_author_email;    string   creator_illustrator;    string   creator_translator; @@ -223,6 +225,7 @@ struct MetaComposite {    string   title_short;    string   title_sub;    string   title_subtitle; +  string   author_date_title;  }  struct ConfComposite {    MetaComposite               meta; diff --git a/src/doc_reform/meta/metadoc.d b/src/doc_reform/meta/metadoc.d index d8cc19f..a4b920b 100644 --- a/src/doc_reform/meta/metadoc.d +++ b/src/doc_reform/meta/metadoc.d @@ -9,6 +9,7 @@ template DocReformAbstraction() {    import      doc_reform.meta,      doc_reform.meta.metadoc_summary, +    doc_reform.meta.metadoc_harvest,      doc_reform.meta.metadoc_from_src,      doc_reform.meta.conf_make_meta_structs,      doc_reform.meta.conf_make_meta_toml, diff --git a/src/doc_reform/meta/metadoc_harvest.d b/src/doc_reform/meta/metadoc_harvest.d new file mode 100644 index 0000000..c3534f9 --- /dev/null +++ b/src/doc_reform/meta/metadoc_harvest.d @@ -0,0 +1,30 @@ +module doc_reform.meta.metadoc_harvest; +template DocReformMetaDocHarvest() { +  auto DocReformMetaDocHarvest(T,H)( +    T  doc_matters, +    H  harvest, +  ) { +    import +      doc_reform.meta.defaults, +      doc_reform.meta.rgx; +    import +      std.array, +      std.exception, +      std.regex, +      std.stdio, +      std.string, +      std.traits, +      std.typecons, +      std.uni, +      std.utf, +      std.conv : to; +    mixin InternalMarkup; +    auto markup = InlineMarkup(); +    harvest.title              = doc_matters.conf_make_meta.meta.title_full; +    harvest.author             = doc_matters.conf_make_meta.meta.creator_author; +    harvest.author_date_title  = doc_matters.conf_make_meta.meta.author_date_title; +    harvest.date_published     = doc_matters.conf_make_meta.meta.date_published; +    harvest.topic_register_arr = doc_matters.conf_make_meta.meta.classify_topic_register_arr; +    return harvest; +  } +} diff --git a/src/doc_reform/meta/metadoc_summary.d b/src/doc_reform/meta/metadoc_summary.d index 768cebd..4beada8 100644 --- a/src/doc_reform/meta/metadoc_summary.d +++ b/src/doc_reform/meta/metadoc_summary.d @@ -1,6 +1,6 @@  module doc_reform.meta.metadoc_summary; -template DocReformAbstractionSummary() { -  void DocReformAbstractionSummary(S,T)( +template DocReformMetaDocSummary() { +  void DocReformMetaDocSummary(S,T)(      const S  doc_abstraction,            T  doc_matters,    ) { diff --git a/src/doc_reform/meta/metadochead.d b/src/doc_reform/meta/metadochead.d deleted file mode 100644 index 05be0a8..0000000 --- a/src/doc_reform/meta/metadochead.d +++ /dev/null @@ -1,84 +0,0 @@ -module doc_reform.meta.metadochead; -template DocReformHarvestGetFromHead() { // TODO -  import -    std.datetime, -    std.getopt, -    std.file, -    std.path, -    std.process; -  import -    doc_reform.meta, -    doc_reform.meta.metadoc_summary, -    doc_reform.meta.metadoc_from_src, -    doc_reform.meta.conf_make_meta_structs, -    doc_reform.meta.conf_make_meta_toml, -    doc_reform.meta.conf_make_meta_json, -    doc_reform.meta.defaults, -    doc_reform.meta.doc_debugs, -    doc_reform.meta.rgx, -    doc_reform.source.paths_source, -    doc_reform.source.read_config_files, -    doc_reform.source.read_source_files, -    doc_reform.output.hub; -  mixin DocReformRgxInit; -  mixin contentJSONtoDocReformStruct; -  mixin DocReformBiblio; -  mixin DocReformRgxInitFlags; -  mixin outputHub; -  enum headBody { header, body_content, insert_file_list, image_list } -  enum makeMeta { make, meta } -  static auto rgx = Rgx(); -  auto DocReformHarvestGetFromHead(E,O,M)( // TODO -    E _env, -    O _opt_action, -    M _manifest -  ){ -    auto _config_document_struct = readConfigDoc!()(_manifest, _env);    // document config file -    auto _config_local_site_struct = readConfigSite!()(_manifest, _env); // local site config -    ConfCompositePlus _make_and_meta_struct; -    _make_and_meta_struct = configParseTOMLreturnDocReformStruct!()(_make_and_meta_struct, _config_document_struct); -    _make_and_meta_struct = configParseTOMLreturnDocReformStruct!()(_make_and_meta_struct, _config_local_site_struct); -    /+ ↓ read file (filename with path) +/ -    /+ ↓ file tuple of header and content +/ -    if ((_opt_action.debug_do) -    || (_opt_action.very_verbose) -    ) { -      writeln("step1 commence → (get document header & body & insert file list & if needed image list)" -      ); -    } -    auto _header_body_insertfilelist_imagelist -      = DocReformRawMarkupContent!()(_opt_action, _manifest.src.path_and_fn); -    static assert(!isTypeTuple!(_header_body_insertfilelist_imagelist)); -    static assert(_header_body_insertfilelist_imagelist.length==4); -    if ((_opt_action.debug_do) -    || (_opt_action.very_verbose) -    ) { -      writeln("- step1 complete"); -    } -    debug(header_and_body) { -      writeln(header); -      writeln(_header_body_insertfilelist_imagelist.length); -      writeln(_header_body_insertfilelist_imagelist.length[headBody.body_content][0]); -    } -    /+ ↓ split header into make and meta +/ -    if ((_opt_action.debug_do) -    || (_opt_action.very_verbose) -    ) { -      writeln("step2 commence → (read document header - toml, return struct)"); -    } -    _make_and_meta_struct = -    docHeaderMakeAndMetaTupTomlExtractAndConvertToStruct!()( -      _make_and_meta_struct, -      _header_body_insertfilelist_imagelist[headBody.header] -    ); -    if ((_opt_action.debug_do) -    || (_opt_action.very_verbose) -    ) { -      writeln("- step2 complete"); -    } -     -    auto t = tuple(doc_matters_shared, doc_matters_abridged_collected); -    static assert(t.length==2); -    return t; -  } -} diff --git a/src/doc_reform/meta/rgx.d b/src/doc_reform/meta/rgx.d index 373400f..544b432 100644 --- a/src/doc_reform/meta/rgx.d +++ b/src/doc_reform/meta/rgx.d @@ -7,6 +7,7 @@ static template DocReformRgxInit() {    static struct Rgx {      /+ misc +/      static true_dollar                                    = ctRegex!(`\$`, "gm"); +    static sep                                            = ctRegex!(`␣`, "gm");      static flag_action                                    = ctRegex!(`^(--[a-z][a-z0-9-]+)$`);      static flag_action_str                                = ctRegex!(` (--[a-z][a-z0-9-]+)`);      static within_quotes                                  = ctRegex!(`"(.+?)"`, "m"); @@ -43,7 +44,7 @@ static template DocReformRgxInit() {      /+ header +/      static variable_doc_title                             = ctRegex!(`@title`);      static variable_doc_author                            = ctRegex!(`@author|@creator`); -    static raw_author_munge                               = ctRegex!(`(\S.+?),\s+(.+)`,"i"); +    static raw_author_munge                               = ctRegex!(`(?P<last>\S.+?),\s+(?P<first>.+)`,"i");      static toml_header_meta_title                         = ctRegex!(`^\s*(title\s*=\s*"|\[title\])`, "m");      /+ heading & paragraph operators +/      static heading_a                                      = ctRegex!(`^:?[A][~] `, "m"); @@ -191,6 +192,10 @@ static template DocReformRgxInit() {      static bi_main_term_plus_rest_split                   = ctRegex!(`\s*:\s*`);      static bi_sub_terms_plus_object_number_offset_split   = ctRegex!(`\s*\|\s*`);      static bi_term_and_object_numbers_match               = ctRegex!(`^(.+?)\+(\d+)`); +    static topic_register_main_terms_split                = ctRegex!(`\s*;\s*`); +    static topic_register_main_term_plus_rest_split       = ctRegex!(`\s*:\s*`); +    static topic_register_sub_terms_split                 = ctRegex!(`\s*\|\s*`); +    static topic_register_multiple_sub_terms_split        = ctRegex!(`␣([^|␣]+(?:\|[^|␣]+)+)`);      /+ language codes +/      auto language_codes                                    =         ctRegex!("(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)"); | 
