Literate Programming 4

@ @> @= //////////// // WEB4.D // // This is a level 4 bootstrapping Literate Programming thing. // It will insert indices and tables of contents. It may also allow appending or replacing sections. // module web3; @> @= private import std.algorithm; // Needed for countUntil and searching private import std.ascii; // Character type checks. private import std.file; // Needed for file input and output private import std.stdio; // Needed for error reporting and my debugging private import std.string; // These programs are all about string processing. @> @ I've converted the bool isCode/isIdentifier into a consistent enum and started tracking line numbers within each text block. @= enum ESectionType { CODE, HEADER, PARAGRAPH, IDENTIFIER, INDEX_TERM, PRE, // Terms? Literal/Emphasis? BOLD, }; struct SSection { string name; ESectionType type; SBlock[] contents; }; struct SBlock { ESectionType type; int lineNumber; string content; }; @ @> @= @ @ @ @ @ @ @ @> @ I'm inserting these hide blocks here. I wish I could insert bits of display inside a code block. @= ptrdiff_t countFromPosUntil(string haystack, ptrdiff_t startIndex, string needle) { ptrdiff_t offset = countUntil(haystack[startIndex..haystack.length], needle); if(offset < 0) { return offset; } return startIndex + offset; } @> @= string formatCodeForDisplay(string source, int lineNumber) { string output = ""; string scanner = escapeHTMLCharacters(source); scanner: while(!scanner.empty) { if(scanner.startsWith("//")) { // Color comments. output ~= ""; int lineLength = countUntil(scanner, "\n"); if(lineLength < 0) { lineLength = scanner.length; } output ~= scanner[0..lineLength - 1]; output ~= ""; scanner = scanner[lineLength..scanner.length]; } else if(scanner.startsWith("\"") || scanner.startsWith("\'")) { // Color strings. char stringType = scanner[0]; output ~= ""; int stringLength = 1; while(stringLength < scanner.length && scanner[stringLength] != stringType) { if(scanner[stringLength] == '\\') { stringLength += 1; } stringLength += 1; } if(stringLength >= scanner.length) { writefln("ERROR: Unable to find close quote for string %s near line %d in string %s\n", scanner[0..min(scanner.length, 20)], lineNumber, source); break scanner; } output ~= scanner[0..stringLength + 1]; output ~= ""; scanner = scanner[stringLength + 1..scanner.length]; } else { if(isAlpha(scanner[0])) { bool isNotIdentifier(dchar ch) { return !(isAlpha(ch) || isDigit(ch) || ch == '_'); } int wordLength = countUntil!isNotIdentifier(scanner); if(wordLength < 0) { wordLength = scanner.length; } const string[] identifiers = [ "const", "bool", "break", "char", "dchar", "else", "for", "if", "import", "int", "main", "module", "private", "return", "string", "std", "void", "while", ]; if(wordLength > 0 && !findAmong(identifiers, [scanner[0..wordLength]]).empty) { // Special identifiers output ~= ""; output ~= scanner[0..wordLength]; output ~= ""; } else { output ~= scanner[0..wordLength]; } scanner = scanner[wordLength..scanner.length]; } else { output ~= scanner[0]; scanner = scanner[1..scanner.length]; } } } return output; } @> @= string escapeHTMLCharacters(string source) { string output; string scanner = source; foreach(dchar ch; source) { if(countUntil("<>&", ch) >= 0) { if(ch == '<') { output ~= "<"; } else if(ch == '>') { output ~= ">"; } else if(ch == '&') { output ~= "&"; } else { writefln("BUG: Only partly implemented support for '%s'.", ch); } } else { output ~= ch; } } return output; } @= SBlock[] slurp_section(string contents, ref int offset, ref int lineNumber, bool recurse, ESectionType sectionType) { SBlock[] results; string currentBlock = ""; int startLineNumber = lineNumber; int index = offset; for(; index < contents.length; index++) { if(contents[index] == '@@') { if(recurse && contents[index + 1] == '<') { results ~= SBlock(sectionType, startLineNumber, currentBlock); currentBlock = ""; startLineNumber = lineNumber; @ if(contents[index..$].startsWith("@@>")) { index += 2; } else { writefln("Identifier '%s' invoked without close tag. at %s", identifier, contents[index..min($, index + 10)]); break; } } else if(contents[index + 1] == '@@') { currentBlock ~= contents[index]; // Skip the escaped at symbol. index++; @ } else { break; } } @ else { if(contents[index] == '\n') { lineNumber++; } currentBlock ~= contents[index]; } } results ~= SBlock(ESectionType.CODE, startLineNumber, currentBlock); offset = index; return results; } @> @= string expand_code_identifier(SSection[] sections, string identifier, string inputFilename) { string output; output ~= "/* from "~identifier~" */"; SSection[] definitions = find_matching_identifiers(sections, identifier); if(definitions.empty) { writefln("ERROR: Unable to find identifier '%s'.", identifier); return format("ERROR: %s is undefined", identifier); } foreach(section; definitions) { foreach(block; section.contents) { output ~= format("\n#line %d \"%s\"\n", block.lineNumber, inputFilename); if(block.type == ESectionType.IDENTIFIER) { output ~= expand_code_identifier(sections, block.content, inputFilename); } else { output ~= block.content; } } } return output; } @> @= void parse_web_then_tangle_and_weave(ref string outputDisplayContents, ref string outputCodeContents, string fileContents, string inputFilename) { SSection[] fileSections; int lineNumber = 0; int charIndex = 0; while(charIndex < fileContents.length) { dchar ch = fileContents[charIndex]; if(ch == '@@') { dchar chNext = charIndex < fileContents.length - 1 ? fileContents[charIndex + 1] : 0; charIndex += 2; if(chNext == '@@') { // It's just an escaped at. Continue parsing. } else if(chNext == 'p') { fileSections ~= SSection("__main__", ESectionType.CODE, slurp_section(fileContents, charIndex, lineNumber, true, ESectionType.CODE)); } else if(chNext == '>') { //End tag. This should be the end of this block. } else if(chNext == '<') { SBlock[] identifierBlocks = slurp_section(fileContents, charIndex, lineNumber, false, ESectionType.IDENTIFIER); assert(identifierBlocks.length == 1); string identifier = identifierBlocks[0].content; SBlock[] sectionContents; if(fileContents[charIndex..charIndex + 3] == "@@>=") { charIndex += 3; sectionContents = slurp_section(fileContents, charIndex, lineNumber, true, ESectionType.CODE); } else { writefln("Identifier '%s' invoked outside program and not a definition.", identifier); } fileSections ~= SSection(identifier, ESectionType.CODE, sectionContents); } else if(chNext == '*') { int titleEndingPeriod = countFromPosUntil(fileContents, charIndex, "."); string title = ""; if(titleEndingPeriod > 0) { title = fileContents[charIndex..titleEndingPeriod]; charIndex = titleEndingPeriod + 1; } fileSections ~= SSection(title, ESectionType.HEADER, slurp_section(fileContents, charIndex, lineNumber, false, ESectionType.HEADER)); } else { // '@@ ' will be converted into a section. fileSections ~= SSection("", ESectionType.PARAGRAPH, SBlock(ESectionType.PARAGRAPH, lineNumber, "

") ~ slurp_section(fileContents, charIndex, lineNumber, false, ESectionType.PARAGRAPH)); } } else { fileSections ~= SSection("", ESectionType.PARAGRAPH, slurp_section(fileContents, charIndex, lineNumber, false, ESectionType.PARAGRAPH)); } } @ @ foreach(block; mainSection[0].contents) { if(block.type == ESectionType.IDENTIFIER) { outputCodeContents ~= expand_code_identifier(fileSections, block.content, inputFilename); } else { outputCodeContents ~= block.content; } } } @> @

= void main(string[] args) { if(args.length != 4) { writefln("Usage: WEB3 inputFile outputHTMLFile outputCodeFile"); } const string inputFilename = args[1]; string fileContents = cast(string) std.file.read(inputFilename); if(fileContents.length == 0) { writefln("Unable to read file '%s'.", inputFilename); return; } string outputDisplayContents = ""; string outputCodeContents = ""; parse_web_then_tangle_and_weave( outputDisplayContents, outputCodeContents, fileContents, inputFilename); string outputDisplayFilename = args[2]; std.file.write(outputDisplayFilename, outputDisplayContents); string outputCodeFilename = args[3]; std.file.write(outputCodeFilename, outputCodeContents); } @>

Literate Programming 4

" ~ section.name ~"

Table of Contents:

Index: