Sorg

1 Introduction

This is Sorg, the Simple Org parser, inspired by smu. Sorg takes in an input file or stream, converts it to HTML, and writes it to an output stream

2 Main loop

<<main_loop>>=

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "parg.h"
<<global_variables>>
<<enums>>
<<sorg_data>>
<<function_declarations>>
<<functions>>
<<parsing_top>>
<<getline>>
int main(int argc, char *argv[])
{
    FILE *in;
    FILE *out;
    char *line;
    size_t read;
    size_t n;
    sorg_d sorg;
    char *filename;
    <<local_variables>>
    filename = NULL;
    in = stdin;
    <<parse_command_flags>>
    if(filename != NULL) {
        in = fopen(filename, "r");
    }
    out = stdout;
    line = NULL;
    n = 0;
    sorg_init(&sorg);
    while((read = sorg_getline(&line, &n, in)) != -1) {
        parse(&sorg, out, line, read);
    }
    if(sorg.pmode == MODE_TEXT) fprintf(out, "</p>");
    if(mktoc && sorg.depth >= 0) fprintf(out, "</ul>");
    if(in != stdin) fclose(in);
    if(line != NULL) free(line);
    return 0;
}

3 Sorg Data

All necessary data for sorg is encapsulated in a struct called sorg_d.

<<sorg_data>>=

typedef struct {
    <<sorg_data_contents>>
} sorg_d;

Sorg data is initialized with the function sorg_init.

<<functions>>=

void sorg_init(sorg_d *sorg) {
    int i;
    <<initialize_stuff>>
}

The current mode of the state machine is stored in a variable called mode.

<<sorg_data_contents>>=

int mode;

By default, it is set to be none, which is the general markdown mode.

<<initialize_stuff>>=

sorg->mode = MODE_NONE;

For logic for things such as paragraphs, sorg also keeps track of the previous mode, called pmode.

<<sorg_data_contents>>=

int pmode;

Like mode, pmode is initialized to be MODE_NONE.

<<initialize_stuff>>=

sorg->pmode = MODE_NONE;

Sorg internally keeps track of the current position (pos) in the line.

TODO: better words here. off: starting offset blksize: text block size

<<sorg_data_contents>>=

size_t pos;
size_t off;
size_t blksize;

<<initialize_stuff>>=

sorg->pos = 0;
sorg->off = 0;
sorg->blksize = 0;

Section position is kept track of in an array of integers known as secpos.

<<sorg_data_contents>>=

#define SORG_MAXSEC 10
int secpos[SORG_MAXSEC];

Section numbers get all initialized to be zero.

<<initialize_stuff>>=

for(i = 0; i < SORG_MAXSEC; i++) sorg->secpos[i] = 0;

The current section depth is kept track in an integer called depth. The previous depth is also recorded. This is needed for generating the table of contents with indentation.

<<sorg_data_contents>>=

int depth;
int pdepth;

depth and pdepth is set to be an initial value of -1. Functions aiming to read this value should do a bounds check before using this variable. Depth corresponds to array position in secpos, so be sure that the depth is in between 0 and SORG_MAXSEC.

<<initialize_stuff>>=

sorg->depth = -1;
sorg->pdepth = -1;

4 Parsing

4.1 Parsing Top

The top-level parsing function is twofold: it is designed to take in org markup, and then produce equivalent HTML output.

Each character of the string is fed through a state machine. The mode of the state machine determines what kind of formatting to add. HTML formatting is tag based. A change to a state indicates the start of a particular tag. A change from from a particular state indicates the end of that particular tag.

Text is written a block at a time using fwrite. Blocks are written any time a state changes. Blocks are also written at the end.

Parsing behavior will change based on the state of the flag mktoc. If enabled, the parser will be set up to generate a table of contents. Otherwise, it is just the regular parser.

4.1.1 The Parse Function

<<parsing_top>>=

static int parse(sorg_d *sorg, FILE *out, char *buf, size_t size)
{
    sorg->pos = 0;
    sorg->off = 0;
    sorg->blksize = 0;
    if(mktoc) {
        <<toc_parse_mode>>
    } else {
        <<normal_parse_mode>>
    }
    return 1;
}

4.1.2 Normal Parsing Mode

This is the regular parsing mode, which converts all org syntax into HTML.

<<normal_parse_mode>>=

if(buf[0] == '\n') {
    if(sorg->pmode == MODE_TEXT) {
        fprintf(out,"</p>\n");
        sorg->pmode = MODE_NONE;
    }
    return 1;
}
while(sorg->pos < size) {
    switch(sorg->mode) {
        case MODE_NONE:
            if(is_header(sorg, buf, size)) {
                if(sorg->pmode == MODE_TEXT) fprintf(out, "</p>\n");
                print_header(sorg, buf, size, out);
                sorg->mode = MODE_NONE;
                sorg->pmode = sorg->mode;
                return 1;
            } else if(is_name(sorg, buf, size)) {
                if(sorg->pmode == MODE_TEXT) fprintf(out, "</p>\n");
                print_name(sorg, buf, size, out);
                sorg->mode = MODE_BEGIN_CODE;
                sorg->pmode = sorg->mode;
                return 1;
            } else if(is_title(sorg, buf, size)) {
                if(sorg->pmode == MODE_TEXT) fprintf(out, "</p>\n");
                print_title(sorg, buf, size, out);
                sorg->mode = MODE_NONE;
                sorg->pmode = sorg->mode;
                return 1;
            } else {
                sorg->mode = MODE_TEXT;
                sorg->off = sorg->pos;
                sorg->blksize = 1;
                if(sorg->pmode != MODE_TEXT) fprintf(out, "<p>");
            }
            break;
        case MODE_TEXT:
            <<parse_formatted>>
            if(is_link(sorg, &buf[sorg->pos], size - sorg->pos)) {
                fwrite(buf + sorg->off, 1, sorg->blksize - 1, out);
                sorg->pos += print_link(sorg,
                                        &buf[sorg->pos],
                                        size - sorg->pos,
                                        out);
                sorg->blksize = 1;
                sorg->off = sorg->pos;
            } else {
                if(buf[sorg->pos] != '\n') sorg->blksize++;
                sorg->pos++;
            }
            break;
        case MODE_BEGIN_CODE:
            if(!is_begin(sorg, buf, size)) return 0;
            else {
                fprintf(out, "\n<p><code>");
                sorg->mode = MODE_CODE;
                return 1;
            }
            break;
        case MODE_CODE:
            if(is_end(sorg, buf, size)) {
                sorg->mode = MODE_NONE;
                fprintf(out, "\n</code></p>\n");
                return 1;
            }
            sorg->blksize++;
            switch(buf[sorg->pos]) {
                case '<':
                    fwrite(buf + sorg->off, 1, sorg->blksize - 1, out);
                    sorg->off = sorg->pos + 1;
                    sorg->blksize = 0;
                    fprintf(out, "<");
                    break;
                case '>':
                    fwrite(buf + sorg->off, 1, sorg->blksize - 1, out);
                    sorg->off = sorg->pos + 1;
                    sorg->blksize = 0;
                    fprintf(out, ">");
                    break;
                case '\n':
                    fwrite(buf + sorg->off, 1, sorg->blksize - 1, out);
                    sorg->off = sorg->pos + 1;
                    sorg->blksize = 0;
                    fprintf(out, "<br>\n");
                    break;
                case ' ':
                    fwrite(buf + sorg->off, 1, sorg->blksize - 1, out);
                    sorg->off = sorg->pos + 1;
                    sorg->blksize = 0;
                    fprintf(out, " ");
                    break;
            }
            sorg->pos++;
            break;
    }
}
fwrite(buf + sorg->off, 1, sorg->blksize, out);
sorg->pmode = sorg->mode;
if(sorg->mode != MODE_CODE) {
    sorg->mode = MODE_NONE;
}

4.1.3 Table of Contents Parsing Mode

This mode is turned on with a switch from the command line flags. When enabled, the parser is only interested in parsing and printing headers. The rest, it skips.

<<toc_parse_mode>>=

if(buf[0] == '\n') {
    return 1;
}
if(is_title(sorg, buf, size)) {
    print_title(sorg, buf, size, out);
}else if(is_header(sorg, buf, size)) {
    print_header(sorg, buf, size, out);
}

4.2 Type Enum

<<enums>>=

enum {
    <<types>>
    MODE_NONE
};

4.3 None mode

By default, the state machine begins in NONE mode. In this mode, the parser will be parsing the line for new states.

4.4 Headings

4.4.1 Check for Header

Headers in org-mode always start at the beginning of the line, and must have a number of stars, followed by a space. This is checked for with the function is_header.

<<function_declarations>>=

static int is_header(sorg_d *sorg, char *buf, size_t size);

<<functions>>=

static int is_header(sorg_d *sorg, char *buf, size_t size)
{
    size_t s;
    if(size == 0) return 0;
    if(buf[0] != '*') return 0;
    for(s = 1; s < size; s++) {
        if(buf[s] == '*') {
            continue;
        } else if(buf[s] == ' ') {
            return 1;
        } else {
            return 0;
        }
    }
    return 0;
}

4.4.2 Print Header

If the line is indeed a header, the parser will scan the line again, this time counting the number of stars before the space indicating the header level. (Note: Since the title is reserved for header 1, the largest heading size starts at level 2.)

The header will then treat the rest of the line as the header text. This action is done with the function print_header.

4.4.2.1 Print Header Function
<<function_declarations>>=

static void print_header(sorg_d *sorg, char *buf, size_t size, FILE *out);

<<functions>>=

static void print_header(sorg_d *sorg, char *buf, size_t size, FILE *out)
{
    size_t s;
    size_t off;
    int mode;
    int level;
    off = 0;
    mode = 0;
    level = 1;
    for(s = 0; s < size; s++) {
        if(mode == 2) break;
        switch(mode) {
            case 0: /* asterisks */
                if(buf[off] != '*') {
                    mode = 1;
                    off++;
                    break;
                }
                level++;
                off++;
                break;
             case 1: /* spaces */
                if(buf[off] != ' ') {
                    mode = 2;
                    break;
                }
        }
    }
    if(buf[size - 1] == '\n') size--; /* chomp */
    <<update_section_depth>>
    if(mktoc) {
        <<print_toc_header>>
    } else {
        <<print_normal_header>>
    }
}

It inside the function print_header that the section counter is updated. This overloads the functionality of print_header, but it is a quick solution to the problem.

Level in this parser starts at header level 2, so the value needs to be subtracted by 2 in order to work with the internal section counters.

<<update_section_depth>>=

update_section_depth(sorg, level - 2);

4.4.2.2 Normal Header Printing
<<print_normal_header>>=

fprintf(out, "<a id=\"");
spaces_to_underscores(buf + off, size - off, out);
fprintf(out, "\">\n");
if(level < 6) {
    fprintf(out, "<h%d>", level);
    if(use_secno) print_section(sorg, out);
    fwrite(buf + off, 1, size - off, out);
    fprintf(out, "</h%d>\n", level);
} else {
    fprintf(out, "<u><i>");
    if(use_secno) print_section(sorg, out);
    fwrite(buf + off, 1, size - off, out);
    fprintf(out, "</i></u>\n");
}
fprintf(out, "</a>\n");

4.4.2.3 TOC Header Printing

A TOC section heading in HTML is a list item in an unordered list. HTML supports nested lists with automatic indentation, which makes presenting the hierarchy a little more elegant (no need to compute indentations manually!). However, the program needs to be responsible when to start and end unordered lists. To do this, we need to keep track of the previous section depth as well as the current one. When the depth changes, one has to update the unordered lists.

A new unordered list will only begin when there is a new subsection created, so this only occurs when the current depth is greater than the previous one.

When the depth goes up a level, this indicates that the currently populated list needs to be closed with an end tag. However, sometimes the depth will go up more than one level, which means that each of those lists will need to be closed. For this reason, a variable number of end tags need to be printed. The number of necessary closed tags needed is obtained from getting the difference between the current depth and the previous depth. (Note: I missed this edge case on the first go).

<<print_toc_header>>=

if(sorg->depth > sorg->pdepth) {
    fprintf(out, "<ul>");
} else if(sorg->depth < sorg->pdepth) {
    for(s = 0; s < (sorg->pdepth - sorg->depth); s++)
    fprintf(out, "</ul>");
}
fprintf(out, "<li><a href=\"%s#", indxfile);
spaces_to_underscores(buf + off, size - off, out);
fprintf(out, "\">");
if(use_secno) print_section(sorg, out);
fwrite(buf + off, 1, size - off, out);
fprintf(out, "</a></li>\n");

4.5 Titles

Titles are declared using the TITLE tag in org-mode. They should only be used once per document, and it is expected that they are declared at the top of the file.

<<function_declarations>>=

static int is_title(sorg_d *sorg, const char *buf, size_t size);

<<functions>>=

static int is_title(sorg_d *sorg, const char *buf, size_t size)
{
    const char *title = "#+TITLE:";
    size_t title_size = 8;
    size_t s;
    if(size < title_size) return 0;
    for(s = 0; s < title_size; s++) {
        if(buf[s] != title[s]) return 0;
    }
    return 1;
}

<<function_declarations>>=

static void print_title(sorg_d *sorg, const char *buf, size_t size, FILE *out);

<<functions>>=

static void print_title(sorg_d *sorg, const char *buf, size_t size, FILE *out)
{
    size_t s;
    int mode;
    const char *title;
    size_t tsize;
    mode = 1;
    tsize = 0;
    title = NULL;
    for(s = 8; s < size; s++) {
        if(mode == 0) break;
        switch(mode) {
            case 1: /* spaces */
                if(buf[s] == ' ') {
                    continue;
                }
                mode = 2;
                tsize = 1;
                title = &buf[s];
                break;
            case 2:
                if(buf[s] == '\n') {
                    mode = 0;
                    break;
                }
                tsize++;
                break;
        }
    }
    if(title != NULL) {
        fprintf(out, "<h1>");
        fwrite(title, 1, tsize, out);
        fprintf(out, "</h1>\n");
    }
}

4.6 Plain Text

Plain text can be roughly defined as stuff in between headers that isn't a code block. In this mode, the parser looks for Formatted Text.

<<types>>=

MODE_TEXT,

4.7 Code Blocks

All code blocks are assumed to begin with the NAME tag with the code block immediately following it on the next line.

A NAME tag is found while the state machine is in NONE mode. It is done using the function is_name.

<<function_declarations>>=

static int is_name(sorg_d *sorg, char *buf, size_t size);

<<functions>>=

static int is_name(sorg_d *sorg, char *buf, size_t size)
{
    const char *name = "#+NAME:";
    const int name_size = 7;
    size_t s;
    if(size < name_size) return 0;
    for(s = 0; s < name_size; s++) {
        if(buf[s] != name[s]) return 0;
    }
    return 1;
}

If a NAME tag is found, this changes the state machine to only look for the beginning of a code block.

<<types>>=

MODE_BEGIN_CODE,

The NAME tag will also be printed to screen. This is a distinct difference between the regular emacs org-mode exporter and this one (and an important one!) Emacs will not export the name of the code block, which leads to more confusing readability!

<<function_declarations>>=

static void print_name(sorg_d *sorg, char *buf, size_t size, FILE *out);

<<functions>>=

static void print_name(sorg_d *sorg, char *buf, size_t size, FILE *out)
{
    int mode;
    size_t name_size;
    size_t n;
    size_t off;
    buf+=7;
    size -= 7;
    name_size = 0;
    mode = 1;
    off = 0;
    for(n = 0; n < size; n++) {
        if(mode == 0) break;
        switch(mode) {
            case 1: /* spaces */
                if(buf[n] == ' ') {
                    break;
                }
                off = n;
                mode = 2;
                name_size = 1;
                break;
            case 2: /* count to line break*/
                if(buf[n] == '\n' || buf[n] == ' ') {
                    mode = 0;
                    break;
                }
                name_size++;
                break;
        }
    }
    fprintf(out, "<div><b><i><<");
    fwrite(buf + off, 1, name_size, out);
    fprintf(out, "</b></i>>>=</div>");
}

Code blocks begin and end with BEGIN_SRC and END_SRC tags. When a new code block declaration begins, the mode is set to be in code mode.

<<types>>=

MODE_CODE,

The BEGIN_SRC tag is checked with the function is_begin.

<<function_declarations>>=

static int is_begin(sorg_d *sorg, char *buf, size_t size);

<<functions>>=

static int is_begin(sorg_d *sorg, char *buf, size_t size)
{
    const char *name = "#+BEGIN_SRC";
    const int name_size = 11;
    size_t s;
    if(size < name_size) return 0;
    for(s = 0; s < name_size; s++) {
        if(buf[s] != name[s]) return 0;
    }
    return 1;
}

Lines will remain in code mode until the END_SRC tag is found.

The END_SRC tag is found using the function is_end.

<<function_declarations>>=

static int is_end(sorg_d *sorg, char *buf, size_t size);

<<functions>>=

static int is_end(sorg_d *sorg, char *buf, size_t size)
{
    const char *name = "#+END_SRC";
    const int name_size = 9;
    size_t s;
    if(size < name_size) return 0;
    for(s = 0; s < name_size; s++) {
        if(buf[s] != name[s]) return 0;
    }
    return 1;
}

4.8 Formatted Text

All formatted text can be generally described as encapsulated text. Each formatting is surrounded by a special character (or characters). Because they are so similar, this kind of parsing can be generalized.

4.8.1 The core mechanic

First, the parser checks to see if an item is encapsulated by a particular delimeter delim of size dsize. If there are matching delimeters on the same line, then the text between is is considered to be encapsulated.

NOTE: for now, dsize only works with a size of 1.

<<function_declarations>>=

static int is_encapsulated(sorg_d *sorg,
                    const char *delim,
                    int dsize,
                    char *buf,
                    size_t size);

<<functions>>=

static int is_encapsulated(sorg_d *sorg,
                    const char *delim,
                    int dsize,
                    char *buf,
                    size_t size)
{
    size_t s;
    if(buf[0] != delim[0]) return 0;
    size = size - 1;
    for(s = 1; s < size; s++) if(buf[s] == delim[0]) return 1;
    return 0;
}

Once encapsulated text is found, the encapsulated text is printed to screen. In HTML, this means they are between HTML tags of a certain label. This function returns how many characters it parsed, so the parser knows to skip over them.

<<function_declarations>>=

static size_t print_encapsulated(sorg_d *sorg,
                          const char *delim,
                          int dsize,
                          char *buf,
                          size_t size,
                          const char *tag,
                          FILE *out);

This function extracts the text in between the two delimiters and then prints it between the specified HTML tags. The trick with printing the text is factoring in offsets for both sets of delimiters. To avoid the leftmost delimiter, offset by the size of the delimiter. To avoid the rightmost delimiter, negate the size by two times the delimiter to compensate for both the rightmost and leftmost delimiter.

<<functions>>=

static size_t print_encapsulated(sorg_d *sorg,
                          const char *delim,
                          int dsize,
                          char *buf,
                          size_t size,
                          const char *tag,
                          FILE *out)
{
    size_t off;
    size_t s;
    off = 1;
    size = size - 1;
    for(s = 1; s < size; s++) {
        off++;
        if(buf[s] == delim[0]) {
            break;
        }
    }
    fprintf(out, "<%s>", tag);
    fwrite(buf + dsize, 1, off - 2*dsize, out);
    fprintf(out, "</%s>", tag);
    return off;
}

In the parse loop, both is_encapsulated and print_encapsulated can be combined together in a function called chk_encapsulated. The chk is short for check, and the abbreviation only used to make sure the function call fits on the same line.

On success, the function will return "true", causing the function to break out of the chain of if statments.

<<function_declarations>>=

static int chk_encapsulated(sorg_d *sorg,
                          const char *delim,
                          int dsize,
                          char *buf,
                          size_t size,
                          const char *tag,
                          FILE *out);

<<functions>>=

static int chk_encapsulated(sorg_d *sorg,
                          const char *delim,
                          int dsize,
                          char *buf,
                          size_t size,
                          const char *tag,
                          FILE *out)
{
    if(is_encapsulated(sorg,
                        delim,
                        dsize,
                        &buf[sorg->pos],
                        size - sorg->pos))
    {
        fwrite(buf + sorg->off, 1, sorg->blksize - 1, out);
        sorg->pos += print_encapsulated(sorg,
                                        delim,
                                        dsize,
                                        &buf[sorg->pos],
                                        size - sorg->pos,
                                        tag,
                                        out);
        sorg->blksize = 1;
        sorg->off = sorg->pos;
        return 1;
    }
    return 0;
}

4.8.2 Format Styles

4.8.2.1 Bold
<<parse_formatted>>=

if(chk_encapsulated(sorg, "*", 1, buf, size, "b", out)) break;

4.8.2.2 Italics
<<parse_formatted>>=

if(chk_encapsulated(sorg, "/", 1, buf, size, "i", out)) break;

4.8.2.3 Preformatted
<<parse_formatted>>=

if(chk_encapsulated(sorg, "=", 1, buf, size, "code", out)) break;

4.8.2.4 Underline
<<parse_formatted>>=

if(chk_encapsulated(sorg, "_", 1, buf, size, "u", out)) break;

4.8.2.5 TeX math mode

This is not yet implemented.

4.9 Ordered Lists

This is not yet implemented.

4.10 Unordered Lists

This is not yet implemented.

4.11 TODO "TODO" headings

This is not yet implemented.

4.12 Links

A link can be found in Sorg using the function is_link.

<<function_declarations>>=

static int is_link(sorg_d *sorg, char *buf, size_t size);

<<functions>>=

static int is_link(sorg_d *sorg, char *buf, size_t size)
{
    size_t n;
    if(size < 5) return 0;
    if(buf[0] != '[' || buf[1] != '[') return 0;
    size -= 1; /* for lookahead */
    for(n = 2; n < size; n++) {
        if(buf[n] == ']' && buf[n + 1] == ']') return 1;
    }
    return 0;
}

<<function_declarations>>=

static size_t print_link(sorg_d *sorg, char *buf, size_t size, FILE *out);

<<functions>>=

static size_t print_link(sorg_d *sorg, char *buf, size_t size, FILE *out)
{
    size_t off;
    size_t n;
    char *link;
    size_t link_size;
    char *name;
    size_t name_size;
    size_t tmp;
    size -= 1; /* for lookahead */
    off = 2;
    name_size = 0;
    link_size = 0;
    tmp = 0;
    link = NULL;
    name = NULL;
    link = &buf[2];
    for(n = 2; n < size; n++) {
        off++;
        tmp++;
        if(buf[n] == ']' && buf[n + 1] == '[') {
            link_size = tmp - 1;
            tmp = 0;
            name = &buf[n + 2];
            n++;
            off++;
        } else if(buf[n] == ']' && buf[n + 1] == ']') {
            name_size = tmp - 1;
            off++;
            break;
        }
    }
    if(!strncmp(link, "https://", 8) || !strncmp(link, "http://", 7)) {
        fprintf(out, "<a href=\"");
        fwrite(link, 1, link_size, out);
    } else {
        fprintf(out, "<a href=\"#");
        spaces_to_underscores(link, link_size, out);
    }
    fprintf(out, "\">");
    fwrite(name, 1, name_size, out);
    fprintf(out, "</a>");
    return off;
}

5 Sections

Org structure is very hierarchical. Sections in an Org document have an implicit section number, determined by where they are in the document, and what their hierarchical level is.

Section system is a decimal point like system. The introductory section would be section 1. A sub section would be 1.1, a second subsection 1.2, a sub section of the sub section would be 1.2.1. etc.

While parsing the file, Sorg keeps track of the current level. Every time a section heading is discovered, the level of the header is checked. If the level matches, the counter assigned for the current level is incremented. If it is a greater depth, the next counter next in line is set to be "1". If it is a lesser depth, the counter for that position is incremented, and all depths following it are zeroed out.

Section number depth can be checked and updated with the function update_depth.

<<function_declarations>>=

static void update_section_depth(sorg_d *sorg, int depth);

<<functions>>=

static void update_section_depth(sorg_d *sorg, int depth)
{
    int i;
    if(depth < 0 && depth > SORG_MAXSEC) {
        fprintf(stderr,
            "Warning: section depth of %d exceeds maximum depth of %d\n",
            depth + 1, SORG_MAXSEC);
        return;
    }
    if(depth > sorg->depth) {
        sorg->secpos[depth] = 1;
    } else if(depth < sorg->depth) {
        sorg->secpos[depth]++;
        for(i = depth + 1; i < SORG_MAXSEC; i++) sorg->secpos[i] = 0;
    } else {
        sorg->secpos[depth]++;
    }
    sorg->pdepth = sorg->depth;
    sorg->depth = depth;
}

To print the current section number in the nice X.Y.Z format, use the function print_section and supply a filehandle to write to.

<<function_declarations>>=

static void print_section(sorg_d *sorg, FILE *out);

It should be noted that this function will print a space, since a space is needed when printing the header.

<<functions>>=

static void print_section(sorg_d *sorg, FILE *out)
{
    int i;
    if(sorg->depth < 0) return;
    fprintf(out, "%d", sorg->secpos[0]);
    for(i = 1; i <= sorg->depth; i++) {
        fprintf(out, ".%d", sorg->secpos[i]);
    }
    fputc(' ', out);
}

6 Command line parsing

Command line argument parsing is done using the third-party library parg, included in this source distribution.

<<local_variables>>=

struct parg_state ps;
int c;

<<parse_command_flags>>=

parg_init(&ps);
while((c = parg_getopt(&ps, argc, argv, "st:")) != -1) {
    switch(c) {
        case 1:
            filename = (char *)ps.optarg;
            break;
        case 't':
            <<generate_toc>>
            break;
        case 's':
            <<turn_on_sections>>
            break;
        default:
            fprintf(stderr, "Unknown option -%c\n", c);
            return 1;
    }
}

6.1 Turn on section numbers (-s)

The "-t" flag will turn on section numbers. This will set a global variable use_secno to be 1.

<<turn_on_sections>>=

use_secno = 1;

By default, the use_secno global is 0.

<<global_variables>>=

static int use_secno = 0;

6.2 Generate Table of Contents (-t)

This argument flag will generate a table of contents. The flag has an input parameter, which should be the name of the HTML filename containing the full documentation.

<<generate_toc>>=

mktoc = 1;
indxfile = (char *)ps.optarg;

By default, the filename is set to NULL, and the mktoc variable is set to be false.

<<global_variables>>=

static char *indxfile = NULL;
static int mktoc = 0;

7 Miscellaneous Utilties

7.1 Getline

The Sorg parser parses a file one line at a time.

Because the getline function is not part of the ANSI C standard, an implementation is used in it's place.

<<getline>>=

size_t sorg_getline(char **lineptr, size_t *n, FILE *stream) {
    char *bufptr = NULL;
    char *p = bufptr;
    size_t size;
    int c;
    if (lineptr == NULL) {
        return -1;
    }
    if (stream == NULL) {
        return -1;
    }
    if (n == NULL) {
        return -1;
    }
    bufptr = *lineptr;
    size = *n;
    c = fgetc(stream);
    if (c == EOF) {
        return -1;
    }
    if (bufptr == NULL) {
        bufptr = malloc(128);
        if (bufptr == NULL) {
            return -1;
        }
        size = 128;
    }
    p = bufptr;
    while(c != EOF) {
        if ((p - bufptr) > (size - 1)) {
            size = size + 128;
            bufptr = realloc(bufptr, size);
            if (bufptr == NULL) {
                return -1;
            }
        }
        *p++ = c;
        if (c == '\n') {
            break;
        }
        c = fgetc(stream);
    }
    *lineptr = bufptr;
    /* Some text editors do not insert a linebreak on the last line.
    * For these cases, shift everything by 1.
    */
    if(c == EOF) {
        p = p + 1;
        size += 1;
    }
    *p++ = '\0';
    *n = size;
    return p - bufptr - 1;
}

7.2 Spaces To Underscores

As it turns out, some browsers, such as netsurf, don't work well when references have spaces in them. This function will take in a string and write it to a file handle, replacing the spaces ( ) to underscores (_).

<<function_declarations>>=

static void spaces_to_underscores(const char *str, size_t size, FILE *out);

<<functions>>=

static void spaces_to_underscores(const char *str, size_t size, FILE *out)
{
    size_t blksize;
    size_t n;
    size_t off;
    off = 0;
    blksize = 0;
    for(n = 0; n < size; n++) {
        blksize++;
        if(str[n] == ' ') {
            fwrite(str + off, 1, blksize - 1, out);
            fputc('_', out);
            blksize = 0;
            off = n + 1;
        }
    }
    fwrite(str + off, 1, blksize, out);
}