Mastering S-Expression Parsing in C: Build a Lisp Interpreter Step‑by‑Step
This article explains the concept of S‑Expressions, defines their simple syntax, and provides a complete C implementation that parses, stores, evaluates, and prints S‑Expression based Lisp code, including detailed explanations of data structures, constructors, destructors, and evaluation logic.
S-Expression
S‑Expression (Symbolic Expression) is a method of representing mathematical expressions using symbols, known for its flexibility and often used in AI algebraic systems such as Python's SymPy library.
Unlike Polish notation, which directly processes numbers, S‑Expressions focus on storing symbols like variables and operators rather than evaluating numeric values.
Because of this property, S‑Expressions can represent any set of mathematical formulas, not just specific numeric results.
S-Expression Syntax Rules
S‑Expression syntax is extremely simple:
Enclose an S‑Expression in parentheses.
An S‑Expression consists of zero or more Symbols or other S‑Expressions.
Thus S‑Expressions, like Polish notation, are essentially tree structures suitable for iterative processing by computers.
S-Expression Parser Implementation
Implementing an S‑Expression parser requires handling two core parts: storing the expression and evaluating it. The process is split into three major sections:
Parse input into an abstract syntax tree (AST).
Convert AST nodes into Lispy Values (lval) structures.
Traverse Lispy Values to compute the final result.
1. Reading and Storing Input
S-Expression Syntax Parser
We adapt the existing Polish‑expression parser to handle S‑Expressions. To support future extensions, the Operator rule is redefined as a Symbol rule, allowing keywords, variables, operators, functions, and more.
mpc_parser_t *Number = mpc_new("number");
mpc_parser_t *Symbol = mpc_new("symbol");
mpc_parser_t *Sexpr = mpc_new("sexpr");
mpc_parser_t *Expr = mpc_new("expr");
mpc_parser_t *Lispy = mpc_new("lispy");
mpca_lang(MPCA_LANG_DEFAULT,
"number : /-?[0-9]+/ ;\
symbol : '+' | '-' | '*' | '/' ;\
sexpr : '(' <expr>* ')' ;\
expr : <number> | <symbol> | <sexpr> ;\
lispy : /^/ <expr>* $/ ;",
Number, Symbol, Sexpr, Expr, Lispy);S-Expression Storage Implementation
Storage Structure Definition
S‑Expression consists of two core components:
Symbol : a special value type representing keywords, variables, operators, functions, etc.
Expression : a list of Symbols and other Expressions enclosed in parentheses.
We extend the lval enumeration to include Symbol and S‑Expression types:
enum { LVAL_ERR, LVAL_NUM, LVAL_SYM, LVAL_SEXPR };The lval struct becomes a recursive tree structure:
typedef struct lval {
int type;
long num;
char *err; // error message string
char *sym; // symbol string
int count; // number of child nodes
struct lval **cell; // array of child pointers
} lval;err now stores a detailed error message.
sym stores the symbol text.
count records how many child expressions are nested.
cell points to a dynamically allocated array of child lval pointers.
The result is a parent‑child tree data structure suitable for representing nested S‑Expressions.
Storage Constructor Functions
Constructors return pointers to avoid copying large structures:
lval* lval_num(long x) {
lval *v = malloc(sizeof(lval));
v->type = LVAL_NUM;
v->num = x;
return v;
}
lval* lval_err(char *m) {
lval *v = malloc(sizeof(lval));
v->type = LVAL_ERR;
v->err = malloc(strlen(m) + 1);
strcpy(v->err, m);
return v;
}
lval* lval_sym(char *s) {
lval *v = malloc(sizeof(lval));
v->type = LVAL_SYM;
v->sym = malloc(strlen(s) + 1);
strcpy(v->sym, s);
return v;
}
lval* lval_sexpr(void) {
lval *v = malloc(sizeof(lval));
v->type = LVAL_SEXPR;
v->count = 0;
v->cell = NULL;
return v;
}Storage Destructor Function
All allocated memory, including child nodes, must be freed to avoid leaks:
void lval_del(lval *v) {
switch (v->type) {
case LVAL_ERR: free(v->err); break;
case LVAL_SYM: free(v->sym); break;
case LVAL_SEXPR:
for (int i = 0; i < v->count; i++) {
lval_del(v->cell[i]);
}
free(v->cell);
break;
default: break; // LVAL_NUM needs no extra cleanup
}
free(v);
}Adding Elements to Storage
lval_add increments the child count and reallocates the cell array to accommodate the new element:
lval* lval_add(lval *v, lval *x) {
v->count++;
v->cell = realloc(v->cell, sizeof(lval*) * v->count);
v->cell[v->count-1] = x;
return v;
}Reading and Storing S‑Expression
AST nodes are converted to lval objects. Numbers and symbols are handled directly; root or sexpr nodes create an empty list and recursively add valid children.
lval* lval_read_num(mpc_ast_t *t) {
errno = 0;
long x = strtol(t->contents, NULL, 10);
return errno != ERANGE ? lval_num(x) : lval_err("invalid number");
}
lval* lval_read(mpc_ast_t *t) {
if (strstr(t->tag, "number")) return lval_read_num(t);
if (strstr(t->tag, "symbol")) return lval_sym(t->contents);
lval *x = NULL;
if (strcmp(t->tag, ">") == 0) x = lval_sexpr();
if (strstr(t->tag, "sexpr")) x = lval_sexpr();
for (int i = 0; i < t->children_num; i++) {
if (strcmp(t->children[i]->contents, "(") == 0) continue;
if (strcmp(t->children[i]->contents, ")") == 0) continue;
if (strcmp(t->children[i]->tag, "regex") == 0) continue;
x = lval_add(x, lval_read(t->children[i]));
}
return x;
}2. Evaluating Input
Evaluation traverses the tree similarly to Polish notation:
Extract the first element as the operator.
Apply the operator to the remaining operands.
lval* lval_eval_sexpr(lval *v) {
for (int i = 0; i < v->count; i++)
v->cell[i] = lval_eval(v->cell[i]);
for (int i = 0; i < v->count; i++)
if (v->cell[i]->type == LVAL_ERR) return lval_take(v, i);
if (v->count == 0) return v;
if (v->count == 1) return lval_take(v, 0);
lval *f = lval_pop(v, 0);
if (f->type != LVAL_SYM) {
lval_del(f);
lval_del(v);
return lval_err("S-expression Does not start with symbol!");
}
lval *result = builtin_op(v, f->sym);
lval_del(f);
return result;
}Builtin Operator Implementation
lval* builtin_op(lval *a, char *op) {
for (int i = 0; i < a->count; i++)
if (a->cell[i]->type != LVAL_NUM)
return (lval_del(a), lval_err("Cannot operate on non-number!"));
lval *x = lval_pop(a, 0);
if (strcmp(op, "-") == 0 && a->count == 0) x->num = -x->num;
while (a->count > 0) {
lval *y = lval_pop(a, 0);
if (strcmp(op, "+") == 0) x->num += y->num;
if (strcmp(op, "-") == 0) x->num -= y->num;
if (strcmp(op, "*") == 0) x->num *= y->num;
if (strcmp(op, "/") == 0) {
if (y->num == 0) {
lval_del(x); lval_del(y);
x = lval_err("Division By Zero!");
break;
}
x->num /= y->num;
}
lval_del(y);
}
lval_del(a);
return x;
}3. Printing Results
Printing functions traverse the lval tree and output Lisp‑style notation:
void lval_expr_print(lval *v, char open, char close) {
putchar(open);
for (int i = 0; i < v->count; i++) {
lval_print(v->cell[i]);
if (i != v->count-1) putchar(' ');
}
putchar(close);
}
void lval_print(lval *v) {
switch (v->type) {
case LVAL_NUM: printf("%li", v->num); break;
case LVAL_ERR: printf("Error: %s", v->err); break;
case LVAL_SYM: printf("%s", v->sym); break;
case LVAL_SEXPR: lval_expr_print(v, '(', ')'); break;
}
}
void lval_println(lval *v) { lval_print(v); putchar('
'); }The main function creates parsers, reads user input, evaluates the expression, prints the result, and cleans up resources:
int main(int argc, char *argv[]) {
mpc_parser_t *Number = mpc_new("number");
mpc_parser_t *Symbol = mpc_new("symbol");
mpc_parser_t *Sexpr = mpc_new("sexpr");
mpc_parser_t *Expr = mpc_new("expr");
mpc_parser_t *Lispy = mpc_new("lispy");
mpca_lang(MPCA_LANG_DEFAULT,
"number : /-?[0-9]+/ ;\
symbol : '+' | '-' | '*' | '/' ;\
sexpr : '(' <expr>* ')' ;\
expr : <number> | <symbol> | <sexpr> ;\
lispy : /^/ <expr>* $/ ;",
Number, Symbol, Sexpr, Expr, Lispy);
puts("Lispy Version 0.1");
puts("Press Ctrl+c to Exit
");
while (1) {
char *input = readline("lispy> ");
add_history(input);
mpc_result_t r;
if (mpc_parse("<stdin>", input, Lispy, &r)) {
lval *x = lval_eval(lval_read(r.output));
lval_println(x);
lval_del(x);
mpc_ast_delete(r.output);
} else {
mpc_err_print(r.error);
mpc_err_delete(r.error);
}
free(input);
}
mpc_cleanup(5, Number, Symbol, Sexpr, Expr, Lispy);
return 0;
}Compile with:
gcc -g -std=c99 -Wall main.c mpc.c -lreadline -lm -o mainRunning the program demonstrates arithmetic with S‑Expressions, handling addition, subtraction, multiplication, division, and error cases such as non‑numeric operands or division by zero.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
