How to Build a Lisp Interpreter in C: Conditional Branching and Built‑in Functions
This tutorial walks through extending a C‑based Lisp interpreter by adding conditional branching, comparison built‑ins, an if keyword, and registering all built‑in functions, while providing the complete source code and explanations for each component.
Previous Articles
《C 语言速通(0)C 家族发展编年史》
《C 语言速通(1)HelloWorld》
《C 语言速通(2)基本数据类型》
《C 语言速通(3)指针类型》
《C 语言速通(4)数组与字符串类型》
《C 语言速通(5)结构体与位域》
《C 语言速通(6)枚举与共用体》
《C 语言速通(7)变量常量与作用域》
《C 语言速通(8)运算符与逻辑控制》
《C 语言速通(9)函数与宏定义》
《用 C 写一门编程语言(0)程序编译原理》
《用 C 写一门编程语言(1) Lispy 编程演示》
《用 C 写一门编程语言(2) 交互式语法解析器》
《用 C 写一门编程语言(3)波兰表达式解析器》
《用 C 写一门编程语言(4)数值存储器》
《用 C 写一门编程语言(5)符号表达式解析器》
《用 C 写一门编程语言(6)引用表达式解析器》
《用 C 写一门编程语言(7)变量元素的设计与实现》
《用 C 写一门编程语言(8)Lambda 函数表达式》
Conditional Branching
To support conditional syntax we implement several builtin functions:
Greater‑than / less‑than functions
Equality function
If function
Recursive function
Example output:
Lispy Version 0.1
Press Ctrl+c to Exit
lispy> > 10 5
1
lispy> <= 88 5
0
lispy> == 5 6
0
lispy> == 5 {}
0
lispy> == 1 1
1
lispy> != {} 56
1
lispy> == {1 2 3 {5 6}} {1 2 3 {5 6}}
1
lispy> def{x y} 100 200
()
lispy> if (== x y) {+ x y} {- x y}
-100Size Comparison Functions
These functions compare two Number values and return 0 (False) or 1 (True) using C's comparison operators.
Assert the arguments.
Perform the numeric comparison.
Return the result.
lval* builtin_gt(lenv* e, lval* a) { return builtin_ord(e, a, ">"); }
lval* builtin_lt(lenv* e, lval* a) { return builtin_ord(e, a, "<"); }
lval* builtin_ge(lenv* e, lval* a) { return builtin_ord(e, a, ">="); }
lval* builtin_le(lenv* e, lval* a) { return builtin_ord(e, a, "<="); }
lval* builtin_ord(lenv* e, lval* a, char* op) {
LASSERT_NUM(op, a, 2);
LASSERT_TYPE(op, a, 0, LVAL_NUM);
LASSERT_TYPE(op, a, 1, LVAL_NUM);
int r;
if (strcmp(op, ">") == 0) { r = (a->cell[0]->num > a->cell[1]->num); }
if (strcmp(op, "<") == 0) { r = (a->cell[0]->num < a->cell[1]->num); }
if (strcmp(op, ">=") == 0) { r = (a->cell[0]->num >= a->cell[1]->num); }
if (strcmp(op, "<=") == 0) { r = (a->cell[0]->num <= a->cell[1]->num); }
lval_del(a);
return lval_num(r);
}Equality Comparison Function
Unlike size comparisons, the equality function works for Numbers, empty lists, and even function objects.
It compares all fields of two lval structures and returns a new lval containing the boolean result.
int lval_eq(lval* x, lval* y) {
if (x->type != y->type) { return 0; }
switch (x->type) {
case LVAL_NUM: return (x->num == y->num);
case LVAL_ERR: return (strcmp(x->err, y->err) == 0);
case LVAL_SYM: return (strcmp(x->sym, y->sym) == 0);
case LVAL_FUN:
if (x->builtin || y->builtin) { return x->builtin == y->builtin; }
else { return lval_eq(x->formals, y->formals) && lval_eq(x->body, y->body); }
case LVAL_QEXPR:
case LVAL_SEXPR:
if (x->count != y->count) { return 0; }
for (int i = 0; i < x->count; i++) {
if (!lval_eq(x->cell[i], y->cell[i])) { return 0; }
}
return 1;
}
return 0;
}
lval* builtin_cmp(lenv* e, lval* a, char* op) {
LASSERT_NUM(op, a, 2);
int r;
if (strcmp(op, "==") == 0) { r = lval_eq(a->cell[0], a->cell[1]); }
if (strcmp(op, "!=") == 0) { r = !lval_eq(a->cell[0], a->cell[1]); }
lval_del(a);
return lval_num(r);
}
lval* builtin_eq(lenv* e, lval* a) { return builtin_cmp(e, a, "=="); }
lval* builtin_ne(lenv* e, lval* a) { return builtin_cmp(e, a, "!="); }If Keyword Function
We add an if keyword whose syntax mirrors C's ternary operator.
The conditional body uses two Q‑Expressions that are evaluated depending on the condition.
lval* builtin_if(lenv* e, lval* a) {
LASSERT_NUM("if", a, 3);
LASSERT_TYPE("if", a, 0, LVAL_NUM);
LASSERT_TYPE("if", a, 1, LVAL_QEXPR);
LASSERT_TYPE("if", a, 2, LVAL_QEXPR);
a->cell[1]->type = LVAL_SEXPR;
a->cell[2]->type = LVAL_SEXPR;
lval* x;
if (a->cell[0]->num) { x = lval_eval(e, lval_pop(a, 1)); }
else { x = lval_eval(e, lval_pop(a, 2)); }
lval_del(a);
return x;
}Function Registration
Finally we register all builtin functions with the environment.
/* Comparison Functions */
lenv_add_builtin(e, "if", builtin_if);
lenv_add_builtin(e, "==", builtin_eq);
lenv_add_builtin(e, "!=", builtin_ne);
lenv_add_builtin(e, ">", builtin_gt);
lenv_add_builtin(e, "<", builtin_lt);
lenv_add_builtin(e, ">=", builtin_ge);
lenv_add_builtin(e, "<=", builtin_le);Source Code
#include <stdio.h>
#include <stdlib.h>
#include "mpc.h"
#define LASSERT(args, cond, fmt, ...) \
if (!(cond)) { lval* err = lval_err(fmt, ##__VA_ARGS__); lval_del(args); return err; }
#define LASSERT_TYPE(func, args, index, expect) \
LASSERT(args, args->cell[index]->type == expect, \
"Function '%s' passed incorrect type for argument %i. Got %s, Expected %s.", \
func, index, ltype_name(args->cell[index]->type), ltype_name(expect))
#define LASSERT_NUM(func, args, num) \
LASSERT(args, args->count == num, \
"Function '%s' passed incorrect number of arguments. Got %i, Expected %i.", \
func, args->count, num)
#define LASSERT_NOT_EMPTY(func, args, index) \
LASSERT(args, args->cell[index]->count != 0, \
"Function '%s' passed {} for argument %i.", func, index);
#ifdef _WIN32
#include <string.h>
static char buffer[2048];
char* readline(char* prompt) {
fputs(prompt, stdout);
fgets(buffer, 2048, stdin);
char* cpy = malloc(strlen(buffer)+1);
strcpy(cpy, buffer);
cpy[strlen(cpy)-1] = '\0';
return cpy;
}
void add_history(char* unused) {}
#else
#ifdef __linux__
#include <readline/readline.h>
#include <readline/history.h>
#endif
#ifdef __MACH__
#include <readline/readline.h>
#endif
#endif
/* Forward Declarations */
struct lval; struct lenv;
typedef struct lval lval;
typedef struct lenv lenv;
enum { LVAL_NUM, LVAL_ERR, LVAL_SYM, LVAL_FUN, LVAL_SEXPR, LVAL_QEXPR };
typedef lval* (*lbuiltin)(lenv*, lval*);
struct lval {
int type;
long num;
char* err;
char* sym;
lbuiltin builtin;
lenv* env;
lval* formals;
lval* body;
int count;
lval** cell;
};
struct lenv {
lenv* par;
int count;
char** syms;
lval** vals;
};
/* Constructors */
lval* lval_num(long x) { lval* v = malloc(sizeof(lval)); v->type = LVAL_NUM; v->num = x; return v; }
char* ltype_name(int t) { switch(t){case LVAL_FUN:return "Function";case LVAL_NUM:return "Number";case LVAL_ERR:return "Error";case LVAL_SYM:return "Symbol";case LVAL_SEXPR:return "S-Expression";case LVAL_QEXPR:return "Q-Expression";default:return "Unknown";}}
lval* lval_err(char* fmt, ...) { lval* v = malloc(sizeof(lval)); v->type = LVAL_ERR; va_list va; va_start(va, fmt); v->err = malloc(512); vsnprintf(v->err, 511, fmt, va); v->err = realloc(v->err, strlen(v->err)+1); va_end(va); return v; }
lval* lval_sym(char* sym) { lval* v = malloc(sizeof(lval)); v->type = LVAL_SYM; v->sym = malloc(strlen(sym)+1); strcpy(v->sym, sym); return v; }
lval* lval_sexpr(void) { lval* v = malloc(sizeof(lval)); v->type = LVAL_SEXPR; v->count = 0; v->cell = NULL; return v; }
lval* lval_qexpr(void) { lval* v = malloc(sizeof(lval)); v->type = LVAL_QEXPR; v->count = 0; v->cell = NULL; return v; }
lval* lval_builtin(lbuiltin func) { lval* v = malloc(sizeof(lval)); v->type = LVAL_FUN; v->builtin = func; return v; }
lenv* lenv_new(void) { lenv* e = malloc(sizeof(lenv)); e->par = NULL; e->count = 0; e->syms = NULL; e->vals = NULL; return e; }
lval* lval_lambda(lval* formals, lval* body) { lval* v = malloc(sizeof(lval)); v->type = LVAL_FUN; v->builtin = NULL; v->env = lenv_new(); v->formals = formals; v->body = body; return v; }
void lenv_del(lenv* e);
void lval_del(lval* v) {
switch(v->type) {
case LVAL_NUM: break;
case LVAL_ERR: free(v->err); break;
case LVAL_SYM: free(v->sym); break;
case LVAL_FUN:
if (!v->builtin) { lenv_del(v->env); lval_del(v->formals); lval_del(v->body); }
break;
case LVAL_QEXPR:
case LVAL_SEXPR:
for (int i=0;i<v->count;i++) lval_del(v->cell[i]);
free(v->cell);
break;
}
free(v);
}
void lenv_del(lenv* e) {
for (int i=0;i<e->count;i++) { free(e->syms[i]); lval_del(e->vals[i]); }
free(e->syms); free(e->vals); free(e);
}
lval* lval_copy(lval* v);
lenv* lenv_copy(lenv* e) {
lenv* n = malloc(sizeof(lenv)); n->par = e->par; n->count = e->count; n->syms = malloc(sizeof(char*)*n->count); n->vals = malloc(sizeof(lval*)*n->count);
for (int i=0;i<e->count;i++) { n->syms[i]=malloc(strlen(e->syms[i])+1); strcpy(n->syms[i], e->syms[i]); n->vals[i]=lval_copy(e->vals[i]); }
return n;
}
lval* lval_copy(lval* v) {
lval* x = malloc(sizeof(lval)); x->type = v->type;
switch(v->type) {
case LVAL_FUN:
if (v->builtin) { x->builtin = v->builtin; }
else { x->builtin = NULL; x->env = lenv_copy(v->env); x->formals = lval_copy(v->formals); x->body = lval_copy(v->body); }
break;
case LVAL_NUM: x->num = v->num; break;
case LVAL_ERR:
x->err = malloc(strlen(v->err)+1); strcpy(x->err, v->err); break;
case LVAL_SYM:
x->sym = malloc(strlen(v->sym)+1); strcpy(x->sym, v->sym); break;
case LVAL_SEXPR:
case LVAL_QEXPR:
x->count = v->count; x->cell = malloc(sizeof(lval*)*x->count);
for (int i=0;i<x->count;i++) x->cell[i]=lval_copy(v->cell[i]);
break;
}
return x;
}
lval* lenv_get(lenv* e, lval* k) {
for (int i=0;i<e->count;i++) if (strcmp(e->syms[i], k->sym)==0) return lval_copy(e->vals[i]);
if (e->par) return lenv_get(e->par, k);
return lval_err("Unbound Symbol '%s'", k->sym);
}
void lenv_put(lenv* e, lval* k, lval* v) {
for (int i=0;i<e->count;i++) if (strcmp(e->syms[i], k->sym)==0) { lval_del(e->vals[i]); e->vals[i]=lval_copy(v); return; }
e->count++; e->vals = realloc(e->vals, sizeof(lval*)*e->count); e->syms = realloc(e->syms, sizeof(char*)*e->count);
e->vals[e->count-1]=lval_copy(v);
e->syms[e->count-1]=malloc(strlen(k->sym)+1); strcpy(e->syms[e->count-1], k->sym);
}
void lenv_def(lenv* e, lval* k, lval* v) { while (e->par) e=e->par; lenv_put(e, k, v); }
lval* lval_add(lval* v, lval* x) { v->count++; v->cell = realloc(v->cell, sizeof(lval*)*v->count); v->cell[v->count-1]=x; return v; }
lval* lval_pop(lval* v, int i) {
lval* x = v->cell[i];
memmove(&v->cell[i], &v->cell[i+1], sizeof(lval*)*(v->count-i-1));
v->count--; v->cell = realloc(v->cell, sizeof(lval*)*v->count);
return x;
}
lval* lval_take(lval* v, int i) { lval* x = lval_pop(v, i); lval_del(v); return x; }
lval* builtin_eval(lenv* e, lval* a);
lval* builtin_list(lenv* e, lval* a);
lval* lval_call(lenv* e, lval* f, lval* a) {
if (f->builtin) return f->builtin(e, a);
int given = a->count; int total = f->formals->count;
while (a->count) {
if (f->formals->count == 0) { lval_del(a); return lval_err("Function passed too many arguments. Got %i, Expected %i.", given, total); }
lval* sym = lval_pop(f->formals, 0);
if (strcmp(sym->sym, "&") == 0) {
if (f->formals->count != 1) { lval_del(a); return lval_err("Function format invalid. Symbol '&' not followed by single symbol."); }
lval* nsym = lval_pop(f->formals, 0);
lenv_put(f->env, nsym, builtin_list(e, a));
lval_del(sym); lval_del(nsym);
break;
}
lval* val = lval_pop(a, 0);
lenv_put(f->env, sym, val);
lval_del(sym); lval_del(val);
}
lval_del(a);
if (f->formals->count > 0 && strcmp(f->formals->cell[0]->sym, "&") == 0) {
if (f->formals->count != 2) return lval_err("Function format invalid. Symbol '&' not followed by single symbol.");
lval_del(lval_pop(f->formals, 0));
lval* sym = lval_pop(f->formals, 0);
lval* val = lval_qexpr();
lenv_put(f->env, sym, val);
lval_del(sym); lval_del(val);
}
if (f->formals->count == 0) {
f->env->par = e;
return builtin_eval(f->env, lval_add(lval_sexpr(), lval_copy(f->body)));
} else {
return lval_copy(f);
}
}
lval* lval_eval(lenv* e, lval* v);
lval* lval_eval_sexpr(lenv* e, lval* v) {
for (int i=0;i<v->count;i++) v->cell[i]=lval_eval(e, v->cell[i]);
for (int i=0;i<v->count;i++) if (v->cell[i]->type == LVAL_ERR) return lval_take(v, i);
if (v->count == 0) return v;
if (v->count == 1) return lval_take(v, 0);
lval* f = lval_pop(v, 0);
if (f->type != LVAL_FUN) {
lval* err = lval_err("S-Expression starts with incorrect type. Got %s, Expected %s.", ltype_name(f->type), ltype_name(LVAL_FUN));
lval_del(f); lval_del(v); return err;
}
lval* result = lval_call(e, f, v);
lval_del(f);
return result;
}
lval* lval_eval(lenv* e, lval* v) {
if (v->type == LVAL_SYM) { lval* x = lenv_get(e, v); lval_del(v); return x; }
if (v->type == LVAL_SEXPR) return lval_eval_sexpr(e, v);
return v;
}
lval* builtin_op(lenv* e, lval* a, char* op) {
for (int i=0;i<a->count;i++) LASSERT_TYPE(op, a, i, LVAL_NUM);
lval* x = lval_pop(a, 0);
if (strcmp(op, "-") == 0 && a->count == 0) x->num = -x->num;
while (a->count) {
lval* y = lval_pop(a, 0);
if (strcmp(op, "+") == 0) x->num += y->num;
if (strcmp(op, "-") == 0) x->num -= y->num;
if (strcmp(op, "*") == 0) x->num *= y->num;
if (strcmp(op, "/") == 0) {
if (y->num == 0) { lval_del(x); lval_del(y); x = lval_err("Division By Zero!"); break; }
x->num /= y->num;
}
lval_del(y);
}
lval_del(a);
return x;
}
lval* builtin_add(lenv* e, lval* a) { return builtin_op(e, a, "+"); }
lval* builtin_sub(lenv* e, lval* a) { return builtin_op(e, a, "-"); }
lval* builtin_mul(lenv* e, lval* a) { return builtin_op(e, a, "*"); }
lval* builtin_div(lenv* e, lval* a) { return builtin_op(e, a, "/"); }
lval* builtin_head(lenv* e, lval* a) {
LASSERT_NUM("head", a, 1);
LASSERT_TYPE("head", a, 0, LVAL_QEXPR);
LASSERT_NOT_EMPTY("head", a, 0);
lval* v = lval_take(a, 0);
while (v->count > 1) lval_del(lval_pop(v, 1));
return v;
}
lval* builtin_tail(lenv* e, lval* a) {
LASSERT_NUM("tail", a, 1);
LASSERT_TYPE("tail", a, 0, LVAL_QEXPR);
LASSERT_NOT_EMPTY("tail", a, 0);
lval* v = lval_take(a, 0);
lval_del(lval_pop(v, 0));
return v;
}
lval* builtin_list(lenv* e, lval* a) { a->type = LVAL_QEXPR; return a; }
lval* builtin_eval(lenv* e, lval* a) {
LASSERT_NUM("eval", a, 1);
LASSERT_TYPE("eval", a, 0, LVAL_QEXPR);
lval* x = lval_take(a, 0);
x->type = LVAL_SEXPR;
return lval_eval(e, x);
}
lval* builtin_join(lenv* e, lval* a) {
for (int i=0;i<a->count;i++) LASSERT_TYPE("join", a, i, LVAL_QEXPR);
lval* x = lval_pop(a, 0);
while (a->count) x = lval_join(x, lval_pop(a, 0));
lval_del(a);
return x;
}
lval* builtin_lambda(lenv* e, lval* a) {
LASSERT_NUM("\\", a, 2);
LASSERT_TYPE("\\", a, 0, LVAL_QEXPR);
LASSERT_TYPE("\\", a, 1, LVAL_QEXPR);
for (int i=0;i<a->cell[0]->count;i++)
LASSERT(a, a->cell[0]->cell[i]->type == LVAL_SYM, "Cannot define non-symbol. Got %s, Expected %s.", ltype_name(a->cell[0]->cell[i]->type), ltype_name(LVAL_SYM));
lval* formals = lval_pop(a, 0);
lval* body = lval_pop(a, 0);
lval_del(a);
return lval_lambda(formals, body);
}
void lenv_add_builtin(lenv* e, char* name, lbuiltin func) {
lval* k = lval_sym(name);
lval* v = lval_builtin(func);
lenv_put(e, k, v);
lval_del(k); lval_del(v);
}
void lenv_add_builtins(lenv* e) {
lenv_add_builtin(e, "def", builtin_def);
lenv_add_builtin(e, "\\", builtin_lambda);
lenv_add_builtin(e, "=", builtin_put);
lenv_add_builtin(e, "list", builtin_list);
lenv_add_builtin(e, "head", builtin_head);
lenv_add_builtin(e, "tail", builtin_tail);
lenv_add_builtin(e, "eval", builtin_eval);
lenv_add_builtin(e, "join", builtin_join);
lenv_add_builtin(e, "+", builtin_add);
lenv_add_builtin(e, "-", builtin_sub);
lenv_add_builtin(e, "*", builtin_mul);
lenv_add_builtin(e, "/", builtin_div);
lenv_add_builtin(e, "if", builtin_if);
lenv_add_builtin(e, "==", builtin_eq);
lenv_add_builtin(e, "!=", builtin_ne);
lenv_add_builtin(e, ">", builtin_gt);
lenv_add_builtin(e, "<", builtin_lt);
lenv_add_builtin(e, ">=", builtin_ge);
lenv_add_builtin(e, "<=", builtin_le);
}
int main(int argc, char* argv[]) {
mpc_parser_t *Number = mpc_new("number");
mpc_parser_t *Symbol = mpc_new("symbol");
mpc_parser_t *Sexpr = mpc_new("sexpr");
mpc_parser_t *Qexpr = mpc_new("qexpr");
mpc_parser_t *Expr = mpc_new("expr");
mpc_parser_t *Lispy = mpc_new("lispy");
mpca_lang(MPCA_LANG_DEFAULT,
"number : /-?[0-9]+/ ;
"
"symbol : /[a-zA-Z0-9_+\-*\\/\\=<>!&]+/ ;
"
"sexpr : '(' <expr>* ')' ;
"
"qexpr : '{' <expr>* '}' ;
"
"expr : <number> | <symbol> | <sexpr> | <qexpr> ;
"
"lispy : /^/ <expr>* $/ ;",
Number, Symbol, Sexpr, Qexpr, Expr, Lispy);
puts("Lispy Version 0.1");
puts("Press Ctrl+c to Exit
");
lenv* e = lenv_new();
lenv_add_builtins(e);
while (1) {
char* input = readline("lispy> ");
add_history(input);
mpc_result_t r;
if (mpc_parse("<stdin>", input, Lispy, &r)) {
lval* x = lval_eval(e, lval_read(r.output));
lval_println(x);
lval_del(x);
mpc_ast_delete(r.output);
} else {
mpc_err_print(r.error);
mpc_err_delete(r.error);
}
free(input);
}
lenv_del(e);
mpc_cleanup(6, Number, Symbol, Sexpr, Qexpr, Expr, Lispy);
return 0;
}How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
