Inside Python’s List: C‑Level Implementations of Core Methods
This article walks through the CPython source code that defines list’s built‑in methods—append, insert, pop, index, count, remove, reverse, clear, and copy—explaining how each method is wired into the type object, the underlying C logic, time‑complexity characteristics, and common pitfalls such as reference‑count handling and shallow versus deep copying.
Python’s list type is backed by a C struct ( PyListObject) whose methods are registered in the tp_methods array of the type object ( PyTypeObject). Each entry in this array is a PyMethodDef that maps a Python name to a C function and specifies the calling convention.
Method registration
typedef struct _typeobject PyTypeObject;
struct typeobject {
/* ... */
struct PyMethodDef *tp_methods; /* array of method definitions */
/* ... */
};
struct PyMethodDef {
const char *ml_name; /* method name exposed to Python */
PyCFunction ml_meth; /* pointer to the C implementation */
int ml_flags; /* calling convention flags */
const char *ml_doc; /* docstring */
};The list type ( PyList_Type) sets tp_methods to list_method, an array that contains definitions for all list operations.
append – add an element at the end
#define LIST_APPEND_METHODDEF \
{"append", (PyCFunction)list_append, METH_O, list_append__doc__}
static PyObject *list_append(PyListObject *self, PyObject *object) {
if (_PyList_AppendTakeRef(self, Py_NewRef(object)) < 0) {
return NULL;
}
Py_RETURN_NONE;
}
static inline int _PyList_AppendTakeRef(PyListObject *self, PyObject *newitem) {
Py_ssize_t len = PyList_GET_SIZE(self);
Py_ssize_t allocated = self->allocated;
if (allocated > len) {
PyList_SET_ITEM(self, len, newitem);
Py_SET_SIZE(self, len + 1);
return 0;
}
return _PyList_AppendTakeRefListResize(self, newitem);
}When the list has spare capacity, the new item is placed directly at index len and the size is incremented (O(1)). If the list is full, a resize is triggered, which may temporarily cost O(n) but happens infrequently.
insert – add an element at an arbitrary position
#define LIST_INSERT_METHODDEF \
{"insert", (PyCFunction)list_insert, METH_FASTCALL, list_insert__doc__}
static PyObject *list_insert(PyListObject *self, PyObject *const *args, Py_ssize_t nargs) {
if (!_PyArg_CheckPositional("insert", nargs, 2, 2))
return NULL;
Py_ssize_t index;
PyObject *object;
// parse arguments (index, object)
// ... (argument conversion omitted for brevity)
return list_insert_impl(self, index, object);
}
static PyObject *list_insert_impl(PyListObject *self, Py_ssize_t index, PyObject *object) {
if (ins1(self, index, object) == 0)
Py_RETURN_NONE;
return NULL;
}
static int ins1(PyListObject *self, Py_ssize_t where, PyObject *v) {
Py_ssize_t n = Py_SIZE(self);
if (v == NULL) {
PyErr_BadInternalCall();
return -1;
}
if (list_resize(self, n + 1) < 0)
return -1;
if (where < 0) {
where += n;
if (where < 0) where = 0;
}
if (where > n) where = n;
PyObject **items = self->ob_item;
for (Py_ssize_t i = n; i > where; --i)
items[i] = items[i-1];
items[where] = Py_NewRef(v);
return 0;
}The algorithm first ensures enough capacity (calling list_resize), then shifts elements rightward from the insertion point and stores the new reference. The operation is O(n) because of the shift.
pop – remove and return an element
#define LIST_POP_METHODDEF \
{"pop", (PyCFunction)list_pop, METH_FASTCALL, list_pop__doc__}
static PyObject *list_pop(PyListObject *self, PyObject *const *args, Py_ssize_t nargs) {
Py_ssize_t index = -1;
if (!_PyArg_CheckPositional("pop", nargs, 0, 1))
return NULL;
if (nargs == 1) {
// convert args[0] to integer index
// ... (conversion omitted)
}
return list_pop_impl(self, index);
}
static PyObject *list_pop_impl(PyListObject *self, Py_ssize_t index) {
if (Py_SIZE(self) == 0) {
PyErr_SetString(PyExc_IndexError, "pop from empty list");
return NULL;
}
if (index < 0)
index += Py_SIZE(self);
if (!valid_index(index, Py_SIZE(self))) {
PyErr_SetString(PyExc_IndexError, "pop index out of range");
return NULL;
}
PyObject **items = self->ob_item;
PyObject *v = items[index];
Py_ssize_t newsize = Py_SIZE(self) - 1;
if (newsize == 0) {
Py_INCREF(v);
_list_clear(self);
} else {
if ((newsize - index) > 0)
memmove(&items[index], &items[index+1], (newsize - index) * sizeof(PyObject *));
if (list_resize(self, newsize) < 0) {
// restore on failure
memmove(&items[index+1], &items[index], (newsize - index) * sizeof(PyObject *));
items[index] = v;
return NULL;
}
}
return v;
}When the popped element is the last one, the list size is simply decremented; otherwise the tail segment is shifted left. The operation is O(1) for the last element and O(n) for arbitrary positions.
index – locate the first occurrence of a value
#define LIST_INDEX_METHODDEF \
{"index", (PyCFunction)list_index, METH_FASTCALL, list_index__doc__}
static PyObject *list_index(PyListObject *self, PyObject *const *args, Py_ssize_t nargs) {
PyObject *value;
Py_ssize_t start = 0, stop = PY_SSIZE_T_MAX;
// parse up to three arguments: value, start, stop
// ... (parsing omitted)
return list_index_impl(self, value, start, stop);
}
static PyObject *list_index_impl(PyListObject *self, PyObject *value, Py_ssize_t start, Py_ssize_t stop) {
if (start < 0) {
start += Py_SIZE(self);
if (start < 0) start = 0;
}
if (stop < 0) {
stop += Py_SIZE(self);
if (stop < 0) stop = 0;
}
for (Py_ssize_t i = start; i < stop && i < Py_SIZE(self); ++i) {
PyObject *obj = self->ob_item[i];
Py_INCREF(obj);
int cmp = PyObject_RichCompareBool(obj, value, Py_EQ);
Py_DECREF(obj);
if (cmp > 0)
return PyLong_FromSsize_t(i);
else if (cmp < 0)
return NULL; // comparison error
}
PyErr_Format(PyExc_ValueError, "%R is not in list", value);
return NULL;
}The linear scan yields O(n) worst‑case time; the function first adjusts negative indices, then compares object identity before falling back to rich comparison.
count – number of occurrences of a value
#define LIST_COUNT_METHODDEF \
{"count", (PyCFunction)list_count, METH_O, list_count__doc__}
static PyObject *list_count(PyListObject *self, PyObject *value) {
Py_ssize_t count = 0;
for (Py_ssize_t i = 0; i < Py_SIZE(self); ++i) {
PyObject *obj = self->ob_item[i];
if (obj == value) {
++count;
continue;
}
Py_INCREF(obj);
int cmp = PyObject_RichCompareBool(obj, value, Py_EQ);
Py_DECREF(obj);
if (cmp > 0)
++count;
else if (cmp < 0)
return NULL; // comparison error
}
return PyLong_FromSsize_t(count);
}Because it must examine every element, count is O(n). The implementation first checks pointer equality (fast path) before invoking the rich‑compare routine.
remove – delete the first matching element
#define LIST_REMOVE_METHODDEF \
{"remove", (PyCFunction)list_remove, METH_O, list_remove__doc__}
static PyObject *list_remove(PyListObject *self, PyObject *value) {
for (Py_ssize_t i = 0; i < Py_SIZE(self); ++i) {
PyObject *obj = self->ob_item[i];
Py_INCREF(obj);
int cmp = PyObject_RichCompareBool(obj, value, Py_EQ);
Py_DECREF(obj);
if (cmp > 0) {
if (list_ass_slice(self, i, i+1, (PyObject *)NULL) == 0)
Py_RETURN_NONE;
return NULL;
} else if (cmp < 0) {
return NULL; // comparison error
}
}
PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
return NULL;
}The function walks the list, uses the same identity‑then‑equality test as count, and once a match is found calls list_ass_slice (equivalent to self[i:i+1] = []) to delete the slice. The overall cost is O(n).
reverse – in‑place reversal
#define LIST_REVERSE_METHODDEF \
{"reverse", (PyCFunction)list_reverse, METH_NOARGS, list_reverse__doc__}
static PyObject *list_reverse(PyListObject *self, PyObject *Py_UNUSED(ignored)) {
return list_reverse_impl(self);
}
static PyObject *list_reverse_impl(PyListObject *self) {
if (Py_SIZE(self) > 1)
reverse_slice(self->ob_item, self->ob_item + Py_SIZE(self));
Py_RETURN_NONE;
}
static void reverse_slice(PyObject **lo, PyObject **hi) {
--hi; // point to last element
while (lo < hi) {
PyObject *tmp = *lo;
*lo = *hi;
*hi = tmp;
++lo;
--hi;
}
}The algorithm uses two pointers moving towards each other and swaps the referenced objects. It runs in O(n) time and O(1) extra space.
clear – empty the list
#define LIST_CLEAR_METHODDEF \
{"clear", (PyCFunction)list_clear, METH_NOARGS, list_clear__doc__}
static PyObject *list_clear(PyListObject *self, PyObject *Py_UNUSED(ignored)) {
return list_clear_impl(self);
}
static PyObject *list_clear_impl(PyListObject *self) {
_list_clear(self);
Py_RETURN_NONE;
}
static int _list_clear(PyListObject *a) {
PyObject **item = a->ob_item;
if (item != NULL) {
Py_ssize_t i = Py_SIZE(a);
Py_SET_SIZE(a, 0);
a->ob_item = NULL;
a->allocated = 0;
while (--i >= 0)
Py_XDECREF(item[i]);
PyMem_Free(item);
}
return 0;
}All element references are decremented, the underlying array is freed, and the size and allocated fields are reset to zero.
copy – shallow copy of a list
#define LIST_COPY_METHODDEF \
{"copy", (PyCFunction)list_copy, METH_NOARGS, list_copy__doc__}
static PyObject *list_copy(PyListObject *self, PyObject *Py_UNUSED(ignored)) {
return list_copy_impl(self);
}
static PyObject *list_copy_impl(PyListObject *self) {
return list_slice(self, 0, Py_SIZE(self));
}
static PyObject *list_slice(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh) {
Py_ssize_t len = ihigh - ilow;
if (len <= 0)
return PyList_New(0);
PyListObject *np = (PyListObject *)list_new_prealloc(len);
if (np == NULL)
return NULL;
PyObject **src = a->ob_item + ilow;
PyObject **dest = np->ob_item;
for (Py_ssize_t i = 0; i < len; ++i) {
PyObject *v = src[i];
dest[i] = Py_NewRef(v);
}
Py_SET_SIZE(np, len);
return (PyObject *)np;
}The copy operation creates a new list object and copies references to the original elements (shallow copy). Because only the pointers are duplicated, mutating a mutable element in either list is reflected in the other.
Shallow vs. deep copy and common pitfalls
Python variables always hold references (pointers) to objects. Assigning b = a copies the pointer; both names refer to the same object. The list.copy() method or slice syntax lst[:] creates a new list that references the same element objects. For a true deep copy, the copy.deepcopy function recursively copies mutable objects while leaving immutable objects shared.
Common pitfalls arise from the fact that list multiplication ( lst = [[]] * n) replicates the same inner list reference n times, so mutating one inner list mutates all. Similarly, dict.fromkeys(keys, []) assigns the same empty list to every key. Understanding that the underlying storage holds pointers prevents these bugs.
Conclusion
The CPython implementation reveals that Python’s list is a dynamic array of PyObject * pointers. Methods differ mainly in how they manage capacity, shift elements, and handle reference counts. Knowing the exact C logic clarifies time‑complexity guarantees (O(1) amortized for append, O(n) for insert, pop at arbitrary positions, index, count, remove) and explains subtle behaviours such as shallow copies, deep copies, and reference‑count side effects.
Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
Satori Komeiji's Programming Classroom
Python and Rust developer; I write about any topics you're interested in. Follow me! (#^.^#)
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
