Re: Unique string storage, CONS cell lists

Consultant (amrit@xvt.com)
Tue, 5 Apr 1994 15:54:47 -0600 (MDT)

> About cons-cell lists:
>
> > Speaking of lisp, is anyone interested in a cons cell object?
>

I once wrote a cons-cell style linked list in the style of lisp; this was at
a point where I was interacting with Guido on several language extensions.
I never wrote to him about the LISP module, because studies we did here showed
that the performance benifits were not very noticable over lists/tuples and
I didn't want to introduce yet another sequence datatype without being able
to show a more marked performance improvement.

Anyway, if there is enough interest in lisp processing, then perhaps I was
mistaken.

NOTE: This module was written circa Python 9.9, so there is no hash method
defined. (9.9 did not allow arbitrary hashable objects) Since the lisp
list is an immutable object, it should support hashing. If there is enough
interest in the list datatype, I can add it (or if some other brave soul wants
to, that's OK :-) Lots of the basic code (compare, etc.) came from
tupleobject.c, so one could borrow the hash code from there. (These (nested
parens) are starting (to (be (very lispish!)))))

So, the modules interface is like this:

>>> from lisp import * # you get cons, nil, & list

>>> x = list(0, 1, 2)
>>> print x
(0, 1, 2) # prints like a tuple (to reflect immutability)

>>> `x`
list(0, 1, 2) # repr() prints list constructor

>>> cons('a', x) # normal cons op
('a', 0, 1, 2)

>>> x[1:] # cdr(x); can also use x.rest
(1, 2) # data structure copy can be avoided for lists

>>> x.first # car(x); can also use x[0]
1

>>> x + list(3, 4) # append; efficient due to sublist sharing
(0, 1, 2, 3, 4)

>>> cons('a', cons('c', nil))
('a', 'c')

So, here it is: a true native Python linked list implementation :-)

*** lispmodule.c ***

/***********************************************************
Copyright 1991, 1992 by Stichting Mathematisch Centrum, Amsterdam, The
Netherlands.

All Rights Reserved

Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies and that
both that copyright notice and this permission notice appear in
supporting documentation, and that the names of Stichting Mathematisch
Centrum or CWI not be used in advertising or publicity pertaining to
distribution of the software without specific, written prior permission.

STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

******************************************************************/

/* ConsList objects
*
* Exported from this module:
*
* list: create a linked list from the argument list.
* if no arguments are given, the empty list is created.
*
* nil: The empty list constant; same as list()
*
* cons: prepend an object onto a list.
*
* Lists support the following methods:
*
* __add__ append two lists
* __len__ Get the length of a list
* __getitem__ index a list; optimized to work with "for" loops
* __getslice__ Slice a list
* first Get the first element of a list
* rest Get the remainder of a list when the first element
* is removed.
*
* Linked lists are immutable objects; this improves performance considerably.
*/

#include <assert.h>
#include "allobj.h"
#include "strctmem.h"
#include "modsupp.h" /* For getargs() etc. */

/* cons cells are two element pairs of pointers; the car is always a
Python object, and the cdr is another cell. Improper lists are not
supported with this list implementation. For a nested list, the car
pointer will point to another Python ConsList object. To make memory
allocation fast and efficient, cells are allocated from a freelist we keep.
*/

#define BLOCK_SIZE 512
#define N_CELLS (BLOCK_SIZE / sizeof(CELL))

typedef struct cell {
OB_HEAD
unsigned short len; /* length of the sublist */
object *car;
struct cell *cdr;
} consobject, CELL;

extern typeobject Constype; /* Really static, forward */

#define is_consobject(v) ((v)->ob_type == &Constype)

static CELL _Nil; /* the value nil */
static CELL *freelist; /* freelist of cons cells */
static CELL *cons PROTO((object *, CELL *)); /* allocation routine */
static CELL *allocateblock PROTO((void)); /* block allocator */

#define Nil (&_Nil)

static object *
newconsobject(self, arg)
object *self;
object *arg;
{
int len;
register int i;
CELL *result;

if ((len = gettuplesize(arg)) < 0)
goto Fail;

INCREF(Nil);
result = Nil;

for (i = len-1; i >= 0; --i) {
object *item;
if ((item = gettupleitem(arg, i)) == NULL)
goto Fail;

if ((result = cons(item, result)) == NULL)
goto Fail;
}

return (object *) result;

Fail:
DECREF(result);
return NULL;
}

/* conslist methods */

static void
list_dealloc(cell)
CELL *cell;
{
DECREF(cell->car);
DECREF(cell->cdr);

cell->cdr = freelist;
freelist = cell;
}

static int
list_compare(v, w)
consobject *v, *w;
{
int len = (v->len < w->len) ? v->len : w->len;
int i;

for (i = 0; i < len; i++) {
int cmp = cmpobject(v->car, w->car);
if (cmp != 0)
return cmp;

v = v->cdr;
w = w->cdr;
}

return v->len - w->len;
}

static object *
list_getattr(list, name)
consobject *list;
char *name;
{
struct memberlist cell_members[] = {
{"first", T_OBJECT, offsetof(CELL, car), READONLY},
{"rest", T_OBJECT, offsetof(CELL, cdr), READONLY},
{NULL}
};

return getmember((char *) list, cell_members, name);
}

static int
list_print(list, fp, flags)
consobject *list;
FILE *fp;
int flags;
{
CELL *ptr;
fprintf(fp, "(");
for (ptr = list; ptr != Nil; ptr = ptr->cdr) {
if (ptr != list)
fprintf(fp, ", ");
if (printobject(ptr->car, fp, flags) != 0)
return -1;
}
if (list->len == 1)
fprintf(fp, ",");
fprintf(fp, ")");
return 0;
}

object *
list_repr(v)
consobject *v;
{
object *s, *t, *comma;
CELL *ptr;
if (v->len == 0)
s = newstringobject("nil");
else {
s = newstringobject("list(");
comma = newstringobject(", ");
for (ptr = v; ptr != Nil; ptr = ptr->cdr) {
if (ptr != v)
joinstring(&s, comma);
t = reprobject(ptr->car);
joinstring(&s, t);
DECREF(t);
}
DECREF(comma);
t = newstringobject(")");
joinstring(&s, t);
DECREF(t);
}
return s;
}

static int
list_length(a)
consobject *a;
{
return a->len;
}

static object *
list_item(a, i)
consobject *a;
int i;
{
while (--i >= 0)
a = a->cdr;

INCREF(a->car);
return a->car;
}

static CELL *append(L1, L2)
CELL *L1, *L2;
{
if (L1 == Nil) {
INCREF(L2);
return L2;
}
else
return cons(L1->car, append(L1->cdr, L2));
}

static object *
list_concat(x, y)
consobject *x;
consobject *y;
{
if (!is_consobject(y)) {
err_badarg();
return NULL;
}

return (object *) append(x, y);
}

static CELL *
getslice(list, start, len)
CELL *list;
int start, len;
{
if (len == 0) {
INCREF(Nil);
return Nil;
}

else if (start > 0)
return getslice(list->cdr, start - 1, len);

else
return cons(list->car, getslice(list->cdr, start, len - 1));
}

static object *
list_slice(a, low, high)
consobject *a;
int low, high;
{
CELL *ptr;
int n;

if (low < 0)
low = 0;
else if (low > a->len)
low = a->len;
if (high < 0)
high = 0;
if (high < low)
high = low;
else if (high > a->len)
high = a->len;

if (high == a->len) { /* just cdr down */
switch (low) {
case 1:
a = a->cdr; /* fall through */
case 0:
INCREF(a);
return (object *) a;

default:
while (--low >= 0)
a = a->cdr;

INCREF(a);
return (object *) a;
}
}

else
return (object *) getslice(a, low, high - low);
}

static object *
list_repeat(a, n)
consobject *a;
int n;
{
CELL *result;

if (n < 0)
n = 0;

switch (n) {
case 0:
INCREF(Nil);
return (object *) Nil;

case 1:
INCREF(a);
return (object *) a;

default:
result = Nil;
while (n-- > 0)
result = append(result, a);

return (object *) result;
}
}

static sequence_methods list_as_sequence = {
list_length, /*sq_length*/
list_concat, /*sq_concat*/
list_repeat, /*sq_repeat*/
list_item, /*sq_item*/
list_slice, /*sq_slice*/
0, /*sq_ass_item*/
0, /*sq_ass_slice*/
};

static typeobject Constype = {
OB_HEAD_INIT(&Typetype)
0, /*ob_size*/
"ConsCell", /*tp_name*/
sizeof(consobject), /*tp_size*/
0, /*tp_itemsize*/
list_dealloc, /*tp_dealloc*/
list_print, /*tp_print*/
list_getattr, /*tp_getattr*/
0, /*tp_setattr*/
list_compare, /*tp_compare*/
list_repr, /*tp_repr*/
0, /*tp_as_number*/
&list_as_sequence, /*tp_as_sequence*/
0, /*tp_as_mapping*/
};

/* memory allocation functions */

static CELL *cons(obj, list)
object *obj;
CELL *list;
{
CELL *cell;

if (freelist == NULL) {
if ((freelist = allocateblock()) == NULL)
return NULL;
}

cell = freelist;
freelist = freelist->cdr;

cell->car = obj;
cell->cdr = list;
cell->len = list->len + 1;

NEWREF(cell);
INCREF(obj);

return cell;
}

static object *python_cons(self, args)
object *self;
object *args;
{
object *item;
consobject *list;
CELL *result;

if (!getargs(args, "(OO)", &item, &list))
return NULL;

if (!is_consobject(list)) {
err_badarg();
return NULL;
}

INCREF(list);
return (object *) cons(item, list);
}

static CELL *allocateblock()
{
CELL *p, *q;

if ((p = (CELL *) malloc(N_CELLS * sizeof(CELL))) == NULL)
return (CELL *) err_nomem();

q = &p[N_CELLS];
while (--q > p) {
q->cdr = q-1;
q->ob_type = &Constype;
}
q->cdr = NULL;
q->ob_type = &Constype;

return &p[N_CELLS - 1];
}

void initlisp()
{
object *m;
static struct methodlist Lisp_methods[] = {
{"list", newconsobject, 1},
{"cons", python_cons},
{NULL, NULL} /* sentinel */
};

INCREF(None);
INCREF(None);

_Nil.ob_type = &Constype;
_Nil.ob_refcnt = 1;
_Nil.len = 0;
_Nil.cdr = (CELL *) (_Nil.car = None);

m = initmodule("lisp", Lisp_methods);
INCREF(Nil);
dictinsert(getmoduledict(m), "nil", (object *) Nil);

if (err_occurred())
fatal("can't initialize module lisp");
}