Kevin Cuzner's Personal Blog

Electronics, Embedded Systems, and Software are my breakfast, lunch, and dinner.


A New Blog

For the past several years I've struggled with maintaining the wordpress blog on this site. Starting 3 or 4 years ago the blog began to receive sustained stronger than usual traffic from data centers trying to post comments or just brute-forcing the admin login page. This peaked around 2021 when I finally decided to turn off all commenting functionality. This cooled things down except for requiring me to "kick" the server every few days as it ran out of file handles due to some misconfiguration on my part combined with the spam traffic.

This became annoying and I needed to find something that would be even lower maintenance. To that end, I've written my first functioning blogging software since 2006 or so. There is no dynamic content, all of it is statically rendered, and the blog content itself is stored in GitHub with updates managed using GitHub hooks. The content itself is written with ReStructured Text and I think I've created a pretty easy-to-use and low-maintenance blog platform (famous last words). Ask me in a decade if I was actually successful lol.

In addition, I've found that there are newer, cheaper AWS instances that would suit my needs. I started using this instance over 10 years ago and before I shut it down, I had an uptime of almost 2.5 years. It's truly the end of an era:

old-server-uptime.png

Writing reusable USB device descriptors with some XML, Python, and C

A recent project required me to reuse (once again) my USB HID device driver. This is my third or fourth project using this and I had started to find it annoying to need to hand-modify a heavily-commented, self-referencing array of uint8_t's. I figured there must be a better way, so I decided to try something different.

In this post I will present a script that turns this madness, which lives in a separate file:

  1/**
  2 * Device descriptor
  3 */
  4static const USB_DATA_ALIGN uint8_t dev_descriptor[] = {
  5    18, //bLength
  6    1, //bDescriptorType
  7    0x00, 0x02, //bcdUSB
  8    0x00, //bDeviceClass (defined by interfaces)
  9    0x00, //bDeviceSubClass
 10    0x00, //bDeviceProtocl
 11    USB_CONTROL_ENDPOINT_SIZE, //bMaxPacketSize0
 12    0xc0, 0x16, //idVendor
 13    0xdc, 0x05, //idProduct
 14    0x11, 0x00, //bcdDevice
 15    1, //iManufacturer
 16    2, //iProduct
 17    0, //iSerialNumber,
 18    1, //bNumConfigurations
 19};
 20
 21static const USB_DATA_ALIGN uint8_t hid_report_descriptor[] = {
 22    HID_SHORT(0x04, 0x00, 0xFF), //USAGE_PAGE (Vendor Defined)
 23    HID_SHORT(0x08, 0x01), //USAGE (Vendor 1)
 24    HID_SHORT(0xa0, 0x01), //COLLECTION (Application)
 25    HID_SHORT(0x08, 0x01), //  USAGE (Vendor 1)
 26    HID_SHORT(0x14, 0x00), //  LOGICAL_MINIMUM (0)
 27    HID_SHORT(0x24, 0xFF, 0x00), //LOGICAL_MAXIMUM (0x00FF)
 28    HID_SHORT(0x74, 0x08), //  REPORT_SIZE (8)
 29    HID_SHORT(0x94, 64), //  REPORT_COUNT(64)
 30    HID_SHORT(0x80, 0x02), //  INPUT (Data, Var, Abs)
 31    HID_SHORT(0x08, 0x01), //  USAGE (Vendor 1)
 32    HID_SHORT(0x90, 0x02), //  OUTPUT (Data, Var, Abs)
 33    HID_SHORT(0xc0),       //END_COLLECTION
 34};
 35
 36/**
 37 * Configuration descriptor
 38 */
 39static const USB_DATA_ALIGN uint8_t cfg_descriptor[] = {
 40    9, //bLength
 41    2, //bDescriptorType
 42    9 + 9 + 9 + 7 + 7, 0x00, //wTotalLength
 43    1, //bNumInterfaces
 44    1, //bConfigurationValue
 45    0, //iConfiguration
 46    0x80, //bmAttributes
 47    250, //bMaxPower
 48    /* INTERFACE 0 BEGIN */
 49    9, //bLength
 50    4, //bDescriptorType
 51    0, //bInterfaceNumber
 52    0, //bAlternateSetting
 53    2, //bNumEndpoints
 54    0x03, //bInterfaceClass (HID)
 55    0x00, //bInterfaceSubClass (0: no boot)
 56    0x00, //bInterfaceProtocol (0: none)
 57    0, //iInterface
 58        /* HID Descriptor */
 59        9, //bLength
 60        0x21, //bDescriptorType (HID)
 61        0x11, 0x01, //bcdHID
 62        0x00, //bCountryCode
 63        1, //bNumDescriptors
 64        0x22, //bDescriptorType (Report)
 65        sizeof(hid_report_descriptor), 0x00,
 66        /* INTERFACE 0, ENDPOINT 1 BEGIN */
 67        7, //bLength
 68        5, //bDescriptorType
 69        0x81, //bEndpointAddress (endpoint 1 IN)
 70        0x03, //bmAttributes, interrupt endpoint
 71        USB_HID_ENDPOINT_SIZE, 0x00, //wMaxPacketSize,
 72        10, //bInterval (10 frames)
 73        /* INTERFACE 0, ENDPOINT 1 END */
 74        /* INTERFACE 0, ENDPOINT 2 BEGIN */
 75        7, //bLength
 76        5, //bDescriptorType
 77        0x02, //bEndpointAddress (endpoint 2 OUT)
 78        0x03, //bmAttributes, interrupt endpoint
 79        USB_HID_ENDPOINT_SIZE, 0x00, //wMaxPacketSize
 80        10, //bInterval (10 frames)
 81        /* INTERFACE 0, ENDPOINT 2 END */
 82    /* INTERFACE 0 END */
 83};
 84
 85static const USB_DATA_ALIGN uint8_t lang_descriptor[] = {
 86    4, //bLength
 87    3, //bDescriptorType
 88    0x09, 0x04 //wLANGID[0]
 89};
 90
 91static const USB_DATA_ALIGN uint8_t manuf_descriptor[] = {
 92    2 + 15 * 2, //bLength
 93    3, //bDescriptorType
 94    'k', 0x00, //wString
 95    'e', 0x00,
 96    'v', 0x00,
 97    'i', 0x00,
 98    'n', 0x00,
 99    'c', 0x00,
100    'u', 0x00,
101    'z', 0x00,
102    'n', 0x00,
103    'e', 0x00,
104    'r', 0x00,
105    '.', 0x00,
106    'c', 0x00,
107    'o', 0x00,
108    'm', 0x00
109};
110
111static const USB_DATA_ALIGN uint8_t product_descriptor[] = {
112    2 + 14 * 2, //bLength
113    3, //bDescriptorType
114    'L', 0x00,
115    'E', 0x00,
116    'D', 0x00,
117    ' ', 0x00,
118    'W', 0x00,
119    'r', 0x00,
120    'i', 0x00,
121    's', 0x00,
122    't', 0x00,
123    'w', 0x00,
124    'a', 0x00,
125    't', 0x00,
126    'c', 0x00,
127    'h', 0x00
128};
129
130const USBDescriptorEntry usb_descriptors[] = {
131    { 0x0100, 0x0000, sizeof(dev_descriptor), dev_descriptor },
132    { 0x0200, 0x0000, sizeof(cfg_descriptor), cfg_descriptor },
133    { 0x0300, 0x0000, sizeof(lang_descriptor), lang_descriptor },
134    { 0x0301, 0x0409, sizeof(manuf_descriptor), manuf_descriptor },
135    { 0x0302, 0x0409, sizeof(product_descriptor), product_descriptor },
136    { 0x2200, 0x0000, sizeof(hid_report_descriptor), hid_report_descriptor },
137    { 0x0000, 0x0000, 0x00, NULL }
138};

Into these comment blocks which can live anywhere in the source and are somewhat more readable:

  1/**
  2 * <descriptor id="device" type="0x01">
  3 *  <length name="bLength" size="1" />
  4 *  <type name="bDescriptorType" size="1" />
  5 *  <word name="bcdUSB">0x0200</word>
  6 *  <byte name="bDeviceClass">0</byte>
  7 *  <byte name="bDeviceSubClass">0</byte>
  8 *  <byte name="bDeviceProtocol">0</byte>
  9 *  <byte name="bMaxPacketSize0">USB_CONTROL_ENDPOINT_SIZE</byte>
 10 *  <word name="idVendor">0x16c0</word>
 11 *  <word name="idProduct">0x05dc</word>
 12 *  <word name="bcdDevice">0x0010</word>
 13 *  <ref name="iManufacturer" type="0x03" refid="manufacturer" size="1" />
 14 *  <ref name="iProduct" type="0x03" refid="product" size="1" />
 15 *  <byte name="iSerialNumber">0</byte>
 16 *  <count name="bNumConfigurations" type="0x02" size="1" />
 17 * </descriptor>
 18 * <descriptor id="lang" type="0x03" first="first">
 19 *  <length name="bLength" size="1" />
 20 *  <type name="bDescriptorType" size="1" />
 21 *  <foreach type="0x03" unique="unique">
 22 *    <echo name="wLang" />
 23 *  </foreach>
 24 * </descriptor>
 25 * <descriptor id="manufacturer" type="0x03" wIndex="0x0409">
 26 *  <property name="wLang" size="2">0x0409</property>
 27 *  <length name="bLength" size="1" />
 28 *  <type name="bDescriptorType" size="1" />
 29 *  <string name="wString">kevincuzner.com</string>
 30 * </descriptor>
 31 * <descriptor id="product" type="0x03" wIndex="0x0409">
 32 *  <property name="wLang" size="2">0x0409</property>
 33 *  <length name="bLength" size="1" />
 34 *  <type name="bDescriptorType" size="1" />
 35 *  <string name="wString">LED Wristwatch</string>
 36 * </descriptor>
 37 * <descriptor id="configuration" type="0x02">
 38 *  <length name="bLength" size="1" />
 39 *  <type name="bDescriptorType" size="1" />
 40 *  <length name="wTotalLength" size="2" all="all" />
 41 *  <count name="bNumInterfaces" type="0x04" associated="associated" size="1" />
 42 *  <byte name="bConfigurationValue">1</byte>
 43 *  <byte name="iConfiguration">0</byte>
 44 *  <byte name="bmAttributes">0x80</byte>
 45 *  <byte name="bMaxPower">250</byte>
 46 *  <children type="0x04" />
 47 * </descriptor>
 48 */
 49
 50/**
 51 * <include>usb_hid.h</include>
 52 * <descriptor id="hid_interface" type="0x04" childof="configuration">
 53 *  <length name="bLength" size="1" />
 54 *  <type name="bDescriptorType" size="1" />
 55 *  <index name="bInterfaceNumber" size="1" />
 56 *  <byte name="bAlternateSetting">0</byte>
 57 *  <count name="bNumEndpoints" type="0x05" associated="associated" size="1" />
 58 *  <byte name="bInterfaceClass">0x03</byte>
 59 *  <byte name="bInterfaceSubClass">0x00</byte>
 60 *  <byte name="bInterfaceProtocol">0x00</byte>
 61 *  <byte name="iInterface">0</byte>
 62 *  <children type="0x21" />
 63 *  <children type="0x05" />
 64 * </descriptor>
 65 * <descriptor id="hid" type="0x21" childof="hid_interface">
 66 *  <length name="bLength" size="1" />
 67 *  <type name="bDescriptorType" size="1" />
 68 *  <word name="bcdHID">0x0111</word>
 69 *  <byte name="bCountryCode">0x00</byte>
 70 *  <count name="bNumDescriptors" type="0x22" size="1" associated="associated" />
 71 *  <foreach type="0x22" associated="associated">
 72 *    <echo name="bDescriptorType" />
 73 *    <echo name="wLength" />
 74 *  </foreach>
 75 * </descriptor>
 76 * <descriptor id="hid_in_endpoint" type="0x05" childof="hid_interface">
 77 *  <length name="bLength" size="1" />
 78 *  <type name="bDescriptorType" size="1" />
 79 *  <inendpoint name="bEndpointAddress" define="HID_IN_ENDPOINT" />
 80 *  <byte name="bmAttributes">0x03</byte>
 81 *  <word name="wMaxPacketSize">USB_HID_ENDPOINT_SIZE</word>
 82 *  <byte name="bInterval">10</byte>
 83 * </descriptor>
 84 * <descriptor id="hid_out_endpoint" type="0x05" childof="hid_interface">
 85 *  <length name="bLength" size="1" />
 86 *  <type name="bDescriptorType" size="1" />
 87 *  <outendpoint name="bEndpointAddress" define="HID_OUT_ENDPOINT" />
 88 *  <byte name="bmAttributes">0x03</byte>
 89 *  <word name="wMaxPacketSize">USB_HID_ENDPOINT_SIZE</word>
 90 *  <byte name="bInterval">10</byte>
 91 * </descriptor>
 92 * <descriptor id="hid_report" childof="hid" top="top" type="0x22" order="1" wIndexType="0x04">
 93 *  <hidden name="bDescriptorType" size="1">0x22</hidden>
 94 *  <hidden name="wLength" size="2">sizeof(hid_report)</hidden>
 95 *  <raw>
 96 *  HID_SHORT(0x04, 0x00, 0xFF), //USAGE_PAGE (Vendor Defined)
 97 *  HID_SHORT(0x08, 0x01), //USAGE (Vendor 1)
 98 *  HID_SHORT(0xa0, 0x01), //COLLECTION (Application)
 99 *  HID_SHORT(0x08, 0x01), //  USAGE (Vendor 1)
100 *  HID_SHORT(0x14, 0x00), //  LOGICAL_MINIMUM (0)
101 *  HID_SHORT(0x24, 0xFF, 0x00), //LOGICAL_MAXIMUM (0x00FF)
102 *  HID_SHORT(0x74, 0x08), //  REPORT_SIZE (8)
103 *  HID_SHORT(0x94, 64), //  REPORT_COUNT(64)
104 *  HID_SHORT(0x80, 0x02), //  INPUT (Data, Var, Abs)
105 *  HID_SHORT(0x08, 0x01), //  USAGE (Vendor 1)
106 *  HID_SHORT(0x90, 0x02), //  OUTPUT (Data, Var, Abs)
107 *  HID_SHORT(0xc0),       //END_COLLECTION
108 *  </raw>
109 * </descriptor>
110 */

In most of my projects before this one I would have something like the first script shown above sitting in a file by itself, declaring a bunch of uint8_t arrays and a usb_descriptors[] table constant that would be consumed by my USB driver as it searched for USB descriptors. A header file that exposes the usb_descriptors[] table would also be found in the project. Any USB descriptor that had to be returned by the device would be found in this table. To make things more complex, descriptors like the configuration descriptor have to declare all of the device interfaces and so pieces and parts of each separate USB interface component would be interspersed inside of other descriptors.

I've been using this structure for some time after writing my first USB driver after reading through the Teensy driver. This is probably the only structural code that has made it all the way from the Teensy driver into all of my other code.

With this new script I've written there's no more need for manually computing how long a descriptor is or needing to modify the configuration descriptor every time a new interface has been added. All the parts of a descriptor are self-contained in the source file that defines a particular interface and can be easily moved around from project to project.

All the code for this post lives here:

`https://github.com/kcuzner/midi-fader <https://github.com/kcuzner/midi-fader>`__

Arranging components in a circle with Kicad

I've been using kicad for just about all of my designs for a little over 5 years now. It took a little bit of a learning curve, but I've really come to love it, especially with the improvements by CERN that came out in version 4. One of the greatest features, in my opinion, is the Python Scripting Console in the PCB editor (pcbnew). It gives (more or less) complete access to the design hierarchy so that things like footprints can be manipulated in a scripted fashion.

In my most recent design, the LED Watch, I used this to script myself a tool for arranging footprints in a circle. What I want to show today was how I did it and how to use it so that you can make your own scripting tools (or just arrange stuff in a circle).

*The python console can be found in pcbnew under Tools->Scripting Console. *

Step 1: Write the script

When writing a script for pcbnew, it is usually helpful to have some documentation. Some can be found here, though I mostly used "dir" a whole bunch and had it print me the structure of the various things once I found the points to hook in. The documentation is fairly spartan at this point, so that made things easier.

Here's my script:

 1#!/usr/bin/env python2
 2
 3# Random placement helpers because I'm tired of using spreadsheets for doing this
 4#
 5# Kevin Cuzner
 6
 7import math
 8from pcbnew import *
 9
10def place_circle(refdes, start_angle, center, radius, component_offset=0, hide_ref=True, lock=False):
11    """
12    Places components in a circle
13    refdes: List of component references
14    start_angle: Starting angle
15    center: Tuple of (x, y) mils of circle center
16    radius: Radius of the circle in mils
17    component_offset: Offset in degrees for each component to add to angle
18    hide_ref: Hides the reference if true, leaves it be if None
19    lock: Locks the footprint if true
20    """
21    pcb = GetBoard()
22    deg_per_idx = 360 / len(refdes)
23    for idx, rd in enumerate(refdes):
24        part = pcb.FindModuleByReference(rd)
25        angle = (deg_per_idx * idx + start_angle) % 360;
26        print "{0}: {1}".format(rd, angle)
27        xmils = center[0] + math.cos(math.radians(angle)) * radius
28        ymils = center[1] + math.sin(math.radians(angle)) * radius
29        part.SetPosition(wxPoint(FromMils(xmils), FromMils(ymils)))
30        part.SetOrientation(angle * -10)
31        if hide_ref is not None:
32            part.Reference().SetVisible(not hide_ref)
33    print "Placement finished. Press F11 to refresh."

There are several arguments to this function: a list of reference designators (["D1", "D2", "D3"] etc), the angle at which the first component should be placed, the position in mils for the center of the circle, and the radius of the circle in mils. Once the function is invoked, it will find all of the components indicated in the reference designator list and arrange them into the desired circle.

Step 2: Save the script

In order to make life easier, it is best if the script is saved somewhere that the pcbnew python interpreter knows where to look. I found a good location at "/usr/share/kicad/scripting/plugins", but the list of all paths that will be searched can be easily found by opening the python console and executing "import sys" followed by "print(sys.path)". Pick a path that makes sense and save your script there. I saved mine as "placement_helpers.py" since I intend to add more functions to it as situations require.

Step 3: Open your PCB and run the script

Before you can use the scripts on your footprints, they need to be imported. Make sure you execute the "Read Netlist" command before continuing.

The scripting console can be found under Tools->Scripting Console. Once it is opened you will see a standard python (2) command prompt. If you placed your script in a location where the Scripting Console will search, you should be able to do something like the following:

 1PyCrust 0.9.8 - KiCAD Python Shell
 2Python 2.7.13 (default, Feb 11 2017, 12:22:40)
 3[GCC 6.3.1 20170109] on linux2
 4Type "help", "copyright", "credits" or "license" for more information.
 5>>> import placement_helpers
 6>>> placement_helpers.place_circle(["D1", "D2"], 0, (500, 500), 1000)
 7D1: 0
 8D2: 180
 9Placement finished. Press F11 to refresh.
10>>>

Now, pcbnew may not recognize that your PCB has changed and enable the save button. You should do something like lay a trace or some other board modification so that you can save any changes the script made. I'm sure there's a way to trigger this in Python, but I haven't got around to trying it yet.

Conclusion

Hopefully this brief tutorial will either help you to place components in circles in Kicad/pcbnew or will help you to write your own scripts for easing PCB layout. Kicad can be a very capable tool and with its new expanded scripting functionality, the sky seems to be the limit.

Database Abstraction in Python

As I was recently working on trying out the Flask web framework for Python, I ended up wanting to access my MySQL database. Recently at work I have been using entity framework and I have gotten quite used to having a good database abstraction that allows programmatic creation of SQL. While such frameworks exist in Python, I thought it would interesting to try writing one. This is one great example of getting carried away with a seemingly simple task.

I aimed for these things:

  • Tables should be represented as objects which each instance of the object representing a row
  • These objects should be able to generate their own insert, select, and update queries
  • Querying the database should be accomplished by logical predicates, not by strings
  • Update queries should be optimized to only update those fields which have changed
  • The database objects should have support for "immutable" fields that are generated by the database

I also wanted to be able to do relations between tables with foreign keys, but I have decided to stop for now on that. I have a structure outlined, but it isn't necessary enough at this point since all I wanted was a database abstraction for my simple Flask project. I will probably implement it later.

This can be found as a gist here: https://gist.github.com/kcuzner/5246020

Example

Before going into the code, here is an example of what this abstraction can do as it stands. It directly uses the DbObject and DbQuery-inheriting objects which are shown further down in this post.

 1from db import *
 2import hashlib
 3
 4def salt_password(user, unsalted):
 5    if user is None:
 6        return unsalted
 7    m = hashlib.sha512()
 8    m.update(user.username)
 9    m.update(unsalted)
10    return m.hexdigest()
11
12class User(DbObject):
13    dbo_tablename = "users"
14    primary_key = IntColumn("id", allow_none=True, mutable=False)
15    username = StringColumn("username", "")
16    password = PasswordColumn("password", salt_password, "")
17    display_name = StringColumn("display_name", "")
18    def __init__(self, **kwargs):
19        DbObject.__init__(self, **kwargs)
20    @classmethod
21    def load(self, cur, username):
22        selection = self.select('u')
23        selection[0].where(selection[1].username == username)
24        result = selection[0].execute(cur)
25        if len(result) == 0:
26            return None
27        else:
28            return result[0]
29    def match_password(self, password):
30        salted = salt_password(self, password)
31        return salted == self.password
32
33#assume there is a function get_db defined which returns a PEP-249
34#database object
35def main():
36    db = get_db()
37    cur = db.cursor()
38    user = User.load(cur, "some username")
39    user.password = "a new password!"
40    user.save(cur)
41    db.commit()
42
43    new_user = User(username="someone", display_name="Their name")
44    new_user.password = "A password that will be hashed"
45    new_user.save(cur)
46    db.commmit()
47
48    print new_user.primary_key # this will now have a database assigned id

This example first loads a user using a DbSelectQuery. The user is then modified and the DbObject-level function save() is used to save it. Next, a new user is created and saved using the same function. After saving, the primary key will have been populated and will be printed.

Change Tracking Columns

I started out with columns. I needed columns that track changes and have a mapping to an SQL column name. I came up with the following:

  1class ColumnSet(object):
  2    """
  3    Object which is updated by ColumnInstances to inform changes
  4    """
  5    def __init__(self):
  6        self.__columns = {} # columns are sorted by name
  7        i_dict = type(self).__dict__
  8        for attr in i_dict:
  9            obj = i_dict[attr]
 10            if isinstance(obj, Column):
 11                # we get an instance of this column
 12                self.__columns[obj.name] = ColumnInstance(obj, self)
 13
 14    @property
 15    def mutated(self):
 16        """
 17        Returns the mutated columns for this tracker.
 18        """
 19        output = []
 20        for name in self.__columns:
 21            column = self.get_column(name)
 22            if column.mutated:
 23                output.append(column)
 24        return output
 25
 26    def get_column(self, name):
 27        return self.__columns[name]
 28
 29class ColumnInstance(object):
 30    """
 31    Per-instance column data. This is used in ColumnSet objects to hold data
 32    specific to that particular instance
 33    """
 34    def __init__(self, column, owner):
 35        """
 36        column: Column object this is created for
 37        initial: Initial value
 38        """
 39        self.__column = column
 40        self.__owner = owner
 41        self.update(column.default)
 42
 43    def update(self, value):
 44        """
 45        Updates the value for this instance, resetting the mutated flag
 46        """
 47        if value is None and not self.__column.allow_none:
 48            raise ValueError("'None' is invalid for column '" +
 49                             self.__column.name + "'")
 50        if self.__column.validate(value):
 51            self.__value = value
 52            self.__origvalue = value
 53        else:
 54            raise ValueError("'" + str(value) + "' is not valid for column '" +
 55                             self.__column.name + "'")
 56
 57    @property
 58    def column(self):
 59        return self.__column
 60
 61    @property
 62    def owner(self):
 63        return self.__owner
 64
 65    @property
 66    def mutated(self):
 67        return self.__value != self.__origvalue
 68
 69    @property
 70    def value(self):
 71        return self.__value
 72
 73    @value.setter
 74    def value(self, value):
 75        if value is None and not self.__column.allow_none:
 76            raise ValueError("'None' is invalid for column '" +
 77                             self.__column.name + "'")
 78        if not self.__column.mutable:
 79            raise AttributeError("Column '" + self.__column.name + "' is not" +
 80                                 " mutable")
 81        if self.__column.validate(value):
 82            self.__value = value
 83        else:
 84            raise ValueError("'" + value + "' is not valid for column '" +
 85                             self.__column.name + "'")
 86
 87class Column(object):
 88    """
 89    Column descriptor for a column
 90    """
 91    def __init__(self, name, default=None, allow_none=False, mutable=True):
 92        """
 93        Initializes a column
 94
 95        name: Name of the column this maps to
 96        default: Default value
 97        allow_none: Whether none (db null) values are allowed
 98        mutable: Whether this can be mutated by a setter
 99        """
100        self.__name = name
101        self.__allow_none = allow_none
102        self.__mutable = mutable
103        self.__default = default
104
105    def validate(self, value):
106        """
107        In a child class, this will validate values being set
108        """
109        raise NotImplementedError
110
111    @property
112    def name(self):
113        return self.__name
114
115    @property
116    def allow_none(self):
117        return self.__allow_none
118
119    @property
120    def mutable(self):
121        return self.__mutable
122
123    @property
124    def default(self):
125        return self.__default
126
127    def __get__(self, owner, ownertype=None):
128        """
129        Gets the value for this column for the passed owner
130        """
131        if owner is None:
132            return self
133        if not isinstance(owner, ColumnSet):
134            raise TypeError("Columns are only allowed on ColumnSets")
135        return owner.get_column(self.name).value
136
137    def __set__(self, owner, value):
138        """
139        Sets the value for this column for the passed owner
140        """
141        if not isinstance(owner, ColumnSet):
142            raise TypeError("Columns are only allowed on ColumnSets")
143        owner.get_column(self.name).value = value
144
145class StringColumn(Column):
146    def validate(self, value):
147        if value is None and self.allow_none:
148            print "nonevalue"
149            return True
150        if isinstance(value, basestring):
151            print "isstr"
152            return True
153        print "not string", value, type(value)
154        return False
155
156class IntColumn(Column):
157    def validate(self, value):
158        if value is None and self.allow_none:
159            return True
160        if isinstance(value, int) or isinstance(value, long):
161            return True
162        return False
163
164class PasswordColumn(Column):
165    def __init__(self, name, salt_function, default=None, allow_none=False,
166                 mutable=True):
167        """
168        Create a new password column which uses the specified salt function
169
170        salt_function: a function(self, value) which returns the salted string
171        """
172        Column.__init__(self, name, default, allow_none, mutable)
173        self.__salt_function = salt_function
174    def validate(self, value):
175        return True
176    def __set__(self, owner, value):
177        salted = self.__salt_function(owner, value)
178        super(PasswordColumn, self).__set__(owner, salted)

The Column class describes the column and is implemented as a descriptor. Each ColumnSet instance contains multiple columns and holds ColumnInstance objects which hold the individual column per-object properties, such as the value and whether it has been mutated or not. Each column type has a validation function to help screen invalid data from the columns. When a ColumnSet is initiated, it scans itself for columns and at that moment creates its ColumnInstances.

Generation of SQL using logical predicates

The next thing I had to create was the database querying structure. I decided that rather than actually using the ColumnInstance or Column objects, I would use a go-between object that can be assigned a "prefix". A common thing to do in SQL queries is to rename the tables in the query so that you can reference the same table multiple times or use different tables with the same column names. So, for example if I had a table called posts and I also had a table called users and they both shared a column called 'last_update', I could assign a prefix 'p' to the post columns and a prefix 'u' to the user columns so that the final column name would be 'p.last_update' and 'u.last_update' for posts and users respectively.

Another thing I wanted to do was avoid the usage of SQL in constructing my queries. This is similar to the way that LINQ works for C#: A predicate is specified and later translated into an SQL query or a series of operations in memory depending on what is going on. So, in Python one of my queries looks like so:

1class Table(ColumnSet):
2    some_column = StringColumn("column_1", "")
3    another = IntColumn("column_2", 0)
4a_variable = 5
5columns = Table.get_columns('x') # columns with a prefix 'x'
6query = DbQuery() # This base class just makes a where statement
7query.where((columns.some_column == "4") & (columns.another > a_variable)
8print query.sql

This would print out a tuple (" WHERE x.column_1 = %s AND x.column_2 > %s", ["4", 5]). So, how does this work? I used operator overloading to create DbQueryExpression objects. The code is like so:

  1class DbQueryExpression(object):
  2    """
  3    Query expression created from columns, literals, and operators
  4    """
  5    def __and__(self, other):
  6        return DbQueryConjunction(self, other)
  7    def __or__(self, other):
  8        return DbQueryDisjunction(self, other)
  9
 10    def __str__(self):
 11        raise NotImplementedError
 12    @property
 13    def arguments(self):
 14        raise NotImplementedError
 15
 16class DbQueryConjunction(DbQueryExpression):
 17    """
 18    Query expression joining together a left and right expression with an
 19    AND statement
 20    """
 21    def __init__(self, l, r):
 22        DbQueryExpression.__ini__(self)
 23        self.l = l
 24        self.r = r
 25    def __str__(self):
 26        return str(self.l) + " AND " + str(self.r)
 27    @property
 28    def arguments(self):
 29        return self.l.arguments + self.r.arguments
 30
 31class DbQueryDisjunction(DbQueryExpression):
 32    """
 33    Query expression joining together a left and right expression with an
 34    OR statement
 35    """
 36    def __init__(self, l, r):
 37        DbQueryExpression.__init__(self)
 38        self.l = l
 39        self.r = r
 40    def __str__(self):
 41        return str(self.r) + " OR " + str(self.r)
 42    @property
 43    def arguments(self):
 44        return self.l.arguments + self.r.arguments
 45
 46class DbQueryColumnComparison(DbQueryExpression):
 47    """
 48    Query expression comparing a combination of a column and/or a value
 49    """
 50    def __init__(self, l, op, r):
 51        DbQueryExpression.__init__(self)
 52        self.l = l
 53        self.op = op
 54        self.r = r
 55    def __str__(self):
 56        output = ""
 57        if isinstance(self.l, DbQueryColumn):
 58            prefix = self.l.prefix
 59            if prefix is not None:
 60                output += prefix + "."
 61            output += self.l.name
 62        elif self.l is None:
 63            output += "NULL"
 64        else:
 65            output += "%s"
 66        output += self.op
 67        if isinstance(self.r, DbQueryColumn):
 68            prefix = self.r.prefix
 69            if prefix is not None:
 70                output += prefix + "."
 71            output += self.r.name
 72        elif self.r is None:
 73            output += "NULL"
 74        else:
 75            output += "%s"
 76        return output
 77    @property
 78    def arguments(self):
 79        output = []
 80        if not isinstance(self.l, DbQueryColumn) and self.l is not None:
 81            output.append(self.l)
 82        if not isinstance(self.r, DbQueryColumn) and self.r is not None:
 83            output.append(self.r)
 84        return output
 85
 86class DbQueryColumnSet(object):
 87    """
 88    Represents a set of columns attached to a specific DbOject type. This
 89    object dynamically builds itself based on a passed type. The columns
 90    attached to this set may be used in DbQueries
 91    """
 92    def __init__(self, dbo_type, prefix):
 93        d = dbo_type.__dict__
 94        self.__columns = {}
 95        for attr in d:
 96            obj = d[attr]
 97            if isinstance(obj, Column):
 98                column = DbQueryColumn(dbo_type, prefix, obj.name)
 99                setattr(self, attr, column)
100                self.__columns[obj.name] = column
101    def __len__(self):
102        return len(self.__columns)
103    def __getitem__(self, key):
104        return self.__columns[key]
105    def __iter__(self):
106        return iter(self.__columns)
107
108class DbQueryColumn(object):
109    """
110    Represents a Column object used in a DbQuery
111    """
112    def __init__(self, dbo_type, prefix, column_name):
113        self.dbo_type = dbo_type
114        self.name = column_name
115        self.prefix = prefix
116
117    def __lt__(self, other):
118        return DbQueryColumnComparison(self, "<", other)
119    def __le__(self, other):
120        return DbQueryColumnComparison(self, "<=", other)
121    def __eq__(self, other):
122        op = "="
123        if other is None:
124           op = " IS "
125       return DbQueryColumnComparison(self, op, other)
126    def __ne__(self, other):
127        op = "!="
128        if other is None:
129            op = " IS NOT "
130        return DbQueryColumnComparison(self, op, other)
131    def __gt__(self, other):
132        return DbQueryColumnComparison(self, ">", other)
133    def __ge__(self, other):
134        return DbQueryColumnComparison(self, ">=", other)

The __str__ function and arguments property return recursively generated expressions using the column prefixes (in the case of __str__) and the arguments (in the case of arguments). As can be seen, this supports parameterization of queries. To be honest, this part was the most fun since I was surprised it was so easy to make predicate expressions using a minimum of classes. One thing that I didn't like, however, was the fact that the boolean and/or operators cannot be overloaded. For that reason I had to use the bitwise operators, so the expressions aren't entirely correct when being read.

This DbQueryExpression is fed into my DbQuery object which actually does the translation to SQL. In the example above, we saw that I just passed a logical argument into my where function. This actually was a DbQueryExpression since my overloaded operators create DbQueryExpression objects when they are compared. The DbColumnSet object is an dynamically generated object containing the go-between column objects which is created from a DbObject. We will discuss the DbObject a little further down

The DbQuery objects are implemented as follows:

  1class DbQueryError(Exception):
  2    """
  3    Raised when there is an error constructing a query
  4    """
  5    def __init__(self, msg):
  6        self.message = msg
  7    def __str__(self):
  8        return self.message
  9
 10class DbQuery(object):
 11    """
 12    Represents a base SQL Query to a database based upon some DbObjects
 13
 14    All of the methods implemented here are valid on select, update, and
 15    delete statements.
 16    """
 17    def __init__(self, execute_filter=None):
 18        """
 19        callback: Function to call when the DbQuery is executed
 20        """
 21        self.__where = []
 22        self.__limit = None
 23        self.__orderby = []
 24        self.__execute_filter = execute_filter
 25    def where(self, expression):
 26        """Specify an expression to append to the WHERE clause"""
 27        self.__where.append(expression)
 28    def limit(self, value=None):
 29        """Specify the limit to the query"""
 30        self.__limit = value
 31    @property
 32    def sql(self):
 33        query = ""
 34        args = []
 35        if len(self.__where) > 0:
 36            where = self.__where[0]
 37            for clause in self.__where[1:]:
 38                where = where & clause
 39            args = where.arguments
 40            query += " WHERE " + str(where)
 41        if self.__limit is not None:
 42            query += " LIMIT " + self.__limit
 43        return query,args
 44    def execute(self, cur):
 45        """
 46        Executes this query on the passed cursor and returns either the result
 47        of the filter function or the cursor if there is no filter function.
 48        """
 49        query = self.sql
 50        cur.execute(query[0], query[1])
 51        if self.__execute_filter:
 52            return self.__execute_filter(self, cur)
 53        else:
 54            return cur
 55
 56class DbSelectQuery(DbQuery):
 57    """
 58    Creates a select query to a database based upon DbObjects
 59    """
 60    def __init__(self, execute_filter=None):
 61        DbQuery.__init__(self, execute_filter)
 62        self.__select = []
 63        self.__froms = []
 64        self.__joins = []
 65        self.__orderby = []
 66    def select(self, *columns):
 67        """Specify one or more columns to select"""
 68        self.__select += columns
 69    def from_table(self, dbo_type, prefix):
 70        """Specify a table to select from"""
 71        self.__froms.append((dbo_type, prefix))
 72    def join(self, dbo_type, prefix, on):
 73        """Specify a table to join to"""
 74        self.__joins.append((dbo_type, prefix, on))
 75    def orderby(self, *columns):
 76        """Specify one or more columns to order by"""
 77        self.__orderby += columns
 78    @property
 79    def sql(self):
 80        query = "SELECT "
 81        args = []
 82        if len(self.__select) == 0:
 83            raise DbQueryError("No selection in DbSelectQuery")
 84        query += ','.join([col.prefix + "." +
 85                 col.name for col in self.__select])
 86        if len(self.__froms) == 0:
 87            raise DbQueryError("No FROM clause in DbSelectQuery")
 88        for table in self.__froms:
 89            query += " FROM " + table[0].dbo_tablename + " " + table[1]
 90        if len(self.__joins) > 0:
 91            for join in self.__joins:
 92                query += " JOIN " + join[0].dbo_tablename + " " + join[1] +
 93                         " ON " + str(join[2])
 94        query_parent = super(DbSelectQuery, self).sql
 95        query += query_parent[0]
 96        args += query_parent[1]
 97        if len(self.__orderby) > 0:
 98           query += " ORDER BY " +
 99                    ','.join([col.prefix + "." +
100                    col.name for col in self.__orderby])
101        return query,args
102
103class DbInsertQuery(DbQuery):
104    """
105    Creates an insert query to a database based upon DbObjects. This does not
106    include any where or limit expressions
107    """
108    def __init__(self, dbo_type, prefix, execute_filter=None):
109        DbQuery.__init__(self, execute_filter)
110        self.table = (dbo_type, prefix)
111        self.__values = []
112    def value(self, column, value):
113        self.__values.append((column, value))
114    @property
115    def sql(self):
116        if len(self.__values) == 0:
117            raise DbQueryError("No values in insert")
118        tablename = self.table[0].dbo_tablename
119        query = "INSERT INTO {table} (".format(table=tablename)
120        args = [val[1] for val in self.__values
121                if val[0].prefix == self.table[1]]
122        query += ",".join([val[0].name for val in self.__values
123                          if val[0].prefix == self.table[1]])
124        query += ") VALUES ("
125        query += ",".join(["%s" for x in args])
126        query += ")"
127        return query,args
128
129class DbUpdateQuery(DbQuery):
130    """
131    Creates an update query to a database based upon DbObjects
132    """
133    def __init__(self, dbo_type, prefix, execute_filter=None):
134        """
135        Initialize the update query
136
137        dbo_type: table type to be updating
138        prefix: Prefix the columns are known under
139        """
140        DbQuery.__init__(self, execute_filter)
141        self.table = (dbo_type, prefix)
142        self.__updates = []
143    def update(self, left, right):
144        self.__updates.append((left, right))
145    @property
146    def sql(self):
147        if len(self.__updates) == 0:
148            raise DbQueryError("No update in DbUpdateQuery")
149        query = "UPDATE " + self.table[0].dbo_tablename + " " + self.table[1]
150        args = []
151        query += " SET "
152        for update in self.__updates:
153            if isinstance(update[0], DbQueryColumn):
154                query += update[0].prefix + "." + update[0].name
155            else:
156                query += "%s"
157                args.append(update[0])
158            query += "="
159            if isinstance(update[1], DbQueryColumn):
160                query += update[1].prefix + "." + update[1].name
161            else:
162                query += "%s"
163                args.append(update[1])
164        query_parent = super(DbUpdateQuery, self).sql
165        query += query_parent[0]
166        args += query_parent[1]
167        return query, args
168
169class DbDeleteQuery(DbQuery):
170    """
171    Creates a delete query for a database based on a DbObject
172    """
173    def __init__(self, dbo_type, prefix, execute_filter=None):
174        DbQuery.__init__(self, execute_filter)
175        self.table = (dbo_type, prefix)
176    @property
177    def sql(self):
178        query = "DELETE FROM " + self.table[0].dbo_tablename + " " +
179                self.table[1]
180        args = []
181        query_parent = super(DbDeleteQuery, self).sql
182        query += query_parent[0]
183        args += query_parent[1]
184        return query, args

Each of the SELECT, INSERT, UPDATE, and DELETE query types inherits from a base DbQuery which does execution and such. I decided to make the DbQuery object take a PEP 249-style cursor object and execute the query itself. My hope is that this will make this a little more portable since, to my knowledge, I didn't make the queries have any MySQL-specific constructions.

The different query types each implement a variety of statements corresponding to different parts of an SQL query: where(), limit(), orderby(), select(), from_table(), etc. These each take in either a DbQueryColumn (such as is the case with where(), orderby(), select(), etc) or a string to be appended to the query, such as is the case with limit(). I could easily have made limit take in two integers as well, but I was kind of rushing through because I wanted to see if this would even work. The query is built by creating the query object for the basic query type that is desired and then calling its member functions to add things on to the query.

Executing the queries can cause a callback "filter" function to be called which takes in the query and the cursor as arguments. I use this function to create new objects from the data or to update an object. It could probably be used for more clever things as well, but those two cases were my original intent in creating it. If no filter is specified, then the cursor is returned.

Table and row objects

At the highest level of this hierarchy is the DbObject. The DbObject definition actually represents a table in the database with a name and a single primary key column. Each instance represents a row. DbObjects also implement the methods for selecting records of their type and also updating themselves when they are changed. They inherit change tracking from the ColumnSet and use DbQueries to accomplish their querying goals. The code is as follows:

  1class DbObject(ColumnSet):
  2    """
  3    A DbObject is a set of columns linked to a table in the database. This is
  4    synonomous to a row. The following class attributes must be set:
  5
  6    dbo_tablename : string table name
  7    primary_key : Column for the primary key
  8    """
  9    def __init__(self, **cols):
 10        ColumnSet.__init__(self)
 11        for name in cols:
 12            c = self.get_column(name)
 13            c.update(cols[name])
 14
 15    @classmethod
 16    def get_query_columns(self, prefix):
 17        return DbQueryColumnSet(self, prefix)
 18
 19    @classmethod
 20    def select(self, prefix):
 21        """
 22        Returns a DbSelectQuery set up for this DbObject
 23        """
 24        columns = self.get_query_columns(prefix)
 25        def execute(query, cur):
 26            output = []
 27            block = cur.fetchmany()
 28            while len(block) > 0:
 29                for row in block:
 30                    values = {}
 31                    i = 0
 32                    for name in columns:
 33                        values[name] = row[i]
 34                        i += 1
 35                    output.append(self(**values))
 36                block = cur.fetchmany()
 37            return output
 38        query = DbSelectQuery(execute)
 39        query.select(*[columns[name] for name in columns])
 40        query.from_table(self, prefix)
 41        return query, columns
 42
 43    def get_primary_key_name(self):
 44        return type(self).__dict__['primary_key'].name
 45
 46    def save(self, cur):
 47        """
 48        Saves any changes to this object to the database
 49        """
 50        if self.primary_key is None:
 51            # we need to be saved
 52            columns = self.get_query_columns('x')
 53            def execute(query, cur):
 54                self.get_column(self.get_primary_key_name()
 55                                ).update(cur.lastrowid)
 56                selection = []
 57                for name in columns:
 58                    if name == self.get_primary_key_name():
 59                        continue #we have no need to update the primary key
 60                    column_instance = self.get_column(name)
 61                    if not column_instance.column.mutable:
 62                        selection.append(columns[name])
 63                if len(selection) != 0:
 64                    # we get to select to get additional computed values
 65                    def execute2(query, cur):
 66                        row = cur.fetchone()
 67                        index = 0
 68                        for s in selection:
 69                            self.get_column(s.name).update(row[index])
 70                            index += 1
 71                        return True
 72                    query = DbSelectQuery(execute2)
 73                    query.select(*selection)
 74                    query.from_table(type(self), 'x')
 75                    query.where(columns[self.get_primary_key_name()] ==
 76                                self.get_column(self.get_primary_key_name()
 77                                                ).value)
 78                    return query.execute(cur)
 79                return True
 80            query = DbInsertQuery(type(self), 'x', execute)
 81            for name in columns:
 82                column_instance = self.get_column(name)
 83                if not column_instance.column.mutable:
 84                    continue
 85                query.value(columns[name], column_instance.value)
 86            print query.sql
 87            return query.execute(cur)
 88        else:
 89            # we have been modified
 90            modified = self.mutated
 91            if len(modified) == 0:
 92                return True
 93            columns = self.get_query_columns('x')
 94            def execute(query, cur):
 95                for mod in modified:
 96                    mod.update(mod.value)
 97                return True
 98            query = DbUpdateQuery(type(self), 'x', execute)
 99            for mod in modified:
100                query.update(columns[mod.column.name], mod.value)
101            query.where(columns[self.get_primary_key_name()] == self.primary_key)
102            return query.execute(cur)

DbObjects require that the inheriting classes define two properties: dbo_tablename and primary_key. dbo_tablename is just a string giving the name of the table in the database and primary_key is a Column that will be used as the primary key.

To select records from the database, the select() function can be called from the class. This sets up a DbSelectQuery which will return an array of the DbObject that it is called for when the query is executed.

One fallacy of this structure is that at the moment it assumes that the primary key won't be None if it has been set. In other words, the way I did it right now does not allow for null primary keys. The reason it does this is because it says that if the primary key hasn't been set, it needs to generate a DbInsertQuery for the object when save() is called instead of a DbUpdateQuery. Both insert and update queries do not include every field. Immutable fields are always excluded and then later selected or inferred from the cursor object.