Custom SQL in a Django QuerySet
Django QuerySet is useful, but when the extra, annotate() and aggregate() are not enough and custom SQL is needed: the documentation demonstrates how to use the DBAL.
This article explains how to use the ORM with custom SQL to order models by number of conditions met (relevancy).
Introduction
Consider the following simple models:
class Widget(models.Model):
pass
class WidgetFeature(models.Model):
widget = models.ForeignKey('Widget', related_name='features')
name = models.CharField(max_length=50)
def __unicode__(self):
return self.name
Consider a search form with a list of 100 features with one checkbox for each. Django QuerySet make it possible to strictly filter out Widgets that don’t have the checked features.
It would be more user friendly to show a list of 49 widgets (7 pages with 7 widgets) ordered by relevancy. The QuerySet should then calculate a score for each widget and order them based on that score.
Extending QuerySet
QuerySet have a Query object which is actually used to generate the SQL, it happens in django.db.models.sql.query.Query.as_sql(). So both QuerySet and Query should be extended.
In yourapp/managers.py:
from django.db.models.query import QuerySet
from django.db.models.sql.query import Query
from django.db import connection, backend, models
class WidgetScoringQuery(Query):
def __init__(self, *args, **kwargs):
"""
Sets the 'scoring_features' property to an empty list, and call
parent constructor with the password arguments and keyword arguments.
"""
super(WidgetScoringQuery, self).__init__(*args, **kwargs)
self.scoring_features = []
def as_sql(self, with_limits=True, with_col_aliases=False):
"""
Creates the SQL for this query. Returns the SQL string and list of
parameters.
The hack here allows to order result widgets by number of matching
features by name.
If 'with_limits' is False, any limit/offset information is not included
in the query.
"""
self.pre_sql_setup()
out_cols = self.get_columns(with_col_aliases)
ordering = self.get_ordering()
# This must come after 'select' and 'ordering' -- see docstring of
# get_from_clause() for details.
from_, f_params = self.get_from_clause()
where, w_params = self.where.as_sql(qn=self.quote_name_unless_alias)
params = []
for val in self.extra_select.itervalues():
params.extend(val[1])
result = ['SELECT']
if self.distinct:
result.append('DISTINCT')
result.append(', '.join(out_cols + self.ordering_aliases))
# jpic: select "score" from the temporary table
if len(self.scoring_features):
result.append(', `tmp`.`score`')
result.append('FROM')
result.extend(from_)
params.extend(f_params)
# jpic: left outer join the temporary table with scores
if len(self.scoring_features):
result.append(self.LOUTER + ' (select w.id as wid,')
result.append('SUM(case')
for feature_name in self.scoring_features:
result.append('when hf.name = "%s" THEN 1' % feature_name)
result.append('ELSE 0 END) as `score`')
result.append('from `%s` `w`' % self.model._meta.db_table)
# HARDCODE: table name of the related WidgetFeature model is hardcoded here
result.append('left join `yourapp_widgetfeature` `hf` on `w`.`id` = `hf`.`widget_id`')
result.append('group by `wid`) `tmp` on `tmp`.`wid` = `yourapp_widget`.`id`')
if where:
result.append('WHERE %s' % where)
params.extend(w_params)
if self.extra_where:
if not where:
result.append('WHERE')
else:
result.append('AND')
result.append(' AND '.join(self.extra_where))
if self.group_by:
grouping = self.get_grouping()
result.append('GROUP BY %s' % ', '.join(grouping))
if self.having:
having, h_params = self.get_having()
result.append('HAVING %s' % ', '.join(having))
params.extend(h_params)
# jpic: order by descending score, and then widget id.
if len(self.scoring_features):
result.append('order by -`score`, `yourapp_widget`.`id`')
#elif ordering:
# result.append('ORDER BY %s' % ', '.join(ordering))
if with_limits:
if self.high_mark is not None:
result.append('LIMIT %d' % (self.high_mark — self.low_mark))
if self.low_mark:
if self.high_mark is None:
val = self.connection.ops.no_limit_value()
if val:
result.append('LIMIT %d' % val)
result.append('OFFSET %d' % self.low_mark)
params.extend(self.extra_params)
return ' '.join(result), tuple(params)
class WidgetScoringQuerySet(QuerySet):
def set_scoring_features(self, scoring_features):
"""
Sets the list of feature names that score better.
:param scoring_features: List of feature names.
"""
self.scoring_features = scoring_features
self.query.scoring_features = scoring_features
def clone(self, **kwargs):
"""
QuerySets are cloned when filtered, so scoring_features
should be passed to the clone.
"""
clone = super(HousingScoringQuerySet, self).clone()
if hasattr(self, 'scoring_features'):
clone.set_scoring_features(self.scoring_features)
class WidgetManager(models.Manager):
def get_query_set(self):
"""
Returns a WidgetScoringQuerySet which allows to order Widgets
by number of scoring features.
See WidgetScoringQuerySet.set_scoring_features() and
WidgetScoringQuery.as_sql().
"""
return WidgetScoringQuerySet(
model=self.model,
query=WidgetScoringQuery(
self.model,
connection
)
)
Then propagate the change in models.py:
from managers import *
class Widget(models.Model):
objects = WidgetManager()
Usage
After ensuring the search form is clean:
q = Widget._default_manager.get_query_set()
q.set_scoring_features([k for k, v in form.cleaned_data.items() if v])
# apply other filters and add the queryset to the context
# sql will be generated when the QuerySet will be evaluated the first time
Conclusion
It is really easy to make a QuerySet use custom SQL although custom SQL generating is poorly documented.
However, issues will arise if clone() is forgotten, it is its role to pass custom properties.
Credits
- Bruce Dianda for the scoring temporary table query,
- thumbs from #mysql (freenode) for helping preliminary investigations,
Comments
Comment form for «Custom SQL in a Django QuerySet»