Mastering pypika: Build SQL Queries in Python Without Manual String Concatenation

This guide walks through installing pypika and demonstrates how to construct SELECT, WHERE, GROUP BY, JOIN, subqueries, set operations, INSERT, UPDATE, and database‑specific adaptations using Python code, showcasing concrete examples and the full query‑building workflow.

Satori Komeiji's Programming Classroom
Satori Komeiji's Programming Classroom
Satori Komeiji's Programming Classroom
Mastering pypika: Build SQL Queries in Python Without Manual String Concatenation

Installation

Install the library with pip install pypika.

Simple SELECT query

Use Query.from_ to specify the table and select to list fields.

from pypika import Query

query = Query.from_("people").select("id", "name", "age")
print(query)
"""
SELECT "id","name","age" FROM "people"
"""
# Returns a QueryBuilder object
print(query.__class__)
"""
<class 'pypika.queries.QueryBuilder'>
"""
# Convert to string directly
print(str(query))
"""
SELECT "id","name","age" FROM "people"
"""
# Or call get_sql()
print(query.get_sql())
"""
SELECT "id","name","age" FROM "people"
"""

You can also use Table and Field objects to enable aliasing.

from pypika import Query, Table, Field

table = Table("people")
fields = [Field("id"), Field("name"), Field("age")]
query = Query.from_(table).select(*fields)
print(query)
"""
SELECT "id","name","age" FROM "people"
"""
# Alias a table
table = Table("data").as_("d")
fields = [Field("max_count").as_("max_cnt")]
query = Query.from_(table).select(*fields)
print(query)
"""
SELECT "max_count" "max_cnt" FROM "data" "d"
"""

Specifying database and schema

When you need to include a database name, pass a Database object to Table. For PostgreSQL schemas, use a Schema object.

from pypika import Query, Table, Database, Schema

database = Database("fruits")
# Table with database
table = Table("apple", database)
query = Query.from_(table).select("name", "price")
print(query)
"""
SELECT "name","price" FROM "fruits"."apple"
"""
# Incorrect: Table name contains a dot
table = Table("fruits.apple")
query = Query.from_(table).select("name", "price")
print(query)
"""
SELECT "name","price" FROM "fruits.apple"
"""
# Correct format uses separate database and table identifiers

WHERE clause filtering

Combine conditions with & (AND) and | (OR). The library provides methods for common predicates.

from pypika import Query, Field

query = Query.from_("t").select("*").where(
    (Field("salary") >= 10000) &
    (Field("age").between(18, 30)) &
    (Field("name").like("张%")) &
    (Field("department").isin(["销售", "财务"]))
)
print(query)
"""
SELECT * FROM "t" 
WHERE "salary">=10000 AND 
      "age" BETWEEN 18 AND 30 AND 
      "name" LIKE '张%' AND 
      "department" IN ('销售','财务')
"""

GROUP BY and aggregation

Use functions from pypika.functions for aggregation.

from pypika import functions as fn, Field, Query

query = Query.from_("people").select(
    "age", fn.Count(Field("id"))
).where(
    Field("age")[18:30] & (Field("length") < 160)
).groupby("age")
print(query)
"""
SELECT "age",COUNT("id") FROM "people" 
WHERE "age" BETWEEN 18 AND 30 AND "length"<160 
GROUP BY "age"
"""

Aggregations require Field objects; passing a plain string will not work.

HAVING clause

from pypika import Query, Field, functions as fn

query = Query.from_("people").select(
    "age", fn.Count(Field("id"))
).groupby("age").having(fn.Count(Field("id")) > 30)
print(query)
"""
SELECT "age",COUNT("id") FROM "people" 
GROUP BY "age" HAVING COUNT("id")>30
"""

JOIN operations

Left join with using when the join columns share the same name.

from pypika import Query, Table

t1 = Table("t1")
t2 = Table("t2")
query = Query.from_(t1).select(t1.name, t2.age).left_join(t2).using("id")
print(query)
"""
SELECT "t1"."name","t2"."age" FROM "t1" 
LEFT JOIN "t2" USING ("id")
"""

If a field name collides with a Table attribute, specify the table explicitly with Field(..., table=...).

from pypika import Query, Table, Field

t1 = Table("t1")
t2 = Table("t2")
query = Query.from_(t1).select(Field("field", table=t2)).left_join(t2).using("id")
print(query)
"""
SELECT "t2"."field" FROM "t1" LEFT JOIN "t2" USING ("id")
"""

For joins on different column names, use on with equality expressions.

from pypika import Query, Table

t1 = Table("t1")
t2 = Table("t2")
query = Query.from_(t1).select(t2.age, t1.name) \
    .left_join(t2) \
    .on(t1.field("uid") == t2.field("tid")) \
    .where(t1.age > 18)
print(query)
"""
SELECT "t2"."age","t1"."name" FROM "t1" 
LEFT JOIN "t2" ON "t1"."uid"="t2"."tid"
WHERE "t1"."age">18
"""

Nested subqueries

from pypika import Query, Table, functions as fn

t1 = Table("t1")
t2 = Table("t2")
sub_query = Query.from_(t1).select(fn.Avg(t2.age).as_("avg")) \
    .left_join(t2).using("id").where(t1.age > 18)
print(sub_query)
"""
SELECT AVG("t2"."age") "avg" FROM "t1"
LEFT JOIN "t2" USING ("id") WHERE "t1"."age">18
"""
query = Query.from_(t1).select("age", "name").where(
    t1.field("age") > Query.from_(sub_query).select("avg")
)
print(query)
"""
SELECT "age","name" FROM "t1" 
WHERE "age">(
    SELECT "sq0"."avg" FROM (
        SELECT AVG("t2"."age") "avg" 
        FROM "t1" LEFT JOIN "t2" USING ("id") WHERE "t1"."age">18
    ) "sq0"
)
"""

Set operations (UNION, INTERSECT, MINUS, EXCEPT)

from pypika import Query, Table

t1 = Table("t1")
t2 = Table("t2")
query1 = Query.from_(t1).select("name", "salary")
query2 = Query.from_(t2).select("name", "salary")
print(query1.union(query2))
print(query2.union(query1))
# Union can also be expressed with +
print(str(query1 + query2) == str(query1.union(query2)))  # True
print(str(query2 + query1) == str(query2.union(query1)))  # True
print(query1.union_all(query2))
print(query2.union_all(query1))
# Union ALL can also be expressed with *
print(str(query1 * query2) == str(query1.union_all(query2)))  # True
print(str(query2 * query1) == str(query2.union_all(query1)))  # True
# INTERSECT (no dedicated operator)
print(query1.intersect(query2))
# MINUS (set difference) – use minus()
print(query1.minus(query2))
# EXCEPT (symmetric difference) – use except_of()
print(query1.except_of(query2))

WITH clause (Common Table Expressions)

from pypika import Table, Query, AliasedQuery

t = Table("t")
sub_query = Query.from_(t).select("*")
query = Query.with_(sub_query, "alias").from_(AliasedQuery("alias")).select("*")
print(query)
"""
WITH alias AS (SELECT * FROM "t") SELECT * FROM alias
"""

DISTINCT

from pypika import Query, Table

t = Table("t")
query = Query.from_(t).distinct().select(t.id, t.age)
print(query)
"""
SELECT DISTINCT "id","age" FROM "t"
"""

ORDER BY

from pypika import Query, Order

query = Query.from_("t").select("id", "name").orderby("id", order=Order.desc)
print(query)
"""
SELECT "id","name" FROM "t" ORDER BY "id" DESC
"""
# Multiple fields
query = Query.from_("t").select("id", "name").orderby("age", "id")
print(query)
"""
SELECT "id","name" FROM "t" ORDER BY "age","id"
"""
# Mixed ascending/descending
query = Query.from_("t").select("id", "name").orderby("age", order=Order.desc).orderby("id")
print(query)
"""
SELECT "id","name" FROM "t" ORDER BY "age" DESC,"id"
"""

LIMIT and OFFSET

from pypika import Table, Query, Field, functions as fn, Order

table = Table("t")
query = Query.from_(table) \
    .select(fn.Count(Field("id")).as_("count"), "age", "length") \
    .where(table.field("age") > 18) \
    .groupby("age", "length") \
    .having(fn.Count("id") > 10) \
    .orderby("count", order=Order.desc) \
    .orderby("age", order=Order.asc) \
    .limit(10).offset(5)
print(query)
"""
SELECT COUNT("id") "count","age","length" 
FROM "t" WHERE "age">18 
GROUP BY "age","length" 
HAVING COUNT('id')>10 
ORDER BY "count" DESC,"age" ASC 
LIMIT 10 OFFSET 5
"""

INSERT statements

from pypika import Table, Query

t = Table("t")
# Single row insert
query = Query.into(t).insert(1, "古明地觉", 16, "东方地灵殿")
print(query)
"""
INSERT INTO "t" VALUES (1,'古明地觉',16,'东方地灵殿')
"""
# Insert with None (NULL handling)
query = Query.into(t).insert(1, "古明地觉", None, "东方地灵殿")
print(query)
"""
INSERT INTO "t" VALUES (1,'古明地觉',NULL,'东方地灵殿')
"""
# Multiple rows
query = Query.into(t) \
    .insert(1, "古明地觉", 16, "东方地灵殿") \
    .insert(2, "古明地恋", 15, "东方地灵殿")
print(query)
"""
INSERT INTO "t" 
VALUES (1,'古明地觉',16,'东方地灵殿'),
       (2,'古明地恋',15,'东方地灵殿')
"""
# Insert using a tuple of rows
query = Query.into(t).insert(
    (1, "古明地觉", 16, "东方地灵殿"),
    (2, "古明地恋", 15, "东方地灵殿")
)
print(query)
"""
INSERT INTO "t" 
VALUES (1,'古明地觉',16,'东方地灵殿'),
       (2,'古明地恋',15,'东方地灵殿')
"""
# Insert specific columns
query = Query.into(t).columns(
    "id", t.field("name"), t.age, Field("place")
).insert(1, "古明地觉", 16, "东方地灵殿")
print(query)
"""
INSERT INTO "t" ("id","name","age","place") 
VALUES (1,'古明地觉',16,'东方地灵殿')
"""
# Insert from another table
t1 = Table("t1")
t2 = Table("t2")
query = Query.into(t1).columns("id", "name", "age") \
    .from_(t2).select("id", "name", "age").where(Field("age") > 18)
print(query)
"""
INSERT INTO "t1" ("id","name","age") 
SELECT "id","name","age" FROM "t2" WHERE "age">18
"""

UPDATE statements

from pypika import Table, Query

t = Table("t")
# Simple update
query = Query.update(t).set(t.name, "古明地恋")
print(query)
"""
UPDATE "t" SET "name"='古明地恋'
"""
# Update with WHERE
query = Query.update(t).set(t.name, "古明地恋").where(t.id == 1)
print(query)
"""
UPDATE "t" SET "name"='古明地恋' WHERE "id"=1
"""
# Update multiple columns
query = Query.update(t).set(t.name, "古明地恋").set(t.age, 16)
print(query)
"""
UPDATE "t" SET "name"='古明地恋',"age"=16
"""
# Update using another table
t1 = Table("t1")
t2 = Table("t2")
query = Query.update(t1).join(t2).on(t1.uid == t2.tid).set(t1.name, t2.name).where(t1.uid > 10)
print(query)
"""
UPDATE "t1" JOIN "t2" ON "t1"."uid"="t2"."tid" 
SET "name"="t2"."name" WHERE "t1"."uid">10
"""

Database adapters

pypika provides query classes for specific dialects. Use MySQLQuery for backticks, PostgreSQLQuery for double quotes, etc.

from pypika import (
    MySQLQuery,
    PostgreSQLQuery,
    Table,
)

t = Table("t")
print(MySQLQuery.from_(t).select(t.id, t.age))  # SELECT `id`,`age` FROM `t`
print(PostgreSQLQuery.from_(t).select(t.id, t.age))  # SELECT "id","age" FROM "t"

Conclusion

pypika offers a concise, chainable API for generating SQL across many databases. It handles quoting, supports most SQL clauses, and can be extended to advanced features such as window functions.

Original Source

Signed-in readers can open the original source through BestHub's protected redirect.

Sign in to view source
Republication Notice

This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactadmin@besthub.devand we will review it promptly.

backendPythonSQLdatabasequery builderpypika
Satori Komeiji's Programming Classroom
Written by

Satori Komeiji's Programming Classroom

Python and Rust developer; I write about any topics you're interested in. Follow me! (#^.^#)

0 followers
Reader feedback

How this landed with the community

Sign in to like

Rate this article

Was this worth your time?

Sign in to rate
Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.