Skip to content

Aggregations

ParadeDB allow fast aggregation. it executes the aggregate using the columnar portion of the ParadeDB index, which can significantly accelerate performance compared to vanilla Postgres.


Note

  • If the filter is applied and you encounter the error query is incompatible with pg_search when using an expression or lookup, try using the legacy ParadeDB function by passing the value as a dictionary or by using LookupParameter with legacy=True.

  • All the aggregations is inside the paradedb.aggregates module

Count

Count(field: str = "id")

Example

from paradedb.aggregates import Count
from articles.models import Article

Article.objects.aggregate(count=Count("id", filter=models.Q(id__all=True)))

Term

Term(
    field: str,
    order: typing.Optional[TermAggregateOrder | dict[str,str]] = None,
    size: int = 0,
    segment_size: int = 0,
    min_doc_count: int = 0,
    missing: float | int | None = None,
    show_term_doc_count_error: bool = False,
    aggs: dict | None = None,
)

Example

from paradedb.expressions import All
from paradedb.aggregates import Term, TermAggregateOrder, Count

# using count or _key
Article.objects.aggregate(
        count=Term(
            "rank",
            order={"_count": "desc"},  # _key
            filter = models.Q(id__all=True)
        )
    )


# using SubAggregation
Article.objects.filter(All(match_op=True)).aggregate(
        count=Term(
            "rank",
            order={"max_count": "desc"},
            aggs={
                "max_count": Count("id").build_json(as_dict=True),
            },
        )
    )

TermAggregateOrder

TermAggregateOrder(
    target: Literal["_count", "_key"] | str,
    order: Literal["asc", "desc"] = "asc",
)

Example

TermAggregateOrder("_key", "asc")

HistogramBound

HistogramBound(min: int, max: int)
  • Used by DateHistogram and Histogram

Example

HistogramBound(0, 100)

DateHistogram

DateHistogram(
    field: str,
    fixed_interval: str = None,
    offset: str | None = None,
    min_doc_count: int | None = None,
    hard_bounds: HistogramBound | tuple[float, float] | None = None,
    extended_bounds: HistogramBound | tuple[float, float] | None = None,
    keyed: bool = True,
)

Example

from paradedb.aggregates import DateHistogram

Article.objects.aggregate(
        date_histogram=DateHistogram(
            "created",
            "30d",
            filter=models.Q(id__all=True)
        )
    )

Histogram

Histogram(
    field: str,
    interval: str,
    offset: str | None = None,
    min_doc_count: int | None = None,
    hard_bounds: HistogramBound | tuple[float, float] | None = None,
    extended_bounds: HistogramBound | tuple[float, float] | None = None,
    keyed: bool = True,
    is_normalized_to_ns: bool = False,
)

Example

from paradedb.aggregates import Histogram

Article.objects.aggregate(
    result=Histogram(field="created", interval="10d", filter=models.Q(id__all=True))
)

Range

Range(
    field: str,
    ranges: list[RangeAggegationRange],
    keyed: bool = True,
)

Example

from paradedb.aggregates import Range, RangeAggegationRange

Article.objects.filter(All(match_op=True)).aggregate(
        range_histogram=Range(
            "created",
            ranges=[
                RangeAggegationRange(
                    from_=timezone.datetime(2022, 1, 1), to=timezone.datetime(2022, 1, 2)
                ),
                RangeAggegationRange(from_=date(2022, 1, 2), to=date(2022, 1, 3)),
            ],
        )
    )

RangeAggegationRange

RangeAggegationRange(
    from_: float | int | date | datetime,
    to: float | int | date | datetime,
    key: str | None = None,
)

Avg

Avg(
    field: str,
    missing: float | int | None = None,
)

Example

from paradedb.aggregates import Avg
Article.objects.filter(All(match_op=True)).aggregate(
        avg_rank=aggregates.Avg("rank")
    )

Cardinality

Cardinality(
    field: str,
    missing: float | int | str | None = None,
)

Example

from paradedb.aggregates import Cardinality

Article.objects.filter(All(match_op=True)).aggregate(
        cardinality=Cardinality("rank", filter=models.Q(rank__gte=1) & models.Q(rank__lte=4))
    )

Min

Min(
    field: str,
    missing: float | int | None = None,
)

Example

from paradedb.aggregates import Min
Article.objects.filter(rank__gte=1).aggregate(m=Min("rank", filter=models.Q(id__all=True)))

Max

Max(
    field: str,
    missing: float | int | None = None,
)

Example

from paradedb.aggregates import Max

Article.objects.aggregate(m=Max("rank", filter=models.Q(id__all=True)))

Percentile

Percentile(
    field: str,
    percents: list[float | int] | None = None,
    missing: float | int | None = None,
    keyed: bool = True,
)

Example

from paradedb.aggregates import Percentile

Article.objects.aggregate(
        percentile=Percentile("rank", [10], filter=models.Q(id__all=True))
    )

Stats

Stats(
    field: str,
    missing: float | int | None = None,
)

Example

from paradedb.aggregates import Stats

Article.objects.aggregate(stats=Stats("rank", filter=models.Q(id__all=True)))

Sum

Sum(
    field: str,
    missing: float | int | None = None,
)

Example

from paradedb.aggregates import Sum

Article.objects.aggregate(total_score=Sum("rank", filter=models.Q(id__all=True)))

TopHit

TopHit(
    sort: list[TopHitSort] | list[dict[str, str]],
    size: int,
    from_: int | None = None,
    docvalue_fields=None,
)

Example

from paradedb.aggregates import TopHit, TopHitSort

Article.objects.aggregate(
        top=TopHit(
            size=100,
            sort=[TopHitSort(field="created", order="desc")],
            docvalue_fields=["rank", "created", "id"],
            filter=models.Q(id__all=True),
        )
    )

TopHitSort

TopHitSort(
    field: str,
    order: Literal["asc", "desc"] = "asc",
)

Facet

Facet(aggregate: Aggregate)

Example

from paradedb.aggregates import Facet, Term

Article.objects.annotate(
    facet=Facet(Term("category"))
)