Aggregations¶

ParadeDB allow fast aggregation. it executes the aggregate using the columnar portion of the ParadeDB index, which can significantly accelerate performance compared to vanilla Postgres.

Note

If the filter is applied and you encounter the error query is incompatible with pg_search when using an expression or lookup, try using the legacy ParadeDB function by passing the value as a dictionary or by using LookupParameter with legacy=True.

All the aggregations is inside the paradedb.aggregates module

Count¶

Count(field: str = "id")

see ParadeDB Count .

see Tantivy for more

Example

from paradedb.aggregates import Count
from articles.models import Article

Article.objects.aggregate(count=Count("id", filter=models.Q(id__all=True)))

Term¶

Term(
    field: str,
    order: typing.Optional[TermAggregateOrder | dict[str,str]] = None,
    size: int = 0,
    segment_size: int = 0,
    min_doc_count: int = 0,
    missing: float | int | None = None,
    show_term_doc_count_error: bool = False,
    aggs: dict | None = None,
)

see ParadeDB Terms Aggregation.

see Tantivy for more

Example

from paradedb.expressions import All
from paradedb.aggregates import Term, TermAggregateOrder, Count

# using count or _key
Article.objects.aggregate(
        count=Term(
            "rank",
            order={"_count": "desc"},  # _key
            filter = models.Q(id__all=True)
        )
    )


# using SubAggregation
Article.objects.filter(All(match_op=True)).aggregate(
        count=Term(
            "rank",
            order={"max_count": "desc"},
            aggs={
                "max_count": Count("id").build_json(as_dict=True),
            },
        )
    )

TermAggregateOrder¶

TermAggregateOrder(
    target: Literal["_count", "_key"] | str,
    order: Literal["asc", "desc"] = "asc",
)

Example

TermAggregateOrder("_key", "asc")

HistogramBound¶

HistogramBound(min: int, max: int)

Used by DateHistogram and Histogram

Example

HistogramBound(0, 100)

DateHistogram¶

DateHistogram(
    field: str,
    fixed_interval: str = None,
    offset: str | None = None,
    min_doc_count: int | None = None,
    hard_bounds: HistogramBound | tuple[float, float] | None = None,
    extended_bounds: HistogramBound | tuple[float, float] | None = None,
    keyed: bool = True,
)

see ParadeDB Date Histogram Aggregation.

see Tantivy doc for more

Example

from paradedb.aggregates import DateHistogram

Article.objects.aggregate(
        date_histogram=DateHistogram(
            "created",
            "30d",
            filter=models.Q(id__all=True)
        )
    )

Histogram¶

Histogram(
    field: str,
    interval: str,
    offset: str | None = None,
    min_doc_count: int | None = None,
    hard_bounds: HistogramBound | tuple[float, float] | None = None,
    extended_bounds: HistogramBound | tuple[float, float] | None = None,
    keyed: bool = True,
    is_normalized_to_ns: bool = False,
)

see ParadeDB Histogram

see Tantivy for more

Example

from paradedb.aggregates import Histogram

Article.objects.aggregate(
    result=Histogram(field="created", interval="10d", filter=models.Q(id__all=True))
)

Range¶

Range(
    field: str,
    ranges: list[RangeAggegationRange],
    keyed: bool = True,
)

see ParadeDB Range Aggregation.

see Tantivy for more

Example

from paradedb.aggregates import Range, RangeAggegationRange

Article.objects.filter(All(match_op=True)).aggregate(
        range_histogram=Range(
            "created",
            ranges=[
                RangeAggegationRange(
                    from_=timezone.datetime(2022, 1, 1), to=timezone.datetime(2022, 1, 2)
                ),
                RangeAggegationRange(from_=date(2022, 1, 2), to=date(2022, 1, 3)),
            ],
        )
    )

RangeAggegationRange¶

RangeAggegationRange(
    from_: float | int | date | datetime,
    to: float | int | date | datetime,
    key: str | None = None,
)

Avg¶

Avg(
    field: str,
    missing: float | int | None = None,
)

see ParadeDB Average Aggregation.

see Tantivy for more

Example

from paradedb.aggregates import Avg
Article.objects.filter(All(match_op=True)).aggregate(
        avg_rank=aggregates.Avg("rank")
    )

Cardinality¶

Cardinality(
    field: str,
    missing: float | int | str | None = None,
)

see ParadeDB Cardinality Aggregation.

see Tantivy for more

Example

from paradedb.aggregates import Cardinality

Article.objects.filter(All(match_op=True)).aggregate(
        cardinality=Cardinality("rank", filter=models.Q(rank__gte=1) & models.Q(rank__lte=4))
    )

Min¶

Min(
    field: str,
    missing: float | int | None = None,
)

see ParadeDB Min Aggregation.

see Tantivy for more

Example

from paradedb.aggregates import Min
Article.objects.filter(rank__gte=1).aggregate(m=Min("rank", filter=models.Q(id__all=True)))

Max¶

Max(
    field: str,
    missing: float | int | None = None,
)

see ParadeDB Max Aggregation.

see Tantivy for more

Example

from paradedb.aggregates import Max

Article.objects.aggregate(m=Max("rank", filter=models.Q(id__all=True)))

Percentile¶

Percentile(
    field: str,
    percents: list[float | int] | None = None,
    missing: float | int | None = None,
    keyed: bool = True,
)

see ParadeDB Percentile Aggregation.

see Tantivy for more

Example

from paradedb.aggregates import Percentile

Article.objects.aggregate(
        percentile=Percentile("rank", [10], filter=models.Q(id__all=True))
    )

Stats¶

Stats(
    field: str,
    missing: float | int | None = None,
)

see ParadeDB Stats Aggregation.

see Tantivy for more

Example

from paradedb.aggregates import Stats

Article.objects.aggregate(stats=Stats("rank", filter=models.Q(id__all=True)))

Sum¶

Sum(
    field: str,
    missing: float | int | None = None,
)

see ParadeDB Sum Aggregation.

see Tantivy for more

Example

from paradedb.aggregates import Sum

Article.objects.aggregate(total_score=Sum("rank", filter=models.Q(id__all=True)))

TopHit¶

TopHit(
    sort: list[TopHitSort] | list[dict[str, str]],
    size: int,
    from_: int | None = None,
    docvalue_fields=None,
)

see ParadeDB TopHit Aggregation.

see Tantivy fro more

Example

from paradedb.aggregates import TopHit, TopHitSort

Article.objects.aggregate(
        top=TopHit(
            size=100,
            sort=[TopHitSort(field="created", order="desc")],
            docvalue_fields=["rank", "created", "id"],
            filter=models.Q(id__all=True),
        )
    )

TopHitSort¶

TopHitSort(
    field: str,
    order: Literal["asc", "desc"] = "asc",
)

Facet(aggregate: Aggregate)

see ParadeDB Facet Aggregation.

Example

from paradedb.aggregates import Facet, Term

Article.objects.annotate(
    facet=Facet(Term("category"))
)

Aggregations¶

Count¶

Term¶

TermAggregateOrder¶

HistogramBound¶

DateHistogram¶

Histogram¶

Range¶

RangeAggegationRange¶

Avg¶

Cardinality¶

Min¶

Max¶

Percentile¶

Stats¶

Sum¶

TopHit¶

TopHitSort¶

Facet¶