Aggregations¶
ParadeDB allow fast aggregation. it executes the aggregate using the columnar portion of the ParadeDB index, which can significantly accelerate performance compared to vanilla Postgres.
Note
- If the filter is applied and you encounter the error
query is incompatible with pg_searchwhen using an expression or lookup, try using the legacy ParadeDB function by passing the value as a dictionary or by using LookupParameter with legacy=True.
- All the aggregations is inside the
paradedb.aggregatesmodule
Count¶
Count(field: str = "id")
- see ParadeDB Count .
- see Tantivy for more
Example
from paradedb.aggregates import Count
from articles.models import Article
Article.objects.aggregate(count=Count("id", filter=models.Q(id__all=True)))
Term¶
Term(
field: str,
order: typing.Optional[TermAggregateOrder | dict[str,str]] = None,
size: int = 0,
segment_size: int = 0,
min_doc_count: int = 0,
missing: float | int | None = None,
show_term_doc_count_error: bool = False,
aggs: dict | None = None,
)
Example
from paradedb.expressions import All
from paradedb.aggregates import Term, TermAggregateOrder, Count
# using count or _key
Article.objects.aggregate(
count=Term(
"rank",
order={"_count": "desc"}, # _key
filter = models.Q(id__all=True)
)
)
# using SubAggregation
Article.objects.filter(All(match_op=True)).aggregate(
count=Term(
"rank",
order={"max_count": "desc"},
aggs={
"max_count": Count("id").build_json(as_dict=True),
},
)
)
TermAggregateOrder¶
TermAggregateOrder(
target: Literal["_count", "_key"] | str,
order: Literal["asc", "desc"] = "asc",
)
Example
TermAggregateOrder("_key", "asc")
HistogramBound¶
HistogramBound(min: int, max: int)
- Used by DateHistogram and Histogram
Example
HistogramBound(0, 100)
DateHistogram¶
DateHistogram(
field: str,
fixed_interval: str = None,
offset: str | None = None,
min_doc_count: int | None = None,
hard_bounds: HistogramBound | tuple[float, float] | None = None,
extended_bounds: HistogramBound | tuple[float, float] | None = None,
keyed: bool = True,
)
Example
from paradedb.aggregates import DateHistogram
Article.objects.aggregate(
date_histogram=DateHistogram(
"created",
"30d",
filter=models.Q(id__all=True)
)
)
Histogram¶
Histogram(
field: str,
interval: str,
offset: str | None = None,
min_doc_count: int | None = None,
hard_bounds: HistogramBound | tuple[float, float] | None = None,
extended_bounds: HistogramBound | tuple[float, float] | None = None,
keyed: bool = True,
is_normalized_to_ns: bool = False,
)
- see ParadeDB Histogram
- see Tantivy for more
Example
from paradedb.aggregates import Histogram
Article.objects.aggregate(
result=Histogram(field="created", interval="10d", filter=models.Q(id__all=True))
)
Range¶
Range(
field: str,
ranges: list[RangeAggegationRange],
keyed: bool = True,
)
Example
from paradedb.aggregates import Range, RangeAggegationRange
Article.objects.filter(All(match_op=True)).aggregate(
range_histogram=Range(
"created",
ranges=[
RangeAggegationRange(
from_=timezone.datetime(2022, 1, 1), to=timezone.datetime(2022, 1, 2)
),
RangeAggegationRange(from_=date(2022, 1, 2), to=date(2022, 1, 3)),
],
)
)
RangeAggegationRange¶
RangeAggegationRange(
from_: float | int | date | datetime,
to: float | int | date | datetime,
key: str | None = None,
)
Avg¶
Avg(
field: str,
missing: float | int | None = None,
)
Example
from paradedb.aggregates import Avg
Article.objects.filter(All(match_op=True)).aggregate(
avg_rank=aggregates.Avg("rank")
)
Cardinality¶
Cardinality(
field: str,
missing: float | int | str | None = None,
)
Example
from paradedb.aggregates import Cardinality
Article.objects.filter(All(match_op=True)).aggregate(
cardinality=Cardinality("rank", filter=models.Q(rank__gte=1) & models.Q(rank__lte=4))
)
Min¶
Min(
field: str,
missing: float | int | None = None,
)
- see ParadeDB Min Aggregation.
- see Tantivy for more
Example
from paradedb.aggregates import Min
Article.objects.filter(rank__gte=1).aggregate(m=Min("rank", filter=models.Q(id__all=True)))
Max¶
Max(
field: str,
missing: float | int | None = None,
)
- see ParadeDB Max Aggregation.
- see Tantivy for more
Example
from paradedb.aggregates import Max
Article.objects.aggregate(m=Max("rank", filter=models.Q(id__all=True)))
Percentile¶
Percentile(
field: str,
percents: list[float | int] | None = None,
missing: float | int | None = None,
keyed: bool = True,
)
Example
from paradedb.aggregates import Percentile
Article.objects.aggregate(
percentile=Percentile("rank", [10], filter=models.Q(id__all=True))
)
Stats¶
Stats(
field: str,
missing: float | int | None = None,
)
Example
from paradedb.aggregates import Stats
Article.objects.aggregate(stats=Stats("rank", filter=models.Q(id__all=True)))
Sum¶
Sum(
field: str,
missing: float | int | None = None,
)
- see ParadeDB Sum Aggregation.
- see Tantivy for more
Example
from paradedb.aggregates import Sum
Article.objects.aggregate(total_score=Sum("rank", filter=models.Q(id__all=True)))
TopHit¶
TopHit(
sort: list[TopHitSort] | list[dict[str, str]],
size: int,
from_: int | None = None,
docvalue_fields=None,
)
Example
from paradedb.aggregates import TopHit, TopHitSort
Article.objects.aggregate(
top=TopHit(
size=100,
sort=[TopHitSort(field="created", order="desc")],
docvalue_fields=["rank", "created", "id"],
filter=models.Q(id__all=True),
)
)
TopHitSort¶
TopHitSort(
field: str,
order: Literal["asc", "desc"] = "asc",
)
Facet¶
Facet(aggregate: Aggregate)
Example
from paradedb.aggregates import Facet, Term
Article.objects.annotate(
facet=Facet(Term("category"))
)