Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
d3bf48c
feat: add callback parameter to _read_gbq_colab
shuoweil May 14, 2026
4258639
Merge branch 'main' into shuowei-filter-execution-history
shuoweil May 15, 2026
7574f22
feat: support execution history filtering by events and job_ids
shuoweil May 15, 2026
eccd236
Merge branch 'main' into shuowei-filter-execution-history
shuoweil May 15, 2026
d2eea39
feat: support automatic per-cell execution history filtering
shuoweil May 15, 2026
21d5dd8
format code
shuoweil May 15, 2026
97c7260
fix: avoid mutating extra_query_labels in place
shuoweil May 16, 2026
f080dda
format code
shuoweil May 16, 2026
4bd2735
fix: update pre-commit config to use ruff
shuoweil May 16, 2026
9dd03ed
Merge branch 'main' into shuowei-filter-execution-history
shuoweil May 18, 2026
dcae615
fix mypy
shuoweil May 18, 2026
eb548b6
Merge branch 'main' into shuowei-filter-execution-history
shuoweil May 18, 2026
7db1330
feat: universal per-cell execution history filtering
shuoweil May 18, 2026
da6682c
format code
shuoweil May 18, 2026
0ef9216
revert .pre-commit-config.yaml to old black/flake8 config
shuoweil May 18, 2026
66fe150
Merge branch 'main' into shuowei-filter-execution-history
shuoweil May 20, 2026
b9c9336
feat: propagate cell execution count to widget queries and rename his…
shuoweil May 20, 2026
5f9d824
refactor: deduplicate ipython execution count
shuoweil May 20, 2026
d9a5593
feat: change default of execution history all_cells to True
shuoweil May 20, 2026
0c8f8c7
fix IPython mocking in unit tests
shuoweil May 20, 2026
81bf807
Merge main
shuoweil May 21, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions packages/bigframes/bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,7 @@ def to_pandas_batches(
page_size: Optional[int] = None,
max_results: Optional[int] = None,
allow_large_results: Optional[bool] = None,
cell_execution_count: Optional[int] = None,
) -> PandasBatches:
"""Download results one message at a time.

Expand All @@ -713,6 +714,7 @@ def to_pandas_batches(
execution_spec.ExecutionSpec(
promise_under_10gb=under_10gb,
ordered=True,
cell_execution_count=cell_execution_count,
),
)
result_batches = execution_result.batches()
Expand Down
3 changes: 2 additions & 1 deletion packages/bigframes/bigframes/core/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import datetime
import threading
import uuid
from typing import Any, Callable, Literal, Set
from typing import Any, Callable, Literal, Optional, Set

import google.cloud.bigquery._job_helpers
import google.cloud.bigquery.job.query
Expand Down Expand Up @@ -129,6 +129,7 @@ class Event:
class EventEnvelope:
event: Event
progress_bar: ProgressBarType = _DEFAULT
cell_execution_count: Optional[int] = None


@dataclasses.dataclass(frozen=True)
Expand Down
16 changes: 13 additions & 3 deletions packages/bigframes/bigframes/core/global_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import threading
import traceback
import warnings
from typing import TYPE_CHECKING, Callable, Optional, TypeVar
from typing import TYPE_CHECKING, Callable, Iterable, Optional, TypeVar

import google.auth.exceptions

Expand Down Expand Up @@ -124,12 +124,22 @@ def with_default_session(func_: Callable[..., _T], *args, **kwargs) -> _T:
return func_(get_global_session(), *args, **kwargs)


def execution_history() -> "bigframes.session._ExecutionHistory":
def execution_history(
*,
events: Optional[Iterable[bigframes.core.events.Event]] = None,
job_ids: Optional[Iterable[str]] = None,
all_cells: bool = True,
) -> "bigframes.session._ExecutionHistory":
import pandas # noqa: F401

import bigframes.session

return with_default_session(bigframes.session.Session.execution_history)
return with_default_session(
bigframes.session.Session.execution_history,
events=events,
job_ids=job_ids,
all_cells=all_cells,
)


class _GlobalSessionContext:
Expand Down
13 changes: 13 additions & 0 deletions packages/bigframes/bigframes/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,3 +249,16 @@ def timedelta_to_micros(
) * 1_000_000 + timedelta.microseconds

raise TypeError(f"Unrecognized input type: {type(timedelta)}")


def get_ipython_execution_count() -> typing.Optional[int]:
"""Returns the current IPython cell execution count if running in a notebook, else None."""
try:
import IPython

ipy = IPython.get_ipython()
if ipy is not None and hasattr(ipy, "execution_count"):
return ipy.execution_count
except (ImportError, NameError):
pass
return None
4 changes: 4 additions & 0 deletions packages/bigframes/bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1755,6 +1755,7 @@ def to_pandas_batches(
max_results: Optional[int] = None,
*,
allow_large_results: Optional[bool] = None,
cell_execution_count: Optional[int] = None,
) -> blocks.PandasBatches:
"""Stream DataFrame results to an iterable of pandas DataFrame.

Expand Down Expand Up @@ -1807,6 +1808,7 @@ def to_pandas_batches(
page_size=page_size,
max_results=max_results,
allow_large_results=allow_large_results,
cell_execution_count=cell_execution_count,
)

def _to_pandas_batches(
Expand All @@ -1815,11 +1817,13 @@ def _to_pandas_batches(
max_results: Optional[int] = None,
*,
allow_large_results: Optional[bool] = None,
cell_execution_count: Optional[int] = None,
) -> blocks.PandasBatches:
return self._block.to_pandas_batches(
page_size=page_size,
max_results=max_results,
allow_large_results=allow_large_results,
cell_execution_count=cell_execution_count,
)

def _compute_dry_run(self) -> google.cloud.bigquery.job.QueryJob:
Expand Down
12 changes: 10 additions & 2 deletions packages/bigframes/bigframes/display/anywidget.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):

self._dataframe = dataframe

from bigframes.core.utils import get_ipython_execution_count

self._cell_execution_count = get_ipython_execution_count()

super().__init__()

# Initialize attributes that might be needed by observers first
Expand Down Expand Up @@ -286,7 +290,10 @@ def _reset_batch_cache(self) -> None:
def _reset_batches_for_new_page_size(self) -> None:
"""Reset the batch iterator when page size changes."""
with bigframes.option_context("display.progress_bar", None):
self._batches = self._dataframe.to_pandas_batches(page_size=self.page_size)
self._batches = self._dataframe.to_pandas_batches(
page_size=self.page_size,
cell_execution_count=self._cell_execution_count,
)

self._reset_batch_cache()

Expand Down Expand Up @@ -318,7 +325,8 @@ def _set_table_html(self) -> None:
current_sort_state = _SortState(tuple(sort_columns), tuple(sort_ascending))
if self._last_sort_state != current_sort_state:
self._batches = df_to_display.to_pandas_batches(
page_size=self.page_size
page_size=self.page_size,
cell_execution_count=self._cell_execution_count,
)
self._reset_batch_cache()
self._last_sort_state = current_sort_state
Expand Down
6 changes: 6 additions & 0 deletions packages/bigframes/bigframes/pandas/io/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ def _try_read_gbq_colab_sessionless_dry_run(
def _read_gbq_colab( # type: ignore[overload-overlap]
query_or_table: str,
*,
callback: Optional[Callable[[bigframes.core.events.EventEnvelope], None]] = ...,
pyformat_args: Optional[Dict[str, Any]] = ...,
dry_run: Literal[False] = ...,
) -> bigframes.dataframe.DataFrame: ...
Expand All @@ -309,6 +310,7 @@ def _read_gbq_colab( # type: ignore[overload-overlap]
def _read_gbq_colab(
query_or_table: str,
*,
callback: Optional[Callable[[bigframes.core.events.EventEnvelope], None]] = ...,
pyformat_args: Optional[Dict[str, Any]] = ...,
dry_run: Literal[True] = ...,
) -> pandas.Series: ...
Expand All @@ -317,6 +319,7 @@ def _read_gbq_colab(
def _read_gbq_colab(
query_or_table: str,
*,
callback: Optional[Callable[[bigframes.core.events.EventEnvelope], None]] = None,
pyformat_args: Optional[Dict[str, Any]] = None,
dry_run: bool = False,
) -> bigframes.dataframe.DataFrame | pandas.Series:
Expand All @@ -328,6 +331,8 @@ def _read_gbq_colab(
Args:
query_or_table (str):
SQL query or table ID (table ID not yet supported).
callback (Optional[Callable[[bigframes.core.events.EventEnvelope], None]]):
Callback to receive query execution events.
pyformat_args (Optional[Dict[str, Any]]):
Parameters to format into the query string.
dry_run (bool):
Expand Down Expand Up @@ -379,6 +384,7 @@ def _read_gbq_colab(
return global_session.with_default_session(
bigframes.session.Session._read_gbq_colab,
query_or_table,
callback=callback,
pyformat_args=pyformat_args,
dry_run=dry_run,
)
Expand Down
2 changes: 2 additions & 0 deletions packages/bigframes/bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,6 +759,7 @@ def to_pandas_batches(
max_results: Optional[int] = None,
*,
allow_large_results: Optional[bool] = None,
cell_execution_count: Optional[int] = None,
) -> Iterable[pandas.Series]:
"""Stream Series results to an iterable of pandas Series.

Expand Down Expand Up @@ -811,6 +812,7 @@ def to_pandas_batches(
page_size=page_size,
max_results=max_results,
allow_large_results=allow_large_results,
cell_execution_count=cell_execution_count,
)
return map(lambda df: cast(pandas.Series, df.squeeze(1)), batches)

Expand Down
122 changes: 109 additions & 13 deletions packages/bigframes/bigframes/session/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,18 @@
class _ExecutionHistory:
def __init__(self, jobs: list[dict]):
self._df = pandas.DataFrame(jobs)
if self._df.empty:
self._df = pandas.DataFrame(
columns=[
"job_id",
"query_id",
"job_type",
"status",
"query",
"total_bytes_processed",
"job_url",
]
)

def to_dataframe(self) -> pandas.DataFrame:
"""Returns the execution history as a pandas DataFrame."""
Expand Down Expand Up @@ -199,9 +211,10 @@ def __init__(
self._clients_provider = clients_provider
self._location = context.location or "US"
else:
credentials, project = (
bigframes._config.auth.resolve_credentials_and_project(context)
)
(
credentials,
project,
) = bigframes._config.auth.resolve_credentials_and_project(context)
if context.location is None:
with bigquery.Client(
project=project,
Expand Down Expand Up @@ -430,12 +443,83 @@ def slot_millis_sum(self):
"""The sum of all slot time used by bigquery jobs in this session."""
return self._metrics.slot_millis

def execution_history(self) -> _ExecutionHistory:
def execution_history(
self,
*,
events: Optional[Iterable[bigframes.core.events.Event]] = None,
job_ids: Optional[Iterable[str]] = None,
all_cells: bool = True,
) -> _ExecutionHistory:
"""Returns the history of executions initiated by BigFrames in the current session.

Use `.to_dataframe()` on the result to get a pandas DataFrame.

Args:
events (Iterable[Event], optional):
Filter execution history to only include jobs associated with the given events.
job_ids (Iterable[str], optional):
Filter execution history to only include jobs matching the given job IDs.
all_cells (bool, optional):
If True, do not filter execution history by notebook cell. If False,
and running in Colab/Jupyter, automatically filter history to only include
jobs executed within the current cell. Defaults to True.
"""
return _ExecutionHistory([job.__dict__ for job in self._metrics.jobs])
jobs = [job.__dict__ for job in self._metrics.jobs]

if events is not None:
event_job_ids = {
getattr(event, "job_id", None)
for event in events
if getattr(event, "job_id", None) is not None
}
event_query_ids = {
getattr(event, "query_id", None)
for event in events
if getattr(event, "query_id", None) is not None
}
jobs = [
job
for job in jobs
if (
job.get("job_id") is not None and job.get("job_id") in event_job_ids
)
or (
job.get("query_id") is not None
and job.get("query_id") in event_query_ids
)
]

elif job_ids is not None:
target_job_ids = set(job_ids)
jobs = [
job
for job in jobs
if (
job.get("job_id") is not None
and job.get("job_id") in target_job_ids
)
or (
job.get("query_id") is not None
and job.get("query_id") in target_job_ids
)
]

elif not all_cells:
try:
import IPython

ipy = IPython.get_ipython()
if ipy is not None and hasattr(ipy, "execution_count"):
current_count = ipy.execution_count
jobs = [
job
for job in jobs
if job.get("cell_execution_count") == current_count
]
except (ImportError, NameError):
pass

return _ExecutionHistory(jobs)

@property
def _allows_ambiguity(self) -> bool:
Expand Down Expand Up @@ -584,6 +668,7 @@ def _read_gbq_colab(
self,
query: str,
*,
callback: Optional[Callable[[bigframes.core.events.EventEnvelope], None]] = ...,
pyformat_args: Optional[Dict[str, Any]] = None,
dry_run: Literal[False] = ...,
) -> dataframe.DataFrame: ...
Expand All @@ -593,6 +678,7 @@ def _read_gbq_colab(
self,
query: str,
*,
callback: Optional[Callable[[bigframes.core.events.EventEnvelope], None]] = ...,
pyformat_args: Optional[Dict[str, Any]] = None,
dry_run: Literal[True] = ...,
) -> pandas.Series: ...
Expand All @@ -601,8 +687,10 @@ def _read_gbq_colab(
def _read_gbq_colab(
self,
query: str,
# TODO: Add a callback parameter that takes some kind of Event object.
*,
callback: Optional[
Callable[[bigframes.core.events.EventEnvelope], None]
] = None,
pyformat_args: Optional[Dict[str, Any]] = None,
dry_run: bool = False,
) -> Union[dataframe.DataFrame, pandas.Series]:
Expand All @@ -615,6 +703,8 @@ def _read_gbq_colab(
query (str):
A SQL query string to execute. Results (if any) are turned into
a DataFrame.
callback (Optional[Callable[[bigframes.core.events.EventEnvelope], None]]):
Callback to receive query execution events.
pyformat_args (dict):
A dictionary of potential variables to replace in ``query``.
Note: strings are _not_ escaped. Use query parameters for these,
Expand All @@ -634,13 +724,19 @@ def _read_gbq_colab(
dry_run=dry_run,
)

return self._loader.read_gbq_query(
query=query,
index_col=bigframes.enums.DefaultIndexKind.NULL,
force_total_order=False,
dry_run=typing.cast(Union[Literal[False], Literal[True]], dry_run),
allow_large_results=allow_large_results,
)
def _run_query():
return self._loader.read_gbq_query(
query=query,
index_col=bigframes.enums.DefaultIndexKind.NULL,
force_total_order=False,
dry_run=typing.cast(Union[Literal[False], Literal[True]], dry_run),
allow_large_results=allow_large_results,
)

if callback is not None:
with self._publisher.subscribe(callback):
return _run_query()
return _run_query()

@overload
def read_gbq_query( # type: ignore[overload-overlap]
Expand Down
Loading
Loading