googleapis · shuoweil · May 14, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
@@ -696,6 +696,7 @@ def to_pandas_batches(
         page_size: Optional[int] = None,
         max_results: Optional[int] = None,
         allow_large_results: Optional[bool] = None,
+        cell_execution_count: Optional[int] = None,
     ) -> PandasBatches:
         """Download results one message at a time.
 
@@ -713,6 +714,7 @@ def to_pandas_batches(
             execution_spec.ExecutionSpec(
                 promise_under_10gb=under_10gb,
                 ordered=True,
+                cell_execution_count=cell_execution_count,
             ),
         )
         result_batches = execution_result.batches()

@@ -20,7 +20,7 @@
 import datetime
 import threading
 import uuid
-from typing import Any, Callable, Literal, Set
+from typing import Any, Callable, Literal, Optional, Set
 
 import google.cloud.bigquery._job_helpers
 import google.cloud.bigquery.job.query
@@ -129,6 +129,7 @@ class Event:
 class EventEnvelope:
     event: Event
     progress_bar: ProgressBarType = _DEFAULT
+    cell_execution_count: Optional[int] = None
 
 
 @dataclasses.dataclass(frozen=True)

@@ -19,7 +19,7 @@
 import threading
 import traceback
 import warnings
-from typing import TYPE_CHECKING, Callable, Optional, TypeVar
+from typing import TYPE_CHECKING, Callable, Iterable, Optional, TypeVar
 
 import google.auth.exceptions
 
@@ -124,12 +124,22 @@ def with_default_session(func_: Callable[..., _T], *args, **kwargs) -> _T:
     return func_(get_global_session(), *args, **kwargs)
 
 
-def execution_history() -> "bigframes.session._ExecutionHistory":
+def execution_history(
+    *,
+    events: Optional[Iterable[bigframes.core.events.Event]] = None,
+    job_ids: Optional[Iterable[str]] = None,
+    all_cells: bool = True,
+) -> "bigframes.session._ExecutionHistory":
     import pandas  # noqa: F401
 
     import bigframes.session
 
-    return with_default_session(bigframes.session.Session.execution_history)
+    return with_default_session(
+        bigframes.session.Session.execution_history,
+        events=events,
+        job_ids=job_ids,
+        all_cells=all_cells,
+    )
 
 
 class _GlobalSessionContext:

@@ -249,3 +249,16 @@ def timedelta_to_micros(
         ) * 1_000_000 + timedelta.microseconds
 
     raise TypeError(f"Unrecognized input type: {type(timedelta)}")
+
+
+def get_ipython_execution_count() -> typing.Optional[int]:
+    """Returns the current IPython cell execution count if running in a notebook, else None."""
+    try:
+        import IPython
+
+        ipy = IPython.get_ipython()
+        if ipy is not None and hasattr(ipy, "execution_count"):
+            return ipy.execution_count
+    except (ImportError, NameError):
+        pass
+    return None
@@ -1755,6 +1755,7 @@ def to_pandas_batches(
         max_results: Optional[int] = None,
         *,
         allow_large_results: Optional[bool] = None,
+        cell_execution_count: Optional[int] = None,
     ) -> blocks.PandasBatches:
         """Stream DataFrame results to an iterable of pandas DataFrame.
 
@@ -1807,6 +1808,7 @@ def to_pandas_batches(
             page_size=page_size,
             max_results=max_results,
             allow_large_results=allow_large_results,
+            cell_execution_count=cell_execution_count,
         )
 
     def _to_pandas_batches(
@@ -1815,11 +1817,13 @@ def _to_pandas_batches(
         max_results: Optional[int] = None,
         *,
         allow_large_results: Optional[bool] = None,
+        cell_execution_count: Optional[int] = None,
     ) -> blocks.PandasBatches:
         return self._block.to_pandas_batches(
             page_size=page_size,
             max_results=max_results,
             allow_large_results=allow_large_results,
+            cell_execution_count=cell_execution_count,
         )
 
     def _compute_dry_run(self) -> google.cloud.bigquery.job.QueryJob:

@@ -92,6 +92,10 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
 
         self._dataframe = dataframe
 
+        from bigframes.core.utils import get_ipython_execution_count
+
+        self._cell_execution_count = get_ipython_execution_count()
+
         super().__init__()
 
         # Initialize attributes that might be needed by observers first
@@ -286,7 +290,10 @@ def _reset_batch_cache(self) -> None:
     def _reset_batches_for_new_page_size(self) -> None:
         """Reset the batch iterator when page size changes."""
         with bigframes.option_context("display.progress_bar", None):
-            self._batches = self._dataframe.to_pandas_batches(page_size=self.page_size)
+            self._batches = self._dataframe.to_pandas_batches(
+                page_size=self.page_size,
+                cell_execution_count=self._cell_execution_count,
+            )
 
         self._reset_batch_cache()
 
@@ -318,7 +325,8 @@ def _set_table_html(self) -> None:
             current_sort_state = _SortState(tuple(sort_columns), tuple(sort_ascending))
             if self._last_sort_state != current_sort_state:
                 self._batches = df_to_display.to_pandas_batches(
-                    page_size=self.page_size
+                    page_size=self.page_size,
+                    cell_execution_count=self._cell_execution_count,
                 )
                 self._reset_batch_cache()
                 self._last_sort_state = current_sort_state

@@ -300,6 +300,7 @@ def _try_read_gbq_colab_sessionless_dry_run(
 def _read_gbq_colab(  # type: ignore[overload-overlap]
     query_or_table: str,
     *,
+    callback: Optional[Callable[[bigframes.core.events.EventEnvelope], None]] = ...,
     pyformat_args: Optional[Dict[str, Any]] = ...,
     dry_run: Literal[False] = ...,
 ) -> bigframes.dataframe.DataFrame: ...
@@ -309,6 +310,7 @@ def _read_gbq_colab(  # type: ignore[overload-overlap]
 def _read_gbq_colab(
     query_or_table: str,
     *,
+    callback: Optional[Callable[[bigframes.core.events.EventEnvelope], None]] = ...,
     pyformat_args: Optional[Dict[str, Any]] = ...,
     dry_run: Literal[True] = ...,
 ) -> pandas.Series: ...
@@ -317,6 +319,7 @@ def _read_gbq_colab(
 def _read_gbq_colab(
     query_or_table: str,
     *,
+    callback: Optional[Callable[[bigframes.core.events.EventEnvelope], None]] = None,
     pyformat_args: Optional[Dict[str, Any]] = None,
     dry_run: bool = False,
 ) -> bigframes.dataframe.DataFrame | pandas.Series:
@@ -328,6 +331,8 @@ def _read_gbq_colab(
     Args:
         query_or_table (str):
             SQL query or table ID (table ID not yet supported).
+        callback (Optional[Callable[[bigframes.core.events.EventEnvelope], None]]):
+            Callback to receive query execution events.
         pyformat_args (Optional[Dict[str, Any]]):
             Parameters to format into the query string.
         dry_run (bool):
@@ -379,6 +384,7 @@ def _read_gbq_colab(
     return global_session.with_default_session(
         bigframes.session.Session._read_gbq_colab,
         query_or_table,
+        callback=callback,
         pyformat_args=pyformat_args,
         dry_run=dry_run,
     )

@@ -759,6 +759,7 @@ def to_pandas_batches(
         max_results: Optional[int] = None,
         *,
         allow_large_results: Optional[bool] = None,
+        cell_execution_count: Optional[int] = None,
     ) -> Iterable[pandas.Series]:
         """Stream Series results to an iterable of pandas Series.
 
@@ -811,6 +812,7 @@ def to_pandas_batches(
             page_size=page_size,
             max_results=max_results,
             allow_large_results=allow_large_results,
+            cell_execution_count=cell_execution_count,
         )
         return map(lambda df: cast(pandas.Series, df.squeeze(1)), batches)
 

@@ -113,6 +113,18 @@
 class _ExecutionHistory:
     def __init__(self, jobs: list[dict]):
         self._df = pandas.DataFrame(jobs)
+        if self._df.empty:
+            self._df = pandas.DataFrame(
+                columns=[
+                    "job_id",
+                    "query_id",
+                    "job_type",
+                    "status",
+                    "query",
+                    "total_bytes_processed",
+                    "job_url",
+                ]
+            )
 
     def to_dataframe(self) -> pandas.DataFrame:
         """Returns the execution history as a pandas DataFrame."""
@@ -199,9 +211,10 @@ def __init__(
             self._clients_provider = clients_provider
             self._location = context.location or "US"
         else:
-            credentials, project = (
-                bigframes._config.auth.resolve_credentials_and_project(context)
-            )
+            (
+                credentials,
+                project,
+            ) = bigframes._config.auth.resolve_credentials_and_project(context)
             if context.location is None:
                 with bigquery.Client(
                     project=project,
@@ -430,12 +443,83 @@ def slot_millis_sum(self):
         """The sum of all slot time used by bigquery jobs in this session."""
         return self._metrics.slot_millis
 
-    def execution_history(self) -> _ExecutionHistory:
+    def execution_history(
+        self,
+        *,
+        events: Optional[Iterable[bigframes.core.events.Event]] = None,
+        job_ids: Optional[Iterable[str]] = None,
+        all_cells: bool = True,
+    ) -> _ExecutionHistory:
         """Returns the history of executions initiated by BigFrames in the current session.
 
         Use `.to_dataframe()` on the result to get a pandas DataFrame.
+
+        Args:
+            events (Iterable[Event], optional):
+                Filter execution history to only include jobs associated with the given events.
+            job_ids (Iterable[str], optional):
+                Filter execution history to only include jobs matching the given job IDs.
+            all_cells (bool, optional):
+                If True, do not filter execution history by notebook cell. If False,
+                and running in Colab/Jupyter, automatically filter history to only include
+                jobs executed within the current cell. Defaults to True.
         """
-        return _ExecutionHistory([job.__dict__ for job in self._metrics.jobs])
+        jobs = [job.__dict__ for job in self._metrics.jobs]
+
+        if events is not None:
+            event_job_ids = {
+                getattr(event, "job_id", None)
+                for event in events
+                if getattr(event, "job_id", None) is not None
+            }
+            event_query_ids = {
+                getattr(event, "query_id", None)
+                for event in events
+                if getattr(event, "query_id", None) is not None
+            }
+            jobs = [
+                job
+                for job in jobs
+                if (
+                    job.get("job_id") is not None and job.get("job_id") in event_job_ids
+                )
+                or (
+                    job.get("query_id") is not None
+                    and job.get("query_id") in event_query_ids
+                )
+            ]
+
+        elif job_ids is not None:
+            target_job_ids = set(job_ids)
+            jobs = [
+                job
+                for job in jobs
+                if (
+                    job.get("job_id") is not None
+                    and job.get("job_id") in target_job_ids
+                )
+                or (
+                    job.get("query_id") is not None
+                    and job.get("query_id") in target_job_ids
+                )
+            ]
+
+        elif not all_cells:
+            try:
+                import IPython
+
+                ipy = IPython.get_ipython()
+                if ipy is not None and hasattr(ipy, "execution_count"):
+                    current_count = ipy.execution_count
+                    jobs = [
+                        job
+                        for job in jobs
+                        if job.get("cell_execution_count") == current_count
+                    ]
+            except (ImportError, NameError):
+                pass
+
+        return _ExecutionHistory(jobs)
 
     @property
     def _allows_ambiguity(self) -> bool:
@@ -584,6 +668,7 @@ def _read_gbq_colab(
         self,
         query: str,
         *,
+        callback: Optional[Callable[[bigframes.core.events.EventEnvelope], None]] = ...,
         pyformat_args: Optional[Dict[str, Any]] = None,
         dry_run: Literal[False] = ...,
     ) -> dataframe.DataFrame: ...
@@ -593,6 +678,7 @@ def _read_gbq_colab(
         self,
         query: str,
         *,
+        callback: Optional[Callable[[bigframes.core.events.EventEnvelope], None]] = ...,
         pyformat_args: Optional[Dict[str, Any]] = None,
         dry_run: Literal[True] = ...,
     ) -> pandas.Series: ...
@@ -601,8 +687,10 @@ def _read_gbq_colab(
     def _read_gbq_colab(
         self,
         query: str,
-        # TODO: Add a callback parameter that takes some kind of Event object.
         *,
+        callback: Optional[
+            Callable[[bigframes.core.events.EventEnvelope], None]
+        ] = None,
         pyformat_args: Optional[Dict[str, Any]] = None,
         dry_run: bool = False,
     ) -> Union[dataframe.DataFrame, pandas.Series]:
@@ -615,6 +703,8 @@ def _read_gbq_colab(
             query (str):
                 A SQL query string to execute. Results (if any) are turned into
                 a DataFrame.
+            callback (Optional[Callable[[bigframes.core.events.EventEnvelope], None]]):
+                Callback to receive query execution events.
             pyformat_args (dict):
                 A dictionary of potential variables to replace in ``query``.
                 Note: strings are _not_ escaped. Use query parameters for these,
@@ -634,13 +724,19 @@ def _read_gbq_colab(
             dry_run=dry_run,
         )
 
-        return self._loader.read_gbq_query(
-            query=query,
-            index_col=bigframes.enums.DefaultIndexKind.NULL,
-            force_total_order=False,
-            dry_run=typing.cast(Union[Literal[False], Literal[True]], dry_run),
-            allow_large_results=allow_large_results,
-        )
+        def _run_query():
+            return self._loader.read_gbq_query(
+                query=query,
+                index_col=bigframes.enums.DefaultIndexKind.NULL,
+                force_total_order=False,
+                dry_run=typing.cast(Union[Literal[False], Literal[True]], dry_run),
+                allow_large_results=allow_large_results,
+            )
+
+        if callback is not None:
+            with self._publisher.subscribe(callback):
+                return _run_query()
+        return _run_query()
 
     @overload
     def read_gbq_query(  # type: ignore[overload-overlap]