-
Notifications
You must be signed in to change notification settings - Fork 147
Expand file tree
/
Copy pathdataframe_formatter.py
More file actions
843 lines (701 loc) · 28 KB
/
dataframe_formatter.py
File metadata and controls
843 lines (701 loc) · 28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""HTML formatting utilities for DataFusion DataFrames."""
from __future__ import annotations
import warnings
from typing import (
TYPE_CHECKING,
Any,
Protocol,
runtime_checkable,
)
from datafusion._internal import DataFrame as DataFrameInternal
if TYPE_CHECKING:
from collections.abc import Callable
def _validate_positive_int(value: Any, param_name: str) -> None:
"""Validate that a parameter is a positive integer.
Args:
value: The value to validate
param_name: Name of the parameter (used in error message)
Raises:
ValueError: If the value is not a positive integer
"""
if not isinstance(value, int) or value <= 0:
msg = f"{param_name} must be a positive integer"
raise ValueError(msg)
def _validate_bool(value: Any, param_name: str) -> None:
"""Validate that a parameter is a boolean.
Args:
value: The value to validate
param_name: Name of the parameter (used in error message)
Raises:
TypeError: If the value is not a boolean
"""
if not isinstance(value, bool):
msg = f"{param_name} must be a boolean"
raise TypeError(msg)
def _validate_formatter_parameters(
max_cell_length: int,
max_width: int,
max_height: int,
max_memory_bytes: int,
min_rows: int,
max_rows: int | None,
repr_rows: int | None,
enable_cell_expansion: bool,
show_truncation_message: bool,
use_shared_styles: bool,
custom_css: str | None,
style_provider: Any,
) -> int:
"""Validate all formatter parameters and return resolved max_rows value.
Args:
max_cell_length: Maximum cell length value to validate
max_width: Maximum width value to validate
max_height: Maximum height value to validate
max_memory_bytes: Maximum memory bytes value to validate
min_rows: Minimum rows to display value to validate
max_rows: Maximum rows value to validate (None means use default)
repr_rows: Deprecated repr_rows value to validate
enable_cell_expansion: Boolean expansion flag to validate
show_truncation_message: Boolean message flag to validate
use_shared_styles: Boolean styles flag to validate
custom_css: Custom CSS string to validate
style_provider: Style provider object to validate
Returns:
The resolved max_rows value after handling repr_rows deprecation
Raises:
ValueError: If any numeric parameter is invalid or constraints are violated
TypeError: If any parameter has invalid type
DeprecationWarning: If repr_rows parameter is used
"""
# Validate numeric parameters
_validate_positive_int(max_cell_length, "max_cell_length")
_validate_positive_int(max_width, "max_width")
_validate_positive_int(max_height, "max_height")
_validate_positive_int(max_memory_bytes, "max_memory_bytes")
_validate_positive_int(min_rows, "min_rows")
# Handle deprecated repr_rows parameter
if repr_rows is not None:
warnings.warn(
"repr_rows parameter is deprecated, use max_rows instead",
DeprecationWarning,
stacklevel=4,
)
_validate_positive_int(repr_rows, "repr_rows")
if max_rows is not None and repr_rows != max_rows:
msg = "Cannot specify both repr_rows and max_rows; use max_rows only"
raise ValueError(msg)
max_rows = repr_rows
# Use default if max_rows was not provided
if max_rows is None:
max_rows = 10
_validate_positive_int(max_rows, "max_rows")
# Validate constraint: min_rows <= max_rows
if min_rows > max_rows:
msg = "min_rows must be less than or equal to max_rows"
raise ValueError(msg)
# Validate boolean parameters
_validate_bool(enable_cell_expansion, "enable_cell_expansion")
_validate_bool(show_truncation_message, "show_truncation_message")
_validate_bool(use_shared_styles, "use_shared_styles")
# Validate custom_css
if custom_css is not None and not isinstance(custom_css, str):
msg = "custom_css must be None or a string"
raise TypeError(msg)
# Validate style_provider
if style_provider is not None and not isinstance(style_provider, StyleProvider):
msg = "style_provider must implement the StyleProvider protocol"
raise TypeError(msg)
return max_rows
@runtime_checkable
class CellFormatter(Protocol):
"""Protocol for cell value formatters."""
def __call__(self, value: Any) -> str:
"""Format a cell value to string representation."""
...
@runtime_checkable
class StyleProvider(Protocol):
"""Protocol for HTML style providers."""
def get_cell_style(self) -> str:
"""Get the CSS style for table cells."""
...
def get_header_style(self) -> str:
"""Get the CSS style for header cells."""
...
class DefaultStyleProvider:
"""Default implementation of StyleProvider."""
def get_cell_style(self) -> str:
"""Get the CSS style for table cells.
Returns:
CSS style string
"""
return (
"border: 1px solid black; padding: 8px; text-align: left; "
"white-space: nowrap;"
)
def get_header_style(self) -> str:
"""Get the CSS style for header cells.
Returns:
CSS style string
"""
return (
"border: 1px solid black; padding: 8px; text-align: left; "
"background-color: #f2f2f2; white-space: nowrap; min-width: fit-content; "
"max-width: fit-content;"
)
class DataFrameHtmlFormatter:
"""Configurable HTML formatter for DataFusion DataFrames.
This class handles the HTML rendering of DataFrames for display in
Jupyter notebooks and other rich display contexts.
This class supports extension through composition. Key extension points:
- Provide a custom StyleProvider for styling cells and headers
- Register custom formatters for specific types
- Provide custom cell builders for specialized cell rendering
Args:
max_cell_length: Maximum characters to display in a cell before truncation
max_width: Maximum width of the HTML table in pixels
max_height: Maximum height of the HTML table in pixels
max_memory_bytes: Maximum memory in bytes for rendered data (default: 2MB)
min_rows: Minimum number of rows to display (must be <= max_rows)
max_rows: Maximum number of rows to display in repr output
repr_rows: Deprecated alias for max_rows
enable_cell_expansion: Whether to add expand/collapse buttons for long cell
values
custom_css: Additional CSS to include in the HTML output
show_truncation_message: Whether to display a message when data is truncated
style_provider: Custom provider for cell and header styles
use_shared_styles: Whether to load styles and scripts only once per notebook
session
"""
def __init__(
self,
max_cell_length: int = 25,
max_width: int = 1000,
max_height: int = 300,
max_memory_bytes: int = 2 * 1024 * 1024, # 2 MB
min_rows: int = 10,
max_rows: int | None = None,
repr_rows: int | None = None,
enable_cell_expansion: bool = True,
custom_css: str | None = None,
show_truncation_message: bool = True,
style_provider: StyleProvider | None = None,
use_shared_styles: bool = True,
) -> None:
"""Initialize the HTML formatter.
Parameters
----------
max_cell_length
Maximum length of cell content before truncation.
max_width
Maximum width of the displayed table in pixels.
max_height
Maximum height of the displayed table in pixels.
max_memory_bytes
Maximum memory in bytes for rendered data. Helps prevent performance
issues with large datasets.
min_rows
Minimum number of rows to display even if memory limit is reached.
Must not exceed ``max_rows``.
max_rows
Maximum number of rows to display. Takes precedence over memory limits
when fewer rows are requested.
repr_rows
Deprecated alias for ``max_rows``. Use ``max_rows`` instead.
enable_cell_expansion
Whether to allow cells to expand when clicked.
custom_css
Custom CSS to apply to the HTML table.
show_truncation_message
Whether to show a message indicating that content has been truncated.
style_provider
Provider of CSS styles for the HTML table. If None, DefaultStyleProvider
is used.
use_shared_styles
Whether to use shared styles across multiple tables. This improves
performance when displaying many DataFrames in a single notebook.
Raises:
------
ValueError
If max_cell_length, max_width, max_height, max_memory_bytes,
min_rows or max_rows is not a positive integer, or if min_rows
exceeds max_rows.
TypeError
If enable_cell_expansion, show_truncation_message, or use_shared_styles is
not a boolean, or if custom_css is provided but is not a string, or if
style_provider is provided but does not implement the StyleProvider
protocol.
"""
# Validate all parameters and get resolved max_rows
resolved_max_rows = _validate_formatter_parameters(
max_cell_length,
max_width,
max_height,
max_memory_bytes,
min_rows,
max_rows,
repr_rows,
enable_cell_expansion,
show_truncation_message,
use_shared_styles,
custom_css,
style_provider,
)
self.max_cell_length = max_cell_length
self.max_width = max_width
self.max_height = max_height
self.max_memory_bytes = max_memory_bytes
self.min_rows = min_rows
self._max_rows = resolved_max_rows
self.enable_cell_expansion = enable_cell_expansion
self.custom_css = custom_css
self.show_truncation_message = show_truncation_message
self.style_provider = style_provider or DefaultStyleProvider()
self.use_shared_styles = use_shared_styles
# Registry for custom type formatters
self._type_formatters: dict[type, CellFormatter] = {}
# Custom cell builders
self._custom_cell_builder: Callable[[Any, int, int, str], str] | None = None
self._custom_header_builder: Callable[[Any], str] | None = None
@property
def max_rows(self) -> int:
"""Get the maximum number of rows to display.
Returns:
The maximum number of rows to display in repr output
"""
return self._max_rows
@max_rows.setter
def max_rows(self, value: int) -> None:
"""Set the maximum number of rows to display.
Args:
value: The maximum number of rows
"""
self._max_rows = value
@property
def repr_rows(self) -> int:
"""Get the maximum number of rows (deprecated name).
.. deprecated::
Use :attr:`max_rows` instead. This property is provided for
backward compatibility.
Returns:
The maximum number of rows to display
"""
return self._max_rows
@repr_rows.setter
def repr_rows(self, value: int) -> None:
"""Set the maximum number of rows using deprecated name.
.. deprecated::
Use :attr:`max_rows` setter instead. This property is provided for
backward compatibility.
Args:
value: The maximum number of rows
"""
warnings.warn(
"repr_rows is deprecated, use max_rows instead",
DeprecationWarning,
stacklevel=2,
)
self._max_rows = value
def register_formatter(self, type_class: type, formatter: CellFormatter) -> None:
"""Register a custom formatter for a specific data type.
Args:
type_class: The type to register a formatter for
formatter: Function that takes a value of the given type and returns
a formatted string
"""
self._type_formatters[type_class] = formatter
def set_custom_cell_builder(
self, builder: Callable[[Any, int, int, str], str]
) -> None:
"""Set a custom cell builder function.
Args:
builder: Function that takes (value, row, col, table_id) and returns HTML
"""
self._custom_cell_builder = builder
def set_custom_header_builder(self, builder: Callable[[Any], str]) -> None:
"""Set a custom header builder function.
Args:
builder: Function that takes a field and returns HTML
"""
self._custom_header_builder = builder
def format_html(
self,
batches: list,
schema: Any,
has_more: bool = False,
table_uuid: str | None = None,
) -> str:
"""Format record batches as HTML.
This method is used by DataFrame's _repr_html_ implementation and can be
called directly when custom HTML rendering is needed.
Args:
batches: List of Arrow RecordBatch objects
schema: Arrow Schema object
has_more: Whether there are more batches not shown
table_uuid: Unique ID for the table, used for JavaScript interactions
Returns:
HTML string representation of the data
Raises:
TypeError: If schema is invalid and no batches are provided
"""
if not batches:
return "No data to display"
# Validate schema
if schema is None or not hasattr(schema, "__iter__"):
msg = "Schema must be provided"
raise TypeError(msg)
# Generate a unique ID if none provided
table_uuid = table_uuid or f"df-{id(batches)}"
# Build HTML components
html = []
html.extend(self._build_html_header())
html.extend(self._build_table_container_start())
# Add table header and body
html.extend(self._build_table_header(schema))
html.extend(self._build_table_body(batches, table_uuid))
html.append("</table>")
html.append("</div>")
# Add footer (JavaScript and messages)
if self.enable_cell_expansion:
html.append(self._get_javascript())
# Always add truncation message if needed (independent of styles)
if has_more and self.show_truncation_message:
html.append("<div>Data truncated due to size.</div>")
return "\n".join(html)
def format_str(
self,
batches: list,
schema: Any,
has_more: bool = False,
table_uuid: str | None = None,
) -> str:
"""Format record batches as a string.
This method is used by DataFrame's __repr__ implementation and can be
called directly when string rendering is needed.
Args:
batches: List of Arrow RecordBatch objects
schema: Arrow Schema object
has_more: Whether there are more batches not shown
table_uuid: Unique ID for the table, used for JavaScript interactions
Returns:
String representation of the data
Raises:
TypeError: If schema is invalid and no batches are provided
"""
return DataFrameInternal.default_str_repr(batches, schema, has_more, table_uuid)
def _build_html_header(self) -> list[str]:
"""Build the HTML header with CSS styles."""
default_css = self._get_default_css() if self.enable_cell_expansion else ""
script = f"""
<script>
if (!document.getElementById('df-styles')) {{
const style = document.createElement('style');
style.id = 'df-styles';
style.textContent = `{default_css}`;
document.head.appendChild(style);
}}
</script>
"""
html = [script]
if self.custom_css:
html.append(f"<style>{self.custom_css}</style>")
return html
def _build_table_container_start(self) -> list[str]:
"""Build the opening tags for the table container."""
html = []
html.append(
f'<div style="width: 100%; max-width: {self.max_width}px; '
f"max-height: {self.max_height}px; overflow: auto; border: "
'1px solid #ccc;">'
)
html.append('<table style="border-collapse: collapse">')
return html
def _build_table_header(self, schema: Any) -> list[str]:
"""Build the HTML table header with column names."""
html = []
html.append("<thead>")
html.append("<tr>")
for field in schema:
if self._custom_header_builder:
html.append(self._custom_header_builder(field))
else:
html.append(
f"<th style='{self.style_provider.get_header_style()}'>"
f"{field.name}</th>"
)
html.append("</tr>")
html.append("</thead>")
return html
def _build_table_body(self, batches: list, table_uuid: str) -> list[str]:
"""Build the HTML table body with data rows."""
html = []
html.append("<tbody>")
row_count = 0
for batch in batches:
for row_idx in range(batch.num_rows):
row_count += 1
html.append("<tr>")
for col_idx, column in enumerate(batch.columns):
# Get the raw value from the column
raw_value = self._get_cell_value(column, row_idx)
# Always check for type formatters first to format the value
formatted_value = self._format_cell_value(raw_value)
# Then apply either custom cell builder or standard cell formatting
if self._custom_cell_builder:
# Pass both the raw value and formatted value to let the
# builder decide
cell_html = self._custom_cell_builder(
raw_value, row_count, col_idx, table_uuid
)
html.append(cell_html)
else:
# Standard cell formatting with formatted value
if (
len(str(raw_value)) > self.max_cell_length
and self.enable_cell_expansion
):
cell_html = self._build_expandable_cell(
formatted_value, row_count, col_idx, table_uuid
)
else:
cell_html = self._build_regular_cell(formatted_value)
html.append(cell_html)
html.append("</tr>")
html.append("</tbody>")
return html
def _get_cell_value(self, column: Any, row_idx: int) -> Any:
"""Extract a cell value from a column.
Args:
column: Arrow array
row_idx: Row index
Returns:
The raw cell value
"""
try:
value = column[row_idx]
if hasattr(value, "as_py"):
return value.as_py()
except (AttributeError, TypeError):
pass
else:
return value
def _format_cell_value(self, value: Any) -> str:
"""Format a cell value for display.
Uses registered type formatters if available.
Args:
value: The cell value to format
Returns:
Formatted cell value as string
"""
# Check for custom type formatters
for type_cls, formatter in self._type_formatters.items():
if isinstance(value, type_cls):
return formatter(value)
# If no formatter matched, return string representation
return str(value)
def _build_expandable_cell(
self, formatted_value: str, row_count: int, col_idx: int, table_uuid: str
) -> str:
"""Build an expandable cell for long content."""
short_value = str(formatted_value)[: self.max_cell_length]
return (
f"<td style='{self.style_provider.get_cell_style()}'>"
f"<div class='expandable-container'>"
"<span class='expandable' "
f"id='{table_uuid}-min-text-{row_count}-{col_idx}'>"
f"{short_value}</span>"
"<span class='full-text' "
f"id='{table_uuid}-full-text-{row_count}-{col_idx}'>"
f"{formatted_value}</span>"
f"<button class='expand-btn' "
f"onclick=\"toggleDataFrameCellText('{table_uuid}',{row_count},{col_idx})\">"
f"...</button>"
f"</div>"
f"</td>"
)
def _build_regular_cell(self, formatted_value: str) -> str:
"""Build a regular table cell."""
return (
f"<td style='{self.style_provider.get_cell_style()}'>{formatted_value}</td>"
)
def _build_html_footer(self, has_more: bool) -> list[str]:
"""Build the HTML footer with JavaScript and messages."""
html = []
# Add JavaScript for interactivity only if cell expansion is enabled
# and we're not using the shared styles approach
if self.enable_cell_expansion and not self.use_shared_styles:
html.append(self._get_javascript())
# Add truncation message if needed
if has_more and self.show_truncation_message:
html.append("<div>Data truncated due to size.</div>")
return html
def _get_default_css(self) -> str:
"""Get default CSS styles for the HTML table."""
return """
.expandable-container {
display: inline-block;
max-width: 200px;
}
.expandable {
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
display: block;
}
.full-text {
display: none;
white-space: normal;
}
.expand-btn {
cursor: pointer;
color: blue;
text-decoration: underline;
border: none;
background: none;
font-size: inherit;
display: block;
margin-top: 5px;
}
"""
def _get_javascript(self) -> str:
"""Get JavaScript code for interactive elements."""
return """
<script>
if (!window.__df_formatter_js_loaded__) {
window.__df_formatter_js_loaded__ = true;
window.toggleDataFrameCellText = function (table_uuid, row, col) {
var shortText = document.getElementById(
table_uuid + "-min-text-" + row + "-" + col
);
var fullText = document.getElementById(
table_uuid + "-full-text-" + row + "-" + col
);
var button = event.target;
if (fullText.style.display === "none") {
shortText.style.display = "none";
fullText.style.display = "inline";
button.textContent = "(less)";
} else {
shortText.style.display = "inline";
fullText.style.display = "none";
button.textContent = "...";
}
};
}
</script>
"""
class FormatterManager:
"""Manager class for the global DataFrame HTML formatter instance."""
_default_formatter: DataFrameHtmlFormatter = DataFrameHtmlFormatter()
@classmethod
def set_formatter(cls, formatter: DataFrameHtmlFormatter) -> None:
"""Set the global DataFrame HTML formatter.
Args:
formatter: The formatter instance to use globally
"""
cls._default_formatter = formatter
_refresh_formatter_reference()
@classmethod
def get_formatter(cls) -> DataFrameHtmlFormatter:
"""Get the current global DataFrame HTML formatter.
Returns:
The global HTML formatter instance
"""
return cls._default_formatter
def get_formatter() -> DataFrameHtmlFormatter:
"""Get the current global DataFrame HTML formatter.
This function is used by the DataFrame._repr_html_ implementation to access
the shared formatter instance. It can also be used directly when custom
HTML rendering is needed.
Returns:
The global HTML formatter instance
Example:
>>> from datafusion.html_formatter import get_formatter
>>> formatter = get_formatter()
>>> formatter.max_cell_length = 50 # Increase cell length
"""
return FormatterManager.get_formatter()
def set_formatter(formatter: DataFrameHtmlFormatter) -> None:
"""Set the global DataFrame HTML formatter.
Args:
formatter: The formatter instance to use globally
Example:
>>> from datafusion.html_formatter import get_formatter, set_formatter
>>> custom_formatter = DataFrameHtmlFormatter(max_cell_length=100)
>>> set_formatter(custom_formatter)
"""
FormatterManager.set_formatter(formatter)
def configure_formatter(**kwargs: Any) -> None:
"""Configure the global DataFrame HTML formatter.
This function creates a new formatter with the provided configuration
and sets it as the global formatter for all DataFrames.
Args:
**kwargs: Formatter configuration parameters like max_cell_length,
max_width, max_height, enable_cell_expansion, etc.
Raises:
ValueError: If any invalid parameters are provided
Example:
>>> from datafusion.html_formatter import configure_formatter
>>> configure_formatter(
... max_cell_length=50,
... max_height=500,
... enable_cell_expansion=True,
... use_shared_styles=True
... )
"""
# Valid parameters accepted by DataFrameHtmlFormatter
valid_params = {
"max_cell_length",
"max_width",
"max_height",
"max_memory_bytes",
"min_rows",
"max_rows",
"repr_rows",
"enable_cell_expansion",
"custom_css",
"show_truncation_message",
"style_provider",
"use_shared_styles",
}
# Check for invalid parameters
invalid_params = set(kwargs) - valid_params
if invalid_params:
msg = (
f"Invalid formatter parameters: {', '.join(invalid_params)}. "
f"Valid parameters are: {', '.join(valid_params)}"
)
raise ValueError(msg)
# Create and set formatter with validated parameters
set_formatter(DataFrameHtmlFormatter(**kwargs))
def reset_formatter() -> None:
"""Reset the global DataFrame HTML formatter to default settings.
This function creates a new formatter with default configuration
and sets it as the global formatter for all DataFrames.
Example:
>>> from datafusion.html_formatter import reset_formatter
>>> reset_formatter() # Reset formatter to default settings
"""
formatter = DataFrameHtmlFormatter()
set_formatter(formatter)
def _refresh_formatter_reference() -> None:
"""Refresh formatter reference in any modules using it.
This helps ensure that changes to the formatter are reflected in existing
DataFrames that might be caching the formatter reference.
"""
# This is a no-op but signals modules to refresh their reference