Example Usage¶
This section demonstrates usage of the gptables API functions and core Classes.
For source code and data used in these examples, please see the examples directory of the package.
from pathlib import Path
import pandas as pd
import gptables as gpt
# Read data
parent_dir = Path(__file__).parents[1]
penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv")
# Any data processing could go here as long as you end with a Pandas dataframe that you want to write in a spreadsheet
# Define table elements
penguins_table_name = "penguins_statistics"
penguins_title = "The Penguins Dataset"
penguins_subtitles = ["This is the first subtitle", "Just another subtitle"]
penguins_scope = "Penguins"
penguins_source = "Palmer Station, Antarctica"
# Define our GPTable
penguins_table = gpt.GPTable(
table=penguins_data,
table_name=penguins_table_name,
title=penguins_title,
subtitles=penguins_subtitles,
scope=penguins_scope,
source=penguins_source,
)
# Every table must be associated to a sheet name for writing
penguins_sheets = {"Penguins": penguins_table}
# Use write_workbook to win!
if __name__ == "__main__":
output_path = parent_dir / "python_penguins_gptable.xlsx"
gpt.write_workbook(
filename=output_path,
sheets=penguins_sheets,
contentsheet_options={"additional_elements": ["subtitles", "scope"]},
)
print("Output written at: ", output_path)
parent_dir = Path(__file__).parents[1]
penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv")
# Any data processing could go here as long as you end with a Pandas dataframe that you want to write in a spreadsheet
# Define table elements
penguins_table_name = "penguins_statistics"
penguins_title = "The Penguins Dataset"
penguins_subtitles = ["This is the first subtitle", "Just another subtitle"]
penguins_scope = "Penguins"
penguins_source = "Palmer Station, Antarctica"
# Use kwargs to pass these to the appropriate parameters
kwargs = {
"table_name": penguins_table_name,
"title": penguins_title,
"subtitles": penguins_subtitles,
"scope": penguins_scope,
"source": penguins_source,
}
penguins_table = gpt.GPTable(table=penguins_data, **kwargs)
# Every table must be associated to a sheet name for writing
penguins_sheets = {"Penguins": penguins_table}
# Use write_workbook to win!
if __name__ == "__main__":
output_path = parent_dir / "python_penguins_gptable.xlsx"
gpt.write_workbook(
filename=output_path,
sheets=penguins_sheets,
contentsheet_options={"additional_elements": ["subtitles", "scope"]},
)
print("Output written at: ", output_path)
# Read data
parent_dir = Path(__file__).parents[1]
penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv")
# Any data processing could go here as long as you end with a Pandas dataframe that you want to write in a spreadsheet
# Define table elements
penguins_table_name = "penguins_statistics"
penguins_title = "The Penguins Dataset"
penguins_subtitles = ["This is the first subtitle", "Just another subtitle"]
penguins_scope = "Penguins"
penguins_source = "Palmer Station, Antarctica"
kwargs = {
"table_name": penguins_table_name,
"title": penguins_title,
"subtitles": penguins_subtitles,
"scope": penguins_scope,
"source": penguins_source,
}
# Define our GPTable
penguins_table = gpt.GPTable(
table=penguins_data, table_name="penguins_statistics", **kwargs
)
penguins_table_copy = deepcopy(penguins_table)
penguins_table_copy.set_title("A copy of the first sheet")
penguins_table_copy.set_table_name(
"penguins_statistics_copy"
) # All tables in a single workbook must have a unique name
penguins_sheets = {"Penguins": penguins_table, "Copy of Penguins": penguins_table_copy}
# Use write_workbook to win!
if __name__ == "__main__":
output_path = parent_dir / "python_penguins_cover_gptable.xlsx"
gpt.write_workbook(filename=output_path, sheets=penguins_sheets)
print("Output written at: ", output_path)
import pandas as pd
import gptables as gpt
# Read data
parent_dir = Path(__file__).parents[1]
penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv")
# Any data processing could go here as long as you end with a Pandas dataframe that you want to write in a spreadsheet
# Define table elements
penguins_table_name = "penguins_statistics"
penguins_title = "The Penguins Dataset"
penguins_subtitles = ["This is the first subtitle", "Just another subtitle"]
penguins_scope = "Penguins"
penguins_source = "Palmer Station, Antarctica"
kwargs = {
"table_name": penguins_table_name,
"title": penguins_title,
"subtitles": penguins_subtitles,
"scope": penguins_scope,
"source": penguins_source,
}
penguins_table = gpt.GPTable(table=penguins_data, **kwargs)
penguins_sheets = {"Penguins": penguins_table}
# Use write_workbook to win!
# Simply pass the filepath of the yaml file containing your theme to the GPTables Theme class and then to write_workbook
if __name__ == "__main__":
output_path = parent_dir / "python_penguins_gptable.xlsx"
theme_path = str(Path(__file__).parent.parent / "themes/penguins_test_theme.yaml")
gpt.write_workbook(
filename=output_path,
sheets=penguins_sheets,
theme=gpt.Theme(theme_path),
contentsheet_options={"additional_elements": ["subtitles", "scope"]},
)
print("Output written at: ", output_path)
import pandas as pd
import gptables as gpt
# Read data
parent_dir = Path(__file__).parents[1]
penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv")
# Any data processing could go here as long as you end with a Pandas dataframe that you want to write in a spreadsheet
# Define table elements
penguins_table_name = "penguins_statistics"
penguins_title = "The Penguins Dataset"
penguins_subtitles = ["This is the first subtitle", "Just another subtitle"]
penguins_scope = "Penguins"
penguins_source = "Palmer Station, Antarctica"
kwargs = {
"table_name": penguins_table_name,
"title": penguins_title,
"subtitles": penguins_subtitles,
"scope": penguins_scope,
"source": penguins_source,
"index_columns": {
2: 0
}, # The level 2 index from our Pandas dataframe is put in the first (zeroth with Python indexing) column of the spreadsheet
}
# Define our GPTable
penguins_table = gpt.GPTable(
table=penguins_data, table_name="penguins_statistics", **kwargs
)
penguins_sheets = {"Penguins": penguins_table}
penguins_cover = gpt.Cover(
cover_label="Cover",
title="A Workbook containing two copies of the data",
intro=["This is some introductory information", "And some more"],
about=["Even more info about my data", "And a little more"],
contact=[
"John Doe",
"Tel: 345345345",
"Email: [john.doe@snailmail.com](mailto:john.doe@snailmail.com)",
],
)
# Use write_workbook to win!
if __name__ == "__main__":
output_path = parent_dir / "python_penguins_cover_gptable.xlsx"
gpt.write_workbook(
filename=output_path,
sheets=penguins_sheets,
cover=penguins_cover,
)
print("Output written at: ", output_path)
from pathlib import Path
import pandas as pd
import gptables as gpt
# Read data
parent_dir = Path(__file__).parents[1]
penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv")
# Any data processing could go here as long as you end with a Pandas dataframe that you want to write in a spreadsheet
# Define table elements
penguins_table_name = "penguins_statistics"
# Notes are added by using $$note$$ in text
penguins_title = "The Penguins Dataset$$noteabouty$$"
penguins_subtitles = [
"This is the first subtitle$$noteaboutx$$",
"Just another subtitle",
]
# Notes can also be included in column headers, see below
penguins_table_notes = {
"species": "$$noteaboutx$$",
2: "$$noteaboutz$$",
} # Columns can be referenced either by index or by name
penguins_units = {
2: "mm",
"bill_depth_mm": "mm",
4: "mm",
"body_mass_g": "g",
} # As above for column referencing
penguins_scope = "Penguins"
penguins_source = "Palmer Station, Antarctica"
kwargs = {
"table_name": penguins_table_name,
"title": penguins_title,
"subtitles": penguins_subtitles,
"units": penguins_units,
"table_notes": penguins_table_notes,
"scope": penguins_scope,
"source": penguins_source,
}
# Define our GPTable
penguins_table = gpt.GPTable(table=penguins_data, **kwargs)
penguins_sheets = {"Penguins": penguins_table}
# Notesheet - Note that the ordering of each list only matters with respect to the other lists in the "notes" dictionary.
# GPTables will use the "Note reference" list to ensure the "Note text" is assigned correctly
notes = {
"Note reference": ["noteaboutz", "noteaboutx", "noteabouty"],
"Note text": [
"This is a note about z linking to google.",
"This is a note about x linking to duckduckgo.",
"This is a note about y linking to the ONS website.",
],
"Useful link": [
"[google](https://www.google.com)",
"[duckduckgo](https://duckduckgo.com/)",
"[ONS](https://www.ons.gov.uk)",
],
}
penguins_notes_table = pd.DataFrame.from_dict(notes)
# Use write_workbook to win!
if __name__ == "__main__":
output_path = parent_dir / "python_penguins_gptable.xlsx"
gpt.write_workbook(
filename=output_path,
sheets=penguins_sheets,
notes_table=penguins_notes_table,
contentsheet_options={"additional_elements": ["subtitles", "scope"]},
)
print("Output written at: ", output_path)
from pathlib import Path
import pandas as pd
import gptables as gpt
# Read data and arrange
parent_dir = Path(__file__).parents[1]
penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv")
# Any data processing could go here as long as you end with a Pandas dataframe that you want to write in a spreadsheet
# Define table elements
penguins_table_name = "penguins_statistics"
penguins_title = "Penguins"
# Individual words/phrases can have formatting applied without the use of the additional_formatting argument
penguins_subtitles = [
"The first subtitle",
[{"bold": True}, "Just", " another subtitle"],
]
penguins_units = {key: "mm" for key in range(2, 5)}
penguins_scope = "Penguins"
# Define additional formatting
# Columns can be referenced by name or number
# Rows may only be referenced by number
# Column and row numbers refer to the table elements, including indexes and column headings
penguins_additional_formatting = [
{
"column": {
"columns": ["Species", "Island"], # str, int or list of either
"format": {
"align": "center",
"italic": True,
}, # The "Species" and "Island" columns are centre-aligned and made italic
}
},
{
"column": {"columns": [3], "format": {"left": 1}}
}, # Gives the fourth column a left border
{
"row": {
"rows": -1, # Numbers only, but can refer to last row using -1
"format": {
"bottom": 1,
"indent": 2,
}, # Give the last row a border at the bottom of each cell and indents two levels
}
},
]
kwargs = {
"table_name": penguins_table_name,
"title": penguins_title,
"subtitles": penguins_subtitles,
"units": penguins_units,
"scope": penguins_scope,
"source": None,
"additional_formatting": penguins_additional_formatting,
}
# Define our GPTable
penguins_table = gpt.GPTable(table=penguins_data, **kwargs)
# Use produce workbook to return GPWorkbook
if __name__ == "__main__":
output_path = parent_dir / "python_penguins_additional_formatting_gptable.xlsx"
wb = gpt.produce_workbook(filename=output_path, sheets={"Penguins": penguins_table})
# Carry out additional modifications on the GPWorkbook or GPWorksheets
# This supports all `XlsxWriter` package functionality
ws = wb.worksheets()[0]
ws.set_row(0, 30) # Set the height of the first row
# To format cells using the set_row or set_column functions we must use a workbook to create a format object
italic_format = wb.add_format({"italic": True})
ws.set_column(
2, 3, 10, italic_format
) # Sets the width of the third and fourth column and makes them italic
# Note that the first two arguments of set_column are the first and last columns (inclusive) you want to format as opposed
# to set_row which only affects a single row at a time (the first argument).
# Finally use the close method to save the output
wb.close()
print("Output written at: ", output_path)
from pathlib import Path
import pandas as pd
import gptables as gpt
# Read data and arrange
parent_dir = Path(__file__).parent
labour_market_data = pd.read_csv(parent_dir / "survey_data.csv")
labour_market_data.dropna(
axis=0, how="all", inplace=True
) # Remove empty rows in the data
labour_market_data.dropna(
axis=1, how="all", inplace=True
) # Remove columns rows in the data
col_names = [
"Time period and dataset code row",
"Number of people",
"Economically active",
"Employment level",
"Unemployment level",
"Economically inactive",
"Economically active rate",
"Employment rate",
"Unemployment rate",
"Economically inactive rate",
]
labour_market_data.columns = col_names
# Define table elements
table_name = "Labour_market_overview_accessibility_example_Nov21"
title = "Number and percentage of population aged 16 and over in each labour market activity group, UK, seasonally adjusted"
subtitles = [
"This worksheet contains one table. Some cells refer to notes which can be found on the notes worksheet."
]
units = {
1: "thousands",
2: "thousands",
3: "thousands",
4: "thousands",
5: "thousands",
6: "%",
7: "%",
8: "%",
9: "%",
}
table_notes = {
2: "$$note 1$$",
3: "$$note 2$$",
4: "$$note 2$$",
5: "$$note 3$$",
7: "$$note 4$$",
8: "$$note 4$$",
9: "$$note 4$$",
}
scope = "Labour Market"
source = "Source: Office for National Statistics"
index = {2: 0} # Column 0 is a level 2 index
additional_formatting = [
{
"row": {
"rows": [1],
"format": {"bold": True, "font_size": 14},
}
}
]
# or use kwargs to pass these to the appropriate parameters
kwargs = {
"table_name": table_name,
"title": title,
"subtitles": subtitles,
"units": units,
"table_notes": table_notes,
"scope": scope,
"source": source,
"index_columns": index,
"additional_formatting": additional_formatting,
}
# Define our GPTable
survey_table = gpt.GPTable(table=labour_market_data, **kwargs)
sheets = {"sheet 1a": survey_table}
cover = gpt.Cover(
cover_label="Cover",
title="Labour market overview data tables, UK, December 2020 (accessibility example)",
intro=[
"This spreadsheet contains a selection of the data tables published alongside the Office for National Statistics' Labour market overview for December 2020. We have edited these data tables and the accompanying cover sheet, table of contents and notes worksheet to meet the legal accessibility regulations. It is intended to be an example of an accessible spreadsheet. The data tables and accompanying information have not been quality assured. Please see the original statistical release if you are looking for accurate data.",
"[Labour market overview, UK: December 2020](https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/employmentandemployeetypes/bulletins/uklabourmarket/december2020)",
],
about=[
[{"bold": True, "font_size": 14}, "Publication dates"],
"This data tables in this spreadsheet were originally published at 7:00am 15 December 2020",
"The next publication was published at 7:00am 26 January 2021.",
[{"bold": True, "font_size": 14}, "Note on weighting methodology"],
"Due to the coronavirus (COVID19) pandemic, all face to face interviewing for the Labour Force Survey was suspended and replaced with telephone interviewing. This change in mode for first interviews has changed the non-response bias of the survey, affecting interviews from March 2020 onwards. All data included in this spreadsheet have now been updated and are based on latest weighting methodology.",
"More information about the impact of COVID19 on the Labour Force Survey",
"Dataset identifier codes",
"The four-character identification codes appearing in the tables are the ONS' references for the data series.",
[{"bold": True, "font_size": 14}, "Comparing quarterly changes"],
"When comparing quarterly changes ONS recommends comparing with the previous non-overlapping three-month average time period, for example, compare Apr to Jun with Jan to Mar, not with Mar to May.",
[{"bold": True, "font_size": 14}, "Units, notes and no data"],
"Some cells in the tables refer to notes which can be found in the notes worksheet. Note markers are presented in square brackets, for example: [note 1].",
"Some cells have no data, when this is the case the words 'no data' are presented in square brackets, for example: '[no data]'. An explanation of why there is no data is given in the notes worksheet, see the column headings for which notes you should refer to.",
"Some column headings give units, when this is the case the units are presented in round brackets to differentiate them from note markers.",
[
{"bold": True, "font_size": 14},
"Historic publication dates for labour market statistics",
" ",
],
"The monthly labour market statistics release was first published in April 1998. Prior to April 1998 there was no integrated monthly release and the Labour Force Survey estimates were published separately, on different dates, from other labour market statistics. From April 2018 the usual publication day for the release was changed from Wednesday to Tuesday.",
[{"bold": True, "font_size": 14}, "More labour market data"],
"Other labour market datasets are available on the ONS website.",
"Labour market statistics time series dataset on the ONS website.",
],
contact=[
"Tel: 01633455400",
"Email: [labour.market@ons.gov.uk](mailto:labour.market@ons.gov.uk)",
],
)
# Notesheet
notes_table = pd.read_csv(parent_dir / "survey_data_notes.csv")
notes_table.dropna(axis=0, how="all", inplace=True) # Remove empty rows in the data
notes_table.dropna(axis=1, how="all", inplace=True) # Remove columns rows in the data
notes_table.columns = ["Note reference", "Note text"]
# Use write_workbook to win!
if __name__ == "__main__":
output_path = parent_dir / "python_survey_data_gptable.xlsx"
gpt.write_workbook(
filename=output_path,
sheets=sheets,
cover=cover,
notes_table=notes_table,
contentsheet_options={"additional_elements": ["subtitles", "scope"]},
auto_width=True,
gridlines="show_all",
cover_gridlines=True,
)
print("Output written at: ", output_path)
R Usage¶
Use of gptables in R requires use of python via the reticulate package.
However we recommend use of the aftables R package, maintained by the Presentation Champions Data Visualisation Tools subgroup.