import dataclasses
import typing
Anything subscriptable can be with this type of adapter. For example,
you might have input data as a list of tuples (e.g. using Python's
in-built csv
library)
index_adapter = KeyAdapter(0, 1)
assert index_adapter.home_goals([1, 2]) == 1
assert index_adapter.away_goals([1, 2]) == 2
Or, you might be using a list of dicts.
dict_adapter = KeyAdapter('hg', 'ag', home_team='home', away_team='away')
example_dict = {
'home': 'Team 1',
'away': 'Team 2',
'hg': 4,
'ag': 3,
}
assert dict_adapter.home_goals(example_dict) == 4
assert dict_adapter.away_goals(example_dict) == 3
assert dict_adapter.home_team(example_dict) == 'Team 1'
assert dict_adapter.away_team(example_dict) == 'Team 2'
Nested data can be supplied using a list
nested_dict_adapter = KeyAdapter(
home_goals=['scoreline', 0],
away_goals=['scoreline', 1]
)
example_nested_dict = {
'scoreline': [1, 1]
}
assert nested_dict_adapter.home_goals(example_nested_dict) == 1
assert nested_dict_adapter.away_goals(example_nested_dict) == 1
KeyAdapter
could be used alongside pd.DataFrame.iterrows
as well; however, it is much faster when using pd.DataFrame.itertuples
.
Likewise, you can't use a KeyAdapter
with custom objects (e.g. dataclasses).
In this case, you need an AttributeAdapter
.
@dataclasses.dataclass()
class ExampleData:
hg: int
ag: int
home: str
away: str
attr_adapter = AttributeAdapter('hg', 'ag', home_team='home', away_team='away')
example_attr = ExampleData(
home='Another home team',
away='Another away team',
hg=5,
ag=1,
)
assert attr_adapter.home_goals(example_attr) == 5
assert attr_adapter.away_goals(example_attr) == 1
assert attr_adapter.home_team(example_attr) == 'Another home team'
assert attr_adapter.away_team(example_attr) == 'Another away team'
As with KeyAdapter
, nested attributes can also be fetched using lists
@dataclasses.dataclass()
class Scoreline:
home: int
away: int
@dataclasses.dataclass()
class ExampleNestedData:
scoreline: Scoreline
home: str
away: str
nested_attr_adapter = AttributeAdapter(
home_team='home',
home_goals=['scoreline', 'home'],
away_team='away',
away_goals=['scoreline', 'away'],
)
example_nested_attr = ExampleNestedData(
home='Another home team',
away='Another away team',
scoreline=Scoreline(2, 5),
)
assert nested_attr_adapter.home_goals(example_nested_attr) == 2
assert nested_attr_adapter.away_goals(example_nested_attr) == 5
example_lumped_data = [
*([example_dict]*4), # i.e., 'Team 1' and 'Team 2' appear in the data 4 times
{'away': 'Team 1', # 'Team 1' now appears an additional time, (5 total)
# Although this time appears as an *away* team
'home': 'Team 3', # While 'Team 3' appears once
'hg': 4,
'ag': 3},
]
lumped_dict_adapter = LumpedAdapter(
base_adapter=dict_adapter,
home_team=('Other team', 5), # Because `home_team` and `away_team` share the same
# placeholder value ('Other team'), they are counted
# together. I.e. a team has to appear at least 5 times
# as _either_ the home team, or the away team
away_team=('Other team', 5)
)
lumped_dict_adapter.fit(example_lumped_data)
lumped_dict_adapter
example_lumped_1 = {
'home': 'Team 1',
'away': 'Team 3',
'hg': 1,
'ag': 2
}
# A team with more than the minimum number of observations appears as before
assert lumped_dict_adapter.home_team(example_lumped_1) == 'Team 1'
# But a team with fewer observations appears as the placeholder
assert lumped_dict_adapter.away_team(example_lumped_1) == 'Other team'
# Meanwhile, values without a placeholder in the LumpedAdapter
# also appear as before
assert lumped_dict_adapter.home_goals(example_lumped_1) == 1
assert lumped_dict_adapter.away_goals(example_lumped_1) == 2
Using a lumped adapter can also allow you to handle items which didn't appear in the training set at all:
example_lumped_2 = {
'home': 'Team 2', # Only appeared 4 times, below threshold of 5
'away': 'Team 4', # Appeared 0 times in the data
'hg': 1,
'ag': 2
}
assert lumped_dict_adapter.home_team(example_lumped_2) == 'Other team'
assert lumped_dict_adapter.away_team(example_lumped_2) == 'Other team'