--- title: TabFusionTransformer keywords: fastai sidebar: home_sidebar summary: "This is a a Pytorch implementeation of TabTransformerTransformer created by Ignacio Oguiza (timeseriesAI@gmail.com)" description: "This is a a Pytorch implementeation of TabTransformerTransformer created by Ignacio Oguiza (timeseriesAI@gmail.com)" nb_path: "nbs/122_models.TabFusionTransformer.ipynb" ---
This implementation is inspired by:
Huang, X., Khetan, A., Cvitkovic, M., & Karnin, Z. (2020). TabTransformer: Tabular Data Modeling Using Contextual Embeddings. arXiv preprint https://arxiv.org/pdf/2012.06678
Official repo: https://github.com/awslabs/autogluon/tree/master/tabular/src/autogluon/tabular/models/tab_transformer
from fastcore.test import test_eq
from fastcore.basics import first
from fastai.data.external import untar_data, URLs
from fastai.tabular.data import TabularDataLoaders
from fastai.tabular.core import Categorify, FillMissing
from fastai.data.transforms import Normalize
import pandas as pd
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')
dls = TabularDataLoaders.from_csv(path/'adult.csv', path=path, y_names="salary",
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race'],
cont_names = ['age', 'fnlwgt', 'education-num'],
procs = [Categorify, FillMissing, Normalize])
x_cat, x_cont, yb = first(dls.train)
model = TabFusionTransformer(dls.classes, dls.cont_names, dls.c)
test_eq(model(x_cat, x_cont).shape, (dls.train.bs, dls.c))
classes = {'education': ['#na#', '10th', '11th', '12th', '1st-4th', '5th-6th', '7th-8th', '9th', 'Assoc-acdm', 'Assoc-voc', 'Bachelors', 'Doctorate',
'HS-grad', 'Masters', 'Preschool', 'Prof-school', 'Some-college'],
'education-num_na': ['#na#', False, True],
'marital-status': ['#na#', 'Divorced', 'Married-AF-spouse', 'Married-civ-spouse', 'Married-spouse-absent', 'Never-married', 'Separated', 'Widowed'],
'occupation': ['#na#', '?', 'Adm-clerical', 'Armed-Forces', 'Craft-repair', 'Exec-managerial', 'Farming-fishing', 'Handlers-cleaners', 'Machine-op-inspct',
'Other-service', 'Priv-house-serv', 'Prof-specialty', 'Protective-serv', 'Sales', 'Tech-support', 'Transport-moving'],
'race': ['#na#', 'Amer-Indian-Eskimo', 'Asian-Pac-Islander', 'Black', 'Other', 'White'],
'relationship': ['#na#', 'Husband', 'Not-in-family', 'Other-relative', 'Own-child', 'Unmarried', 'Wife'],
'workclass': ['#na#', '?', 'Federal-gov', 'Local-gov', 'Never-worked', 'Private', 'Self-emp-inc', 'Self-emp-not-inc', 'State-gov', 'Without-pay']}
cont_names = ['a', 'b', 'c']
c_out = 6
x_ts = torch.randn(64, 3, 10)
x_cat = torch.randint(0,3,(64,7))
x_cont = torch.randn(64,3)
model = TSTabFusionTransformer(x_ts.shape[1], c_out, x_ts.shape[-1], classes, cont_names)
x = (x_ts, (x_cat, x_cont))
test_eq(model(x).shape, (x_ts.shape[0], c_out))