2 - 5) EDA - Player information
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno
from utils import my_histogram, pca_results
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.decomposition import PCA
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
Cell In[50], line 7
5 import seaborn as sns
6 import missingno as msno
----> 7 from utils import my_histogram, pca_results
9 from sklearn.experimental import enable_iterative_imputer
10 from sklearn.impute import IterativeImputer
ImportError: cannot import name 'pca_results' from 'utils' (/Users/youngjun/Jun/projects/european_soccer_match_prediction/src/utils.py)
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
df_player = pd.read_csv("../data/df_player.csv")
df_player
| player_api_id | player_name | birthday | height | weight | player_measured_date | overall_rating | potential | preferred_foot | attacking_work_rate | defensive_work_rate | crossing | finishing | heading_accuracy | short_passing | volleys | dribbling | curve | free_kick_accuracy | long_passing | ball_control | acceleration | sprint_speed | agility | reactions | balance | shot_power | jumping | stamina | strength | long_shots | aggression | interceptions | positioning | vision | penalties | marking | standing_tackle | sliding_tackle | gk_diving | gk_handling | gk_kicking | gk_positioning | gk_reflexes | position_X_mode | position_Y_mode | position_role | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 505942 | Aaron Appindangoye | 1992-02-29 | 182.88 | 187 | 2016-02-18 | 67.0 | 71.0 | right | medium | medium | 49.0 | 44.0 | 71.0 | 61.0 | 44.0 | 51.0 | 45.0 | 39.0 | 64.0 | 49.0 | 60.0 | 64.0 | 59.0 | 47.0 | 65.0 | 55.0 | 58.0 | 54.0 | 76.0 | 35.0 | 71.0 | 70.0 | 45.0 | 54.0 | 48.0 | 65.0 | 69.0 | 69.0 | 6.0 | 11.0 | 10.0 | 8.0 | 8.0 | 6.0 | 3.0 | defender |
| 1 | 505942 | Aaron Appindangoye | 1992-02-29 | 182.88 | 187 | 2015-11-19 | 67.0 | 71.0 | right | medium | medium | 49.0 | 44.0 | 71.0 | 61.0 | 44.0 | 51.0 | 45.0 | 39.0 | 64.0 | 49.0 | 60.0 | 64.0 | 59.0 | 47.0 | 65.0 | 55.0 | 58.0 | 54.0 | 76.0 | 35.0 | 71.0 | 70.0 | 45.0 | 54.0 | 48.0 | 65.0 | 69.0 | 69.0 | 6.0 | 11.0 | 10.0 | 8.0 | 8.0 | 6.0 | 3.0 | defender |
| 2 | 505942 | Aaron Appindangoye | 1992-02-29 | 182.88 | 187 | 2015-09-21 | 62.0 | 66.0 | right | medium | medium | 49.0 | 44.0 | 71.0 | 61.0 | 44.0 | 51.0 | 45.0 | 39.0 | 64.0 | 49.0 | 60.0 | 64.0 | 59.0 | 47.0 | 65.0 | 55.0 | 58.0 | 54.0 | 76.0 | 35.0 | 63.0 | 41.0 | 45.0 | 54.0 | 48.0 | 65.0 | 66.0 | 69.0 | 6.0 | 11.0 | 10.0 | 8.0 | 8.0 | 6.0 | 3.0 | defender |
| 3 | 505942 | Aaron Appindangoye | 1992-02-29 | 182.88 | 187 | 2015-03-20 | 61.0 | 65.0 | right | medium | medium | 48.0 | 43.0 | 70.0 | 60.0 | 43.0 | 50.0 | 44.0 | 38.0 | 63.0 | 48.0 | 60.0 | 64.0 | 59.0 | 46.0 | 65.0 | 54.0 | 58.0 | 54.0 | 76.0 | 34.0 | 62.0 | 40.0 | 44.0 | 53.0 | 47.0 | 62.0 | 63.0 | 66.0 | 5.0 | 10.0 | 9.0 | 7.0 | 7.0 | 6.0 | 3.0 | defender |
| 4 | 505942 | Aaron Appindangoye | 1992-02-29 | 182.88 | 187 | 2007-02-22 | 61.0 | 65.0 | right | medium | medium | 48.0 | 43.0 | 70.0 | 60.0 | 43.0 | 50.0 | 44.0 | 38.0 | 63.0 | 48.0 | 60.0 | 64.0 | 59.0 | 46.0 | 65.0 | 54.0 | 58.0 | 54.0 | 76.0 | 34.0 | 62.0 | 40.0 | 44.0 | 53.0 | 47.0 | 62.0 | 63.0 | 66.0 | 5.0 | 10.0 | 9.0 | 7.0 | 7.0 | 6.0 | 3.0 | defender |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 183973 | 39902 | Zvjezdan Misimovic | 1982-06-05 | 180.34 | 176 | 2009-08-30 | 83.0 | 85.0 | right | medium | low | 84.0 | 77.0 | 59.0 | 89.0 | 77.0 | 84.0 | 86.0 | 78.0 | 84.0 | 85.0 | 66.0 | 72.0 | 77.0 | 86.0 | 73.0 | 76.0 | 58.0 | 72.0 | 67.0 | 81.0 | 56.0 | 78.0 | 86.0 | 88.0 | 83.0 | 22.0 | 31.0 | 30.0 | 9.0 | 20.0 | 84.0 | 20.0 | 20.0 | 5.0 | 9.0 | forward |
| 183974 | 39902 | Zvjezdan Misimovic | 1982-06-05 | 180.34 | 176 | 2009-02-22 | 78.0 | 80.0 | right | medium | low | 74.0 | 76.0 | 53.0 | 84.0 | 77.0 | 85.0 | 86.0 | 74.0 | 73.0 | 86.0 | 66.0 | 67.0 | 77.0 | 74.0 | 73.0 | 75.0 | 58.0 | 66.0 | 65.0 | 73.0 | 61.0 | 64.0 | 72.0 | 88.0 | 70.0 | 32.0 | 31.0 | 30.0 | 9.0 | 20.0 | 73.0 | 20.0 | 20.0 | 5.0 | 9.0 | forward |
| 183975 | 39902 | Zvjezdan Misimovic | 1982-06-05 | 180.34 | 176 | 2008-08-30 | 77.0 | 80.0 | right | medium | low | 74.0 | 71.0 | 53.0 | 84.0 | 77.0 | 85.0 | 86.0 | 74.0 | 73.0 | 86.0 | 66.0 | 67.0 | 77.0 | 74.0 | 73.0 | 75.0 | 58.0 | 66.0 | 65.0 | 73.0 | 67.0 | 64.0 | 72.0 | 88.0 | 70.0 | 32.0 | 31.0 | 30.0 | 9.0 | 20.0 | 73.0 | 20.0 | 20.0 | 5.0 | 9.0 | forward |
| 183976 | 39902 | Zvjezdan Misimovic | 1982-06-05 | 180.34 | 176 | 2007-08-30 | 78.0 | 81.0 | right | medium | low | 74.0 | 64.0 | 57.0 | 86.0 | 77.0 | 87.0 | 86.0 | 73.0 | 73.0 | 91.0 | 61.0 | 60.0 | 77.0 | 69.0 | 73.0 | 72.0 | 58.0 | 67.0 | 59.0 | 78.0 | 63.0 | 63.0 | 68.0 | 88.0 | 53.0 | 28.0 | 32.0 | 30.0 | 9.0 | 20.0 | 73.0 | 20.0 | 20.0 | 5.0 | 9.0 | forward |
| 183977 | 39902 | Zvjezdan Misimovic | 1982-06-05 | 180.34 | 176 | 2007-02-22 | 80.0 | 81.0 | right | medium | low | 74.0 | 68.0 | 57.0 | 88.0 | 77.0 | 87.0 | 86.0 | 53.0 | 78.0 | 91.0 | 58.0 | 64.0 | 77.0 | 66.0 | 73.0 | 72.0 | 58.0 | 67.0 | 59.0 | 78.0 | 63.0 | 63.0 | 68.0 | 88.0 | 53.0 | 38.0 | 32.0 | 30.0 | 9.0 | 9.0 | 78.0 | 7.0 | 15.0 | 5.0 | 9.0 | forward |
183978 rows × 47 columns
player_num_attr = ["player_api_id", "player_measured_date", "height", "weight",
'overall_rating', 'potential', 'crossing', 'finishing', 'heading_accuracy',
'short_passing', 'volleys', 'dribbling', 'curve', 'free_kick_accuracy',
'long_passing', 'ball_control', 'acceleration', 'sprint_speed', 'agility',
'reactions', 'balance', 'shot_power', 'jumping', 'stamina',
'strength', 'long_shots', 'aggression', 'interceptions', 'positioning',
'vision', 'penalties', 'marking', 'standing_tackle', 'sliding_tackle',
'gk_diving', 'gk_handling', 'gk_kicking', 'gk_positioning','gk_reflexes']
df_player_num_attr = df_player[player_num_attr]
1. Missing values
msno.matrix(df_player_num_attr)
<AxesSubplot:>

- If overall_rating is missing, all other attributes are also msissing.
df_player_num_attr.overall_rating.isna().sum()
836
- There are only 836 cases that overall_rating and all other attributes are missing.
len(df_player_num_attr[~df_player_num_attr.overall_rating.isna()].player_api_id.unique())
11060
- Also, even if we drop the cases that overall_rating is missing, we can check that each player’s attributes have been measured at least once.
- So let’s just drop the cases that overall_rating is missing.
df_player_num_attr = df_player_num_attr[~df_player_num_attr.overall_rating.isna()]
msno.matrix(df_player_num_attr.sort_values(["player_api_id", "player_measured_date"]))
<AxesSubplot:>

df_player_num_attr.isna().sum()[df_player_num_attr.isna().sum() > 0]
volleys 1877
curve 1877
agility 1877
balance 1877
jumping 1877
vision 1877
sliding_tackle 1877
dtype: int64
- There are some missing values in voleys, curve, agility, balance, jumping, vision, sliding_tackle variables.
- Let’s check the correlation between attributes to check whether missing values can be imputed by other variables.
plt.figure(figsize = (20, 5))
corr = round(df_player_num_attr[player_num_attr].corr(), 1)
corr = corr.loc[["volleys", "curve", "agility", "balance", "jumping", "vision", "sliding_tackle"], :]
sns.heatmap(corr, annot = True, cmap = "BrBG", vmin = -1, vmax = 1,
linewidths = 0.5, cbar_kws = {"shrink" : 0.5})
<AxesSubplot:>

- We can see that there are absolute correlations over 0.7 between:
- volleys: finishing, dribbling, curve, positioning, long_shots, dribbling, positioning, curve, ball_control, shot_power, penalties
- curve: dribbling, long_shots, crossing, ball_control, free_kick_accuracy, volleys, short_passing, vision, positioning
- agility: acceleration, sprint_speed, dribbling
- balance: agility, height
- jumping: none
- vision: ball_control, short_passing, positioning, dribbling, long_shots, curve, free_kick_accuracy, crossing, volleys, long_passing, penalties, finishing
- sliding_tackle: standing_tackle, marking, interceptions, aggression
- There are plenty of other variables that have high correlation with the variables that have the missing values, except for the balance and jumping.
-
So, it seems to be possible to use the iterative imputation for the missing values from the player attributes, except for the balance and jumping.
- However, since only 1%(1877 / 183142) of the data have missing values in banace and jumping columns, let’s also use the iterative imputation even for the balance and jumping.
target = df_player_num_attr.set_index(["player_api_id", "player_measured_date"])
index = target.index
col_names = target.columns
imp_player_attr = IterativeImputer(max_iter = 10, random_state = 42)
imp_player_attr.fit(target)
df_player_num_attr_imputed = pd.DataFrame(imp_player_attr.transform(target),
columns = col_names, index = index)
msno.matrix(df_player_num_attr_imputed)
<AxesSubplot:>

df_player_num_attr_imputed = df_player_num_attr_imputed.reset_index()
df_player_num_attr_imputed
| player_api_id | player_measured_date | height | weight | overall_rating | potential | crossing | finishing | heading_accuracy | short_passing | volleys | dribbling | curve | free_kick_accuracy | long_passing | ball_control | acceleration | sprint_speed | agility | reactions | balance | shot_power | jumping | stamina | strength | long_shots | aggression | interceptions | positioning | vision | penalties | marking | standing_tackle | sliding_tackle | gk_diving | gk_handling | gk_kicking | gk_positioning | gk_reflexes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 505942 | 2016-02-18 | 182.88 | 187.0 | 67.0 | 71.0 | 49.0 | 44.0 | 71.0 | 61.0 | 44.0 | 51.0 | 45.0 | 39.0 | 64.0 | 49.0 | 60.0 | 64.0 | 59.0 | 47.0 | 65.0 | 55.0 | 58.0 | 54.0 | 76.0 | 35.0 | 71.0 | 70.0 | 45.0 | 54.0 | 48.0 | 65.0 | 69.0 | 69.0 | 6.0 | 11.0 | 10.0 | 8.0 | 8.0 |
| 1 | 505942 | 2015-11-19 | 182.88 | 187.0 | 67.0 | 71.0 | 49.0 | 44.0 | 71.0 | 61.0 | 44.0 | 51.0 | 45.0 | 39.0 | 64.0 | 49.0 | 60.0 | 64.0 | 59.0 | 47.0 | 65.0 | 55.0 | 58.0 | 54.0 | 76.0 | 35.0 | 71.0 | 70.0 | 45.0 | 54.0 | 48.0 | 65.0 | 69.0 | 69.0 | 6.0 | 11.0 | 10.0 | 8.0 | 8.0 |
| 2 | 505942 | 2015-09-21 | 182.88 | 187.0 | 62.0 | 66.0 | 49.0 | 44.0 | 71.0 | 61.0 | 44.0 | 51.0 | 45.0 | 39.0 | 64.0 | 49.0 | 60.0 | 64.0 | 59.0 | 47.0 | 65.0 | 55.0 | 58.0 | 54.0 | 76.0 | 35.0 | 63.0 | 41.0 | 45.0 | 54.0 | 48.0 | 65.0 | 66.0 | 69.0 | 6.0 | 11.0 | 10.0 | 8.0 | 8.0 |
| 3 | 505942 | 2015-03-20 | 182.88 | 187.0 | 61.0 | 65.0 | 48.0 | 43.0 | 70.0 | 60.0 | 43.0 | 50.0 | 44.0 | 38.0 | 63.0 | 48.0 | 60.0 | 64.0 | 59.0 | 46.0 | 65.0 | 54.0 | 58.0 | 54.0 | 76.0 | 34.0 | 62.0 | 40.0 | 44.0 | 53.0 | 47.0 | 62.0 | 63.0 | 66.0 | 5.0 | 10.0 | 9.0 | 7.0 | 7.0 |
| 4 | 505942 | 2007-02-22 | 182.88 | 187.0 | 61.0 | 65.0 | 48.0 | 43.0 | 70.0 | 60.0 | 43.0 | 50.0 | 44.0 | 38.0 | 63.0 | 48.0 | 60.0 | 64.0 | 59.0 | 46.0 | 65.0 | 54.0 | 58.0 | 54.0 | 76.0 | 34.0 | 62.0 | 40.0 | 44.0 | 53.0 | 47.0 | 62.0 | 63.0 | 66.0 | 5.0 | 10.0 | 9.0 | 7.0 | 7.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 183137 | 39902 | 2009-08-30 | 180.34 | 176.0 | 83.0 | 85.0 | 84.0 | 77.0 | 59.0 | 89.0 | 77.0 | 84.0 | 86.0 | 78.0 | 84.0 | 85.0 | 66.0 | 72.0 | 77.0 | 86.0 | 73.0 | 76.0 | 58.0 | 72.0 | 67.0 | 81.0 | 56.0 | 78.0 | 86.0 | 88.0 | 83.0 | 22.0 | 31.0 | 30.0 | 9.0 | 20.0 | 84.0 | 20.0 | 20.0 |
| 183138 | 39902 | 2009-02-22 | 180.34 | 176.0 | 78.0 | 80.0 | 74.0 | 76.0 | 53.0 | 84.0 | 77.0 | 85.0 | 86.0 | 74.0 | 73.0 | 86.0 | 66.0 | 67.0 | 77.0 | 74.0 | 73.0 | 75.0 | 58.0 | 66.0 | 65.0 | 73.0 | 61.0 | 64.0 | 72.0 | 88.0 | 70.0 | 32.0 | 31.0 | 30.0 | 9.0 | 20.0 | 73.0 | 20.0 | 20.0 |
| 183139 | 39902 | 2008-08-30 | 180.34 | 176.0 | 77.0 | 80.0 | 74.0 | 71.0 | 53.0 | 84.0 | 77.0 | 85.0 | 86.0 | 74.0 | 73.0 | 86.0 | 66.0 | 67.0 | 77.0 | 74.0 | 73.0 | 75.0 | 58.0 | 66.0 | 65.0 | 73.0 | 67.0 | 64.0 | 72.0 | 88.0 | 70.0 | 32.0 | 31.0 | 30.0 | 9.0 | 20.0 | 73.0 | 20.0 | 20.0 |
| 183140 | 39902 | 2007-08-30 | 180.34 | 176.0 | 78.0 | 81.0 | 74.0 | 64.0 | 57.0 | 86.0 | 77.0 | 87.0 | 86.0 | 73.0 | 73.0 | 91.0 | 61.0 | 60.0 | 77.0 | 69.0 | 73.0 | 72.0 | 58.0 | 67.0 | 59.0 | 78.0 | 63.0 | 63.0 | 68.0 | 88.0 | 53.0 | 28.0 | 32.0 | 30.0 | 9.0 | 20.0 | 73.0 | 20.0 | 20.0 |
| 183141 | 39902 | 2007-02-22 | 180.34 | 176.0 | 80.0 | 81.0 | 74.0 | 68.0 | 57.0 | 88.0 | 77.0 | 87.0 | 86.0 | 53.0 | 78.0 | 91.0 | 58.0 | 64.0 | 77.0 | 66.0 | 73.0 | 72.0 | 58.0 | 67.0 | 59.0 | 78.0 | 63.0 | 63.0 | 68.0 | 88.0 | 53.0 | 38.0 | 32.0 | 30.0 | 9.0 | 9.0 | 78.0 | 7.0 | 15.0 |
183142 rows × 39 columns
2. Dimension reduction by PCA
- In the match data, we have the information of 22 players id from the each team.
- Since we will merge the player attributes to the match data, there are too many columns: 22 players * 35 attributes = 770 columns.
df_player_grouped_num_attr = df_player_num_attr_imputed.drop(["player_measured_date", "height", "weight"], axis = 1).groupby("player_api_id").mean()
df_player_grouped_num_attr
| overall_rating | potential | crossing | finishing | heading_accuracy | short_passing | volleys | dribbling | curve | free_kick_accuracy | long_passing | ball_control | acceleration | sprint_speed | agility | reactions | balance | shot_power | jumping | stamina | strength | long_shots | aggression | interceptions | positioning | vision | penalties | marking | standing_tackle | sliding_tackle | gk_diving | gk_handling | gk_kicking | gk_positioning | gk_reflexes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| player_api_id | |||||||||||||||||||||||||||||||||||
| 2625 | 60.142857 | 61.142857 | 50.142857 | 47.285714 | 46.285714 | 56.500000 | 38.000000 | 54.142857 | 50.000000 | 49.928571 | 65.571429 | 59.142857 | 66.857143 | 63.285714 | 66.428571 | 49.714286 | 66.214286 | 66.714286 | 58.714286 | 77.285714 | 55.785714 | 56.857143 | 71.285714 | 62.071429 | 50.357143 | 55.285714 | 61.928571 | 62.928571 | 63.857143 | 56.571429 | 12.428571 | 12.857143 | 19.071429 | 10.357143 | 10.428571 |
| 2752 | 69.380952 | 70.380952 | 36.428571 | 37.428571 | 75.333333 | 54.809524 | 20.428571 | 44.857143 | 30.428571 | 18.857143 | 59.571429 | 55.571429 | 41.619048 | 42.190476 | 43.333333 | 60.857143 | 44.571429 | 57.428571 | 56.523810 | 60.904762 | 82.238095 | 19.571429 | 80.285714 | 71.428571 | 31.428571 | 52.428571 | 35.428571 | 71.095238 | 70.666667 | 65.571429 | 11.095238 | 6.095238 | 7.095238 | 9.095238 | 15.095238 |
| 2768 | 69.285714 | 70.571429 | 42.238095 | 42.761905 | 66.666667 | 64.714286 | 28.761905 | 40.238095 | 51.761905 | 20.047619 | 57.000000 | 56.380952 | 56.952381 | 58.904762 | 58.047619 | 69.809524 | 60.190476 | 38.761905 | 76.904762 | 59.285714 | 67.428571 | 19.428571 | 62.761905 | 74.714286 | 44.190476 | 57.761905 | 31.904762 | 71.666667 | 70.619048 | 79.714286 | 10.761905 | 15.904762 | 22.714286 | 15.142857 | 12.095238 |
| 2770 | 71.133333 | 73.533333 | 61.866667 | 67.666667 | 67.800000 | 72.533333 | 67.133333 | 75.066667 | 79.800000 | 68.666667 | 70.266667 | 76.733333 | 62.200000 | 59.800000 | 62.600000 | 65.933333 | 61.266667 | 68.333333 | 57.133333 | 51.000000 | 62.600000 | 73.000000 | 56.800000 | 44.933333 | 64.533333 | 71.600000 | 63.933333 | 30.200000 | 37.533333 | 25.000000 | 8.666667 | 15.666667 | 27.200000 | 16.333333 | 17.000000 |
| 2790 | 70.200000 | 75.800000 | 70.000000 | 37.400000 | 55.200000 | 68.000000 | 43.000000 | 60.800000 | 67.000000 | 55.400000 | 69.800000 | 66.800000 | 70.800000 | 71.200000 | 65.000000 | 69.200000 | 67.000000 | 46.400000 | 61.000000 | 70.400000 | 71.000000 | 42.400000 | 68.400000 | 63.400000 | 60.600000 | 60.000000 | 55.400000 | 74.600000 | 74.600000 | 69.000000 | 8.000000 | 17.000000 | 59.000000 | 16.600000 | 17.400000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 744907 | 51.909091 | 65.909091 | 42.454545 | 44.454545 | 47.454545 | 59.454545 | 42.454545 | 56.454545 | 46.454545 | 43.454545 | 58.454545 | 55.454545 | 66.000000 | 64.000000 | 61.000000 | 53.454545 | 74.000000 | 50.454545 | 63.000000 | 54.000000 | 49.909091 | 38.454545 | 36.454545 | 22.454545 | 48.454545 | 51.454545 | 52.454545 | 39.454545 | 48.454545 | 49.454545 | 8.454545 | 13.454545 | 12.454545 | 11.454545 | 8.454545 |
| 746419 | 59.000000 | 66.000000 | 55.000000 | 28.000000 | 50.000000 | 35.000000 | 29.000000 | 67.000000 | 37.000000 | 39.000000 | 35.000000 | 55.000000 | 77.000000 | 76.000000 | 70.000000 | 60.000000 | 74.000000 | 29.000000 | 55.000000 | 63.000000 | 46.000000 | 27.000000 | 59.000000 | 53.000000 | 55.000000 | 40.000000 | 43.000000 | 55.000000 | 69.000000 | 62.000000 | 7.000000 | 10.000000 | 7.000000 | 9.000000 | 9.000000 |
| 748432 | 58.000000 | 68.000000 | 48.000000 | 26.000000 | 57.000000 | 49.000000 | 24.000000 | 41.000000 | 39.000000 | 20.000000 | 41.000000 | 42.000000 | 64.000000 | 68.000000 | 50.000000 | 48.000000 | 52.000000 | 46.000000 | 68.000000 | 50.000000 | 67.000000 | 25.000000 | 52.000000 | 55.000000 | 38.000000 | 36.000000 | 45.000000 | 63.000000 | 69.000000 | 68.000000 | 8.000000 | 8.000000 | 12.000000 | 12.000000 | 6.000000 |
| 750435 | 56.444444 | 70.444444 | 35.000000 | 57.000000 | 55.000000 | 60.000000 | 52.000000 | 58.555556 | 56.000000 | 37.000000 | 54.000000 | 56.333333 | 65.666667 | 63.555556 | 60.222222 | 61.000000 | 78.000000 | 57.000000 | 58.000000 | 57.000000 | 46.000000 | 50.000000 | 34.000000 | 13.000000 | 56.333333 | 58.000000 | 61.000000 | 18.000000 | 19.000000 | 21.000000 | 9.000000 | 10.000000 | 8.000000 | 10.000000 | 11.000000 |
| 750584 | 58.000000 | 66.000000 | 47.000000 | 39.000000 | 53.000000 | 60.000000 | 37.000000 | 54.000000 | 40.000000 | 45.000000 | 55.000000 | 56.000000 | 63.000000 | 60.000000 | 51.000000 | 54.000000 | 76.000000 | 59.000000 | 58.000000 | 55.000000 | 57.000000 | 38.000000 | 62.000000 | 56.000000 | 41.000000 | 47.000000 | 52.000000 | 56.000000 | 66.000000 | 62.000000 | 8.000000 | 9.000000 | 13.000000 | 11.000000 | 13.000000 |
11060 rows × 35 columns
plt.figure(figsize = (20, 20))
corr = round(df_player_grouped_num_attr.corr(), 1)
sns.heatmap(corr, annot = True, cmap = "BrBG", vmin = -1, vmax = 1,
linewidths = 0.5, cbar_kws = {"shrink" : 0.5})
<AxesSubplot:>

- Aslo, correlations between player numerical attributes are very high.
-
So, I will use the PCA to reduce the dimension of the numerical player attributes.
- Note that we set the the last season (2015/2016 seasons) as the test set.
-
So we will fit the PCA to the only train data and transform all data by fitted PC loadings.
- Check the season start month to exclude the player attributes information measured before the 2015 season.
df_match_basic = pd.read_csv("../data/df_match_basic.csv")
for season in ["2015/2016"]:
print(f"== Season: {season}")
print(df_match_basic[df_match_basic.season == season].sort_values("match_date").groupby(["league_id", "season"]).min().match_date)
== Season: 2015/2016
league_id season
1 2015/2016 2015-07-24
1729 2015/2016 2015-08-08
4769 2015/2016 2015-08-07
7809 2015/2016 2015-08-14
10257 2015/2016 2015-08-22
13274 2015/2016 2015-08-08
15722 2015/2016 2015-07-17
17642 2015/2016 2015-08-14
19694 2015/2016 2015-08-01
21518 2015/2016 2015-08-21
24558 2015/2016 2015-07-18
Name: match_date, dtype: object
- We can use the player attributes information measured before the 2015-07-17
player_cutoff_date = pd.to_datetime("2015-07-17")
player_cutoff_date
Timestamp('2015-07-17 00:00:00')
df_player_num_attr_imputed
| player_api_id | player_measured_date | height | weight | overall_rating | potential | crossing | finishing | heading_accuracy | short_passing | volleys | dribbling | curve | free_kick_accuracy | long_passing | ball_control | acceleration | sprint_speed | agility | reactions | balance | shot_power | jumping | stamina | strength | long_shots | aggression | interceptions | positioning | vision | penalties | marking | standing_tackle | sliding_tackle | gk_diving | gk_handling | gk_kicking | gk_positioning | gk_reflexes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 505942 | 2016-02-18 | 182.88 | 187.0 | 67.0 | 71.0 | 49.0 | 44.0 | 71.0 | 61.0 | 44.0 | 51.0 | 45.0 | 39.0 | 64.0 | 49.0 | 60.0 | 64.0 | 59.0 | 47.0 | 65.0 | 55.0 | 58.0 | 54.0 | 76.0 | 35.0 | 71.0 | 70.0 | 45.0 | 54.0 | 48.0 | 65.0 | 69.0 | 69.0 | 6.0 | 11.0 | 10.0 | 8.0 | 8.0 |
| 1 | 505942 | 2015-11-19 | 182.88 | 187.0 | 67.0 | 71.0 | 49.0 | 44.0 | 71.0 | 61.0 | 44.0 | 51.0 | 45.0 | 39.0 | 64.0 | 49.0 | 60.0 | 64.0 | 59.0 | 47.0 | 65.0 | 55.0 | 58.0 | 54.0 | 76.0 | 35.0 | 71.0 | 70.0 | 45.0 | 54.0 | 48.0 | 65.0 | 69.0 | 69.0 | 6.0 | 11.0 | 10.0 | 8.0 | 8.0 |
| 2 | 505942 | 2015-09-21 | 182.88 | 187.0 | 62.0 | 66.0 | 49.0 | 44.0 | 71.0 | 61.0 | 44.0 | 51.0 | 45.0 | 39.0 | 64.0 | 49.0 | 60.0 | 64.0 | 59.0 | 47.0 | 65.0 | 55.0 | 58.0 | 54.0 | 76.0 | 35.0 | 63.0 | 41.0 | 45.0 | 54.0 | 48.0 | 65.0 | 66.0 | 69.0 | 6.0 | 11.0 | 10.0 | 8.0 | 8.0 |
| 3 | 505942 | 2015-03-20 | 182.88 | 187.0 | 61.0 | 65.0 | 48.0 | 43.0 | 70.0 | 60.0 | 43.0 | 50.0 | 44.0 | 38.0 | 63.0 | 48.0 | 60.0 | 64.0 | 59.0 | 46.0 | 65.0 | 54.0 | 58.0 | 54.0 | 76.0 | 34.0 | 62.0 | 40.0 | 44.0 | 53.0 | 47.0 | 62.0 | 63.0 | 66.0 | 5.0 | 10.0 | 9.0 | 7.0 | 7.0 |
| 4 | 505942 | 2007-02-22 | 182.88 | 187.0 | 61.0 | 65.0 | 48.0 | 43.0 | 70.0 | 60.0 | 43.0 | 50.0 | 44.0 | 38.0 | 63.0 | 48.0 | 60.0 | 64.0 | 59.0 | 46.0 | 65.0 | 54.0 | 58.0 | 54.0 | 76.0 | 34.0 | 62.0 | 40.0 | 44.0 | 53.0 | 47.0 | 62.0 | 63.0 | 66.0 | 5.0 | 10.0 | 9.0 | 7.0 | 7.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 183137 | 39902 | 2009-08-30 | 180.34 | 176.0 | 83.0 | 85.0 | 84.0 | 77.0 | 59.0 | 89.0 | 77.0 | 84.0 | 86.0 | 78.0 | 84.0 | 85.0 | 66.0 | 72.0 | 77.0 | 86.0 | 73.0 | 76.0 | 58.0 | 72.0 | 67.0 | 81.0 | 56.0 | 78.0 | 86.0 | 88.0 | 83.0 | 22.0 | 31.0 | 30.0 | 9.0 | 20.0 | 84.0 | 20.0 | 20.0 |
| 183138 | 39902 | 2009-02-22 | 180.34 | 176.0 | 78.0 | 80.0 | 74.0 | 76.0 | 53.0 | 84.0 | 77.0 | 85.0 | 86.0 | 74.0 | 73.0 | 86.0 | 66.0 | 67.0 | 77.0 | 74.0 | 73.0 | 75.0 | 58.0 | 66.0 | 65.0 | 73.0 | 61.0 | 64.0 | 72.0 | 88.0 | 70.0 | 32.0 | 31.0 | 30.0 | 9.0 | 20.0 | 73.0 | 20.0 | 20.0 |
| 183139 | 39902 | 2008-08-30 | 180.34 | 176.0 | 77.0 | 80.0 | 74.0 | 71.0 | 53.0 | 84.0 | 77.0 | 85.0 | 86.0 | 74.0 | 73.0 | 86.0 | 66.0 | 67.0 | 77.0 | 74.0 | 73.0 | 75.0 | 58.0 | 66.0 | 65.0 | 73.0 | 67.0 | 64.0 | 72.0 | 88.0 | 70.0 | 32.0 | 31.0 | 30.0 | 9.0 | 20.0 | 73.0 | 20.0 | 20.0 |
| 183140 | 39902 | 2007-08-30 | 180.34 | 176.0 | 78.0 | 81.0 | 74.0 | 64.0 | 57.0 | 86.0 | 77.0 | 87.0 | 86.0 | 73.0 | 73.0 | 91.0 | 61.0 | 60.0 | 77.0 | 69.0 | 73.0 | 72.0 | 58.0 | 67.0 | 59.0 | 78.0 | 63.0 | 63.0 | 68.0 | 88.0 | 53.0 | 28.0 | 32.0 | 30.0 | 9.0 | 20.0 | 73.0 | 20.0 | 20.0 |
| 183141 | 39902 | 2007-02-22 | 180.34 | 176.0 | 80.0 | 81.0 | 74.0 | 68.0 | 57.0 | 88.0 | 77.0 | 87.0 | 86.0 | 53.0 | 78.0 | 91.0 | 58.0 | 64.0 | 77.0 | 66.0 | 73.0 | 72.0 | 58.0 | 67.0 | 59.0 | 78.0 | 63.0 | 63.0 | 68.0 | 88.0 | 53.0 | 38.0 | 32.0 | 30.0 | 9.0 | 9.0 | 78.0 | 7.0 | 15.0 |
183142 rows × 39 columns
player_cutoff_bool = pd.to_datetime(df_player_num_attr_imputed.player_measured_date) < player_cutoff_date
df_player_grouped_num_attr = df_player_num_attr_imputed.drop(["player_measured_date", "height", "weight"], axis = 1)[player_cutoff_bool].groupby("player_api_id").mean()
df_player_grouped_num_attr
| overall_rating | potential | crossing | finishing | heading_accuracy | short_passing | volleys | dribbling | curve | free_kick_accuracy | long_passing | ball_control | acceleration | sprint_speed | agility | reactions | balance | shot_power | jumping | stamina | strength | long_shots | aggression | interceptions | positioning | vision | penalties | marking | standing_tackle | sliding_tackle | gk_diving | gk_handling | gk_kicking | gk_positioning | gk_reflexes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| player_api_id | |||||||||||||||||||||||||||||||||||
| 2625 | 60.142857 | 61.142857 | 50.142857 | 47.285714 | 46.285714 | 56.500000 | 38.000000 | 54.142857 | 50.000000 | 49.928571 | 65.571429 | 59.142857 | 66.857143 | 63.285714 | 66.428571 | 49.714286 | 66.214286 | 66.714286 | 58.714286 | 77.285714 | 55.785714 | 56.857143 | 71.285714 | 62.071429 | 50.357143 | 55.285714 | 61.928571 | 62.928571 | 63.857143 | 56.571429 | 12.428571 | 12.857143 | 19.071429 | 10.357143 | 10.428571 |
| 2752 | 69.105263 | 70.210526 | 36.157895 | 37.157895 | 74.631579 | 54.368421 | 20.157895 | 45.263158 | 30.157895 | 18.842105 | 59.421053 | 55.421053 | 42.578947 | 42.526316 | 44.315789 | 60.526316 | 45.157895 | 57.157895 | 56.684211 | 60.789474 | 81.947368 | 19.526316 | 79.894737 | 71.473684 | 31.157895 | 52.157895 | 35.157895 | 71.000000 | 70.315789 | 65.421053 | 11.000000 | 6.000000 | 7.000000 | 9.000000 | 15.000000 |
| 2768 | 67.562500 | 69.250000 | 41.687500 | 42.062500 | 65.000000 | 61.562500 | 28.062500 | 39.062500 | 52.312500 | 19.437500 | 56.062500 | 54.312500 | 58.687500 | 60.687500 | 58.062500 | 67.562500 | 59.937500 | 38.062500 | 76.250000 | 64.000000 | 66.000000 | 19.250000 | 62.062500 | 72.812500 | 45.500000 | 57.062500 | 36.875000 | 69.812500 | 68.687500 | 80.062500 | 10.375000 | 16.187500 | 25.750000 | 15.500000 | 12.750000 |
| 2770 | 71.133333 | 73.533333 | 61.866667 | 67.666667 | 67.800000 | 72.533333 | 67.133333 | 75.066667 | 79.800000 | 68.666667 | 70.266667 | 76.733333 | 62.200000 | 59.800000 | 62.600000 | 65.933333 | 61.266667 | 68.333333 | 57.133333 | 51.000000 | 62.600000 | 73.000000 | 56.800000 | 44.933333 | 64.533333 | 71.600000 | 63.933333 | 30.200000 | 37.533333 | 25.000000 | 8.666667 | 15.666667 | 27.200000 | 16.333333 | 17.000000 |
| 2790 | 70.200000 | 75.800000 | 70.000000 | 37.400000 | 55.200000 | 68.000000 | 43.000000 | 60.800000 | 67.000000 | 55.400000 | 69.800000 | 66.800000 | 70.800000 | 71.200000 | 65.000000 | 69.200000 | 67.000000 | 46.400000 | 61.000000 | 70.400000 | 71.000000 | 42.400000 | 68.400000 | 63.400000 | 60.600000 | 60.000000 | 55.400000 | 74.600000 | 74.600000 | 69.000000 | 8.000000 | 17.000000 | 59.000000 | 16.600000 | 17.400000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 744907 | 51.000000 | 65.000000 | 42.000000 | 44.000000 | 47.000000 | 59.000000 | 42.000000 | 56.000000 | 46.000000 | 43.000000 | 58.000000 | 55.000000 | 66.000000 | 64.000000 | 61.000000 | 53.000000 | 74.000000 | 50.000000 | 63.000000 | 54.000000 | 49.000000 | 38.000000 | 36.000000 | 22.000000 | 48.000000 | 51.000000 | 52.000000 | 39.000000 | 48.000000 | 49.000000 | 8.000000 | 13.000000 | 12.000000 | 11.000000 | 8.000000 |
| 746419 | 59.000000 | 66.000000 | 55.000000 | 28.000000 | 50.000000 | 35.000000 | 29.000000 | 67.000000 | 37.000000 | 39.000000 | 35.000000 | 55.000000 | 77.000000 | 76.000000 | 70.000000 | 60.000000 | 74.000000 | 29.000000 | 55.000000 | 63.000000 | 46.000000 | 27.000000 | 59.000000 | 53.000000 | 55.000000 | 40.000000 | 43.000000 | 55.000000 | 69.000000 | 62.000000 | 7.000000 | 10.000000 | 7.000000 | 9.000000 | 9.000000 |
| 748432 | 58.000000 | 68.000000 | 48.000000 | 26.000000 | 57.000000 | 49.000000 | 24.000000 | 41.000000 | 39.000000 | 20.000000 | 41.000000 | 42.000000 | 64.000000 | 68.000000 | 50.000000 | 48.000000 | 52.000000 | 46.000000 | 68.000000 | 50.000000 | 67.000000 | 25.000000 | 52.000000 | 55.000000 | 38.000000 | 36.000000 | 45.000000 | 63.000000 | 69.000000 | 68.000000 | 8.000000 | 8.000000 | 12.000000 | 12.000000 | 6.000000 |
| 750435 | 56.000000 | 70.000000 | 35.000000 | 57.000000 | 55.000000 | 60.000000 | 52.000000 | 58.000000 | 56.000000 | 37.000000 | 54.000000 | 56.000000 | 64.000000 | 62.000000 | 55.600000 | 61.000000 | 78.000000 | 57.000000 | 58.000000 | 57.000000 | 46.000000 | 50.000000 | 34.000000 | 13.000000 | 56.000000 | 58.000000 | 61.000000 | 18.000000 | 19.000000 | 21.000000 | 9.000000 | 10.000000 | 8.000000 | 10.000000 | 11.000000 |
| 750584 | 58.000000 | 66.000000 | 47.000000 | 39.000000 | 53.000000 | 60.000000 | 37.000000 | 54.000000 | 40.000000 | 45.000000 | 55.000000 | 56.000000 | 63.000000 | 60.000000 | 51.000000 | 54.000000 | 76.000000 | 59.000000 | 58.000000 | 55.000000 | 57.000000 | 38.000000 | 62.000000 | 56.000000 | 41.000000 | 47.000000 | 52.000000 | 56.000000 | 66.000000 | 62.000000 | 8.000000 | 9.000000 | 13.000000 | 11.000000 | 13.000000 |
11060 rows × 35 columns
pipe = Pipeline([
('scale',StandardScaler()),
('pca', PCA(n_components = 10, random_state = 42)),
])
res = pipe.fit_transform(df_player_grouped_num_attr)
plt.figure(figsize = (14, 8))
plt.bar(range(1,len(pipe.named_steps.pca.explained_variance_ratio_) + 1), pipe.named_steps.pca.explained_variance_ratio_, alpha=0.5, align='center', label='Individual explained variance')
plt.step(range(1,len(np.cumsum(pipe.named_steps.pca.explained_variance_ratio_)) + 1), np.cumsum(pipe.named_steps.pca.explained_variance_ratio_), where='mid',label='Cumulative explained variance')
plt.ylabel('Explained variance ratio', fontsize = 14)
plt.xlabel('Principal component index', fontsize = 14)
plt.legend(loc = 'best')
plt.tight_layout()
plt.show()

- 5 variables can explain about 80% of the total variance.
-
That is, PCA variables can reduce the number of variables without significant loss of information.
- Let’s check 5 PC variables.
pipe = Pipeline([
('scale',StandardScaler()),
('pca', PCA(n_components = 5, random_state = 42)),
])
res = pipe.fit_transform(df_player_grouped_num_attr)
_ = pca_results(df_player_grouped_num_attr, pipe.named_steps.pca)

- Since our goal is to reduce the numerical attributes variables without significant loss of information, let’s just use 5 PC variables for each player.
- So for each match, we reduced from 22 players * 35 attributes = 770 columns to 22 players * 5 PC = 110 columns.
cols = df_player_grouped_num_attr.columns.tolist()
cols.append("player_api_id")
cols.append("player_measured_date")
df_player_num_attr_pc = pipe.fit_transform(df_player_num_attr_imputed[cols].set_index(["player_api_id", "player_measured_date"]))
df_player_num_attr_pc = pd.DataFrame(df_player_num_attr_pc, columns = ["PC1", "PC2", "PC3", "PC4", "PC5"],
index = df_player_num_attr_imputed[cols].set_index(["player_api_id", "player_measured_date"]).index).reset_index()
df_player_num_attr_pc
| player_api_id | player_measured_date | PC1 | PC2 | PC3 | PC4 | PC5 | |
|---|---|---|---|---|---|---|---|
| 0 | 505942 | 2016-02-18 | 1.046944 | 2.688259 | -1.529901 | 0.360865 | 0.548021 |
| 1 | 505942 | 2015-11-19 | 1.046944 | 2.688259 | -1.529901 | 0.360865 | 0.548021 |
| 2 | 505942 | 2015-09-21 | 1.400003 | 2.036745 | -2.359132 | 0.536742 | 0.502532 |
| 3 | 505942 | 2015-03-20 | 1.615408 | 1.927571 | -2.657865 | 0.548879 | 0.399693 |
| 4 | 505942 | 2007-02-22 | 1.615408 | 1.927571 | -2.657865 | 0.548879 | 0.399693 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 183137 | 39902 | 2009-08-30 | -4.991884 | -2.921361 | 4.090605 | 0.593824 | 2.434563 |
| 183138 | 39902 | 2009-02-22 | -3.766452 | -2.749777 | 2.543465 | 0.492997 | 2.349990 |
| 183139 | 39902 | 2008-08-30 | -3.745176 | -2.589656 | 2.519182 | 0.452451 | 2.349129 |
| 183140 | 39902 | 2007-08-30 | -3.382604 | -2.497503 | 2.251858 | 0.317332 | 2.726097 |
| 183141 | 39902 | 2007-02-22 | -3.659867 | -2.058531 | 1.834322 | 0.277823 | 2.463788 |
183142 rows × 7 columns
player_num_collected = pd.DataFrame(df_player_num_attr_pc.groupby("player_api_id").count().player_measured_date)
player_num_collected.columns = ["num_collected"]
player_num_collected
| num_collected | |
|---|---|
| player_api_id | |
| 2625 | 14 |
| 2752 | 21 |
| 2768 | 21 |
| 2770 | 15 |
| 2790 | 5 |
| ... | ... |
| 744907 | 11 |
| 746419 | 2 |
| 748432 | 2 |
| 750435 | 9 |
| 750584 | 2 |
11060 rows × 1 columns
df_player_num_attr_pc.groupby("player_api_id").count().player_measured_date.describe()
count 11060.000000
mean 16.558951
std 9.361642
min 2.000000
25% 8.000000
50% 16.000000
75% 23.000000
max 56.000000
Name: player_measured_date, dtype: float64
my_histogram(player_num_collected, "num_collected")

- Players’ attributes are collected in different days.
- The player’s characteristic data was collected 2 times for the fewest players and 56 times for the most players.
-
So, we will use player attributes collected closest to match dates.
- Let’s merge all starting players’ PC variables collected closest to match dates.
df_match_player = pd.read_csv("../data/df_match_player.csv")
df_match_basic[["match_api_id", "match_date"]].merge(df_match_player, how = "left", on = "match_api_id")
| match_api_id | match_date | home_player_X1 | home_player_X2 | home_player_X3 | home_player_X4 | home_player_X5 | home_player_X6 | home_player_X7 | home_player_X8 | home_player_X9 | home_player_X10 | home_player_X11 | away_player_X1 | away_player_X2 | away_player_X3 | away_player_X4 | away_player_X5 | away_player_X6 | away_player_X7 | away_player_X8 | away_player_X9 | away_player_X10 | away_player_X11 | home_player_Y1 | home_player_Y2 | home_player_Y3 | home_player_Y4 | home_player_Y5 | home_player_Y6 | home_player_Y7 | home_player_Y8 | home_player_Y9 | home_player_Y10 | home_player_Y11 | away_player_Y1 | away_player_Y2 | away_player_Y3 | away_player_Y4 | away_player_Y5 | away_player_Y6 | away_player_Y7 | away_player_Y8 | away_player_Y9 | away_player_Y10 | away_player_Y11 | home_player_1 | home_player_2 | home_player_3 | home_player_4 | home_player_5 | home_player_6 | home_player_7 | home_player_8 | home_player_9 | home_player_10 | home_player_11 | away_player_1 | away_player_2 | away_player_3 | away_player_4 | away_player_5 | away_player_6 | away_player_7 | away_player_8 | away_player_9 | away_player_10 | away_player_11 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 492473 | 2008-08-17 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | 492474 | 2008-08-16 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | 492475 | 2008-08-16 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | 492476 | 2008-08-17 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | 492477 | 2008-08-16 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 25974 | 1992091 | 2015-09-22 | 1.0 | 2.0 | 4.0 | 6.0 | 8.0 | 4.0 | 6.0 | 3.0 | 5.0 | 7.0 | 5.0 | 1.0 | 2.0 | 4.0 | 6.0 | 8.0 | 4.0 | 6.0 | 3.0 | 5.0 | 7.0 | 5.0 | 1.0 | 3.0 | 3.0 | 3.0 | 3.0 | 6.0 | 6.0 | 8.0 | 8.0 | 8.0 | 11.0 | 1.0 | 3.0 | 3.0 | 3.0 | 3.0 | 6.0 | 6.0 | 8.0 | 8.0 | 8.0 | 11.0 | 42231.0 | 678384.0 | 95220.0 | 638592.0 | 413155.0 | 45780.0 | 171229.0 | 67333.0 | 119839.0 | 143790.0 | 195215.0 | 462944.0 | 563066.0 | 8800.0 | 67304.0 | 158253.0 | 133126.0 | 186524.0 | 93223.0 | 121115.0 | 232110.0 | 289732.0 |
| 25975 | 1992092 | 2015-09-23 | 1.0 | 3.0 | 5.0 | 7.0 | 2.0 | 4.0 | 6.0 | 8.0 | 5.0 | 4.0 | 6.0 | 1.0 | 2.0 | 4.0 | 6.0 | 8.0 | 3.0 | 5.0 | 7.0 | 3.0 | 5.0 | 7.0 | 1.0 | 3.0 | 3.0 | 3.0 | 7.0 | 7.0 | 7.0 | 7.0 | 9.0 | 11.0 | 11.0 | 1.0 | 3.0 | 3.0 | 3.0 | 3.0 | 7.0 | 7.0 | 7.0 | 10.0 | 10.0 | 10.0 | 33272.0 | 41621.0 | 25813.0 | 257845.0 | 114735.0 | 42237.0 | 113227.0 | 358156.0 | 32343.0 | 531309.0 | 37257.0 | 42276.0 | 114792.0 | 150007.0 | 178119.0 | 27232.0 | 570830.0 | 260708.0 | 201704.0 | 36382.0 | 34082.0 | 95257.0 |
| 25976 | 1992093 | 2015-09-23 | 1.0 | 2.0 | 4.0 | 6.0 | 8.0 | 4.0 | 6.0 | 3.0 | 5.0 | 7.0 | 5.0 | 1.0 | 2.0 | 4.0 | 6.0 | 8.0 | 4.0 | 6.0 | 3.0 | 5.0 | 7.0 | 5.0 | 1.0 | 3.0 | 3.0 | 3.0 | 3.0 | 6.0 | 6.0 | 8.0 | 8.0 | 8.0 | 11.0 | 1.0 | 3.0 | 3.0 | 3.0 | 3.0 | 6.0 | 6.0 | 8.0 | 8.0 | 8.0 | 11.0 | 157856.0 | 274779.0 | 177689.0 | 294256.0 | 42258.0 | 39979.0 | 173936.0 | 147959.0 | 451983.0 | 80564.0 | 289472.0 | 10637.0 | 67349.0 | 202663.0 | 32597.0 | 114794.0 | 188114.0 | 25840.0 | 482200.0 | 95230.0 | 451335.0 | 275122.0 |
| 25977 | 1992094 | 2015-09-22 | 1.0 | 2.0 | 4.0 | 6.0 | 8.0 | 3.0 | 7.0 | 5.0 | 3.0 | 5.0 | 7.0 | 1.0 | 2.0 | 4.0 | 6.0 | 8.0 | 4.0 | 6.0 | 3.0 | 5.0 | 7.0 | 5.0 | 1.0 | 3.0 | 3.0 | 3.0 | 3.0 | 7.0 | 7.0 | 7.0 | 10.0 | 10.0 | 10.0 | 1.0 | 3.0 | 3.0 | 3.0 | 3.0 | 6.0 | 6.0 | 8.0 | 8.0 | 8.0 | 11.0 | NaN | 8881.0 | 173534.0 | 39646.0 | 282287.0 | 340790.0 | 393337.0 | 8893.0 | 614454.0 | 93229.0 | 178142.0 | 274776.0 | 121080.0 | 197757.0 | 260964.0 | 231614.0 | 113235.0 | 41116.0 | 462608.0 | 42262.0 | 92252.0 | 194532.0 |
| 25978 | 1992095 | 2015-09-23 | 1.0 | 2.0 | 4.0 | 6.0 | 8.0 | 2.0 | 4.0 | 6.0 | 8.0 | 4.0 | 6.0 | 1.0 | 2.0 | 4.0 | 6.0 | 8.0 | 4.0 | 6.0 | 3.0 | 5.0 | 7.0 | 5.0 | 1.0 | 3.0 | 3.0 | 3.0 | 3.0 | 7.0 | 7.0 | 7.0 | 7.0 | 10.0 | 10.0 | 1.0 | 3.0 | 3.0 | 3.0 | 3.0 | 6.0 | 6.0 | 8.0 | 8.0 | 8.0 | 11.0 | 274787.0 | 492132.0 | 108451.0 | 25815.0 | 94553.0 | 384376.0 | 598355.0 | 36785.0 | 45174.0 | 302079.0 | 71764.0 | 156175.0 | 95216.0 | 172768.0 | 22834.0 | 458806.0 | 207234.0 | 25772.0 | 40274.0 | 34035.0 | 41726.0 | 527103.0 |
25979 rows × 68 columns
df_match_lineup = df_match_basic[["match_api_id", "match_date"]].merge(df_match_player, how = "left", on = "match_api_id") \
[["match_date", "match_api_id",
"home_player_1", "home_player_2", "home_player_3", "home_player_4",
"home_player_5", "home_player_6", "home_player_7", "home_player_8",
"home_player_9", "home_player_10", "home_player_11",
"away_player_1", "away_player_2", "away_player_3", "away_player_4",
"away_player_5", "away_player_6", "away_player_7", "away_player_8",
"away_player_9", "away_player_10", "away_player_11"]]
msno.matrix(df_match_lineup.sort_values("match_date"))
<AxesSubplot:>

- If there is at least one missing value in the starting lineup, let’s exclude the match from the analysis.
df_match_lineup = df_match_lineup.dropna()
df_match_lineup.shape
(21374, 24)
- Use the pc variables measured closest to the match date for each player.
df_player_num_attr_pc
| player_api_id | player_measured_date | PC1 | PC2 | PC3 | PC4 | PC5 | |
|---|---|---|---|---|---|---|---|
| 0 | 505942 | 2016-02-18 | 1.046944 | 2.688259 | -1.529901 | 0.360865 | 0.548021 |
| 1 | 505942 | 2015-11-19 | 1.046944 | 2.688259 | -1.529901 | 0.360865 | 0.548021 |
| 2 | 505942 | 2015-09-21 | 1.400003 | 2.036745 | -2.359132 | 0.536742 | 0.502532 |
| 3 | 505942 | 2015-03-20 | 1.615408 | 1.927571 | -2.657865 | 0.548879 | 0.399693 |
| 4 | 505942 | 2007-02-22 | 1.615408 | 1.927571 | -2.657865 | 0.548879 | 0.399693 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 183137 | 39902 | 2009-08-30 | -4.991884 | -2.921361 | 4.090605 | 0.593824 | 2.434563 |
| 183138 | 39902 | 2009-02-22 | -3.766452 | -2.749777 | 2.543465 | 0.492997 | 2.349990 |
| 183139 | 39902 | 2008-08-30 | -3.745176 | -2.589656 | 2.519182 | 0.452451 | 2.349129 |
| 183140 | 39902 | 2007-08-30 | -3.382604 | -2.497503 | 2.251858 | 0.317332 | 2.726097 |
| 183141 | 39902 | 2007-02-22 | -3.659867 | -2.058531 | 1.834322 | 0.277823 | 2.463788 |
183142 rows × 7 columns
df_match_player_attr_pcs = df_match_lineup[["match_api_id"]]
for i, col in enumerate(["home_player_1", "home_player_2", "home_player_3", "home_player_4",
"home_player_5", "home_player_6", "home_player_7", "home_player_8",
"home_player_9", "home_player_10", "home_player_11"]):
merged = df_match_lineup[["match_date", "match_api_id", col]].merge(df_player_num_attr_pc,
how = "left", left_on = col, right_on = "player_api_id") \
.sort_values(["player_api_id", "match_date", "player_measured_date"])
merged["date_diff"] = (pd.to_datetime(merged.player_measured_date) - pd.to_datetime(merged.match_date)).dt.days
merged = merged[merged.date_diff < 0]
merged_get_first = merged.sort_values(["player_api_id", "match_date", "date_diff"], ascending = [True, True, False]).groupby(["player_api_id", "match_date"]).first()
merged_get_first = merged_get_first[["match_api_id", "PC1", "PC2", "PC3", "PC4", "PC5"]].rename(columns = {"PC1": f"home_player_{i + 1}_pc_1",
"PC2": f"home_player_{i + 1}_pc_2",
"PC3": f"home_player_{i + 1}_pc_3",
"PC4": f"home_player_{i + 1}_pc_4",
"PC5": f"home_player_{i + 1}_pc_5"})
df_match_player_attr_pcs = df_match_player_attr_pcs.merge(merged_get_first, how = "left", on = "match_api_id")
for i, col in enumerate(["away_player_1", "away_player_2", "away_player_3", "away_player_4",
"away_player_5", "away_player_6", "away_player_7", "away_player_8",
"away_player_9", "away_player_10", "away_player_11"]):
merged = df_match_lineup[["match_date", "match_api_id", col]].merge(df_player_num_attr_pc,
how = "left", left_on = col, right_on = "player_api_id") \
.sort_values(["player_api_id", "match_date", "player_measured_date"])
merged["date_diff"] = (pd.to_datetime(merged.player_measured_date) - pd.to_datetime(merged.match_date)).dt.days
merged = merged[merged.date_diff < 0]
merged_get_first = merged.sort_values(["player_api_id", "match_date", "date_diff"], ascending = [True, True, False]).groupby(["player_api_id", "match_date"]).first()
merged_get_first = merged_get_first[["match_api_id", "PC1", "PC2", "PC3", "PC4", "PC5"]].rename(columns = {"PC1": f"away_player_{i + 1}_pc_1",
"PC2": f"away_player_{i + 1}_pc_2",
"PC3": f"away_player_{i + 1}_pc_3",
"PC4": f"away_player_{i + 1}_pc_4",
"PC5": f"away_player_{i + 1}_pc_5"})
df_match_player_attr_pcs = df_match_player_attr_pcs.merge(merged_get_first, how = "left", on = "match_api_id")
df_match_player_attr_pcs
| match_api_id | home_player_1_pc_1 | home_player_1_pc_2 | home_player_1_pc_3 | home_player_1_pc_4 | home_player_1_pc_5 | home_player_2_pc_1 | home_player_2_pc_2 | home_player_2_pc_3 | home_player_2_pc_4 | home_player_2_pc_5 | home_player_3_pc_1 | home_player_3_pc_2 | home_player_3_pc_3 | home_player_3_pc_4 | home_player_3_pc_5 | home_player_4_pc_1 | home_player_4_pc_2 | home_player_4_pc_3 | home_player_4_pc_4 | home_player_4_pc_5 | home_player_5_pc_1 | home_player_5_pc_2 | home_player_5_pc_3 | home_player_5_pc_4 | home_player_5_pc_5 | home_player_6_pc_1 | home_player_6_pc_2 | home_player_6_pc_3 | home_player_6_pc_4 | home_player_6_pc_5 | home_player_7_pc_1 | home_player_7_pc_2 | home_player_7_pc_3 | home_player_7_pc_4 | home_player_7_pc_5 | home_player_8_pc_1 | home_player_8_pc_2 | home_player_8_pc_3 | home_player_8_pc_4 | home_player_8_pc_5 | home_player_9_pc_1 | home_player_9_pc_2 | home_player_9_pc_3 | home_player_9_pc_4 | home_player_9_pc_5 | home_player_10_pc_1 | home_player_10_pc_2 | home_player_10_pc_3 | home_player_10_pc_4 | home_player_10_pc_5 | home_player_11_pc_1 | home_player_11_pc_2 | home_player_11_pc_3 | home_player_11_pc_4 | home_player_11_pc_5 | away_player_1_pc_1 | away_player_1_pc_2 | away_player_1_pc_3 | away_player_1_pc_4 | away_player_1_pc_5 | away_player_2_pc_1 | away_player_2_pc_2 | away_player_2_pc_3 | away_player_2_pc_4 | away_player_2_pc_5 | away_player_3_pc_1 | away_player_3_pc_2 | away_player_3_pc_3 | away_player_3_pc_4 | away_player_3_pc_5 | away_player_4_pc_1 | away_player_4_pc_2 | away_player_4_pc_3 | away_player_4_pc_4 | away_player_4_pc_5 | away_player_5_pc_1 | away_player_5_pc_2 | away_player_5_pc_3 | away_player_5_pc_4 | away_player_5_pc_5 | away_player_6_pc_1 | away_player_6_pc_2 | away_player_6_pc_3 | away_player_6_pc_4 | away_player_6_pc_5 | away_player_7_pc_1 | away_player_7_pc_2 | away_player_7_pc_3 | away_player_7_pc_4 | away_player_7_pc_5 | away_player_8_pc_1 | away_player_8_pc_2 | away_player_8_pc_3 | away_player_8_pc_4 | away_player_8_pc_5 | away_player_9_pc_1 | away_player_9_pc_2 | away_player_9_pc_3 | away_player_9_pc_4 | away_player_9_pc_5 | away_player_10_pc_1 | away_player_10_pc_2 | away_player_10_pc_3 | away_player_10_pc_4 | away_player_10_pc_5 | away_player_11_pc_1 | away_player_11_pc_2 | away_player_11_pc_3 | away_player_11_pc_4 | away_player_11_pc_5 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 493017 | 9.172915 | -0.705596 | 1.028500 | -0.044401 | 1.246272 | 3.957784 | 1.650964 | -2.348632 | -0.837480 | 0.223750 | -0.817702 | -0.589548 | 0.195433 | -2.144582 | 1.524434 | 3.108730 | 0.633708 | -1.772338 | 0.807727 | 1.684577 | 0.615229 | -0.611994 | -0.845425 | 0.395221 | 3.107710 | -0.038702 | -0.551509 | -0.280096 | -0.068082 | 2.645878 | 1.086047 | -2.583583 | -0.596348 | -1.915882 | -0.987251 | -0.845848 | -0.053597 | 0.746034 | -0.515174 | 1.763346 | 4.244623 | 1.088030 | -2.054898 | -0.414998 | 0.270225 | -0.559441 | 1.233335 | 0.426201 | 0.609910 | 1.364324 | 1.472274 | -0.298277 | -1.530989 | 0.743621 | -0.875547 | 9.794795 | -0.549117 | 1.941560 | 0.281992 | 0.521328 | -1.886782 | 1.005850 | 1.291888 | 0.172012 | 1.212622 | 1.320201 | 1.065244 | -0.406996 | -1.803778 | 0.573483 | 2.628391 | 0.940615 | -0.609534 | -1.586848 | 0.652795 | 3.207876 | 0.685128 | -1.480756 | -1.499414 | 0.134771 | -1.360311 | -2.979972 | 1.181522 | -0.726996 | 1.161218 | -1.735000 | -0.323721 | 0.977427 | -1.047350 | 1.307837 | -0.004187 | 1.646099 | 0.543917 | 0.101722 | 0.374692 | -1.836650 | -2.551881 | 1.212316 | 0.139711 | 1.155972 | -0.916797 | -1.202214 | 0.022668 | -0.118531 | -0.332637 | 0.628660 | -1.751083 | -0.030693 | 0.418133 | -0.206630 |
| 1 | 493025 | 6.467731 | -2.125163 | 3.092089 | -0.930974 | 1.172527 | 0.390653 | 1.341612 | 0.109198 | -0.418330 | 0.921304 | 2.673401 | 1.688787 | -0.346764 | 0.032250 | -0.148238 | 0.544724 | 0.856618 | -0.234054 | -0.185530 | 1.496783 | 0.349558 | -0.322471 | 0.311433 | -0.720916 | 0.765089 | -2.997770 | -1.279240 | 1.492689 | -0.174247 | 1.728035 | -1.262416 | 0.290702 | 1.097804 | 0.377514 | 1.681666 | -1.035827 | -0.034337 | 1.133088 | 0.899750 | 1.451945 | -1.970162 | -1.958911 | 1.072887 | 0.806278 | 2.735079 | -1.206227 | -2.567254 | 0.401355 | -0.240084 | 1.552955 | -0.417405 | -0.845298 | 0.077456 | -1.026087 | -0.249720 | 6.746068 | -1.225452 | 5.441467 | -1.741143 | 0.153029 | -0.720625 | 1.362837 | 0.950063 | -0.430052 | 1.920381 | 1.321520 | 2.234319 | 0.546943 | 0.392576 | 1.081937 | -1.157135 | 1.181325 | 0.901247 | 0.722505 | 0.967595 | -1.573349 | 0.366277 | 1.085719 | -0.362088 | 0.794429 | -2.362155 | 0.960996 | 2.341796 | 0.480765 | 1.235871 | -2.036662 | 1.423269 | 1.975662 | 0.456836 | 0.564672 | -1.002934 | -2.335553 | 0.470491 | -0.111478 | 0.585989 | -1.407944 | -2.205414 | 1.069296 | 0.053596 | 0.809813 | -2.259696 | -2.778135 | 1.816538 | 1.407815 | 0.022775 | 0.169010 | -2.319129 | 0.361658 | 0.935315 | -1.877390 |
| 2 | 493027 | 7.587977 | -0.669761 | 3.351410 | -1.725204 | 0.971832 | -0.659142 | 2.447278 | 1.853202 | -0.962117 | 0.277773 | 0.782575 | 2.059493 | 1.311728 | -0.983126 | 0.652947 | 0.750721 | 3.078327 | 1.228950 | 0.540478 | 0.034199 | 0.361849 | 2.339354 | 1.689313 | 0.713260 | 0.455514 | -3.759299 | 1.204148 | 3.191002 | -0.784951 | 0.913294 | -3.018176 | 0.545416 | 2.535436 | -0.218828 | 2.658318 | -2.589758 | 0.570906 | 1.654008 | 0.345163 | 0.580780 | -3.859427 | -1.870653 | 2.659663 | -0.725774 | 2.171793 | -0.199689 | -2.415793 | -0.317491 | 0.067180 | 0.943378 | -1.056848 | -1.205950 | 1.067483 | 1.246078 | -0.070047 | 8.942737 | -2.146976 | 1.394474 | -1.573403 | -0.305385 | 0.326027 | 1.666843 | 0.694338 | 0.105792 | 1.385551 | 1.716745 | 1.133649 | -0.670094 | -1.374712 | 0.468374 | 1.822806 | 1.126330 | -0.789073 | -1.478142 | 0.878564 | 5.021780 | 0.645976 | -1.954515 | -1.642732 | 0.230055 | 0.882744 | 1.703690 | 0.053693 | 0.563981 | 0.978446 | -0.780143 | -0.700812 | 0.612057 | 1.416142 | 2.059364 | -0.691788 | -2.016431 | 0.356132 | -1.164456 | 2.251789 | 1.464444 | 0.379332 | -0.902796 | -0.837762 | 0.211743 | 1.923084 | -2.893726 | -2.042800 | -1.018973 | -1.292926 | 0.416905 | -2.210474 | -0.336256 | 0.054563 | 2.588626 |
| 3 | 493034 | 9.172915 | -0.705596 | 1.028500 | -0.044401 | 1.246272 | 3.957784 | 1.650964 | -2.348632 | -0.837480 | 0.223750 | -0.817702 | -0.589548 | 0.195433 | -2.144582 | 1.524434 | -0.559441 | 1.233335 | 0.426201 | 0.609910 | 1.364324 | -0.845848 | -0.053597 | 0.746034 | -0.515174 | 1.763346 | 0.615229 | -0.611994 | -0.845425 | 0.395221 | 3.107710 | 3.108730 | 0.633708 | -1.772338 | 0.807727 | 1.684577 | 1.086047 | -2.583583 | -0.596348 | -1.915882 | -0.987251 | 4.244623 | 1.088030 | -2.054898 | -0.414998 | 0.270225 | -0.105750 | 0.211866 | 0.619067 | 0.766076 | -0.979514 | 1.472274 | -0.298277 | -1.530989 | 0.743621 | -0.875547 | 7.587977 | -0.669761 | 3.351410 | -1.725204 | 0.971832 | -0.659142 | 2.447278 | 1.853202 | -0.962117 | 0.277773 | 0.361849 | 2.339354 | 1.689313 | 0.713260 | 0.455514 | 1.052194 | 2.080899 | 0.370792 | -0.782729 | 0.394531 | 2.077104 | 1.903504 | 0.684877 | 1.471026 | -0.492695 | -3.759299 | 1.204148 | 3.191002 | -0.784951 | 0.913294 | -0.631336 | -1.917288 | 0.404582 | -1.604514 | -0.334234 | -3.859427 | -1.870653 | 2.659663 | -0.725774 | 2.171793 | -3.018176 | 0.545416 | 2.535436 | -0.218828 | 2.658318 | -2.589758 | 0.570906 | 1.654008 | 0.345163 | 0.580780 | -1.056848 | -1.205950 | 1.067483 | 1.246078 | -0.070047 |
| 4 | 493040 | 8.942737 | -2.146976 | 1.394474 | -1.573403 | -0.305385 | 0.326027 | 1.666843 | 0.694338 | 0.105792 | 1.385551 | 1.831429 | 1.436093 | -0.753915 | -1.703966 | 1.116009 | 1.716745 | 1.133649 | -0.670094 | -1.374712 | 0.468374 | 1.822806 | 1.126330 | -0.789073 | -1.478142 | 0.878564 | 0.882744 | 1.703690 | 0.053693 | 0.563981 | 0.978446 | -0.780143 | -0.700812 | 0.612057 | 1.416142 | 2.059364 | -0.691788 | -2.016431 | 0.356132 | -1.164456 | 2.251789 | 0.416905 | -2.210474 | -0.336256 | 0.054563 | 2.588626 | 1.923084 | -2.893726 | -2.042800 | -1.018973 | -1.292926 | 1.464444 | 0.379332 | -0.902796 | -0.837762 | 0.211743 | 9.554376 | -0.788436 | 2.277353 | 0.709577 | 0.731361 | 1.970444 | 1.864556 | -0.597320 | 0.168243 | 0.246152 | 4.470705 | 2.449109 | -1.396639 | -0.315235 | 0.008914 | 1.829497 | 1.079852 | -2.363541 | -0.545278 | 0.775451 | 4.019384 | 0.341251 | -2.730811 | -1.093104 | 0.247451 | 0.682845 | 0.218515 | -0.581395 | 0.951872 | 2.794600 | -0.793495 | -0.740710 | 0.492791 | 0.151706 | 2.489086 | 1.933672 | 0.891587 | -1.109525 | -0.792216 | -0.402166 | -0.203211 | -1.625096 | -0.009942 | 0.220891 | 1.091029 | 0.344323 | -3.281820 | -1.047589 | -0.870665 | 0.278254 | 0.817798 | -2.420838 | -0.361229 | 0.689072 | -0.850103 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 21369 | 1992089 | 10.658392 | -1.977648 | 0.867377 | 0.480266 | 1.074043 | 2.309500 | 0.931667 | -3.860209 | 0.540601 | -0.235396 | 4.829365 | 3.813902 | -2.190143 | 2.896975 | 0.303158 | 4.363347 | 3.346856 | -2.511394 | 1.256390 | -1.264653 | 0.693648 | 2.017825 | -1.753765 | -2.152817 | -0.662817 | 2.162406 | -2.749882 | -4.593596 | 0.434768 | 0.841622 | -0.817444 | 1.999270 | -0.449227 | -0.166485 | -0.336412 | 1.345606 | 0.069802 | -3.170566 | -0.790618 | 1.344050 | -1.052756 | 0.131148 | -1.406108 | -0.920376 | 1.149814 | -0.756354 | -0.555587 | -0.498005 | 0.934758 | -2.606863 | -2.571317 | -2.788252 | -0.783057 | -0.228549 | 0.825315 | 11.297455 | -1.943311 | -0.622608 | 1.229927 | 2.987196 | 0.715893 | 1.452546 | -2.111059 | -0.524585 | 0.158115 | 5.742015 | 3.143138 | -3.280495 | 0.901933 | -1.055971 | 3.559549 | 2.023672 | -2.954817 | 1.214646 | 0.395441 | -0.652392 | 0.617508 | -2.119288 | 0.420918 | -0.312378 | -1.179215 | 0.990786 | -0.758331 | 0.280326 | -1.019536 | -0.983237 | 1.033960 | -0.714609 | -0.027433 | 0.645499 | -0.468185 | -2.492838 | -2.135542 | 0.672037 | 1.175961 | 0.089223 | -1.409898 | -2.184536 | 0.860498 | 2.273182 | -1.604118 | -1.071515 | -1.719817 | -0.597994 | 0.268359 | -2.027482 | -0.929774 | -0.429955 | -0.408179 | -0.159691 |
| 21370 | 1992091 | 12.241704 | -2.064268 | 0.575912 | 0.027214 | 0.566339 | 2.941632 | 1.263739 | -2.564271 | -0.317355 | 0.251567 | -0.338978 | 1.001774 | -1.054876 | 0.023669 | 0.503527 | 3.322061 | 2.191727 | -1.431455 | -0.935647 | -2.106780 | 0.805775 | 2.313243 | -1.001739 | -0.734564 | -1.002397 | 0.167600 | 0.748910 | -2.441020 | -0.128875 | 1.354357 | -0.992210 | 1.486864 | -0.837892 | -0.283414 | 0.885329 | -0.144093 | -1.113188 | -1.698684 | 0.131062 | 0.388273 | -1.074581 | -2.141164 | -1.372831 | 0.593159 | -0.060973 | -1.374814 | -2.597658 | -1.353337 | -0.079045 | 0.449154 | 1.613120 | -2.543332 | -2.596794 | 1.483082 | 0.105411 | 12.290704 | -2.430347 | -1.144252 | -0.197618 | 0.806754 | 4.561174 | 1.116819 | -3.853639 | -1.449020 | -0.008049 | 0.859240 | 1.593152 | -1.523836 | 0.182434 | 0.923206 | 3.366277 | 2.096648 | -2.560049 | 1.302320 | 0.621418 | -0.179065 | 1.542439 | -0.883813 | -1.534520 | -0.929437 | -1.274091 | 1.044816 | -0.632610 | 0.214867 | -1.155989 | -1.165921 | 1.062657 | -0.462190 | -0.128853 | 0.815702 | -0.551649 | -3.139936 | -1.871850 | 0.919162 | 1.101761 | -0.895229 | -1.357762 | -1.203311 | 0.502165 | 1.217481 | 0.817539 | -1.879420 | -1.866788 | 1.257348 | -2.040793 | -2.027482 | -0.929774 | -0.429955 | -0.408179 | -0.159691 |
| 21371 | 1992092 | 12.064639 | -2.316174 | 0.433132 | 0.848287 | 2.231699 | 0.041330 | 1.067682 | -1.528655 | 1.481193 | 0.463667 | 0.484107 | 2.132478 | -2.712916 | 0.415998 | -0.482643 | 2.368090 | 1.818320 | -1.835697 | 2.213761 | 1.021893 | 2.542915 | 2.152431 | -2.292237 | -0.564741 | 0.029556 | -0.661431 | 1.259634 | -1.538893 | 0.327645 | 1.036215 | -2.276135 | -1.075133 | -1.057017 | 0.562307 | 1.955697 | 1.429669 | 1.815169 | -2.713671 | -1.202667 | -0.206876 | -2.066585 | -1.486135 | -2.249610 | 0.528683 | 1.248011 | 2.305110 | 0.196755 | -3.015813 | 0.884716 | 1.782990 | 0.202502 | 0.363303 | -2.203897 | 0.890204 | 1.972601 | 11.605200 | -2.180668 | 0.633318 | 0.505193 | 2.121379 | 0.936419 | 1.221556 | -2.522024 | -0.246410 | 0.123592 | 0.472920 | 1.911681 | -1.404507 | -1.552859 | 0.102178 | 2.614211 | 3.470441 | -0.910743 | 4.107990 | 0.561911 | -0.386817 | 2.148362 | -0.789333 | 0.490890 | 0.348217 | 1.412310 | -0.776693 | -3.695553 | -2.537028 | 0.654647 | 0.038726 | 0.194863 | -1.450742 | -1.480179 | -0.329070 | -1.248945 | 0.135936 | -0.373578 | -0.008843 | 1.145546 | -1.268382 | -2.736194 | -2.134065 | -2.336878 | 0.312531 | -1.267814 | -0.952598 | -0.567959 | 1.417181 | -1.031334 | -2.777047 | -2.958610 | -1.015094 | -0.426489 | 1.111898 |
| 21372 | 1992093 | 12.055222 | -2.425069 | 1.339212 | 0.274265 | 0.840987 | 0.807082 | 0.384775 | -3.089207 | -1.255036 | 0.706752 | 3.316945 | 2.584242 | -1.555439 | -0.624218 | -1.494029 | 0.752059 | 2.634155 | 0.029098 | -2.048228 | -2.345172 | -0.477442 | 0.588966 | -1.810286 | -0.671132 | 0.670969 | -4.479330 | 0.957108 | 1.567778 | 1.821481 | 1.887503 | -0.304292 | 2.229022 | -0.374285 | -0.629550 | -0.550464 | -2.488337 | -2.492236 | 0.004280 | -0.328167 | -0.137174 | -1.303862 | -3.461748 | -0.804760 | -0.413108 | -1.193188 | -3.587590 | -2.419330 | 0.831607 | 2.357133 | 0.281695 | -2.476574 | -2.795499 | -0.080093 | 0.062647 | -2.011499 | 12.360092 | -1.868190 | 1.990823 | 0.546306 | 0.372803 | -1.329084 | 1.395726 | -0.722913 | -0.893829 | 0.215497 | 1.165437 | 3.021942 | -0.188859 | 2.311939 | 0.403286 | -4.008986 | 1.662399 | 0.332054 | 0.291516 | 1.172819 | -1.397060 | 1.400592 | -0.140128 | -0.156016 | -0.683065 | -1.079926 | 0.426619 | -0.576034 | -2.142184 | -0.515036 | -0.909491 | 1.934916 | 0.160027 | 0.916388 | 1.269362 | -1.061561 | -2.717686 | -0.665092 | -0.671323 | -2.761000 | -1.823349 | -1.744156 | -0.979761 | 0.332470 | 0.702492 | -1.032879 | -1.066695 | -0.612696 | -0.450766 | 0.290344 | -1.642076 | -3.316316 | 0.011961 | 0.375835 | -3.465042 |
| 21373 | 1992095 | 11.109830 | -2.853322 | 3.703596 | -0.584121 | -0.402950 | -0.371390 | 1.595755 | -1.118035 | -1.088917 | -0.072962 | 0.782687 | 3.074394 | -0.351447 | 0.339711 | -0.876789 | 0.954087 | 3.884860 | 0.323121 | -0.869362 | -1.514651 | -1.424525 | 1.026468 | -0.666682 | -0.521699 | 1.419345 | -2.778054 | -3.370352 | -0.446712 | -0.978029 | -0.758080 | 2.408722 | 1.658858 | -2.251337 | -0.820091 | -0.630519 | -2.159654 | 1.814418 | 0.256719 | 0.567236 | -0.314540 | -2.998560 | -3.922779 | 0.105072 | 0.225998 | 1.222075 | -1.617306 | -2.339904 | -0.992463 | -1.548240 | 0.519653 | -2.910805 | -2.167197 | 0.101099 | 1.144445 | -0.165881 | 12.307758 | -1.719545 | 4.261962 | 0.728169 | 1.500838 | -1.795498 | 2.597198 | 0.399761 | -0.004946 | -0.912142 | 1.097353 | 2.822507 | -0.720652 | -1.295103 | -1.490739 | -1.267830 | 3.175089 | 1.368422 | -0.396539 | -2.110602 | 0.679151 | 1.514007 | -0.744474 | -2.598616 | -1.680172 | -2.472498 | 1.701903 | 0.506631 | -1.382100 | -0.412576 | -3.875220 | 1.593144 | 1.643325 | 2.150146 | 2.170000 | -1.395402 | 0.464636 | -0.435683 | -0.975001 | -1.386601 | -3.586664 | -2.865959 | 0.395006 | 1.476579 | 2.034092 | -4.892234 | -0.601987 | 1.354383 | 0.830095 | -0.426868 | -3.654878 | -2.094599 | 1.339425 | 0.285083 | -2.245146 |
21374 rows × 111 columns
- Save the table.
df_match_player_attr_pcs.to_csv("../data/df_match_player_attr_pcs.csv", index = False)