How to Import FIFA18 Player Data from CSV into MySQL with Python
This step‑by‑step guide shows how to download the FIFA 18 player dataset from GitHub, create a MySQL database and table, read the CSV with Python’s csv.DictReader, generate INSERT statements, batch‑load the data efficiently, and query Argentine players, illustrating a complete end‑to‑end data‑import workflow.
Overview
MySQL is a common database and an essential interview skill. This article demonstrates how to combine Python and SQL to import the FIFA 18 player dataset into a MySQL database.
Download the data
The dataset is available on GitHub at https://github.com/amanthedorkknight/fifa18-all-player-statistics . The file to use is basicplayerdata.csv.
Create database and table
CREATE DATABASE fifa18_db DEFAULT CHARACTER SET utf8 COLLATE utf8_general_ci;
USE fifa18_db;
CREATE TABLE player(
id INT PRIMARY KEY AUTO_INCREMENT,
player_id INT,
name CHAR(64),
age INT,
poster CHAR(64),
nationality CHAR(64),
flag CHAR(64),
overall INT,
potential INT,
club CHAR(64) DEFAULT '',
club_logo CHAR(64),
value CHAR(16),
wage CHAR(16),
special INT
) DEFAULT CHARSET=utf8;Read CSV and map fields
import csv
path = '/home/linux/workdir/data/basicplayerdata.csv'
field = ['', 'ID', 'Name', 'Age', 'Photo', 'Nationality', 'Flag', 'Overall', 'Potential', 'Club', 'Club Logo', 'Value', 'Wage', 'Special']
f = open(path)
fcsv = csv.DictReader(f, fieldnames=field)
next(fcsv) # skip header line
line = next(fcsv)
print(line)Map CSV columns to MySQL columns
sqlfield = ['player_id','name','age','poster','nationality','flag','overall','potential','club','club_logo','value','wage','special']
csvfield = ['ID','Name','Age','Photo','Nationality','Flag','Overall','Potential','Club','Club Logo','Value','Wage','Special']
keysinfo = dict(zip(sqlfield, csvfield))Generate INSERT statement for a row
data = {}
for sfield, cfield in keysinfo.items():
ele = line.get(cfield, '')
data.setdefault(sfield, ele)
tablename = 'player'
fields = ','.join(data.keys())
vals = ','.join(["'%s'" % v for v in data.values()])
sql = f"INSERT INTO {tablename}({fields}) VALUES({vals})"
print(sql)Insert the row into MySQL
import pymysql
db = pymysql.connect('localhost', 'root', 'abc123', 'fifa18_db', charset='utf8')
cursor = db.cursor()
cursor.execute(sql)
db.commit()
cursor.close()
db.close()Result of a single insertion
INSERT INTO player(player_id,name,age,poster,nationality,flag,overall,potential,club,club_logo,value,wage,special) VALUES('158023','L. Messi','30','https://cdn.sofifa.org/players/4/18/158023.png','Argentina','https://cdn.sofifa.org/flags/52.png','94','94','FC Barcelona','https://cdn.sofifa.org/teams/2/18/light/241.png','€118.5M','€565K','2161')Wrap the logic into a reusable class
class LoadDataFromCsvToMysql:
def __init__(self, csvpath, csvfield, mysqlfield, table, sqlconfig):
self.dbconfig = sqlconfig
self.inpath = csvpath
self.sqlfield = mysqlfield
self.csvfield = csvfield
self.fieldmap = dict(zip(mysqlfield, csvfield[1:]))
self.tablename = table
def connectSql(self):
self.db = pymysql.connect(**self.dbconfig, charset='utf8')
self.cursor = self.db.cursor()
def disconnectSql(self):
self.cursor.close()
self.db.close()
def processSql(self, sql):
ret = self.cursor.execute(sql)
self.db.commit()
return ret
def loadCsv(self):
self.f = open(self.inpath)
fcsv = csv.DictReader(self.f, fieldnames=self.csvfield)
next(fcsv)
return fcsv
def closeCsv(self):
self.f.close()
def gensql(self, linedata):
data = {}
for sfield, cfield in self.fieldmap.items():
data.setdefault(sfield, linedata.get(cfield, ''))
fields = ','.join(data.keys())
vals = ','.join(["'%s'" % v for v in data.values()])
return f"INSERT INTO {self.tablename}({fields}) VALUES({vals})"
def process(self):
self.connectSql()
fcsv = self.loadCsv()
for line in fcsv:
sql = self.gensql(line)
self.processSql(sql)
self.disconnectSql()
self.closeCsv()Batch insertion performance
Inserting the full 18 000 rows takes about 20 seconds. For better performance, batch 500 rows per INSERT.
Querying data with inheritance
class QueryMysql(LoadDataFromCsvToMysql):
def __init__(self, sqlconfig):
super().__init__('', [], [], '', sqlconfig)
def genSql(self, table, fields, condition=None):
fds = ','.join(fields)
cond = f" where {condition}" if condition else ''
return f"select {fds} from {table}{cond}"
def process(self, tablename, fields, condition=None):
self.connectSql()
sql = self.genSql(tablename, fields, condition)
self.cursor.execute(sql)
items = self.cursor.fetchall()
for item in items:
print(item)
print('all Argentina player:', len(items))
self.disconnectSql()
# usage
sqlconfig = {'host':'localhost','port':3306,'user':'root','passwd':'abc123','db':'fifa18_db'}
obj = QueryMysql(sqlconfig)
obj.process('player', ['name','poster','age'], 'nationality="Argentina"')Query result
('L. Messi', 'https://cdn.sofifa.org/players/4/18/158023.png', 30)
('G. Higuaín', 'https://cdn.sofifa.org/players/4/18/167664.png', 29)
... (total 966 Argentine players)The dataset contains 966 Argentine players in total.
Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
21CTO
21CTO (21CTO.com) offers developers community, training, and services, making it your go‑to learning and service platform.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
