JSON-to-MySQL Movie Database¶
The JSON-to-MySQL Movie Database project involves creating a relational database system using MySQL to store comprehensive information about movies, actors, directors, genres, and their relationships. The project also includes developing a Python script for uploading data from JSON files into the MySQL database.
Tables:
- ACTORS: This table stores information about actors.
- DIRECTORS: Contains data about movie directors.
- DIRECTORS_GENRES: A linking table that represents the many-to-many relationship between directors and genres.
- MOVIES: Stores details about movies, including their unique IDs, titles, year, and rakingss.
- MOVIES_GENRES: A linking table connecting movies with genres.
- ROLES: This table contains information about the roles actors play in movies.
- MOVIES_DIRECTORS: Another linking table that establishes the relationship between movies and directors.
Following the next ER DIAGRAM:
Import library¶
In [ ]:
import os
import pandas
import json
import mysql.connector
from tqdm import tqdm
Create Database and verification¶
In [ ]:
conn = mysql.connector.connect(host = "localhost",
user = "root",
passwd = "jsonfile")
cur = conn.cursor()
#creating a new database
cur.execute("CREATE DATABASE IMDBJSON") # TO CREATE A NEW DATABASE UNCOMENT THIS LINE
#getting the list of all the databases which will now include the new database PythonDB
db = cur.execute("SHOW DATABASES")
for x in cur:
print(x) # verify database is created
conn.close() # Close Conection
('imdbjson',) ('information_schema',) ('mysql',) ('performance_schema',) ('sakila',) ('sys',) ('task',) ('world',)
Create tables based on ER SCHEMA¶
In [ ]:
conn = mysql.connector.connect(
host='localhost',
user='root',
password='jsonfile',
database='imdbjson'
)
cur = conn.cursor()
#Creating Directors table
sql ='''CREATE TABLE DIRECTORS (
ID INT(11) NOT NULL DEFAULT 0,
FIRST_NAME VARCHAR(100) DEFAULT NULL,
LAST_NAME VARCHAR(100) DEFAULT NULL,
PRIMARY KEY (ID),
KEY DIRECTORS_FIRST_NAME (FIRST_NAME),
KEY DIRECTORS_LAST_NAME (LAST_NAME)
)ENGINE=INNODB;
'''
cur.execute(sql)
#Creating actors table
sql ='''CREATE TABLE ACTORS (
ID INT(11) NOT NULL DEFAULT 0,
FIRST_NAME VARCHAR(100) DEFAULT NULL,
LAST_NAME VARCHAR(100) DEFAULT NULL,
GENDER CHAR(1) DEFAULT NULL,
PRIMARY KEY (ID),
KEY ACTORS_FIRST_NAME (FIRST_NAME),
KEY ACTORS_LAST_NAME (LAST_NAME)
)ENGINE=INNODB;
'''
cur.execute(sql)
#Creating movies table
sql ='''CREATE TABLE MOVIES (
ID INT(11) NOT NULL DEFAULT 0,
NAME VARCHAR(100) DEFAULT NULL,
YEAR INT(11) DEFAULT NULL,
RANKED FLOAT DEFAULT NULL,
PRIMARY KEY (ID),
KEY MOVIES_NAME (NAME)
)ENGINE=INNODB;
'''
cur.execute(sql)
#Creating directors genres table
sql ='''CREATE TABLE DIRECTORS_GENRES (
DIRECTOR_ID INT(11) NOT NULL,
GENRE VARCHAR(100) NOT NULL,
PROB FLOAT DEFAULT NULL,
PRIMARY KEY (DIRECTOR_ID,GENRE),
KEY DIRECTOR_GENRES_DIRECTOR_ID (DIRECTOR_ID),
CONSTRAINT `DIRECTORS_GENRES__1` FOREIGN KEY (DIRECTOR_ID) REFERENCES DIRECTORS (ID)
ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=INNODB;
'''
cur.execute(sql)
#Creating movies director table
sql ='''CREATE TABLE MOVIES_DIRECTORS (
DIRECTOR_ID INT(11) NOT NULL,
MOVIE_ID INT(11) NOT NULL,
PRIMARY KEY (DIRECTOR_ID,MOVIE_ID),
KEY MOVIES_DIRECTORS_DIRECTOR_ID (DIRECTOR_ID),
KEY MOVIES_DIRECTORS_MOVIE_ID (MOVIE_ID),
CONSTRAINT `MOVIES_DIRECTORS__1` FOREIGN KEY (DIRECTOR_ID) REFERENCES DIRECTORS (ID)
ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT `MOVIES_DIRECTORS__2` FOREIGN KEY (MOVIE_ID) REFERENCES MOVIES (ID)
ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=INNODB;
'''
cur.execute(sql)
#Creating roles table
sql ='''CREATE TABLE ROLES (
ACTOR_ID INT(11) NOT NULL,
MOVIE_ID INT(11) NOT NULL,
ROLE VARCHAR(100) NOT NULL,
PRIMARY KEY (ACTOR_ID,MOVIE_ID,ROLE),
KEY ACTOR_ID (ACTOR_ID),
KEY MOVIE_ID (MOVIE_ID),
CONSTRAINT ROLES__1 FOREIGN KEY (ACTOR_ID) REFERENCES ACTORS (ID)
ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT ROLES__2 FOREIGN KEY (MOVIE_ID) REFERENCES MOVIES (ID)
ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=INNODB;
'''
cur.execute(sql)
#Creating movies genres table
sql ='''CREATE TABLE MOVIES_GENRES (
MOVIE_ID INT(11) NOT NULL,
GENRE VARCHAR(100) NOT NULL,
PRIMARY KEY (MOVIE_ID,GENRE),
KEY MOVIE_GENRES_MOVIE_ID (MOVIE_ID),
CONSTRAINT MOVIES_GENRES__1 FOREIGN KEY (MOVIE_ID) REFERENCES MOVIES (ID)
ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=INNODB;
'''
cur.execute(sql)
Upload JSON files provided to each table¶
In [ ]:
# Define a function to load JSON data and upload it to a MySQL table
def upload_json_data(file_path, table_name, columns, tqdm_color):
with open(file_path, 'r') as f:
data = json.load(f)
print(f"{table_name} table uploading")
for item in tqdm(data, colour=tqdm_color):
values = tuple(item[column] for column in columns)
placeholders = ', '.join(['%s'] * len(columns))
sql = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({placeholders})"
# Define your database connection (conn) and cursor (cur) here
cur.execute(sql, values)
conn.commit()
# Call the function for each JSON file and corresponding table
upload_json_data('actors.json', 'ACTORS', ['id', 'first_name', 'last_name', 'gender'], 'yellow')
upload_json_data('directors.json', 'DIRECTORS', ['id', 'first_name', 'last_name'], 'blue')
upload_json_data('movies.json', 'MOVIES', ['id', 'name', 'year', 'ranked'], 'red')
upload_json_data('directorsgenres.json', 'DIRECTORS_GENRES', ['director_id', 'genre', 'prob'], 'yellow')
upload_json_data('moviesgenres.json', 'MOVIES_GENRES', ['movie_id', 'genre'], 'blue')
upload_json_data('moviesdirectors.json', 'MOVIES_DIRECTORS', ['director_id', 'movie_id'], 'red')
upload_json_data('roles.json', 'ROLES', ['actor_id', 'movie_id', 'role'], 'green')
ACTORS table uploading
0%| | 0/10000 [00:00<?, ?it/s]100%|██████████| 10000/10000 [00:20<00:00, 484.46it/s]
DIRECTORS table uploading
100%|██████████| 10000/10000 [00:20<00:00, 497.58it/s]
MOVIES table uploading
100%|██████████| 10000/10000 [00:20<00:00, 494.78it/s]
DIRECTORS_GENRES table uploading
100%|██████████| 17376/17376 [00:46<00:00, 371.77it/s]
MOVIES_GENRES table uploading
100%|██████████| 10136/10136 [00:20<00:00, 497.34it/s]
MOVIES_DIRECTORS table uploading
100%|██████████| 872/872 [00:01<00:00, 500.61it/s]
ROLES table uploading
100%|██████████| 1139/1139 [00:02<00:00, 488.54it/s]
Verify Number of row upload to each table¶
In [ ]:
#Verify data was uploaded
tables=["ACTORS" , "DIRECTORS" , "DIRECTORS_GENRES", "MOVIES" , "MOVIES_GENRES" ,"ROLES","MOVIES_DIRECTORS"]
conn = mysql.connector.connect(
host='localhost',
user='root',
password='jsonfile',
database='imdbjson'
)
cur = conn.cursor()
for table in tables :
cur.execute(f"SELECT COUNT(*) FROM {table}")
result = cur.fetchone()
row_count = result[0]
print("Number of rows in the table:", table," ",row_count)
conn.close()
Number of rows in the table: ACTORS 10000 Number of rows in the table: DIRECTORS 10000 Number of rows in the table: DIRECTORS_GENRES 17376 Number of rows in the table: MOVIES 10000 Number of rows in the table: MOVIES_GENRES 10136 Number of rows in the table: ROLES 1139 Number of rows in the table: MOVIES_DIRECTORS 872