JSON-to-MySQL Movie Database¶

The JSON-to-MySQL Movie Database project involves creating a relational database system using MySQL to store comprehensive information about movies, actors, directors, genres, and their relationships. The project also includes developing a Python script for uploading data from JSON files into the MySQL database.

Tables:

  • ACTORS: This table stores information about actors.
  • DIRECTORS: Contains data about movie directors.
  • DIRECTORS_GENRES: A linking table that represents the many-to-many relationship between directors and genres.
  • MOVIES: Stores details about movies, including their unique IDs, titles, year, and rakingss.
  • MOVIES_GENRES: A linking table connecting movies with genres.
  • ROLES: This table contains information about the roles actors play in movies.
  • MOVIES_DIRECTORS: Another linking table that establishes the relationship between movies and directors.

Following the next ER DIAGRAM:

IMDB

Import library¶

In [ ]:
import os 
import pandas
import json
import mysql.connector
from tqdm import tqdm

Create Database and verification¶

In [ ]:
conn = mysql.connector.connect(host = "localhost",
                                 user = "root",
                                 passwd = "jsonfile")  
cur = conn.cursor()
#creating a new database  
cur.execute("CREATE DATABASE IMDBJSON")  # TO CREATE A NEW DATABASE UNCOMENT THIS LINE  
#getting the list of all the databases which will now include the new database PythonDB  
db = cur.execute("SHOW DATABASES")   
for x in cur:  
    print(x)  # verify database is created 
conn.close() # Close Conection 
('imdbjson',)
('information_schema',)
('mysql',)
('performance_schema',)
('sakila',)
('sys',)
('task',)
('world',)

Create tables based on ER SCHEMA¶

In [ ]:
conn = mysql.connector.connect(
      host='localhost', 
      user='root', 
      password='jsonfile', 
      database='imdbjson'
   )
cur = conn.cursor()
   #Creating Directors table 
sql ='''CREATE TABLE DIRECTORS (
         ID INT(11) NOT NULL DEFAULT 0,
         FIRST_NAME VARCHAR(100) DEFAULT NULL,
         LAST_NAME  VARCHAR(100) DEFAULT NULL,
         PRIMARY KEY (ID),
         KEY DIRECTORS_FIRST_NAME (FIRST_NAME),
         KEY DIRECTORS_LAST_NAME (LAST_NAME)
         )ENGINE=INNODB;
         '''
cur.execute(sql)
   #Creating actors table 
sql ='''CREATE TABLE ACTORS (
         ID INT(11) NOT NULL DEFAULT 0,
         FIRST_NAME VARCHAR(100) DEFAULT NULL,
         LAST_NAME VARCHAR(100) DEFAULT NULL,
         GENDER CHAR(1) DEFAULT NULL,
         PRIMARY KEY (ID),
         KEY ACTORS_FIRST_NAME (FIRST_NAME),
         KEY ACTORS_LAST_NAME (LAST_NAME)
         )ENGINE=INNODB;
         '''
cur.execute(sql)
   #Creating movies table 
sql ='''CREATE TABLE MOVIES (
         ID INT(11) NOT NULL DEFAULT 0,
         NAME VARCHAR(100) DEFAULT NULL,
         YEAR INT(11) DEFAULT NULL,
         RANKED FLOAT DEFAULT NULL,
         PRIMARY KEY (ID),
         KEY MOVIES_NAME (NAME)
         )ENGINE=INNODB;
         '''
cur.execute(sql)
   #Creating directors genres table 
sql ='''CREATE TABLE DIRECTORS_GENRES (
         DIRECTOR_ID INT(11) NOT NULL,
         GENRE VARCHAR(100) NOT NULL,
         PROB FLOAT DEFAULT NULL,
         PRIMARY KEY (DIRECTOR_ID,GENRE),
         KEY DIRECTOR_GENRES_DIRECTOR_ID (DIRECTOR_ID),
         CONSTRAINT `DIRECTORS_GENRES__1` FOREIGN KEY (DIRECTOR_ID) REFERENCES DIRECTORS (ID) 
         ON DELETE CASCADE ON UPDATE CASCADE
         ) ENGINE=INNODB;
         '''
cur.execute(sql)
   #Creating movies director table 
sql ='''CREATE TABLE MOVIES_DIRECTORS (
         DIRECTOR_ID INT(11) NOT NULL,
         MOVIE_ID INT(11) NOT NULL,
         PRIMARY KEY (DIRECTOR_ID,MOVIE_ID),
         KEY MOVIES_DIRECTORS_DIRECTOR_ID (DIRECTOR_ID),
         KEY MOVIES_DIRECTORS_MOVIE_ID (MOVIE_ID),
         CONSTRAINT `MOVIES_DIRECTORS__1` FOREIGN KEY (DIRECTOR_ID) REFERENCES DIRECTORS (ID) 
         ON DELETE CASCADE ON UPDATE CASCADE,
         CONSTRAINT `MOVIES_DIRECTORS__2` FOREIGN KEY (MOVIE_ID) REFERENCES MOVIES (ID) 
         ON DELETE CASCADE ON UPDATE CASCADE
         ) ENGINE=INNODB;
         '''
cur.execute(sql)
   #Creating roles  table 
sql ='''CREATE TABLE ROLES (
         ACTOR_ID INT(11) NOT NULL,
         MOVIE_ID INT(11) NOT NULL,
         ROLE VARCHAR(100) NOT NULL,
         PRIMARY KEY (ACTOR_ID,MOVIE_ID,ROLE),
         KEY ACTOR_ID (ACTOR_ID),
         KEY MOVIE_ID (MOVIE_ID),
         CONSTRAINT ROLES__1 FOREIGN KEY (ACTOR_ID) REFERENCES ACTORS (ID)
         ON DELETE CASCADE ON UPDATE CASCADE,
         CONSTRAINT ROLES__2 FOREIGN KEY (MOVIE_ID) REFERENCES MOVIES (ID) 
         ON DELETE CASCADE ON UPDATE CASCADE
         ) ENGINE=INNODB;
         '''
cur.execute(sql)
   #Creating movies genres table 
sql ='''CREATE TABLE MOVIES_GENRES (
         MOVIE_ID INT(11) NOT NULL,
         GENRE VARCHAR(100) NOT NULL,
         PRIMARY KEY (MOVIE_ID,GENRE),
         KEY MOVIE_GENRES_MOVIE_ID (MOVIE_ID),
         CONSTRAINT MOVIES_GENRES__1 FOREIGN KEY (MOVIE_ID) REFERENCES MOVIES (ID)
         ON DELETE CASCADE ON UPDATE CASCADE
         ) ENGINE=INNODB;
         '''
cur.execute(sql)

Upload JSON files provided to each table¶

In [ ]:
# Define a function to load JSON data and upload it to a MySQL table
def upload_json_data(file_path, table_name, columns, tqdm_color):
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    print(f"{table_name} table uploading")
    
    for item in tqdm(data, colour=tqdm_color):
        values = tuple(item[column] for column in columns)
        placeholders = ', '.join(['%s'] * len(columns))
        sql = f"INSERT INTO {table_name} ({', '.join(columns)}) VALUES ({placeholders})"
        # Define your database connection (conn) and cursor (cur) here
        cur.execute(sql, values) 
        conn.commit()


# Call the function for each JSON file and corresponding table
upload_json_data('actors.json', 'ACTORS', ['id', 'first_name', 'last_name', 'gender'], 'yellow')
upload_json_data('directors.json', 'DIRECTORS', ['id', 'first_name', 'last_name'], 'blue')
upload_json_data('movies.json', 'MOVIES', ['id', 'name', 'year', 'ranked'], 'red')
upload_json_data('directorsgenres.json', 'DIRECTORS_GENRES', ['director_id', 'genre', 'prob'], 'yellow')
upload_json_data('moviesgenres.json', 'MOVIES_GENRES', ['movie_id', 'genre'], 'blue')
upload_json_data('moviesdirectors.json', 'MOVIES_DIRECTORS', ['director_id', 'movie_id'], 'red')
upload_json_data('roles.json', 'ROLES', ['actor_id', 'movie_id', 'role'], 'green')
ACTORS table uploading
  0%|          | 0/10000 [00:00<?, ?it/s]100%|██████████| 10000/10000 [00:20<00:00, 484.46it/s]
DIRECTORS table uploading
100%|██████████| 10000/10000 [00:20<00:00, 497.58it/s]
MOVIES table uploading
100%|██████████| 10000/10000 [00:20<00:00, 494.78it/s]
DIRECTORS_GENRES table uploading
100%|██████████| 17376/17376 [00:46<00:00, 371.77it/s]
MOVIES_GENRES table uploading
100%|██████████| 10136/10136 [00:20<00:00, 497.34it/s]
MOVIES_DIRECTORS table uploading
100%|██████████| 872/872 [00:01<00:00, 500.61it/s]
ROLES table uploading
100%|██████████| 1139/1139 [00:02<00:00, 488.54it/s]

Verify Number of row upload to each table¶

In [ ]:
#Verify data was uploaded
tables=["ACTORS" , "DIRECTORS" , "DIRECTORS_GENRES", "MOVIES" , "MOVIES_GENRES" ,"ROLES","MOVIES_DIRECTORS"]
conn = mysql.connector.connect(
      host='localhost', 
      user='root', 
      password='jsonfile', 
      database='imdbjson'
   )
cur = conn.cursor()
for table in tables : 
   cur.execute(f"SELECT COUNT(*) FROM {table}") 
   result = cur.fetchone()
   row_count = result[0]
   print("Number of rows in the table:", table," ",row_count)
conn.close()
Number of rows in the table: ACTORS   10000
Number of rows in the table: DIRECTORS   10000
Number of rows in the table: DIRECTORS_GENRES   17376
Number of rows in the table: MOVIES   10000
Number of rows in the table: MOVIES_GENRES   10136
Number of rows in the table: ROLES   1139
Number of rows in the table: MOVIES_DIRECTORS   872