Load Data From MySQL to BigQuery

  • Read
  • Discuss
import os
import pandas as pd
import pandas_gbq as pd_gbq
import modin.pandas as pd_mod
from sqlalchemy import create_engine
from google.oauth2 import service_account

# Service account file for GCP connection

credentials = service_account.Credentials.from_service_account_file(r'C:\Users\Sherry\Downloads\pelagic-earth-353304-5e9ab57c7481.json')

#  BigQuery Variables

PROJECT_ID = 'pelagic-earth-353304-5e9ab57c7481'

DATASET_ID = 'covid_dataset'

#  MySql Variables

MYSQL_USERNAME = 'root1'

MYSQL_PASSWORD = 'root'

MYSQL_HOST = '127.0.0.1'

MYSQL_DATABASE = 'vaccination'

# Path for the dump directory

DIRECTORY = 'dump'

def main():

    con_uri = 'mysql+pymysql://{}:{}@{}/{}'.format(

        MYSQL_USERNAME,

        MYSQL_PASSWORD,

        MYSQL_HOST,

        MYSQL_DATABASE

    )

    print 'Connection url {}'.format(con_uri)

    try:

        engine = create_engine(con_uri, pool_recycle=3600).connect()

    except Exception as e:

        print 'Error {}'.format(e)

    tables_query = 'SELECT table_name ' \

                   'FROM information_schema.tables ' \

                   'WHERE TABLE_TYPE = "BASE TABLE" ' \

                   'AND TABLE_SCHEMA = "{}";'.format(MYSQL_DATABASE)

    list_tables = pd.read_sql(tables_query, con_uri)

    for index, row in list_tables.iterrows():

        table_id = '{}.{}'.format(DATASET_ID, row['TABLE_NAME'])

        print 'Loading Table {}'.format(table_id)

        df = pd_mod.read_sql_table(row['TABLE_NAME'], engine)

        pd_gbq.to_gbq(df, table_id,

                      project_id=PROJECT_ID,

                      if_exists='replace',

                      chunksize=10000000,

                      progress_bar=True)

Leave a Reply

Leave a Reply

Scroll to Top