Load Data From MySQL to BigQuery
- Read
- Discuss
import os
import pandas as pd
import pandas_gbq as pd_gbq
import modin.pandas as pd_mod
from sqlalchemy import create_engine
from google.oauth2 import service_account
# Service account file for GCP connection
credentials = service_account.Credentials.from_service_account_file(r'C:\Users\Sherry\Downloads\pelagic-earth-353304-5e9ab57c7481.json')
# BigQuery Variables
PROJECT_ID = 'pelagic-earth-353304-5e9ab57c7481'
DATASET_ID = 'covid_dataset'
# MySql Variables
MYSQL_USERNAME = 'root1'
MYSQL_PASSWORD = 'root'
MYSQL_HOST = '127.0.0.1'
MYSQL_DATABASE = 'vaccination'
# Path for the dump directory
DIRECTORY = 'dump'
def main():
con_uri = 'mysql+pymysql://{}:{}@{}/{}'.format(
MYSQL_USERNAME,
MYSQL_PASSWORD,
MYSQL_HOST,
MYSQL_DATABASE
)
print 'Connection url {}'.format(con_uri)
try:
engine = create_engine(con_uri, pool_recycle=3600).connect()
except Exception as e:
print 'Error {}'.format(e)
tables_query = 'SELECT table_name ' \
'FROM information_schema.tables ' \
'WHERE TABLE_TYPE = "BASE TABLE" ' \
'AND TABLE_SCHEMA = "{}";'.format(MYSQL_DATABASE)
list_tables = pd.read_sql(tables_query, con_uri)
for index, row in list_tables.iterrows():
table_id = '{}.{}'.format(DATASET_ID, row['TABLE_NAME'])
print 'Loading Table {}'.format(table_id)
df = pd_mod.read_sql_table(row['TABLE_NAME'], engine)
pd_gbq.to_gbq(df, table_id,
project_id=PROJECT_ID,
if_exists='replace',
chunksize=10000000,
progress_bar=True)
Leave a Reply
You must be logged in to post a comment.