#data #kyc #_2022
This is one list I consult when doing KYC checks for individuals affiliated to a deal. Accordingly, I like to prep it for full text search by using SQLite.
Link:
Script:
```py
# migrate_canada_sanctions.py
import sqlite3
import pandas as pd
import os
def create_canada_db():
# ------------------------------------ #
# Delete the DB if it exists
# ------------------------------------ #
if os.path.exists("db_canada.db"):
os.remove("db_canada.db")
# ------------------------------------ #
# Make the db
# ------------------------------------ #
db = sqlite3.connect('db_canada.db')
cursor = db.cursor()
cursor.close()
db.close()
# ------------------------------------ #
# Make the dataframe
# ------------------------------------ #
df = pd.read_xml('https://www.international.gc.ca/world-monde/assets/office_docs/international_relations-relations_internationales/sanctions/sema-lmes.xml')
df = df.astype(str)
# rename the column titles to remove spaces and camelcase
column_titles = []
for title in df.columns:
column_titles.append(title.replace(' ','_').replace('-','_').replace(',','').replace('.','').replace('/','_').replace('(','').replace(')','').lower())
df.columns = column_titles
# make a formatted string of the column headers for later use
headers = ''
for title in column_titles:
headers += title
headers += ','
# ------------------------------------ #
# Insert data to db
# ------------------------------------ #
conn = sqlite3.connect('db_canada.db', check_same_thread=False)
df.to_sql(name='canada', con=conn, if_exists='append', index=False)
# ------------------------------------ #
# Make the virtual table
# ------------------------------------ #
conn.execute(f'''
CREATE VIRTUAL TABLE v_canada
USING FTS5 (
{headers[:-1]}
)
;
'''
)
conn.execute(f'''
INSERT INTO v_canada (
{headers[:-1]}
)
SELECT DISTINCT {headers[:-1]}
FROM canada
;
'''
)
conn.commit()
conn.execute('''DROP TABLE canada;''')
conn.execute('''VACUUM;''')
# ------------------------------------ #
# Close the database
# ------------------------------------ #
conn.close()
# ------------------------------------ #
# print statement
# ------------------------------------ #
print(' ')
print(len(df))
print(' ')
print('--------------------------------------------')
print('--------------------------------------------')
print(' Canada Database Build Complete ')
print('--------------------------------------------')
print('--------------------------------------------')
# create_canada_db()
```