#_2022 #kyc
Worldbank sanctions for OSINT / KYC searches. Setup here for full text search in SQLite.
Link:
Script:
```py
# migrate_worlddbank_sanctions.py
import sqlite3
import pandas as pd
import requests
import os
# --------------------------------------------------------------- #
# https://www.worldbank.org/en/projects-operations/procurement/debarred-firms
# this file must be manually downloaded, then placed in the static
# data directory as worldbank_sanctions.csv. You have to delete the
# first two rows, which is extremely annoying
# TODO: Make an RPA script that downloads this data and preps it
# into the static_data directory so this can be totally automated
# --------------------------------------------------------------- #
def create_worldbank_sanctions_db():
# ------------------------------------ #
# Delete the DB if it exists
# ------------------------------------ #
if os.path.exists("db_worldbank_sanctions.db"):
os.remove("db_worldbank_sanctions.db")
# ------------------------------------ #
# Make the db
# ------------------------------------ #
db = sqlite3.connect('db_worldbank_sanctions.db')
cursor = db.cursor()
cursor.close()
db.close()
# ------------------------------------ #
# Make the dataframe
# ------------------------------------ #
# make dataframe from the extract
worldbank_df = pd.read_csv('static_data/worldbank_sanctions.csv')
worldbank_df = worldbank_df.astype(str)
# rename the column titles to remove spaces and camelcase
column_titles = []
for title in worldbank_df.columns:
column_titles.append(title.replace(' ','_').replace('-','_').replace(',','').replace('.','').replace('/','_').replace('(','').replace(')','').lower())
worldbank_df.columns = column_titles
# make a formatted string of the column headers for later use
headers = ''
for title in column_titles:
headers += title
headers += ','
print(headers)
# ------------------------------------ #
# Insert data to db
# ------------------------------------ #
conn = sqlite3.connect('db_worldbank_sanctions.db', check_same_thread=False)
worldbank_df.to_sql(name='worldbank_sanctions', con=conn, if_exists='replace', index=False)
# ------------------------------------ #
# Make the virtual table
# ------------------------------------ #
conn.execute(f'''
CREATE VIRTUAL TABLE v_worldbank_sanctions
USING FTS5 (
{headers[:-1]}
)
;
'''
)
conn.execute(f'''
INSERT INTO v_worldbank_sanctions (
{headers[:-1]}
)
SELECT
{headers[:-1]}
FROM worldbank_sanctions
;
'''
)
conn.commit()
conn.execute('''DROP TABLE worldbank_sanctions;''')
conn.execute('''VACUUM;''')
# # ------------------------------------ #
# # Close the database
# # ------------------------------------ #
# conn.close()
# ------------------------------------ #
# print statement
# ------------------------------------ #
print(' ')
print(len(worldbank_df))
print(' ')
print('--------------------------------------------')
print('--------------------------------------------')
print(' Worldbank Sanctions Database Build Complete ')
print('--------------------------------------------')
print('--------------------------------------------')
# create_worldbank_sanctions_db()
```