#_2022 #kyc Worldbank sanctions for OSINT / KYC searches. Setup here for full text search in SQLite. Link: Script: ```py # migrate_worlddbank_sanctions.py import sqlite3 import pandas as pd import requests import os # --------------------------------------------------------------- # # https://www.worldbank.org/en/projects-operations/procurement/debarred-firms # this file must be manually downloaded, then placed in the static # data directory as worldbank_sanctions.csv. You have to delete the # first two rows, which is extremely annoying # TODO: Make an RPA script that downloads this data and preps it # into the static_data directory so this can be totally automated # --------------------------------------------------------------- # def create_worldbank_sanctions_db(): # ------------------------------------ # # Delete the DB if it exists # ------------------------------------ # if os.path.exists("db_worldbank_sanctions.db"): os.remove("db_worldbank_sanctions.db") # ------------------------------------ # # Make the db # ------------------------------------ # db = sqlite3.connect('db_worldbank_sanctions.db') cursor = db.cursor() cursor.close() db.close() # ------------------------------------ # # Make the dataframe # ------------------------------------ # # make dataframe from the extract worldbank_df = pd.read_csv('static_data/worldbank_sanctions.csv') worldbank_df = worldbank_df.astype(str) # rename the column titles to remove spaces and camelcase column_titles = [] for title in worldbank_df.columns: column_titles.append(title.replace(' ','_').replace('-','_').replace(',','').replace('.','').replace('/','_').replace('(','').replace(')','').lower()) worldbank_df.columns = column_titles # make a formatted string of the column headers for later use headers = '' for title in column_titles: headers += title headers += ',' print(headers) # ------------------------------------ # # Insert data to db # ------------------------------------ # conn = sqlite3.connect('db_worldbank_sanctions.db', check_same_thread=False) worldbank_df.to_sql(name='worldbank_sanctions', con=conn, if_exists='replace', index=False) # ------------------------------------ # # Make the virtual table # ------------------------------------ # conn.execute(f''' CREATE VIRTUAL TABLE v_worldbank_sanctions USING FTS5 ( {headers[:-1]} ) ; ''' ) conn.execute(f''' INSERT INTO v_worldbank_sanctions ( {headers[:-1]} ) SELECT {headers[:-1]} FROM worldbank_sanctions ; ''' ) conn.commit() conn.execute('''DROP TABLE worldbank_sanctions;''') conn.execute('''VACUUM;''') # # ------------------------------------ # # # Close the database # # ------------------------------------ # # conn.close() # ------------------------------------ # # print statement # ------------------------------------ # print(' ') print(len(worldbank_df)) print(' ') print('--------------------------------------------') print('--------------------------------------------') print(' Worldbank Sanctions Database Build Complete ') print('--------------------------------------------') print('--------------------------------------------') # create_worldbank_sanctions_db() ```