#data #_2022 #kyc #OSINT
SEC actions refer to enforcement measures and legal proceedings initiated by the U.S. Securities and Exchange Commission (SEC), the federal agency responsible for regulating securities markets and protecting investors. These actions are typically taken against individuals, companies, or entities that violate U.S. securities laws, such as fraud, insider trading, accounting irregularities, or failure to comply with reporting requirements.
This is a great source of data for KYC / OSINT. Again, I set it up in a full text search SQLite database for such purposes.
Link:
Script:
```py
# migrate_sec_actions.py
from bs4 import BeautifulSoup
import requests
from bs4.element import Comment
import sqlite3
import pandas as pd
import requests
import os
import json
import time
import re
def create_sec_db():
# ------------------------------------ #
# Delete the DB if it exists
# ------------------------------------ #
if os.path.exists("db_sec.db"):
os.remove("db_sec.db")
# ------------------------------------ #
# Make the db
# ------------------------------------ #
db = sqlite3.connect('db_sec.db')
cursor = db.cursor()
cursor.close()
db.close()
# ------------------------------------ #
# Make and insert the dataframe
# ------------------------------------ #
# make the list of all character combos for the url
list_0 = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
list_1 = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
char_list = []
for char_0 in list_0:
for char_1 in list_1:
char_set = char_0+char_1
char_list.append(char_set)
# query each character set and continue while pagination is true
for char_set in char_list:
page=0
pagination=True
while pagination:
# set the url
print(f'{char_set} - {page}')
url = f'https://www.sec.gov/litigations/sec-action-look-up?aId=&last_name={char_set}&first_name=&items_per_page=50&page&page={page}'
response = requests.get(url)
html = response.content
soup = BeautifulSoup(html, 'html.parser')
# make lists for later dataframe
name_list = []
alt_name_list = []
age_list = []
enforcement_list = []
date_filed_list = []
# get list of all cards
card_list = soup.find_all("div", {"class": re.compile("^card border-divide views-row view-row-count")})
for card in card_list:
# add the name to the name list
name_element = card.find("h2", {"class": "field-content card-title"})
if name_element is not None:
name = name_element.text
name_list.append(name)
else:
name_list.append('n/a')
# add the alt name to the list
alt_name_elem = card.find("div", {"class": "views-field views-field-field-also-known-as-1"})
if alt_name_elem is not None:
alt_name_span = alt_name_elem.find("span", {"class": "field-content"})
alt_name_list.append(alt_name_span.text)
else:
alt_name_list.append('n/a')
# add current age to the list
age_elem = card.find("div", {"class": "views-field views-field-field-age-in-document"})
if age_elem is not None:
age_span = age_elem.find("span", {"class": "field-content"})
age_list.append(age_span.text)
else:
age_list.append('n/a')
# add enforcement list
enforcement_elem = card.find("div", {"class": "views-field views-field-field-action-name-in-document"})
if enforcement_elem is not None:
enforcement_span = enforcement_elem.find("span", {"class": "field-content"})
enforcement_list.append(enforcement_span.text.replace('\t',''))
else:
enforcement_list.append('n/a')
# add date filed list
date_filed_elem = card.find("div", {"class": "views-field views-field-field-date-filed"})
if date_filed_elem is not None:
date_filed_span = date_filed_elem.find("span", {"class": "field-content"})
date_filed_list.append(date_filed_span.text.replace('\n',''))
else:
date_filed_list.append('n/a')
# make a dataframe from the lists
zipped = list(zip(name_list, alt_name_list, age_list, enforcement_list, date_filed_list))
df = df = pd.DataFrame(zipped, columns=['name', 'alt_name', 'age', 'enforcement_actions', 'date_filed'])
# print check (optional)
print(df)
# sleep (optional)
time.sleep(2.5)
if len(df)==50:
page+=1
else:
pagination=False
# ------------------------------------ #
# Insert data to db
# ------------------------------------ #
conn = sqlite3.connect('db_sec.db', check_same_thread=False)
df.to_sql(name='sec', con=conn, if_exists='append', index=False)
# ------------------------------------ #
# Make the virtual table
# ------------------------------------ #
conn.execute('''
CREATE VIRTUAL TABLE v_sec
USING FTS5 (
name,
alt_name,
age,
enforcement_actions,
date_filed
)
;
'''
)
conn.execute('''
INSERT INTO v_sec (
name,
alt_name,
age,
enforcement_actions,
date_filed
)
SELECT DISTINCT name,
alt_name,
age,
enforcement_actions,
date_filed
FROM sec
;
'''
)
conn.commit()
conn.execute('''DROP TABLE sec;''')
conn.execute('''VACUUM;''')
# ------------------------------------ #
# Close the database
# ------------------------------------ #
conn.close()
# ------------------------------------ #
# print statement
# ------------------------------------ #
print('--------------------------------------------')
print('--------------------------------------------')
print(' SEC Actions Database Build Complete ')
print('--------------------------------------------')
print('--------------------------------------------')
# create_sec_db()
```