#data #bizdev_utils #kyc #_2023
Channel E2E is a digital platform and publication focused on the end-to-end ecosystem of IT service providers, managed service providers (MSPs), technology resellers, and their business lifecycle. The platform is a resource for professionals involved in the technology services industry, providing insights, news, and guidance on various aspects of running and growing technology-driven businesses.
Oddly enough, the site titles of the Channel E2E can be helpful for KYC and BizDev efforts, as they are a quick lookup of M&A activity (all I care about is if a company has received prior investment or not, so if its on this list then I'm disinterested)
Link:
Script:
```sh
#!/bin/bash
# curl meta sitemap
curl https://www.channele2e.com/sitemap-index.xml > a_meta_sitemap.txt
# print statement to create space on the command line
echo " "
# grep just the lines we want
grep "https://www.channele2e.com/sitemap-" a_meta_sitemap.txt > b_meta_sitemap.txt
# get rid of loc tags sed
sed -i 's/<\/\?loc>//g' b_meta_sitemap.txt
# get rid of leading whitespaces with sed
sed -i 's/^[[:blank:]]*//; s/[[:blank:]]*$//' b_meta_sitemap.txt
# make a file variable
file="b_meta_sitemap.txt"
# loop through meta sitemap and curl each sitemap within it
while IFS= read -r line
do
# print statements
echo "$line"
# curl the endpoint
curl "$line" > pe_sitemap.txt
# return just the useful lines
grep "https://" pe_sitemap.txt >> a_pe_links.txt
# return just the news blogs
grep "https://www.channele2e.com/news/" a_pe_links.txt > b_pe_links.txt
# get rid of leading useless text
sed -i 's#<loc>https://www.channele2e.com/news/##' b_pe_links.txt
# replace - with space
sed -i 's/-/ /g' b_pe_links.txt
# replace useless end text with , and make csv
sed 's/<\/loc>/,/g' b_pe_links.txt > c_pe_links.csv
# # get rid of additional tags
# grep -v '<' b_pe_links.csv > c_pe_links.csv
# append results to csv
sed 's/^[[:space:]]*//' c_pe_links.csv >> d_pe_links.csv
# additional print for space when reading
echo " "
done < "$file"
# dedup final outcomes
sort d_pe_links.csv | uniq > final_pe_links.csv
# delete the temp files (optional)
rm a_meta_sitemap.txt
rm a_pe_links.txt
rm b_meta_sitemap.txt
rm b_pe_links.txt
rm c_pe_links.csv
rm pe_sitemap.txt
rm d_pe_links.csv
```