프로그래밍/Python
spactrack 파싱 및 엑셀 저장 개발
1q
2021. 2. 6. 21:46
spactrack에서 저장된 spac 목록들을 파싱하고 엑셀에 저장하는 스크립트이다.
# get parsing spack stocks list and stored to excel
# python 3.8
import requests
from bs4 import BeautifulSoup
import xlsxwriter
# startrack's spack stocks list
url1 = "https://sheet2site-staging.herokuapp.com/api/v3/index.php/?search=&key=1F7gLiGZP_F4tZgQXgEhsHMqlgqdSds3vO0-4hoL6ROQ&e=1"
url2 = "https://sheet2site-staging.herokuapp.com/api/v3/load_more.php/?key=1F7gLiGZP_F4tZgQXgEhsHMqlgqdSds3vO0-4hoL6ROQ&template=Table%20Template&filter=&search=&e=1&is_filter_multi=true&length=99&page={}"
row = 0
columnlist = ["SPAC Ticker", "Name", "Status", "SPAC Target Focus", "Target Company(if Deal Announced)", "Prominent Leadership / Directors / Advisors", "Trust Value(from last filing)", "Market Cap", "Commons Price", "Commons % Change Previous Day", "Unit Price", "Warrant Price", "Unit & Warrant Details", "Estimated Unit Split Date", "Warrant Intrinsic Value", "IPO Date", "IPO Size(M)", "Underwriter(s)", "Estimated Completion Deadline Date", "% Progress to Deadline", "SEC Filings", "Tags"]
def CreateXlsx():
w_obj = xlsxwriter.Workbook('spack.xlsx')
worksheet = w_obj.add_worksheet()
return worksheet
def CloseXlsx(w_obj):
w_obj.close()
def TestEndofIndex():
for index in range(1, 10):
res = requests.get(url2.format(index))
if res.text == "end":
return index - 1
return False
def Parsing(index, worksheet):
# 1 page
global row
dummpylists = []
dummpylists2 = []
html = requests.get(url1).text
soup = BeautifulSoup(html)
for tag in soup.select('tbody tr'):
for tag2 in tag.findAll("td"):
dummpylists.append(tag2.getText())
row = row + 1
for col, dummy in enumerate(dummpylists):
worksheet.write(row, col, dummy)
col = col + 1
dummpylists = []
#cols.append(all_cols[i].findAll("td")[j].find(text=True).strip().encode('cp949'))
# 2~N page
for i in range(1, index+1):
html = requests.get(url2.format(i)).text
soup = BeautifulSoup(html)
for tag in soup.select('tr'):
for tag2 in tag.findAll("td"):
dummpylists2.append(tag2.getText())
row = row + 1
for col, dummy in enumerate(dummpylists2):
worksheet.write(row, col, dummy)
col = col + 1
dummpylists2 = []
w_obj = xlsxwriter.Workbook('spack.xlsx')
worksheet = w_obj.add_worksheet()
bold = w_obj.add_format({'bold': 1})
# write header in excel file
for i, header in enumerate(columnlist):
worksheet.write(0, i, header, bold)
r_TestEndofIndex = TestEndofIndex()
if r_TestEndofIndex != False:
Parsing(r_TestEndofIndex, worksheet)
w_obj.close()
else:
input("[ERROR] TestEndofIndex")