master
joey0629 1 year ago
parent 46da2c648b
commit be0e324b80
  1. 16
      Dockerfile
  2. 113
      main.py

@ -2,7 +2,21 @@ FROM python:3.10
# Install Python and pip # Install Python and pip
RUN apt-get update && apt-get install -y python3 python3-pip && apt install nano RUN apt-get update && apt-get install -y python3 python3-pip && apt install nano
RUN apt-get update && apt-get install -y wget bzip2 libxtst6 libgtk-3-0 libx11-xcb-dev libdbus-glib-1-2 libxt6 libpci-dev && rm -rf /var/lib/apt/lists/*
RUN apt-get update && apt-get install -y \
wget unzip \
&& rm -rf /var/lib/apt/lists/* \
&& wget https://chromedriver.storage.googleapis.com/114.0.5735.90/chromedriver_linux64.zip \
&& unzip chromedriver_linux64.zip \
&& mv chromedriver /usr/local/bin/ \
&& chmod +x /usr/local/bin/chromedriver
# 安裝Google Chrome
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
&& echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" | tee /etc/apt/sources.list.d/google-chrome.list \
&& apt-get update && apt-get install -y \
google-chrome-stable \
&& rm -rf /var/lib/apt/lists/*
# Set the working directory in the container # Set the working directory in the container
WORKDIR /app WORKDIR /app

@ -86,14 +86,17 @@ def find_Fomc_bs4(date:str):
def find_cpi(url): def find_cpi(url):
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support import expected_conditions as EC
import time import time
print("Start") print("Start")
options = Options() options = Options()
options.add_argument("--headless") # 啟用無頭模式 # options.add_argument('--headless')
driver = webdriver.Firefox(options = options) options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome( options=options)
driver.get(url) driver.get(url)
# time.sleep(3) # time.sleep(3)
date = driver.find_element(By.XPATH, '//*[@id="bodytext"]/div[1]/pre[1]').text.split('\n')[6].split("-")[1].split(" ")[1] date = driver.find_element(By.XPATH, '//*[@id="bodytext"]/div[1]/pre[1]').text.split('\n')[6].split("-")[1].split(" ")[1]
@ -105,9 +108,6 @@ def find_fomc(url,date:str):
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
print("Start") print("Start")
options = Options() options = Options()
options.add_argument("--headless") # 啟用無頭模式 options.add_argument("--headless") # 啟用無頭模式
@ -121,6 +121,60 @@ def find_fomc(url,date:str):
driver.quit() driver.quit()
return date , value[0].strip('\n') return date , value[0].strip('\n')
def find_pce(date:str):
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
print("Start")
options = Options()
options.add_argument("--headless") # 啟用無頭模式
driver = webdriver.Chrome()
driver.get(pce_url+date)
# time.sleep(3)
date = driver.find_element(By.XPATH, '//*[@id="home"]/h1').text.split(' ')[4]
value_1 = driver.find_element(By.XPATH, '//*[@id="home"]/div[2]/div/div/p[2]').text
value_2 = driver.find_element(By.XPATH, '//*[@id="home"]/div[2]/div/div/div[1]/table/tbody/tr[13]').text.strip(' ').split(' ')[-1]
driver.quit()
return date , value_1 , value_2
def find_pce_bs4(date:str):
from bs4 import BeautifulSoup
import requests
response = requests.get(pce_url+date)
soup = BeautifulSoup(response.text, 'html.parser')
print(soup)
# 使用CSS選擇器找到元素
time_elements = soup.select('.row release-embargo')
value_elements = soup.find_all('div',class_="col-md-12 release-body" )
value2_elements = soup.find_all('td',class_="text-left" )
print(value_elements)
if time_elements:
time_text = time_elements[0].text
print(time_text)
time = time_text.split(' ')[4]
print(time)
else:
time = None
if value_elements:
split_elements = [item.text.split('.') for item in value_elements]
print(value_elements)
value1 = []
for sublist in split_elements:
value1.extend(sublist)
print(value1)
else:
value1 = None
if value2_elements:
split_elements = [item.text.split('.') for item in value2_elements]
value2 = []
for sublist in split_elements:
value2.extend(sublist)
print(value2)
else:
value2 = None
return time, value1 , value2
#非農就業人數 #非農就業人數
def read_pdf_nonfarm(month, year): def read_pdf_nonfarm(month, year):
pdf = pdfplumber.open(f"empsit_{month}_{year}.pdf") pdf = pdfplumber.open(f"empsit_{month}_{year}.pdf")
@ -218,26 +272,49 @@ def broadcast_all_bs4_fomc(target:str):
broadcast_message( broadcast_message(
f"Now :{datetime.fromtimestamp(time.time())} , Spend time :{str(round(endtimee - startimee, 3))} s", f"Now :{datetime.fromtimestamp(time.time())} , Spend time :{str(round(endtimee - startimee, 3))} s",
"-1002033782195") "-1002033782195")
def broadcast_all_pce(target:str,date_target:str):
startimee = time.time()
date, message1, message2 = find_pce(date_target)
while date != target:
date, message1, message2 = find_pce(date_target)
broadcast_message(date+" PCE Data", "-1002033782195")
broadcast_message(message1+'\n\n Percent change from month one year ago : '+message2, "-1002033782195")
endtimee = time.time()
broadcast_message(
f"Now :{datetime.fromtimestamp(time.time())} , Spend time :{str(round(endtimee - startimee, 3))} s",
"-1002033782195")
def broadcast_all_bs4_pce(target:str):
startimee = time.time()
date , message = find_pce_bs4('20240131a')
while date != target:
date, message = find_pce_bs4('20240131a')
broadcast_message(date+" PCE Data", "-1002033782195")
broadcast_message(message, "-1002033782195")
endtimee = time.time()
broadcast_message(
f"Now :{datetime.fromtimestamp(time.time())} , Spend time :{str(round(endtimee - startimee, 3))} s",
"-1002033782195")
if __name__ == "__main__": if __name__ == "__main__":
global nonfarm_url , cpi_url , fomc_url global nonfarm_url , cpi_url , fomc_url , pce_url
nonfarm_url = "https://www.bls.gov/news.release/empsit.nr0.htm" nonfarm_url = "https://www.bls.gov/news.release/empsit.nr0.htm"
cpi_url = "https://www.bls.gov/news.release/cpi.nr0.htm" cpi_url = "https://www.bls.gov/news.release/cpi.nr0.htm"
fomc_url = "https://www.federalreserve.gov/newsevents/pressreleases/monetary" fomc_url = "https://www.federalreserve.gov/newsevents/pressreleases/monetary"
print("Start Time:" , datetime.fromtimestamp(time.time())) pce_url = "https://www.bea.gov/news/2024/personal-income-and-outlays-"#january-2024
schedule.every().day.at("18:00").do(broadcast_all_bs4_fomc, "February") # broadcast_all_pce("February", "february-2024")
while True: # print("Start Time:" , datetime.fromtimestamp(time.time()))
if datetime.now().strftime("%Y-%m-%d %H:%M") == "2024-03-20 18:00": # schedule.every().day.at("18:39").do(broadcast_all_cpi, "FEBRUARY")
schedule.run_pending() # while True:
time.sleep(0.1) # Check every 0.1 seconds # if datetime.now().strftime("%Y-%m-%d %H:%M") == "2024-04-10 18:39":
# schedule.run_pending()
# time.sleep(0.1) # Check every 0.1 seconds
#NonFarm #NonFarm
# text = download_pdf_nonfarm() # text = download_pdf_nonfarm()
# read_nonfarm(url) # read_nonfarm(url)
# print(text) # print(text)
#CPI #CPI
date , value = find_cpi(cpi_url)
# data, value = find_cpi(cpi_url) print(date)
# print(data, value) print(value)
# broadcast_all_cpi("FEBRUARY") # broadcast_all_cpi("FEBRUARY")
# read_CPI(cpi_url)
# broadcast_all_fomc("January")
Loading…
Cancel
Save