diff --git a/Dockerfile b/Dockerfile index 2d085b6..eba2d8a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,21 @@ FROM python:3.10 # Install Python and pip RUN apt-get update && apt-get install -y python3 python3-pip && apt install nano -RUN apt-get update && apt-get install -y wget bzip2 libxtst6 libgtk-3-0 libx11-xcb-dev libdbus-glib-1-2 libxt6 libpci-dev && rm -rf /var/lib/apt/lists/* + +RUN apt-get update && apt-get install -y \ + wget unzip \ + && rm -rf /var/lib/apt/lists/* \ + && wget https://chromedriver.storage.googleapis.com/114.0.5735.90/chromedriver_linux64.zip \ + && unzip chromedriver_linux64.zip \ + && mv chromedriver /usr/local/bin/ \ + && chmod +x /usr/local/bin/chromedriver + +# 安裝Google Chrome +RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \ + && echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" | tee /etc/apt/sources.list.d/google-chrome.list \ + && apt-get update && apt-get install -y \ + google-chrome-stable \ + && rm -rf /var/lib/apt/lists/* # Set the working directory in the container WORKDIR /app diff --git a/main.py b/main.py index 964945c..a3f4703 100644 --- a/main.py +++ b/main.py @@ -86,14 +86,17 @@ def find_Fomc_bs4(date:str): def find_cpi(url): from selenium import webdriver from selenium.webdriver.common.by import By - from selenium.webdriver.firefox.options import Options + from selenium.webdriver.chrome.options import Options + from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import time print("Start") options = Options() - options.add_argument("--headless") # 啟用無頭模式 - driver = webdriver.Firefox(options = options) + # options.add_argument('--headless') + options.add_argument('--no-sandbox') + options.add_argument('--disable-dev-shm-usage') + driver = webdriver.Chrome( options=options) driver.get(url) # time.sleep(3) date = driver.find_element(By.XPATH, '//*[@id="bodytext"]/div[1]/pre[1]').text.split('\n')[6].split("-")[1].split(" ")[1] @@ -105,9 +108,6 @@ def find_fomc(url,date:str): from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.firefox.options import Options - from selenium.webdriver.support.ui import WebDriverWait - from selenium.webdriver.support import expected_conditions as EC - import time print("Start") options = Options() options.add_argument("--headless") # 啟用無頭模式 @@ -121,6 +121,60 @@ def find_fomc(url,date:str): driver.quit() return date , value[0].strip('\n') +def find_pce(date:str): + from selenium import webdriver + from selenium.webdriver.common.by import By + from selenium.webdriver.chrome.options import Options + print("Start") + options = Options() + options.add_argument("--headless") # 啟用無頭模式 + driver = webdriver.Chrome() + driver.get(pce_url+date) + # time.sleep(3) + date = driver.find_element(By.XPATH, '//*[@id="home"]/h1').text.split(' ')[4] + value_1 = driver.find_element(By.XPATH, '//*[@id="home"]/div[2]/div/div/p[2]').text + value_2 = driver.find_element(By.XPATH, '//*[@id="home"]/div[2]/div/div/div[1]/table/tbody/tr[13]').text.strip(' ').split(' ')[-1] + driver.quit() + return date , value_1 , value_2 +def find_pce_bs4(date:str): + from bs4 import BeautifulSoup + import requests + response = requests.get(pce_url+date) + soup = BeautifulSoup(response.text, 'html.parser') + print(soup) + # 使用CSS選擇器找到元素 + time_elements = soup.select('.row release-embargo') + value_elements = soup.find_all('div',class_="col-md-12 release-body" ) + value2_elements = soup.find_all('td',class_="text-left" ) + print(value_elements) + if time_elements: + time_text = time_elements[0].text + print(time_text) + time = time_text.split(' ')[4] + print(time) + else: + time = None + + if value_elements: + split_elements = [item.text.split('.') for item in value_elements] + print(value_elements) + value1 = [] + for sublist in split_elements: + value1.extend(sublist) + print(value1) + + else: + value1 = None + if value2_elements: + split_elements = [item.text.split('.') for item in value2_elements] + value2 = [] + for sublist in split_elements: + value2.extend(sublist) + print(value2) + + else: + value2 = None + return time, value1 , value2 #非農就業人數 def read_pdf_nonfarm(month, year): pdf = pdfplumber.open(f"empsit_{month}_{year}.pdf") @@ -218,26 +272,49 @@ def broadcast_all_bs4_fomc(target:str): broadcast_message( f"Now :{datetime.fromtimestamp(time.time())} , Spend time :{str(round(endtimee - startimee, 3))} s", "-1002033782195") +def broadcast_all_pce(target:str,date_target:str): + startimee = time.time() + date, message1, message2 = find_pce(date_target) + while date != target: + date, message1, message2 = find_pce(date_target) + broadcast_message(date+" PCE Data", "-1002033782195") + broadcast_message(message1+'\n\n Percent change from month one year ago : '+message2, "-1002033782195") + endtimee = time.time() + broadcast_message( + f"Now :{datetime.fromtimestamp(time.time())} , Spend time :{str(round(endtimee - startimee, 3))} s", + "-1002033782195") + +def broadcast_all_bs4_pce(target:str): + startimee = time.time() + date , message = find_pce_bs4('20240131a') + while date != target: + date, message = find_pce_bs4('20240131a') + broadcast_message(date+" PCE Data", "-1002033782195") + broadcast_message(message, "-1002033782195") + endtimee = time.time() + broadcast_message( + f"Now :{datetime.fromtimestamp(time.time())} , Spend time :{str(round(endtimee - startimee, 3))} s", + "-1002033782195") if __name__ == "__main__": - global nonfarm_url , cpi_url , fomc_url + global nonfarm_url , cpi_url , fomc_url , pce_url nonfarm_url = "https://www.bls.gov/news.release/empsit.nr0.htm" cpi_url = "https://www.bls.gov/news.release/cpi.nr0.htm" fomc_url = "https://www.federalreserve.gov/newsevents/pressreleases/monetary" - print("Start Time:" , datetime.fromtimestamp(time.time())) - schedule.every().day.at("18:00").do(broadcast_all_bs4_fomc, "February") - while True: - if datetime.now().strftime("%Y-%m-%d %H:%M") == "2024-03-20 18:00": - schedule.run_pending() - time.sleep(0.1) # Check every 0.1 seconds + pce_url = "https://www.bea.gov/news/2024/personal-income-and-outlays-"#january-2024 + # broadcast_all_pce("February", "february-2024") + # print("Start Time:" , datetime.fromtimestamp(time.time())) + # schedule.every().day.at("18:39").do(broadcast_all_cpi, "FEBRUARY") + # while True: + # if datetime.now().strftime("%Y-%m-%d %H:%M") == "2024-04-10 18:39": + # schedule.run_pending() + # time.sleep(0.1) # Check every 0.1 seconds #NonFarm # text = download_pdf_nonfarm() # read_nonfarm(url) # print(text) #CPI - - # data, value = find_cpi(cpi_url) - # print(data, value) + date , value = find_cpi(cpi_url) + print(date) + print(value) # broadcast_all_cpi("FEBRUARY") - # read_CPI(cpi_url) - # broadcast_all_fomc("January") \ No newline at end of file