From 46da2c648b89814dcca1241d8c3b37992b9831e8 Mon Sep 17 00:00:00 2001 From: joey0629 Date: Wed, 20 Mar 2024 21:17:43 +0800 Subject: [PATCH] Fomc Update Bs4 --- .idea/.gitignore | 2 ++ main.py | 81 ++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 77 insertions(+), 6 deletions(-) diff --git a/.idea/.gitignore b/.idea/.gitignore index 13566b8..a9d7db9 100644 --- a/.idea/.gitignore +++ b/.idea/.gitignore @@ -6,3 +6,5 @@ # Datasource local storage ignored files /dataSources/ /dataSources.local.xml +# GitHub Copilot persisted chat sessions +/copilot/chatSessions diff --git a/main.py b/main.py index 1e58afc..964945c 100644 --- a/main.py +++ b/main.py @@ -57,6 +57,32 @@ def find_indicate(url): date = driver.find_element(By.XPATH, '//*[@id="test"]/div[2]/article/div/div/div[1]/div[2]/div/div[1]/div/table/tbody/tr[1]/td[1]') value = driver.find_element(By.XPATH, '//*[@id="test"]/div[2]/article/div/div/div[1]/div[2]/div/div[1]/div/table/tbody/tr[1]/td[2]') return date.text , value.text +def find_Fomc_bs4(date:str): + from bs4 import BeautifulSoup + import requests + response = requests.get(fomc_url+date+".htm") + soup = BeautifulSoup(response.text, 'html.parser') + + # 使用CSS選擇器找到元素 + time_elements = soup.select('.article__time') + value_elements = soup.find_all('div',class_="col-xs-12 col-sm-8 col-md-8" ) + + if time_elements: + time_text = time_elements[0].text + time = time_text.split(',')[0].split(' ')[0] + else: + time = None + + if value_elements: + split_elements = [item.text.split('.') for item in value_elements] + result = [] + for sublist in split_elements: + result.extend(sublist) + value = [item for item in result if 'In support of' in item] + else: + value = None + + return time, value[0].strip('\n') def find_cpi(url): from selenium import webdriver from selenium.webdriver.common.by import By @@ -64,6 +90,7 @@ def find_cpi(url): from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import time + print("Start") options = Options() options.add_argument("--headless") # 啟用無頭模式 driver = webdriver.Firefox(options = options) @@ -73,9 +100,27 @@ def find_cpi(url): value = driver.find_element(By.XPATH, '//*[@id="bodytext"]/div[1]/pre[1]').text.split('\n')[8:12] driver.quit() return date , value -# 是一個持續運行的迴圈,不斷從Telegram伺服器抓取新的消息 -# 然後使用上面定義的消息處理器來處理這些消息。 +def find_fomc(url,date:str): + from selenium import webdriver + from selenium.webdriver.common.by import By + from selenium.webdriver.firefox.options import Options + from selenium.webdriver.support.ui import WebDriverWait + from selenium.webdriver.support import expected_conditions as EC + import time + print("Start") + options = Options() + options.add_argument("--headless") # 啟用無頭模式 + driver = webdriver.Firefox(options = options) + driver.get(url+date+".htm") + # time.sleep(3) + date = driver.find_element(By.CLASS_NAME, 'article__time').text.split(',')[0].split(' ')[0] + value = driver.find_element(By.XPATH, '//*[@id="article"]/div[3]').text.split('.') + value = [item for item in value if 'In support of' in item] + + + driver.quit() + return date , value[0].strip('\n') #非農就業人數 def read_pdf_nonfarm(month, year): pdf = pdfplumber.open(f"empsit_{month}_{year}.pdf") @@ -121,6 +166,7 @@ def read_PCE(): message = find_indicate_bs4("https://www.bea.gov/data/income-saving/personal-income") return message + def broadcast_all(target:str): startimee = time.time() message = read_PCE() @@ -152,14 +198,35 @@ def broadcast_all_cpi(target:str): broadcast_message( f"Now :{datetime.fromtimestamp(time.time())} , Spend time :{str(round(endtimee - startimee, 3))} s", "-1002033782195") +def broadcast_all_fomc(target:str): + startimee = time.time() + date , message = find_fomc(fomc_url,'20240131a') + while date != target: + date, message = find_fomc(fomc_url,'20240131a') + broadcast_message(message, "-1002033782195") + endtimee = time.time() + broadcast_message( + f"Now :{datetime.fromtimestamp(time.time())} , Spend time :{str(round(endtimee - startimee, 3))} s", + "-1002033782195") +def broadcast_all_bs4_fomc(target:str): + startimee = time.time() + date , message = find_Fomc_bs4('20240131a') + while date != target: + date, message = find_Fomc_bs4('20240131a') + broadcast_message(message, "-1002033782195") + endtimee = time.time() + broadcast_message( + f"Now :{datetime.fromtimestamp(time.time())} , Spend time :{str(round(endtimee - startimee, 3))} s", + "-1002033782195") if __name__ == "__main__": - global nonfarm_url , cpi_url + global nonfarm_url , cpi_url , fomc_url nonfarm_url = "https://www.bls.gov/news.release/empsit.nr0.htm" cpi_url = "https://www.bls.gov/news.release/cpi.nr0.htm" + fomc_url = "https://www.federalreserve.gov/newsevents/pressreleases/monetary" print("Start Time:" , datetime.fromtimestamp(time.time())) - schedule.every().day.at("17:49").do(broadcast_all_cpi, "JANUARY") + schedule.every().day.at("18:00").do(broadcast_all_bs4_fomc, "February") while True: - if datetime.now().strftime("%Y-%m-%d %H:%M") == "2024-03-12 17:49": + if datetime.now().strftime("%Y-%m-%d %H:%M") == "2024-03-20 18:00": schedule.run_pending() time.sleep(0.1) # Check every 0.1 seconds #NonFarm @@ -171,4 +238,6 @@ if __name__ == "__main__": # data, value = find_cpi(cpi_url) # print(data, value) - # read_CPI(cpi_url) \ No newline at end of file + # broadcast_all_cpi("FEBRUARY") + # read_CPI(cpi_url) + # broadcast_all_fomc("January") \ No newline at end of file