From 2b5f63240802c8904ca65d1d1e6843efb22170cb Mon Sep 17 00:00:00 2001 From: joey0629 Date: Wed, 21 Feb 2024 00:01:50 +0800 Subject: [PATCH] bs4 update --- main.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 84c404b..4b21dc4 100644 --- a/main.py +++ b/main.py @@ -86,7 +86,16 @@ def start(): url = "https://www.bea.gov/news/2024/personal-income-and-outlays-december-2023" text = find_indicate(url) return text +def find_indicate_bs4(url): + from bs4 import BeautifulSoup + import requests + response = requests.get(url) + soup = BeautifulSoup(response.text, 'html.parser') + # 使用CSS選擇器找到元素 + element = soup.select("tr",class_ ='item-fact-row' ) + + return element[1].text.split('+')[0]+" "+element[1].text.split('+')[1] def find_indicate(url): from selenium import webdriver from selenium.webdriver.common.by import By @@ -94,7 +103,7 @@ def find_indicate(url): import time options = Options() # options.add_argument("--headless") # 啟用無頭模式 - driver = webdriver.Chrome(options=options) + driver =webdriver.Chrome(options=options) driver.get(url) # time.sleep(3) chat = driver.find_element(By.XPATH, '//*[@id="home"]/div[2]/div/div/div[1]/table/tbody/tr[13]/td[6]') @@ -135,7 +144,7 @@ if __name__ == "__main__": endtimee = time.time() print("Time_NonFarm: ", endtimee-startimee) startimee = time.time() - print(find_indicate("https://www.bea.gov/news/2024/personal-income-and-outlays-december-2023")) + print(find_indicate_bs4("https://www.bea.gov/data/personal-consumption-expenditures-price-index")) endtimee = time.time() print("Time_NonFarm: ", endtimee-startimee) # cfg = Config()