From df468ec5a69aa290c9ef71468154ef8edbc3840b Mon Sep 17 00:00:00 2001 From: joey0629 Date: Wed, 22 May 2024 13:31:26 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=20date=20=E8=88=87=20schedul?= =?UTF-8?q?e=20=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 296 ++++++++++++++++++++++++-------------------------------- 1 file changed, 128 insertions(+), 168 deletions(-) diff --git a/main.py b/main.py index 5976ee8..614b481 100644 --- a/main.py +++ b/main.py @@ -14,7 +14,8 @@ from selenium.common.exceptions import WebDriverException import pytz from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options - +from datetime import timedelta +import threading BOT_TOKEN = '6701395239:AAFE30dqvNihDdni9vYoAbWssO-X5yAmwho' # BOT_TOKEN = "6746720034:AAEMaoV2FwIZ8pz_PF18-bo2a6gFC1eVtVs" #BOT_TOKEN = '6589162555:AAHGhrTQ0wYNtIUySMohnfpxQl1d6blr24Q' @@ -28,149 +29,78 @@ def broadcast_message(message:str,chat_id:str): ) -def find_cpi(url): - from selenium import webdriver - from selenium.webdriver.common.by import By - from selenium.webdriver.chrome.options import Options +def find_cpi(driver): + try: + driver.get(cpi_url) + print("Open") + date = driver.find_element(By.XPATH, '/html/body/div[2]/div[5]/div/div[1]/pre[1]').text.split('\n')[6].split("-")[1].split(" ")[1] + value = driver.find_element(By.XPATH, '/html/body/div[2]/div[5]/div/div[1]/pre[1]').text.split('\n')[8:12] + return driver, date, value + except Exception as e: + print(f"Error in find_cpi: {e}") + return driver, None, None - from selenium.webdriver.support.ui import WebDriverWait - from selenium.webdriver.support import expected_conditions as EC - import time - print("Start") - options = Options() - options.add_argument('--headless') - options.add_argument('--no-sandbox') - options.add_argument('--disable-dev-shm-usage') - options.add_argument("--window-size=1920,1080") # 可以根據需要調整這個大小 - options.add_argument( - 'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3') - driver = webdriver.Chrome( options=options) - driver.get(url) - print("Open") - # time.sleep(4) - date = driver.find_element(By.XPATH, '/html/body/div[2]/div[5]/div/div[1]/pre[1]').text.split('\n')[6].split("-")[1].split(" ")[1] - value = driver.find_element(By.XPATH, '/html/body/div[2]/div[5]/div/div[1]/pre[1]').text.split('\n')[8:12] - # driver.quit() - return date , value +def find_fomc(driver, date:str): + try: + driver.get(fomc_url+date+".htm") + print("Open") + date = driver.find_element(By.CLASS_NAME, 'article__time').text.split(',')[0].split(' ')[0] + value = driver.find_element(By.XPATH, '//*[@id="article"]/div[3]').text.split('.') + value = [item for item in value if 'In support of' in item] + return driver, date, value[0].strip('\n') + except Exception as e: + print(f"Error in find_fomc: {e}") + return driver, None, None -def find_fomc(url,date:str): - from selenium import webdriver - from selenium.webdriver.common.by import By - from selenium.webdriver.chrome.options import Options - print("Start") - options = Options() - options.add_argument('--headless') - options.add_argument('--no-sandbox') - options.add_argument('--disable-dev-shm-usage') - options.add_argument("--window-size=1920,1080") # 可以根據需要調整這個大小 - options.add_argument( - 'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3') - driver = webdriver.Chrome( options=options) - driver.get(url+date+".htm") - print("Open") - # time.sleep(3) - date = driver.find_element(By.CLASS_NAME, 'article__time').text.split(',')[0].split(' ')[0] - value = driver.find_element(By.XPATH, '//*[@id="article"]/div[3]').text.split('.') - value = [item for item in value if 'In support of' in item] - driver.quit() - return date , value[0].strip('\n') -def find_pce(date:str): - from selenium import webdriver - from selenium.webdriver.common.by import By - from selenium.webdriver.chrome.options import Options - print("Start") - options = Options() - # options.add_argument('--headless') - options.add_argument('--no-sandbox') - options.add_argument('--disable-dev-shm-usage') - options.add_argument("--window-size=1920,1080") # 可以根據需要調整這個大小 - options.add_argument( - 'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3') - driver = webdriver.Chrome( options=options) +def find_non_farm(driver): + try: + driver.get(nonfarm_url) + print("Open") + date = driver.find_element(By.XPATH, '/html/body/div[2]/div[5]/div/div[1]/pre').text.split('\n')[6] + value = driver.find_element(By.XPATH, '/html/body/div[2]/div[5]/div/div[1]/pre').text.split('\n')[8:12] + return driver, date, value + except Exception as e: + print(f"Error in find_non_farm: {e}") + return driver, None, None +def find_pce(driver, date_target:str): try: - driver.get(pce_url+date) + driver.get(pce_url+date_target) print("Successfully accessed the website.") time.sleep(1) date = driver.find_element(By.XPATH, '//*[@id="home"]/h1').text.split(' ')[4] value_1 = driver.find_element(By.XPATH, '//*[@id="home"]/div[2]/div/div/p[2]').text value_2 = driver.find_element(By.XPATH, '//*[@id="home"]/div[2]/div/div/div[1]/table/tbody/tr[13]').text.strip(' ').split(' ')[-1] - return date, value_1, value_2 + return driver, date, value_1, value_2 except WebDriverException as e: print(e) print("Failed to access the website.") - return None , None , None - finally: - driver.quit() -#非農就業人數 -def find_non_farm(url): - from selenium import webdriver - from selenium.webdriver.common.by import By - from selenium.webdriver.chrome.options import Options - - from selenium.webdriver.support.ui import WebDriverWait - from selenium.webdriver.support import expected_conditions as EC - import time - print("Start") - options = Options() - options.add_argument('--headless') - options.add_argument('--no-sandbox') - options.add_argument('--disable-dev-shm-usage') - options.add_argument("--window-size=1920,1080") # 可以根據需要調整這個大小 - options.add_argument( - 'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3') - driver = webdriver.Chrome( options=options) - driver.get(url) - print("Open") - # time.sleep(4) - date = driver.find_element(By.XPATH, '/html/body/div[2]/div[5]/div/div[1]/pre').text.split('\n')[6] - value = driver.find_element(By.XPATH, '/html/body/div[2]/div[5]/div/div[1]/pre').text.split('\n')[8:12] - # driver.quit() - return date , value -def find_pmi(month:str): - from selenium import webdriver - from selenium.webdriver.common.by import By - from selenium.webdriver.chrome.options import Options - - from selenium.webdriver.support.ui import WebDriverWait - from selenium.webdriver.support import expected_conditions as EC - import time - print("Start") - options = Options() - options.add_argument('--headless') - options.add_argument('--no-sandbox') - options.add_argument('--disable-dev-shm-usage') - options.add_argument("--window-size=1920,1080") # 可以根據需要調整這個大小 - options.add_argument( - 'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3') - driver = webdriver.Chrome( options=options) - driver.get(pmi_url+month+"/") - print("Open") - # time.sleep(4) - agree_button = driver.find_element(By.XPATH, '//*[@id="alert-modal-disclaimer___BV_modal_body_"]/center/input') - agree_button.click() - - date = driver.find_element(By.XPATH, '//*[@id="main"]/div[1]/div/div[2]/div/div[1]/div/div/div[1]/div[1]/h1[2]').text.split(' ')[0] - value = driver.find_element(By.XPATH, '//*[@id="main"]/div[1]/div/div[2]/div/div[1]/div/div/div[1]/div[1]/p[3]').text.split('.')[0:4] - #將value list 串起來成一個string - value_str = ".".join(value)+"." - driver.quit() - return date ,value_str - -def read_pdf_nonfarm(month, year): - pdf = pdfplumber.open(f"empsit_{month}_{year}.pdf") - page = pdf.pages[0] - text = page.extract_text().split('\n') - text = (text[7]+text[8]).split(',') - text = text[0]+text[1]+text[2] - return text + return driver, None, None, None +def find_pmi(driver, date_target:str): + try: + driver.get(pmi_url+date_target+"/") + print("Open") + # time.sleep(4) + agree_button = driver.find_element(By.XPATH, '//*[@id="alert-modal-disclaimer___BV_modal_body_"]/center/input') + agree_button.click() + date = driver.find_element(By.XPATH, '//*[@id="main"]/div[1]/div/div[2]/div/div[1]/div/div/div[1]/div[1]/h1[2]').text.split(' ')[0] + value = driver.find_element(By.XPATH, '//*[@id="main"]/div[1]/div/div[2]/div/div[1]/div/div/div[1]/div[1]/p[3]').text.split('.')[0:4] + #將value list 串起來成一個string + value_str = ".".join(value)+"." + return driver, date, value_str + except Exception as e: + print(f"Error in find_pmi: {e}") + return driver, None, None def broadcast_all_non_farm(target:str): startimee = time.time() - date , message = find_non_farm(nonfarm_url) + driver = webdriver.Chrome(options=options) + driver, date, message = find_non_farm(driver) while date != target: - date, message = find_non_farm(nonfarm_url) + driver.refresh() + print("Non Farm Refresh") + driver, date, message = find_non_farm(driver) message= "\n".join(message) broadcast_message(message, "-1002033782195") endtimee = time.time() @@ -178,13 +108,17 @@ def broadcast_all_non_farm(target:str): f"Now :{datetime.fromtimestamp(time.time())} , Spend time :{str(round(endtimee - startimee, 3))} s", "-1002033782195") broadcast_message(f"The above is the Non Farm for {target}","-1002033782195") + driver.quit() return True def broadcast_all_cpi(target:str): startimee = time.time() - date , message = find_cpi(cpi_url) + driver = webdriver.Chrome(options=options) + driver, date , message = find_cpi(driver) while date != target: - date, message = find_cpi(cpi_url) + driver.refresh() + print("Cpi Refresh") + driver, date, message = find_cpi(driver) message= "\n".join(message) broadcast_message(message, "-1002033782195") endtimee = time.time() @@ -192,23 +126,32 @@ def broadcast_all_cpi(target:str): f"Now :{datetime.fromtimestamp(time.time())} , Spend time :{str(round(endtimee - startimee, 3))} s", "-1002033782195") broadcast_message(f"The above is the CPI for {target}","-1002033782195") + driver.quit() return True def broadcast_all_fomc(target:str,date_target:str): #date_target = "20240131a" startimee = time.time() - date , message = find_fomc(fomc_url,date_target) + driver = webdriver.Chrome(options=options) + driver, date , message = find_fomc(driver,date_target) while date != target: - date, message = find_fomc(fomc_url,date_target) + driver.refresh() + print("Fomc Refresh") + driver, date, message = find_fomc(driver,date_target) broadcast_message(message, "-1002033782195") endtimee = time.time() broadcast_message( f"Now :{datetime.fromtimestamp(time.time())} , Spend time :{str(round(endtimee - startimee, 3))} s", "-1002033782195") broadcast_message(f"The above is the FOMC abstract for {target}","-1002033782195") + driver.quit() + return True def broadcast_all_pce(target:str,date_target:str): startimee = time.time() - date, message1, message2 = find_pce(date_target) + driver = webdriver.Chrome(options=options) + driver, date, message1, message2 = find_pce(driver,date_target) while date != target: - date, message1, message2 = find_pce(date_target) + driver.refresh() + print("PCE Refresh") + driver, date, message1, message2 = find_pce(driver,date_target) broadcast_message(date+" PCE Data", "-1002033782195") broadcast_message(message1+'\n\n Percent change from month one year ago : '+f"**{message2}**", "-1002033782195") endtimee = time.time() @@ -216,6 +159,8 @@ def broadcast_all_pce(target:str,date_target:str): f"Now :{datetime.fromtimestamp(time.time())} , Spend time :{str(round(endtimee - startimee, 3))} s", "-1002033782195") broadcast_message(f"The above is the PCE for {target}","-1002033782195") + driver.quit() + return True def broadcast_pmi(date_target:str): startimee = time.time() driver = webdriver.Chrome(options=options) @@ -223,6 +168,7 @@ def broadcast_pmi(date_target:str): Up_date = date_target.capitalize() while date != Up_date: driver.refresh() + print("PMI Refresh") driver, date, value = find_pmi(driver, date_target) broadcast_message(Up_date+" PMI Data", "-1002033782195") broadcast_message(value, "-1002033782195") @@ -232,72 +178,85 @@ def broadcast_pmi(date_target:str): "-1002033782195") broadcast_message(f"The above is the PMI for {Up_date}","-1002033782195") driver.quit() + return True has_broadcasted = False def wrapper_function_cpi(target): global has_broadcasted + time.sleep(55) result = broadcast_all_cpi(target) if result: has_broadcasted = True def wrapper_function_pce(target,date_target): global has_broadcasted + time.sleep(55) result = broadcast_all_pce(target,date_target) if result: has_broadcasted = True def wrapper_function_fomc(target,date_target): global has_broadcasted + time.sleep(55) result = broadcast_all_fomc(target,date_target=date_target) if result: has_broadcasted = True def wrapper_function_non_farm(target): global has_broadcasted + time.sleep(55) result = broadcast_all_non_farm(target) if result: has_broadcasted = True def wrapper_function_pmi(target): global has_broadcasted + time.sleep(55) result = broadcast_pmi(target) if result: has_broadcasted = True -def convert_to_utc(date_str, time_str): +def convert_to_utc(date_str, time_str, offset_seconds=0): local_tz = pytz.timezone('Asia/Taipei') local_time = datetime.strptime(date_str + " " + time_str, "%Y/%m/%d %H:%M") + local_time = local_time - timedelta(seconds=offset_seconds) # 提前 offset_seconds 秒 local_time = local_tz.localize(local_time) utc_time = local_time.astimezone(pytz.utc) return utc_time.strftime("%Y-%m-%d %H:%M") def print_hello(text): + time.sleep(55) + startime = time.time() + startime = datetime.fromtimestamp(startime) + print("Start Time:" , startime) print("Hello") print(text) + return True schedules = { - convert_to_utc("2024/05/09", "04:34"): {"function": print_hello, "args": ["May"]}, - convert_to_utc("2024/05/09", "04:35"): {"function": print_hello, "args": ["June"]}, - convert_to_utc("2024/06/12", "20:30"): {"function": wrapper_function_cpi, "args": ["June", "june-2024"]}, - convert_to_utc("2024/07/11", "20:30"): {"function": wrapper_function_cpi, "args": ["July", "july-2024"]}, - convert_to_utc("2024/08/14", "20:30"): {"function": wrapper_function_cpi, "args": ["August", "august-2024"]}, - convert_to_utc("2024/09/11", "20:30"): {"function": wrapper_function_cpi, "args": ["September", "september-2024"]}, - convert_to_utc("2024/10/10", "20:30"): {"function": wrapper_function_cpi, "args": ["October", "october-2024"]}, - convert_to_utc("2024/11/13", "21:30"): {"function": wrapper_function_cpi, "args": ["November", "november-2024"]}, - convert_to_utc("2024/12/11", "21:30"): {"function": wrapper_function_cpi, "args": ["December", "december-2024"]}, - convert_to_utc("2024/05/31", "20:30"): {"function": wrapper_function_pce, "args": ["May", "may-2024"]}, - convert_to_utc("2024/06/28", "20:30"): {"function": wrapper_function_pce, "args": ["June", "june-2024"]}, - convert_to_utc("2024/07/26", "20:30"): {"function": wrapper_function_pce, "args": ["July", "july-2024"]}, - convert_to_utc("2024/08/30", "20:30"): {"function": wrapper_function_pce, "args": ["August", "august-2024"]}, - convert_to_utc("2024/09/27", "20:30"): {"function": wrapper_function_pce, "args": ["September", "september-2024"]}, - convert_to_utc("2024/10/31", "20:30"): {"function": wrapper_function_pce, "args": ["October", "october-2024"]}, - convert_to_utc("2024/11/27", "21:30"): {"function": wrapper_function_pce, "args": ["November", "november-2024"]}, - convert_to_utc("2024/12/20", "21:30"): {"function": wrapper_function_pce, "args": ["December", "december-2024"]}, - convert_to_utc("2024/06/07", "20:30"): {"function": wrapper_function_non_farm, "args": ["June", "june-2024"]}, - convert_to_utc("2024/07/05", "20:30"): {"function": wrapper_function_non_farm, "args": ["July", "july-2024"]}, - convert_to_utc("2024/08/02", "20:30"): {"function": wrapper_function_non_farm, "args": ["August", "august-2024"]}, - convert_to_utc("2024/09/06", "20:30"): {"function": wrapper_function_non_farm, "args": ["September", "september-2024"]}, - convert_to_utc("2024/10/04", "20:30"): {"function": wrapper_function_non_farm, "args": ["October", "october-2024"]}, - convert_to_utc("2024/11/01", "20:30"): {"function": wrapper_function_non_farm, "args": ["November", "november-2024"]}, - convert_to_utc("2024/12/06", "21:30"): {"function": wrapper_function_non_farm, "args": ["December", "december-2024"]}, - convert_to_utc("2024/06/13", "02:00"): {"function": wrapper_function_fomc, "args": ["June", "20240613a"]}, - convert_to_utc("2024/08/01", "02:00"): {"function": wrapper_function_fomc, "args": ["August", "20240801a"]}, - convert_to_utc("2024/09/19", "02:00"): {"function": wrapper_function_fomc, "args": ["September", "20240919a"]}, - convert_to_utc("2024/11/08", "02:00"): {"function": wrapper_function_fomc, "args": ["November", "20241108a"]}, - convert_to_utc("2024/12/19", "03:00"): {"function": wrapper_function_fomc, "args": ["December", "20241219a"]}, + convert_to_utc("2024/05/22", "10:33", 5): {"function": print_hello, "args": ["May"]}, + convert_to_utc("2024/05/22", "10:34", 5): {"function": print_hello, "args": ["June"]}, + convert_to_utc("2024/06/12", "20:30", 5): {"function": wrapper_function_cpi, "args": ["MAY"]}, + convert_to_utc("2024/07/11", "20:30", 5): {"function": wrapper_function_cpi, "args": ["JUNE"]}, + convert_to_utc("2024/08/14", "20:30", 5): {"function": wrapper_function_cpi, "args": ["JULY"]}, + convert_to_utc("2024/09/11", "20:30", 5): {"function": wrapper_function_cpi, "args": ["AUGUST"]}, + convert_to_utc("2024/10/10", "20:30", 5): {"function": wrapper_function_cpi, "args": ["SEPTEMBER"]}, + convert_to_utc("2024/11/13", "21:30", 5): {"function": wrapper_function_cpi, "args": ["OCTOBER"]}, + convert_to_utc("2024/12/11", "21:30", 5): {"function": wrapper_function_cpi, "args": ["NOVEMBER"]}, + convert_to_utc("2024/05/31", "20:30", 5): {"function": wrapper_function_pce, "args": ["April", "april-2024"]}, + convert_to_utc("2024/06/28", "20:30", 5): {"function": wrapper_function_pce, "args": ["May", "may-2024"]}, + convert_to_utc("2024/07/26", "20:30", 5): {"function": wrapper_function_pce, "args": ["June", "june-2024"]}, + convert_to_utc("2024/08/30", "20:30", 5): {"function": wrapper_function_pce, "args": ["July", "july-2024"]}, + convert_to_utc("2024/09/27", "20:30", 5): {"function": wrapper_function_pce, "args": ["August", "august-2024"]}, + convert_to_utc("2024/10/31", "20:30", 5): {"function": wrapper_function_pce, "args": ["September", "september-2024"]}, + convert_to_utc("2024/11/27", "21:30", 5): {"function": wrapper_function_pce, "args": ["October", "october-2024"]}, + convert_to_utc("2024/12/20", "21:30", 5): {"function": wrapper_function_pce, "args": ["November", "november-2024"]}, + convert_to_utc("2024/06/07", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["MAY"]}, + convert_to_utc("2024/07/05", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["JUNE"]}, + convert_to_utc("2024/08/02", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["JULY"]}, + convert_to_utc("2024/09/06", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["AUGUST"]}, + convert_to_utc("2024/10/04", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["SEPTEMBER"]}, + convert_to_utc("2024/11/01", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["OCTOBER"]}, + convert_to_utc("2024/12/06", "21:30", 5): {"function": wrapper_function_non_farm, "args": ["NOVEMBER"]}, + convert_to_utc("2024/06/13", "02:00", 5): {"function": wrapper_function_fomc, "args": ["June", "20240613a"]}, + convert_to_utc("2024/08/01", "02:00", 5): {"function": wrapper_function_fomc, "args": ["August", "20240801a"]}, + convert_to_utc("2024/09/19", "02:00", 5): {"function": wrapper_function_fomc, "args": ["September", "20240919a"]}, + convert_to_utc("2024/11/08", "02:00", 5): {"function": wrapper_function_fomc, "args": ["November", "20241108a"]}, + convert_to_utc("2024/12/19", "03:00", 5): {"function": wrapper_function_fomc, "args": ["December", "20241219a"]}, + convert_to_utc("2024/06/03", "21:07", 5): {"function": wrapper_function_pmi, "args": ["April"]}, } if __name__ == "__main__": global nonfarm_url, cpi_url, fomc_url, pce_url, options @@ -313,21 +272,22 @@ if __name__ == "__main__": fomc_url = "https://www.federalreserve.gov/newsevents/pressreleases/monetary" pce_url = "https://www.bea.gov/news/2024/personal-income-and-outlays-"#january-2024 pmi_url = "https://www.ismworld.org/supply-management-news-and-reports/reports/ism-report-on-business/pmi/"#+month/ - ''' + print("Start Time:" , datetime.fromtimestamp(time.time())) - schedule.every().day.at("10:44").do(wrapper_function_fomc, "March", "20240320a") + # schedule.every().day.at("10:44").do(wrapper_function_fomc, "March", "20240320a") for times, task in schedules.items(): func = task["function"] args = task["args"] - schedule.every().day.at(times.split(" ")[1]).do(func, *args) + print(f"Schedule {func.__name__} at {times}") + schedule.every().day.at(times.split(" ")[1]).do(threading.Thread(target=func, args=args).start) while True: schedule.run_pending() if has_broadcasted: print("Broadcast completed") + has_broadcasted = False time.sleep(0.1) # Check every 0.1 seconds - ''' - broadcast_pmi("april") + #NonFarm # date , message = find_non_farm(nonfarm_url) # print(date)