更新 date non farm 抓取方法

master
joey0629 10 months ago
parent 2fd7100854
commit 24bff1dc6f
  1. 56
      main.py

@ -32,7 +32,23 @@ def broadcast_message(message:str,chat_id:str):
},
)
def find_indices(lst):
# 找到所有空字串的索引
empty_indices = [i for i, x in enumerate(lst) if x == '']
# 找到第二個連續兩個空字串的索引
start_index = None
for i in range(1, len(empty_indices)):
if empty_indices[i] - empty_indices[i-1] == 1 :
if start_index is not None: # 如果已經找到第二個連續的空字串,則跳出迴圈
start_index = empty_indices[i-1]
break
start_index = empty_indices[i-1]
# 找到下一個空字串的索引
end_index = None
end_index = empty_indices[empty_indices.index(start_index)+2]
return start_index+2, end_index
def find_cpi(driver):
try:
driver.get(cpi_url)
@ -59,18 +75,14 @@ def find_fomc(driver, date:str):
def find_non_farm(driver):
try:
driver.get(nonfarm_url)
for _ in range(100):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# print all of the page source that was loaded
print(driver.page_source.encode("utf-8"))
print("Open")
# 等待元素出現
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, '/html/body/div[2]/div[5]/div/div[1]/pre')))
date = driver.title.split(" ")[5]
value = driver.find_element(By.XPATH, '/html/body/div[2]/div[5]/div/div[1]/pre').text.split('\n')
start , end = find_indices(value)
result = value[start:end]
date = driver.find_element(By.XPATH, '/html/body/div[2]/div[5]/div/div[1]/pre').text.split('\n')[6]
value = driver.find_element(By.XPATH, '/html/body/div[2]/div[5]/div/div[1]/pre').text.split('\n')[8:12]
return driver, date, value
return driver, date, result
except Exception as e:
print(f"Error in find_non_farm: {e}")
return driver, None, None
@ -276,14 +288,14 @@ schedules = {
convert_to_utc("2024/10/31", "20:30", 5): {"function": wrapper_function_pce, "args": ["September", "september-2024"]},
convert_to_utc("2024/11/27", "21:30", 5): {"function": wrapper_function_pce, "args": ["October", "october-2024"]},
convert_to_utc("2024/12/20", "21:30", 5): {"function": wrapper_function_pce, "args": ["November", "november-2024"]},
convert_to_utc("2024/06/05", "17:30", 5): {"function": wrapper_function_non_farm, "args": ["APRIL"]},
convert_to_utc("2024/06/07", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["MAY"]},
convert_to_utc("2024/07/05", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["JUNE"]},
convert_to_utc("2024/08/02", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["JULY"]},
convert_to_utc("2024/09/06", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["AUGUST"]},
convert_to_utc("2024/10/04", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["SEPTEMBER"]},
convert_to_utc("2024/11/01", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["OCTOBER"]},
convert_to_utc("2024/12/06", "21:30", 5): {"function": wrapper_function_non_farm, "args": ["NOVEMBER"]},
convert_to_utc("2024/06/06", "09:33", 5): {"function": wrapper_function_non_farm, "args": ["M04"]},
convert_to_utc("2024/06/07", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["M05"]},
convert_to_utc("2024/07/05", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["M06"]},
convert_to_utc("2024/08/02", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["M07"]},
convert_to_utc("2024/09/06", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["M08"]},
convert_to_utc("2024/10/04", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["M09"]},
convert_to_utc("2024/11/01", "20:30", 5): {"function": wrapper_function_non_farm, "args": ["M10"]},
convert_to_utc("2024/12/06", "21:30", 5): {"function": wrapper_function_non_farm, "args": ["M11"]},
convert_to_utc("2024/06/13", "02:00", 5): {"function": wrapper_function_fomc, "args": ["June", "20240613a"]},
convert_to_utc("2024/08/01", "02:00", 5): {"function": wrapper_function_fomc, "args": ["August", "20240801a"]},
convert_to_utc("2024/09/19", "02:00", 5): {"function": wrapper_function_fomc, "args": ["September", "20240919a"]},
@ -294,7 +306,7 @@ schedules = {
if __name__ == "__main__":
global nonfarm_url, cpi_url, fomc_url, pce_url, options
options = Options()
# options.add_argument('--headless')
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument("--window-size=1920,1080") # 可以根據需要調整這個大小
@ -308,7 +320,6 @@ if __name__ == "__main__":
print("Start Time:" , datetime.fromtimestamp(time.time()))
# schedule.every().day.at("10:44").do(wrapper_function_fomc, "March", "20240320a")
for times, task in schedules.items():
func = task["function"]
args = task["args"]
@ -321,10 +332,13 @@ if __name__ == "__main__":
has_broadcasted = False
time.sleep(0.1) # Check every 0.1 seconds
# NonFarm
#NonFarm
# driver = webdriver.Chrome(options=options)
# driver, date, message = find_non_farm(driver)
# print("Non Farm Date")
# print(date)
# print("Non Farm Message")
# print(message)
#CPI
# find_cpi("FEBRUARY")

Loading…
Cancel
Save