commit 35b3ceed1b9b6e8c8caa70637b0f030d9a571e3f Author: joey0629 Date: Tue Feb 20 12:23:17 2024 +0800 Init diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/ChatBot_Indicator.iml b/.idea/ChatBot_Indicator.iml new file mode 100644 index 0000000..637816a --- /dev/null +++ b/.idea/ChatBot_Indicator.iml @@ -0,0 +1,13 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..09aa061 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..f1617ce --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..2480af5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.10 + +# Install Python and pip +RUN apt-get update && apt-get install -y python3 python3-pip + +# Set the working directory in the container +WORKDIR /app + +# Copy the dependencies file to the working directory +COPY requirements.txt . + +# Install any dependencies +RUN pip3 install -r requirements.txt + +# Copy the content of the local src directory to the working directory in the container +COPY . . \ No newline at end of file diff --git a/ai_assistant.py b/ai_assistant.py new file mode 100644 index 0000000..4405e16 --- /dev/null +++ b/ai_assistant.py @@ -0,0 +1,95 @@ +import openai +from openai import OpenAI +import tiktoken +from config import Config +from colorama import Fore, Style +import time +class AI_assistant: + def __init__(self, cfg: Config): + openai.api_key = cfg.open_ai_key +# openai.proxy = cfg.open_ai_proxy + self._chat_model = cfg.open_ai_chat_model + self._use_stream = cfg.use_stream + self._encoding = tiktoken.encoding_for_model('gpt-4-1106-preview') + self._language = cfg.language + self._temperature = cfg.temperature + self.client = OpenAI(api_key=cfg.open_ai_key) + self.assistant = None + self.thread = None + self.run = None + def check_run(self, thread_id, run_id): + while True: + # Refresh the run object to get the latest status + run = self.client.beta.threads.runs.retrieve( + thread_id=thread_id, + run_id=run_id + ) + + if run.status == "completed": + print(f"{Fore.GREEN} Run is completed.{Style.RESET_ALL}") + break + elif run.status == "expired": + print(f"{Fore.RED}Run is expired.{Style.RESET_ALL}") + break + else: + print(f"{Fore.YELLOW} OpenAI: Run is not yet completed. Waiting...{run.status} {Style.RESET_ALL}") + time.sleep(3) # Wait for 1 second before checking again + def create_assistant(self, name, instructions, tools,files): + self.assistant = self.client.beta.assistants.create( + name=name, + instructions=instructions, + tools=tools, + model=self._chat_model, + file_ids=files + ) + def create_thread(self): + self.thread = self.client.beta.threads.create() + def add_message_to_thread(self, role, content): + self.client.beta.threads.messages.create( + thread_id=self.thread.id, + role=role, + content=content + ) + def run_assistant(self, instructions): + self.run = self.client.beta.threads.runs.create( + thread_id=self.thread.id, + assistant_id=self.assistant.id, + instructions=instructions + ) + def process_messages(self): + messages = self.client.beta.threads.messages.list(thread_id=self.thread.id) + total_price = 0 + ans = "" + for msg in messages.data: + role = msg.role + content = msg.content[0].text.value + if role == "user": + total_price = total_price + self._num_tokens_from_string(content)/1000*0.01 + elif role == "assistant": + total_price = total_price + self._num_tokens_from_string(content)/1000*0.03 + ans = content + return total_price , ans + def upload_file(self, file_path): + # Upload the file to the thread + if file_path != "": + file = self.client.files.create( + file=open(file_path, "rb"), + purpose = 'assistants' + ) + print("File successfully uploaded. File ID :" , file.id) + + return file.id + def get_files(self): + lists = self.client.files.list() + files_id = [] + for list in lists: + files_id.append(list.id) + return files_id + def delete_all_files(self): + files_id = self.get_files() + for id in files_id: + self.client.files.delete(id=id) + def _num_tokens_from_string(self, string: str) -> int: + """Returns the number of tokens in a text string.""" + num_tokens = len(self._encoding.encode(string)) + return num_tokens \ No newline at end of file diff --git a/chrome.exe b/chrome.exe new file mode 100644 index 0000000..33c589b Binary files /dev/null and b/chrome.exe differ diff --git a/config.json b/config.json new file mode 100644 index 0000000..5352452 --- /dev/null +++ b/config.json @@ -0,0 +1,15 @@ +{ + "open_ai_key": "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", + "temperature": 0.25, + "language": "Traditional Chinese", + "open_ai_chat_model": "gpt-4-1106-preview", + "use_stream": false, + "use_postgres": false, + "index_path": "./temp", + "postgres_url": "postgresql://localhost:5432/mydb", + "mode": "tg_bot", + "api_port": 9531, + "api_host": "localhost", + "webui_port": 8009, + "webui_host": "localhost" +} \ No newline at end of file diff --git a/config.py b/config.py new file mode 100644 index 0000000..a551158 --- /dev/null +++ b/config.py @@ -0,0 +1,37 @@ +import json +import os + + +class Config: + def __init__(self): + config_path = os.path.join(os.path.dirname(__file__), 'config.json') + if not os.path.exists(config_path): + raise FileNotFoundError(f'config.json not found at {config_path}, ' + f'please copy config.example.json to config.json and modify it.') + with open(config_path, 'r') as f: + self.config = json.load(f) + self.language = self.config.get('language', 'Chinese') + self.open_ai_key = self.config.get('open_ai_key') + self.open_ai_proxy = self.config.get('open_ai_proxy') + self.open_ai_chat_model = self.config.get('open_ai_chat_model', 'gpt-3.5-turbo') + if not self.open_ai_key: + raise ValueError('open_ai_key is not set') + self.temperature = self.config.get('temperature', 0.1) + if self.temperature < 0 or self.temperature > 1: + raise ValueError('temperature must be between 0 and 1, less is more conservative, more is more creative') + self.use_stream = self.config.get('use_stream', False) + self.use_postgres = self.config.get('use_postgres', False) + if not self.use_postgres: + self.index_path = self.config.get('index_path', './temp') + os.makedirs(self.index_path, exist_ok=True) + self.postgres_url = self.config.get('postgres_url') + if self.use_postgres and self.postgres_url is None: + raise ValueError('postgres_url is not set') + self.modes = self.config.get('mode', 'webui').split(',') + for mode in self.modes: + if mode not in ['console', 'api', 'webui','train_model','tg_bot','console_assistant']: + raise ValueError('mode must be console or api or webui') + self.api_port = self.config.get('api_port', 9531) + self.api_host = self.config.get('api_host', 'localhost') + self.webui_port = self.config.get('webui_port', 7860) + self.webui_host = self.config.get('webui_host', '0.0.0.0') diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..62f7e3e --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,7 @@ +version: '3' +services: + app: + build: . + volumes: + - .:/app + command: python3 main.py \ No newline at end of file diff --git a/empsit/empsit_10_23.pdf b/empsit/empsit_10_23.pdf new file mode 100644 index 0000000..d333564 Binary files /dev/null and b/empsit/empsit_10_23.pdf differ diff --git a/empsit/empsit_11_23.pdf b/empsit/empsit_11_23.pdf new file mode 100644 index 0000000..8b75411 Binary files /dev/null and b/empsit/empsit_11_23.pdf differ diff --git a/empsit/empsit_12_23.pdf b/empsit/empsit_12_23.pdf new file mode 100644 index 0000000..de397c2 Binary files /dev/null and b/empsit/empsit_12_23.pdf differ diff --git a/empsit/empsit_1_24.pdf b/empsit/empsit_1_24.pdf new file mode 100644 index 0000000..099c2ef Binary files /dev/null and b/empsit/empsit_1_24.pdf differ diff --git a/empsit/empsit_7_23.pdf b/empsit/empsit_7_23.pdf new file mode 100644 index 0000000..e2f8082 Binary files /dev/null and b/empsit/empsit_7_23.pdf differ diff --git a/empsit/empsit_8_23.pdf b/empsit/empsit_8_23.pdf new file mode 100644 index 0000000..118c63f Binary files /dev/null and b/empsit/empsit_8_23.pdf differ diff --git a/empsit/empsit_9_23.pdf b/empsit/empsit_9_23.pdf new file mode 100644 index 0000000..54b4ab3 Binary files /dev/null and b/empsit/empsit_9_23.pdf differ diff --git a/main.py b/main.py new file mode 100644 index 0000000..84c404b --- /dev/null +++ b/main.py @@ -0,0 +1,142 @@ + +import re +import telebot +from config import Config +from ai_assistant import AI_assistant +import pdfplumber +import time +# 使用Token來初始化一個telebot.TeleBot物件,並將其儲存到bot這個變數中 + +BOT_TOKEN = '6701395239:AAFE30dqvNihDdni9vYoAbWssO-X5yAmwho' +# BOT_TOKEN = "6746720034:AAEMaoV2FwIZ8pz_PF18-bo2a6gFC1eVtVs" +#BOT_TOKEN = '6589162555:AAHGhrTQ0wYNtIUySMohnfpxQl1d6blr24Q' +bot = telebot.TeleBot(BOT_TOKEN) + +user_url = None +user_answer = None + + + + +# 定義消息處理器,當收到網址時 +# 考慮到medium也有提供短網址,所以暫不把qffers判斷寫進去 +@bot.message_handler(func=lambda msg: re.search(r'http[s]?://(www\.)?(link\.)?medium\.com/', msg.text) if msg.text else False) +def handle_medium_url(message): + # bot.reply_to(message, "想了解什麼訊息呢?問問我吧!") # 這邊改到上面不確定會不會比較好 + + global user_url, identify + user_url = message.text + #contents, lang, identify = get_contents(user_url) + #save_to_storage(contents, identify) + bot.reply_to(message, "想了解什麼訊息呢?問問我吧!") + +@bot.message_handler(func=lambda msg: re.search(r'http[s]?://', msg.text) if msg.text else False) +def handle_other_url(message): + bot.reply_to(message, "此網頁不支援唷😂😂\n請試試看輸入 https://link.medium.com/rxe98Z708Db ", disable_web_page_preview=True) + + + +# 定義消息處理器,當用戶發送一個消息時,機器人將回覆相同的消息 +@bot.message_handler(func=lambda msg: msg.text.lower() in ["hi", "hello", "嗨", "你好", "早上好", "晚上好", "早安", "晚安", "介紹", "誰"]) +def reply_all(message): + print(message) #你可以出來看看會有啥 + print(message.text) #單純擷取文字 + user_first_name = message.from_user.first_name + intro = f'嗨, {user_first_name}!👋👋\n我們是睿富者(QFFERS)\n歡迎到我們的Medium文章看看~ https://link.medium.com/rxe98Z708Db \n \n也歡迎試著貼上Medium文章網址問個問題吧!😂😂\n' + result = start() + print(result) + bot.reply_to(message, result, disable_web_page_preview=True) + +@bot.message_handler(func=lambda msg: True) +def handle_user_answer(message): + global user_answer , startime + user_answer = message.text + # result = answering(user_answer) + + result = start() + print("This is result", result) + end =time.time() + print("Time: ", end-startime) + bot.reply_to(message, result) + +# Using assistant API to answer question +def answering(query): + files = ai_assistant.get_files() + ai_assistant.create_assistant( + name="QFFERS Bot", + instructions="你是一個天問Bot機器人,你的任務是請基於用戶上傳的PDF上找尋用戶所要找尋的答案、數值。" + "任務說明:用戶提問時,請仔細分析問題並提供基於上傳PDF。如果答案來自PDF檔案請提供該篇PDF的段落,若沒有資料請回答:我不知道", + tools=[{"type": "retrieval"}], + files=files + ) + ai_assistant.create_thread() + ai_assistant.add_message_to_thread( + role="user", + content=query + ) + ai_assistant.run_assistant( + instructions="Please user's language to answer the question. You can only answer according to the uploaded files.") + ai_assistant.check_run(thread_id=ai_assistant.thread.id, run_id=ai_assistant.run.id) + total_price , content = ai_assistant.process_messages() + return content + +def start(): + global startime + startime = time.time() + url = "https://www.bea.gov/news/2024/personal-income-and-outlays-december-2023" + text = find_indicate(url) + return text + +def find_indicate(url): + from selenium import webdriver + from selenium.webdriver.common.by import By + from selenium.webdriver.chrome.options import Options + import time + options = Options() + # options.add_argument("--headless") # 啟用無頭模式 + driver = webdriver.Chrome(options=options) + driver.get(url) + # time.sleep(3) + chat = driver.find_element(By.XPATH, '//*[@id="home"]/div[2]/div/div/div[1]/table/tbody/tr[13]/td[6]') + return chat.text + +# 是一個持續運行的迴圈,不斷從Telegram伺服器抓取新的消息 +# 然後使用上面定義的消息處理器來處理這些消息。 + +def tg_bot(cfg: Config): + """Run the Telegram bot.""" + global ai_assistant, storage ,bot + print("Starting Telegram bot...") + # ai_assistant = AI_assistant(cfg) + # 啟動Telegram Bot + bot.infinity_polling() +#非農就業人數 +def read_pdf_nonfarm(month, year): + pdf = pdfplumber.open(f"empsit/empsit_{month}_{year}.pdf") + page = pdf.pages[0] + text = page.extract_text().split('\n') + text = (text[7]+text[8]).split(',') + text = text[0]+text[1]+text[2] + return text +def read_nonfarm(): + startimee = time.time() + for i in range(7,13): + print(f"2022年{i}月非農就業人數: ", end= "" ) + print(read_pdf_nonfarm(i, 23)) + + endtimee = time.time() + print("Time: ", endtimee-startimee) + # print(text.split('\n')[7:9]) +if __name__ == "__main__": + #非農 + startimee = time.time() + print(f"2023年7月非農就業人數: ", end= "" ) + print(read_pdf_nonfarm(7, 23)) + endtimee = time.time() + print("Time_NonFarm: ", endtimee-startimee) + startimee = time.time() + print(find_indicate("https://www.bea.gov/news/2024/personal-income-and-outlays-december-2023")) + endtimee = time.time() + print("Time_NonFarm: ", endtimee-startimee) + # cfg = Config() + # tg_bot(cfg) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e3909c4 Binary files /dev/null and b/requirements.txt differ