commit 35b3ceed1b9b6e8c8caa70637b0f030d9a571e3f
Author: joey0629 <jjjoey020629@gmail.com>
Date:   Tue Feb 20 12:23:17 2024 +0800

    Init

diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/ChatBot_Indicator.iml b/.idea/ChatBot_Indicator.iml
new file mode 100644
index 0000000..637816a
--- /dev/null
+++ b/.idea/ChatBot_Indicator.iml
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PackageRequirementsSettings">
+    <option name="modifyBaseFiles" value="true" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..09aa061
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="Python 3.10 (ChatBot_Indicator)" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (ChatBot_Indicator)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..f1617ce
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/ChatBot_Indicator.iml" filepath="$PROJECT_DIR$/.idea/ChatBot_Indicator.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..2480af5
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,16 @@
+FROM python:3.10
+
+# Install Python and pip
+RUN apt-get update && apt-get install -y python3 python3-pip
+
+# Set the working directory in the container
+WORKDIR /app
+
+# Copy the dependencies file to the working directory
+COPY requirements.txt .
+
+# Install any dependencies
+RUN pip3 install -r requirements.txt
+
+# Copy the content of the local src directory to the working directory in the container
+COPY . .
\ No newline at end of file
diff --git a/ai_assistant.py b/ai_assistant.py
new file mode 100644
index 0000000..4405e16
--- /dev/null
+++ b/ai_assistant.py
@@ -0,0 +1,95 @@
+import openai
+from openai import OpenAI
+import tiktoken
+from config import Config
+from colorama import Fore, Style
+import time
+class AI_assistant:
+    def __init__(self, cfg: Config):
+        openai.api_key = cfg.open_ai_key
+#        openai.proxy = cfg.open_ai_proxy
+        self._chat_model = cfg.open_ai_chat_model
+        self._use_stream = cfg.use_stream
+        self._encoding = tiktoken.encoding_for_model('gpt-4-1106-preview')
+        self._language = cfg.language
+        self._temperature = cfg.temperature
+        self.client = OpenAI(api_key=cfg.open_ai_key)
+        self.assistant = None
+        self.thread = None
+        self.run = None
+    def check_run(self, thread_id, run_id):
+        while True:
+            # Refresh the run object to get the latest status
+            run = self.client.beta.threads.runs.retrieve(
+                thread_id=thread_id,
+                run_id=run_id
+            )
+
+            if run.status == "completed":
+                print(f"{Fore.GREEN} Run is completed.{Style.RESET_ALL}")
+                break
+            elif run.status == "expired":
+                print(f"{Fore.RED}Run is expired.{Style.RESET_ALL}")
+                break
+            else:
+                print(f"{Fore.YELLOW} OpenAI: Run is not yet completed. Waiting...{run.status} {Style.RESET_ALL}")
+                time.sleep(3)  # Wait for 1 second before checking again
+    def create_assistant(self, name, instructions, tools,files):
+        self.assistant = self.client.beta.assistants.create(
+            name=name,
+            instructions=instructions,
+            tools=tools,
+            model=self._chat_model,
+            file_ids=files
+        )
+    def create_thread(self):
+        self.thread = self.client.beta.threads.create()
+    def add_message_to_thread(self, role, content):
+        self.client.beta.threads.messages.create(
+            thread_id=self.thread.id,
+            role=role,
+            content=content
+        )
+    def run_assistant(self, instructions):
+        self.run = self.client.beta.threads.runs.create(
+            thread_id=self.thread.id,
+            assistant_id=self.assistant.id,
+            instructions=instructions
+        )
+    def process_messages(self):
+        messages = self.client.beta.threads.messages.list(thread_id=self.thread.id)
+        total_price = 0
+        ans = ""
+        for msg in messages.data:
+            role = msg.role
+            content = msg.content[0].text.value
+            if role == "user":
+                total_price = total_price + self._num_tokens_from_string(content)/1000*0.01
+            elif role == "assistant":
+                total_price = total_price + self._num_tokens_from_string(content)/1000*0.03
+                ans = content
+        return total_price , ans
+    def upload_file(self, file_path):
+    # Upload the file to the thread
+        if file_path != "":
+            file = self.client.files.create(
+                file=open(file_path, "rb"),
+                purpose = 'assistants'
+            )
+            print("File successfully uploaded. File ID :" , file.id)
+
+        return file.id
+    def get_files(self):
+        lists = self.client.files.list()
+        files_id = []
+        for list in lists:
+            files_id.append(list.id)
+        return files_id
+    def delete_all_files(self):
+        files_id = self.get_files()
+        for id in files_id:
+            self.client.files.delete(id=id)
+    def _num_tokens_from_string(self, string: str) -> int:
+        """Returns the number of tokens in a text string."""
+        num_tokens = len(self._encoding.encode(string))
+        return num_tokens
\ No newline at end of file
diff --git a/chrome.exe b/chrome.exe
new file mode 100644
index 0000000..33c589b
Binary files /dev/null and b/chrome.exe differ
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..5352452
--- /dev/null
+++ b/config.json
@@ -0,0 +1,15 @@
+{
+  "open_ai_key": "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",
+  "temperature": 0.25,
+  "language": "Traditional Chinese",
+  "open_ai_chat_model": "gpt-4-1106-preview",
+  "use_stream": false,
+  "use_postgres": false,
+  "index_path": "./temp",
+  "postgres_url": "postgresql://localhost:5432/mydb",
+  "mode": "tg_bot",
+  "api_port": 9531,
+  "api_host": "localhost",
+  "webui_port": 8009,
+  "webui_host": "localhost"
+}
\ No newline at end of file
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..a551158
--- /dev/null
+++ b/config.py
@@ -0,0 +1,37 @@
+import json
+import os
+
+
+class Config:
+    def __init__(self):
+        config_path = os.path.join(os.path.dirname(__file__), 'config.json')
+        if not os.path.exists(config_path):
+            raise FileNotFoundError(f'config.json not found at {config_path}, '
+                                    f'please copy config.example.json to config.json and modify it.')
+        with open(config_path, 'r') as f:
+            self.config = json.load(f)
+            self.language = self.config.get('language', 'Chinese')
+            self.open_ai_key = self.config.get('open_ai_key')
+            self.open_ai_proxy = self.config.get('open_ai_proxy')
+            self.open_ai_chat_model = self.config.get('open_ai_chat_model', 'gpt-3.5-turbo')
+            if not self.open_ai_key:
+                raise ValueError('open_ai_key is not set')
+            self.temperature = self.config.get('temperature', 0.1)
+            if self.temperature < 0 or self.temperature > 1:
+                raise ValueError('temperature must be between 0 and 1, less is more conservative, more is more creative')
+            self.use_stream = self.config.get('use_stream', False)
+            self.use_postgres = self.config.get('use_postgres', False)
+            if not self.use_postgres:
+                self.index_path = self.config.get('index_path', './temp')
+                os.makedirs(self.index_path, exist_ok=True)
+            self.postgres_url = self.config.get('postgres_url')
+            if self.use_postgres and self.postgres_url is None:
+                raise ValueError('postgres_url is not set')
+            self.modes = self.config.get('mode', 'webui').split(',')
+            for mode in self.modes:
+                if mode not in ['console', 'api', 'webui','train_model','tg_bot','console_assistant']:
+                    raise ValueError('mode must be console or api or webui')
+            self.api_port = self.config.get('api_port', 9531)
+            self.api_host = self.config.get('api_host', 'localhost')
+            self.webui_port = self.config.get('webui_port', 7860)
+            self.webui_host = self.config.get('webui_host', '0.0.0.0')
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..62f7e3e
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,7 @@
+version: '3'
+services:
+  app:
+    build: .
+    volumes:
+      - .:/app
+    command: python3 main.py
\ No newline at end of file
diff --git a/empsit/empsit_10_23.pdf b/empsit/empsit_10_23.pdf
new file mode 100644
index 0000000..d333564
Binary files /dev/null and b/empsit/empsit_10_23.pdf differ
diff --git a/empsit/empsit_11_23.pdf b/empsit/empsit_11_23.pdf
new file mode 100644
index 0000000..8b75411
Binary files /dev/null and b/empsit/empsit_11_23.pdf differ
diff --git a/empsit/empsit_12_23.pdf b/empsit/empsit_12_23.pdf
new file mode 100644
index 0000000..de397c2
Binary files /dev/null and b/empsit/empsit_12_23.pdf differ
diff --git a/empsit/empsit_1_24.pdf b/empsit/empsit_1_24.pdf
new file mode 100644
index 0000000..099c2ef
Binary files /dev/null and b/empsit/empsit_1_24.pdf differ
diff --git a/empsit/empsit_7_23.pdf b/empsit/empsit_7_23.pdf
new file mode 100644
index 0000000..e2f8082
Binary files /dev/null and b/empsit/empsit_7_23.pdf differ
diff --git a/empsit/empsit_8_23.pdf b/empsit/empsit_8_23.pdf
new file mode 100644
index 0000000..118c63f
Binary files /dev/null and b/empsit/empsit_8_23.pdf differ
diff --git a/empsit/empsit_9_23.pdf b/empsit/empsit_9_23.pdf
new file mode 100644
index 0000000..54b4ab3
Binary files /dev/null and b/empsit/empsit_9_23.pdf differ
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..84c404b
--- /dev/null
+++ b/main.py
@@ -0,0 +1,142 @@
+
+import re
+import telebot
+from config import Config
+from ai_assistant import AI_assistant
+import pdfplumber
+import time
+# 使用Token來初始化一個telebot.TeleBot物件，並將其儲存到bot這個變數中
+
+BOT_TOKEN = '6701395239:AAFE30dqvNihDdni9vYoAbWssO-X5yAmwho'
+# BOT_TOKEN = "6746720034:AAEMaoV2FwIZ8pz_PF18-bo2a6gFC1eVtVs"
+#BOT_TOKEN = '6589162555:AAHGhrTQ0wYNtIUySMohnfpxQl1d6blr24Q'
+bot = telebot.TeleBot(BOT_TOKEN)
+
+user_url = None
+user_answer = None
+
+
+
+
+# 定義消息處理器，當收到網址時
+# 考慮到medium也有提供短網址，所以暫不把qffers判斷寫進去
+@bot.message_handler(func=lambda msg: re.search(r'http[s]?://(www\.)?(link\.)?medium\.com/', msg.text) if msg.text else False)
+def handle_medium_url(message):
+    # bot.reply_to(message, "想了解什麼訊息呢?問問我吧！")  # 這邊改到上面不確定會不會比較好
+
+    global user_url, identify
+    user_url = message.text
+    #contents, lang, identify = get_contents(user_url)
+    #save_to_storage(contents, identify)
+    bot.reply_to(message, "想了解什麼訊息呢?問問我吧！")
+
+@bot.message_handler(func=lambda msg: re.search(r'http[s]?://', msg.text) if msg.text else False)
+def handle_other_url(message):
+    bot.reply_to(message, "此網頁不支援唷😂😂\n請試試看輸入 https://link.medium.com/rxe98Z708Db ", disable_web_page_preview=True)
+
+
+
+# 定義消息處理器，當用戶發送一個消息時，機器人將回覆相同的消息
+@bot.message_handler(func=lambda msg: msg.text.lower() in ["hi", "hello", "嗨", "你好", "早上好", "晚上好", "早安", "晚安", "介紹", "誰"])
+def reply_all(message):
+    print(message) #你可以出來看看會有啥
+    print(message.text) #單純擷取文字
+    user_first_name = message.from_user.first_name
+    intro =  f'嗨, {user_first_name}！👋👋\n我們是睿富者（QFFERS）\n歡迎到我們的Medium文章看看~ https://link.medium.com/rxe98Z708Db \n \n也歡迎試著貼上Medium文章網址問個問題吧！😂😂\n'
+    result = start()
+    print(result)
+    bot.reply_to(message, result, disable_web_page_preview=True)
+
+@bot.message_handler(func=lambda msg: True)
+def handle_user_answer(message):
+    global user_answer , startime
+    user_answer = message.text
+    # result = answering(user_answer)
+
+    result = start()
+    print("This is result", result)
+    end =time.time()
+    print("Time: ", end-startime)
+    bot.reply_to(message, result)
+
+# Using assistant API to answer question
+def answering(query):
+    files = ai_assistant.get_files()
+    ai_assistant.create_assistant(
+        name="QFFERS Bot",
+        instructions="你是一個天問Bot機器人，你的任務是請基於用戶上傳的PDF上找尋用戶所要找尋的答案、數值。"
+                     "任務說明:用戶提問時，請仔細分析問題並提供基於上傳PDF。如果答案來自PDF檔案請提供該篇PDF的段落，若沒有資料請回答:我不知道",
+        tools=[{"type": "retrieval"}],
+        files=files
+    )
+    ai_assistant.create_thread()
+    ai_assistant.add_message_to_thread(
+        role="user",
+        content=query
+    )
+    ai_assistant.run_assistant(
+        instructions="Please user's language to answer the question. You can only answer according to the uploaded files.")
+    ai_assistant.check_run(thread_id=ai_assistant.thread.id, run_id=ai_assistant.run.id)
+    total_price , content = ai_assistant.process_messages()
+    return content
+
+def start():
+    global startime
+    startime = time.time()
+    url = "https://www.bea.gov/news/2024/personal-income-and-outlays-december-2023"
+    text = find_indicate(url)
+    return text
+
+def find_indicate(url):
+    from selenium import webdriver
+    from selenium.webdriver.common.by import By
+    from selenium.webdriver.chrome.options import Options
+    import time
+    options = Options()
+    # options.add_argument("--headless")  # 啟用無頭模式
+    driver = webdriver.Chrome(options=options)
+    driver.get(url)
+    # time.sleep(3)
+    chat = driver.find_element(By.XPATH, '//*[@id="home"]/div[2]/div/div/div[1]/table/tbody/tr[13]/td[6]')
+    return chat.text
+
+# 是一個持續運行的迴圈，不斷從Telegram伺服器抓取新的消息
+# 然後使用上面定義的消息處理器來處理這些消息。
+
+def tg_bot(cfg: Config):
+    """Run the Telegram bot."""
+    global ai_assistant, storage ,bot
+    print("Starting Telegram bot...")
+    # ai_assistant = AI_assistant(cfg)
+    # 啟動Telegram Bot
+    bot.infinity_polling()
+#非農就業人數
+def read_pdf_nonfarm(month, year):
+    pdf = pdfplumber.open(f"empsit/empsit_{month}_{year}.pdf")
+    page = pdf.pages[0]
+    text = page.extract_text().split('\n')
+    text = (text[7]+text[8]).split(',')
+    text = text[0]+text[1]+text[2]
+    return text
+def read_nonfarm():
+    startimee = time.time()
+    for i in range(7,13):
+        print(f"2022年{i}月非農就業人數: ", end= "" )
+        print(read_pdf_nonfarm(i, 23))
+
+    endtimee = time.time()
+    print("Time: ", endtimee-startimee)
+    # print(text.split('\n')[7:9])
+if __name__ == "__main__":
+    #非農
+    startimee = time.time()
+    print(f"2023年7月非農就業人數: ", end= "" )
+    print(read_pdf_nonfarm(7, 23))
+    endtimee = time.time()
+    print("Time_NonFarm: ", endtimee-startimee)
+    startimee = time.time()
+    print(find_indicate("https://www.bea.gov/news/2024/personal-income-and-outlays-december-2023"))
+    endtimee = time.time()
+    print("Time_NonFarm: ", endtimee-startimee)
+    # cfg = Config()
+    # tg_bot(cfg)
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..e3909c4
Binary files /dev/null and b/requirements.txt differ