master
joey0629 1 year ago
commit 35b3ceed1b
  1. 8
      .idea/.gitignore
  2. 13
      .idea/ChatBot_Indicator.iml
  3. 6
      .idea/inspectionProfiles/profiles_settings.xml
  4. 7
      .idea/misc.xml
  5. 8
      .idea/modules.xml
  6. 6
      .idea/vcs.xml
  7. 16
      Dockerfile
  8. 95
      ai_assistant.py
  9. BIN
      chrome.exe
  10. 15
      config.json
  11. 37
      config.py
  12. 7
      docker-compose.yml
  13. BIN
      empsit/empsit_10_23.pdf
  14. BIN
      empsit/empsit_11_23.pdf
  15. BIN
      empsit/empsit_12_23.pdf
  16. BIN
      empsit/empsit_1_24.pdf
  17. BIN
      empsit/empsit_7_23.pdf
  18. BIN
      empsit/empsit_8_23.pdf
  19. BIN
      empsit/empsit_9_23.pdf
  20. 142
      main.py
  21. BIN
      requirements.txt

8
.idea/.gitignore vendored

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.venv" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PackageRequirementsSettings">
<option name="modifyBaseFiles" value="true" />
</component>
</module>

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.10 (ChatBot_Indicator)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (ChatBot_Indicator)" project-jdk-type="Python SDK" />
</project>

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/ChatBot_Indicator.iml" filepath="$PROJECT_DIR$/.idea/ChatBot_Indicator.iml" />
</modules>
</component>
</project>

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

@ -0,0 +1,16 @@
FROM python:3.10
# Install Python and pip
RUN apt-get update && apt-get install -y python3 python3-pip
# Set the working directory in the container
WORKDIR /app
# Copy the dependencies file to the working directory
COPY requirements.txt .
# Install any dependencies
RUN pip3 install -r requirements.txt
# Copy the content of the local src directory to the working directory in the container
COPY . .

@ -0,0 +1,95 @@
import openai
from openai import OpenAI
import tiktoken
from config import Config
from colorama import Fore, Style
import time
class AI_assistant:
def __init__(self, cfg: Config):
openai.api_key = cfg.open_ai_key
# openai.proxy = cfg.open_ai_proxy
self._chat_model = cfg.open_ai_chat_model
self._use_stream = cfg.use_stream
self._encoding = tiktoken.encoding_for_model('gpt-4-1106-preview')
self._language = cfg.language
self._temperature = cfg.temperature
self.client = OpenAI(api_key=cfg.open_ai_key)
self.assistant = None
self.thread = None
self.run = None
def check_run(self, thread_id, run_id):
while True:
# Refresh the run object to get the latest status
run = self.client.beta.threads.runs.retrieve(
thread_id=thread_id,
run_id=run_id
)
if run.status == "completed":
print(f"{Fore.GREEN} Run is completed.{Style.RESET_ALL}")
break
elif run.status == "expired":
print(f"{Fore.RED}Run is expired.{Style.RESET_ALL}")
break
else:
print(f"{Fore.YELLOW} OpenAI: Run is not yet completed. Waiting...{run.status} {Style.RESET_ALL}")
time.sleep(3) # Wait for 1 second before checking again
def create_assistant(self, name, instructions, tools,files):
self.assistant = self.client.beta.assistants.create(
name=name,
instructions=instructions,
tools=tools,
model=self._chat_model,
file_ids=files
)
def create_thread(self):
self.thread = self.client.beta.threads.create()
def add_message_to_thread(self, role, content):
self.client.beta.threads.messages.create(
thread_id=self.thread.id,
role=role,
content=content
)
def run_assistant(self, instructions):
self.run = self.client.beta.threads.runs.create(
thread_id=self.thread.id,
assistant_id=self.assistant.id,
instructions=instructions
)
def process_messages(self):
messages = self.client.beta.threads.messages.list(thread_id=self.thread.id)
total_price = 0
ans = ""
for msg in messages.data:
role = msg.role
content = msg.content[0].text.value
if role == "user":
total_price = total_price + self._num_tokens_from_string(content)/1000*0.01
elif role == "assistant":
total_price = total_price + self._num_tokens_from_string(content)/1000*0.03
ans = content
return total_price , ans
def upload_file(self, file_path):
# Upload the file to the thread
if file_path != "":
file = self.client.files.create(
file=open(file_path, "rb"),
purpose = 'assistants'
)
print("File successfully uploaded. File ID :" , file.id)
return file.id
def get_files(self):
lists = self.client.files.list()
files_id = []
for list in lists:
files_id.append(list.id)
return files_id
def delete_all_files(self):
files_id = self.get_files()
for id in files_id:
self.client.files.delete(id=id)
def _num_tokens_from_string(self, string: str) -> int:
"""Returns the number of tokens in a text string."""
num_tokens = len(self._encoding.encode(string))
return num_tokens

Binary file not shown.

@ -0,0 +1,15 @@
{
"open_ai_key": "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",
"temperature": 0.25,
"language": "Traditional Chinese",
"open_ai_chat_model": "gpt-4-1106-preview",
"use_stream": false,
"use_postgres": false,
"index_path": "./temp",
"postgres_url": "postgresql://localhost:5432/mydb",
"mode": "tg_bot",
"api_port": 9531,
"api_host": "localhost",
"webui_port": 8009,
"webui_host": "localhost"
}

@ -0,0 +1,37 @@
import json
import os
class Config:
def __init__(self):
config_path = os.path.join(os.path.dirname(__file__), 'config.json')
if not os.path.exists(config_path):
raise FileNotFoundError(f'config.json not found at {config_path}, '
f'please copy config.example.json to config.json and modify it.')
with open(config_path, 'r') as f:
self.config = json.load(f)
self.language = self.config.get('language', 'Chinese')
self.open_ai_key = self.config.get('open_ai_key')
self.open_ai_proxy = self.config.get('open_ai_proxy')
self.open_ai_chat_model = self.config.get('open_ai_chat_model', 'gpt-3.5-turbo')
if not self.open_ai_key:
raise ValueError('open_ai_key is not set')
self.temperature = self.config.get('temperature', 0.1)
if self.temperature < 0 or self.temperature > 1:
raise ValueError('temperature must be between 0 and 1, less is more conservative, more is more creative')
self.use_stream = self.config.get('use_stream', False)
self.use_postgres = self.config.get('use_postgres', False)
if not self.use_postgres:
self.index_path = self.config.get('index_path', './temp')
os.makedirs(self.index_path, exist_ok=True)
self.postgres_url = self.config.get('postgres_url')
if self.use_postgres and self.postgres_url is None:
raise ValueError('postgres_url is not set')
self.modes = self.config.get('mode', 'webui').split(',')
for mode in self.modes:
if mode not in ['console', 'api', 'webui','train_model','tg_bot','console_assistant']:
raise ValueError('mode must be console or api or webui')
self.api_port = self.config.get('api_port', 9531)
self.api_host = self.config.get('api_host', 'localhost')
self.webui_port = self.config.get('webui_port', 7860)
self.webui_host = self.config.get('webui_host', '0.0.0.0')

@ -0,0 +1,7 @@
version: '3'
services:
app:
build: .
volumes:
- .:/app
command: python3 main.py

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -0,0 +1,142 @@
import re
import telebot
from config import Config
from ai_assistant import AI_assistant
import pdfplumber
import time
# 使用Token來初始化一個telebot.TeleBot物件,並將其儲存到bot這個變數中
BOT_TOKEN = '6701395239:AAFE30dqvNihDdni9vYoAbWssO-X5yAmwho'
# BOT_TOKEN = "6746720034:AAEMaoV2FwIZ8pz_PF18-bo2a6gFC1eVtVs"
#BOT_TOKEN = '6589162555:AAHGhrTQ0wYNtIUySMohnfpxQl1d6blr24Q'
bot = telebot.TeleBot(BOT_TOKEN)
user_url = None
user_answer = None
# 定義消息處理器,當收到網址時
# 考慮到medium也有提供短網址,所以暫不把qffers判斷寫進去
@bot.message_handler(func=lambda msg: re.search(r'http[s]?://(www\.)?(link\.)?medium\.com/', msg.text) if msg.text else False)
def handle_medium_url(message):
# bot.reply_to(message, "想了解什麼訊息呢?問問我吧!") # 這邊改到上面不確定會不會比較好
global user_url, identify
user_url = message.text
#contents, lang, identify = get_contents(user_url)
#save_to_storage(contents, identify)
bot.reply_to(message, "想了解什麼訊息呢?問問我吧!")
@bot.message_handler(func=lambda msg: re.search(r'http[s]?://', msg.text) if msg.text else False)
def handle_other_url(message):
bot.reply_to(message, "此網頁不支援唷😂😂\n請試試看輸入 https://link.medium.com/rxe98Z708Db ", disable_web_page_preview=True)
# 定義消息處理器,當用戶發送一個消息時,機器人將回覆相同的消息
@bot.message_handler(func=lambda msg: msg.text.lower() in ["hi", "hello", "", "你好", "早上好", "晚上好", "早安", "晚安", "介紹", ""])
def reply_all(message):
print(message) #你可以出來看看會有啥
print(message.text) #單純擷取文字
user_first_name = message.from_user.first_name
intro = f'嗨, {user_first_name}!👋👋\n我們是睿富者(QFFERS)\n歡迎到我們的Medium文章看看~ https://link.medium.com/rxe98Z708Db \n \n也歡迎試著貼上Medium文章網址問個問題吧!😂😂\n'
result = start()
print(result)
bot.reply_to(message, result, disable_web_page_preview=True)
@bot.message_handler(func=lambda msg: True)
def handle_user_answer(message):
global user_answer , startime
user_answer = message.text
# result = answering(user_answer)
result = start()
print("This is result", result)
end =time.time()
print("Time: ", end-startime)
bot.reply_to(message, result)
# Using assistant API to answer question
def answering(query):
files = ai_assistant.get_files()
ai_assistant.create_assistant(
name="QFFERS Bot",
instructions="你是一個天問Bot機器人,你的任務是請基於用戶上傳的PDF上找尋用戶所要找尋的答案、數值。"
"任務說明:用戶提問時,請仔細分析問題並提供基於上傳PDF。如果答案來自PDF檔案請提供該篇PDF的段落,若沒有資料請回答:我不知道",
tools=[{"type": "retrieval"}],
files=files
)
ai_assistant.create_thread()
ai_assistant.add_message_to_thread(
role="user",
content=query
)
ai_assistant.run_assistant(
instructions="Please user's language to answer the question. You can only answer according to the uploaded files.")
ai_assistant.check_run(thread_id=ai_assistant.thread.id, run_id=ai_assistant.run.id)
total_price , content = ai_assistant.process_messages()
return content
def start():
global startime
startime = time.time()
url = "https://www.bea.gov/news/2024/personal-income-and-outlays-december-2023"
text = find_indicate(url)
return text
def find_indicate(url):
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
options = Options()
# options.add_argument("--headless") # 啟用無頭模式
driver = webdriver.Chrome(options=options)
driver.get(url)
# time.sleep(3)
chat = driver.find_element(By.XPATH, '//*[@id="home"]/div[2]/div/div/div[1]/table/tbody/tr[13]/td[6]')
return chat.text
# 是一個持續運行的迴圈,不斷從Telegram伺服器抓取新的消息
# 然後使用上面定義的消息處理器來處理這些消息。
def tg_bot(cfg: Config):
"""Run the Telegram bot."""
global ai_assistant, storage ,bot
print("Starting Telegram bot...")
# ai_assistant = AI_assistant(cfg)
# 啟動Telegram Bot
bot.infinity_polling()
#非農就業人數
def read_pdf_nonfarm(month, year):
pdf = pdfplumber.open(f"empsit/empsit_{month}_{year}.pdf")
page = pdf.pages[0]
text = page.extract_text().split('\n')
text = (text[7]+text[8]).split(',')
text = text[0]+text[1]+text[2]
return text
def read_nonfarm():
startimee = time.time()
for i in range(7,13):
print(f"2022年{i}月非農就業人數: ", end= "" )
print(read_pdf_nonfarm(i, 23))
endtimee = time.time()
print("Time: ", endtimee-startimee)
# print(text.split('\n')[7:9])
if __name__ == "__main__":
#非農
startimee = time.time()
print(f"2023年7月非農就業人數: ", end= "" )
print(read_pdf_nonfarm(7, 23))
endtimee = time.time()
print("Time_NonFarm: ", endtimee-startimee)
startimee = time.time()
print(find_indicate("https://www.bea.gov/news/2024/personal-income-and-outlays-december-2023"))
endtimee = time.time()
print("Time_NonFarm: ", endtimee-startimee)
# cfg = Config()
# tg_bot(cfg)

Binary file not shown.
Loading…
Cancel
Save