From 6b4758ac6b53d5e78280f6a493428ad863c96fa4 Mon Sep 17 00:00:00 2001
From: Tony Okeke <tonykabilanokeke@gmail.com>
Date: Mon, 26 Feb 2024 00:19:26 -0500
Subject: [PATCH 1/7] add .gitignore

---
 .gitignore | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index ad8c20e..a2a8dea 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1 @@
-dist/
-build/
-*.pyc
+.env/

From b1a1a832dfb8c8eb10dab0ffdd908597984e6a33 Mon Sep 17 00:00:00 2001
From: Tony Okeke <tonykabilanokeke@gmail.com>
Date: Mon, 26 Feb 2024 00:19:50 -0500
Subject: [PATCH 2/7] migrate to openai 1.0.0 API

---
 .env                                          |   1 +
 gpthistory.egg-info/PKG-INFO                  |   6 ++
 gpthistory.egg-info/entry_points.txt          |   6 +-
 gpthistory.egg-info/requires.txt              |   6 +-
 .../__pycache__/__init__.cpython-310.pyc      | Bin 0 -> 152 bytes
 .../__pycache__/gpthistory.cpython-310.pyc    | Bin 0 -> 3048 bytes
 .../__pycache__/helpers.cpython-310.pyc       | Bin 0 -> 3067 bytes
 gpthistory/gpthistory.py                      |  91 +++++++++---------
 gpthistory/helpers.py                         |  48 ++++-----
 setup.py                                      |  22 ++---
 10 files changed, 94 insertions(+), 86 deletions(-)
 create mode 100644 .env
 create mode 100644 gpthistory/__pycache__/__init__.cpython-310.pyc
 create mode 100644 gpthistory/__pycache__/gpthistory.cpython-310.pyc
 create mode 100644 gpthistory/__pycache__/helpers.cpython-310.pyc

diff --git a/.env b/.env
new file mode 100644
index 0000000..a390e60
--- /dev/null
+++ b/.env
@@ -0,0 +1 @@
+OPENAI_API_KEY="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"
\ No newline at end of file
diff --git a/gpthistory.egg-info/PKG-INFO b/gpthistory.egg-info/PKG-INFO
index 5cbb48a..da0adaa 100644
--- a/gpthistory.egg-info/PKG-INFO
+++ b/gpthistory.egg-info/PKG-INFO
@@ -2,6 +2,12 @@ Metadata-Version: 2.1
 Name: gpthistory
 Version: 0.3
 Summary: A tool for searching through your chatgpt conversation history
+Home-page: UNKNOWN
 Author: Shrikar Archak
 Author-email: shrikar84@gmail.com
+License: UNKNOWN
+Platform: UNKNOWN
 License-File: LICENSE.md
+
+UNKNOWN
+
diff --git a/gpthistory.egg-info/entry_points.txt b/gpthistory.egg-info/entry_points.txt
index ce90c75..d13f257 100644
--- a/gpthistory.egg-info/entry_points.txt
+++ b/gpthistory.egg-info/entry_points.txt
@@ -1,2 +1,4 @@
-[console_scripts]
-gpthistory = gpthistory.gpthistory:main
+
+        [console_scripts]
+        gpthistory=gpthistory.gpthistory:app
+    
\ No newline at end of file
diff --git a/gpthistory.egg-info/requires.txt b/gpthistory.egg-info/requires.txt
index dbd3260..2cbb3f1 100644
--- a/gpthistory.egg-info/requires.txt
+++ b/gpthistory.egg-info/requires.txt
@@ -1,5 +1,5 @@
-Click
-python-dotenv
+numpy
 openai
 pandas
-numpy
+python-dotenv
+typer
diff --git a/gpthistory/__pycache__/__init__.cpython-310.pyc b/gpthistory/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c7e5b61ee4a144c6d9cf2c3d3062a8e7eacd0cbc
GIT binary patch
literal 152
zcmd1j<>g`kg8NmsQ$h4&5P=LBfgA@QE@lA|DGb33nv8xc8Hzx{2;!Huer{fgeqvEk
zVs>V+esN}MNpgmMQEEYcv3`0%Nk(RINq$i!GBZ9tGcU6wK3=b&@)n0pZhlH>PO2Tq
L&|)Sa!NLFl32!91

literal 0
HcmV?d00001

diff --git a/gpthistory/__pycache__/gpthistory.cpython-310.pyc b/gpthistory/__pycache__/gpthistory.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2d1fd228ab56d8520127ce9d9afd7c8ee66725b7
GIT binary patch
literal 3048
zcma)8&2JmW72nw}E|;ICBwMl(bYVbAh$K`{6lkl;ZW60<4HU9l324D2Al5rWa;g2G
zXNLM9mPG;iszc9h0rD82mtK3yAJD&G&|6PA_vRkd_lAlk6$#KK=Iy-Md2i-@{T}mL
z%{Sor<eh)Ay}Dui3z?gb1I#Bd#NUBXgObGPF5jd}G;elI{kFQcemh+U-e%&arEV$p
zx?WoDmWjcPxxX;F6=sgA^O~l0YAp!0sk5-U4d#rRFz!&7mM%=#@620lWkI^DtTJC)
z7}R6N=x*PnW$M4S&q?>*)p~`k!)le*^y&s%q4gt^f6glB27&t?=+SO`1NS%f+m>pv
zQ^~_f_9RSt<B&_CRt79%Jd~`*(wB_VI2#DH8ih$TNpMx><DQIV!o(Z$`)Q>!7|UTS
zWX{i~wP+a1IHT;e6N%$#d-MHJL^4iUTLd2ko9`#=m?c^GmQR@w;Q+q2P37p1%I)Vo
z4W+XCal#^k7rlC#@OEK5fg%0@U!*Z2FwD6nZSWnTCi+S`bGI-SWJU(Y%!FaVunV$l
zeEIRrDI98T81vG?0RFu%%$YkYNv|lKlbI*Wg?A1yII{WE!lU+vL7gKD=H}c7Id=oJ
z5~HwybM?%Incj!otKikTtjw#lB<(*Le=%`Q^QhN6CvVqk^Lk+xR$<ffpCRV6asj_l
zW8PdC4P(PVp86XGP<IEY&H_R!+BT3~7&Bik-xXS&Rka6N=mGvWN`SA8R-{d9w0>dE
z{KD6|e67pBsS7l(!q*1)t$AV1YDI0nM(-@h=q_#1l?yVfWM)yGRf;PA8Z4+>dFE?v
z)2g=aUQvO2uP+SnUcIOn6}q-#z}+^AO5qQ{1G9$SeHY{#*B&Suqx<@XReJBjg1lTm
zFxnf_pZEbhpH1R~24NQHGz$7WPlGTR#K$ZPFuMX8$}qs>2u@E=wa8(cD36^&4)*dR
zRZ7G07;=_l?kGc<F`YhmhB+C$1l<yJ(*V<z!7*|>nBIGmXUB{S$lN>=%X1ms4xl_r
zcw#T6$`=gsIL~lyW3BKSXoqu4aW+oEi0!>0#aBdGf{m4vPbA}z($+9PQDrbBjzTza
zy4Fh>AF$xjqu^n}`f%MWIeVzw<uYoE@??@`V*2yv@Fln^L@*|}F2w)}B*@rF5Z$`A
z%mWb~vmnhgcGgk;wZL@kFqdI+dq0?G@)h)uqaD59exS@;C@&3<7>&75_BfP7Rf0Jd
ztn#1j@BZR<y%(Pz{!-Z^k!Q+I@{oeKWU1gLs<IN6DHq(&GOFAp9}E~*cAWKdWsa#T
z??UMIIB4Sy+-}Dr3sqS#a52V5xjKk&3C=>=0|Zfyc9?QrO&C8@Hs$%4TPW+wJb{2K
z7rX$@MIHs(P3~#cm?g@)3ZGDw>-Y)fU;yF8G6_n_ks`;ctN~C<nY6F`Xu>&s@4>vm
z^An*%1=qxdN}NTUrSLgSR9WAU!aCo;lf8bNg$b}<8T?aNc=Kp0&E!_dd3Y3yEfF&r
z4YxQO=VI#`Ilp__8K0>NCM{h$XdU#47eMa1zGa&>v57}kVXTw7>61D!|6smpmR3oN
zG)WbB&+=eT6-Hh2P183`;$7M{%t#%+)<J%iaKuzqI$~!hIj6T!7v1?5b^i*CH0FS{
zfUt<Q=E$1c(gEywF5OW{I@F{V;0`Is$eWk3pS=duEQrP*T=hqQk%-TLbf&Bopy}NF
z!kSq#d*;Y`0o@Fc`Ti9$<5?xZO2FQtv;eHor+dp4eFNN4*7*BFS~)Nz;3)NxN3LC+
zexM=ZfQ3970+3vp7@Wk=w6Z`B*|OmEuIa{d9~K-|+12LlU{mlrU^;IBx#b<i7#qd`
z46zCXyU=TWOSEwpkP&WS?cWl@TDc`OFbd;KcLvR@Am`+d<}34HnLGdqP9Mrl0+^Vc
zj}?(Rr<UY1P^0`EEQ2Ud$B#b;{@<v3)tG)Lw85R7B!|W=9>XL|U(ygKZ&$t*1SAMh
zmxBc@;7oG*zsGpr%HtvY8IbAv_Z^|@VCrrP^xO2uPXI!nzBmkSqHK`F+0k||lyWS#
zx3;iMJ2{|67<a&wt!N7b-~H(SdZwKvwr9D7m`yU;30@>D6ilN%y^b*S3@&76zkP>q
zq7m;Rd4S{{Bp)F80h0HSe27FBv;%mSoGlT{5|RU|<WTu>CO8IBm030c41j+|04!%?
zL_8J^^R~x#QOD144}d}fErC(~BV_&SzfjA$h@c$_MPmLGWe_)R#MoVW`ZusD9s;R4
zSlcfAyvvr=w3=p1BY}Ncwe0`+C7bZy!ZzjgdMVtq*K6PBzebe-5;R%4W9WjR(6*`)
zA~oc^YFz_TXUGz;WeMyWa+lURhf5Ptv349ZgFMcl-<+b#iC|oN5pXLMaRfo{#{*?Q
z+uwVx$^#~UwQLG-HS8+Ao{k0fy)*@J<?TUZKg9luh{X4ipe;9CqMf62-`h!ZI!V}L
ltU`hD^b8Ns5PyJ*?z&}L;H697a;+9|f7&G8eV^Eu{{=c$O0)m~

literal 0
HcmV?d00001

diff --git a/gpthistory/__pycache__/helpers.cpython-310.pyc b/gpthistory/__pycache__/helpers.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..36d2d91438589b7b190375ee38d6c080771841c5
GIT binary patch
literal 3067
zcmZ`*&2QYs6`$ePa=EJyuht)%G%eG>sTQpzH$|H^aNMf4nhGss11^eon}DD>qZK7C
zNzaTFYr!pA$U)je4!!jQ1++&4J@oIH9CGNTI_Ks?-S=i!yOQ9L;G3_<$9cc^dv91Q
zECd9uFNUA<vPsB)aB%)|Vek`p<i9~M!e~Z%la}_V8CyNuv`)`8t=IFQwX)j4@A-71
zTkqAme@rOhbT2q*Fndh77d2tDaI$FR5_86sxy&2ey=7kCy8=&()tLX81#i_)R(N|%
zdskVV1<y$D8f&m7wAa}JTZHxoTVl)5c37LW*vd27dyUaYq<i%gc1^asR5!9BW)UkC
z&kyzDgNK{j8(Y!F!>#BSo4<WQe?M%jCA+bbJQm6BZ~^;i&iK={L>>;iH&<hssC2-)
zGQ1VuT+R3)&+>RS9&jn+KCE^v?V3gH?G$1VtAyePrk4Y)2hTlt<b4o*GNKhZ0aV9y
zWL1_3D+@*zjO@yWkqsljiaGnZMo*}jar<NpI+RrCqc{9c5W=<jREaoIq2f<fSjIxh
za7Pq_5Pb<*tl~AJrfbvP>RM9d0Eg1eX(v)gCD#rb(5|rn{@8u4v<Fi>XI)1-Xh{dC
z6B!jz14o1p;yDq^(7bqWeUPj5ScrH(mFqI)D%o8Zye#B;zf`-aRE0P?-`(X|$%R}i
zk8}&{q8HKp1J}T&Tm<1b0d;6VTW~v6v|uW+K%8ARbdatIJaP@hABh5J6+nDUD|&3-
zCm%HcW>Nvv6}?VAu{z|}K*}d|W&x1)@NI+8qcTfX80R6na1^GwD#FJQ%3Ure5DE<H
zHpW`Sd7o=L<9XNCuF=<l(T!v}<eH`il8LwiZ|Rmq>k%4_rbfSs`$Yh)KeU}TbzX`a
zFu1@RbQ9TQn1lL??9n}NrB4}U)@K&*h%&oFm;;a7A%++48+Sh7ITx|w=n)@0=8UCz
zKLmP)aroqbi=*&B;>?4R=Nnrg<iy0w7uPTbw`Q;0idlT?_U(6`gWqXh9soxM1>>2(
z*ceE3eWna>cm)`YQ(fOY`>6I3!C@zeocXf$1(#)!OMV%q8~y37BcmA24a3JImMqV4
zs71a1Ej!{Z7+!!2lWdk~e}w@inj**qMvtvy3QTzGLu+J>Y-OF;4EVt8>*OEgpC+x8
zv*+%4mCYRH0<;eESnV099Oie(ec;l_t!U-G3pStBRSlR$S$*FTo5ofEGoUkuvSr4N
zi3W}_UCmr`mb?qYjf+8=ISSbeA%e_2gw%j)1Sow)piUWhIDB&&XAI>G@re<`v-iSN
zq^;<ncn!n_#Lfuz2o_`;M4wdTBhS!^bjV+<Czv5ryn%0HmO~7!wcFYj#mARp7u?Ap
z$CF}E-n|Q&U!v_c8GdUP(z%aocd{ahGkF&}XYb7UG<^Nu1@~9)?H=U&vKzi1-ekk2
zb6%Q>H@1r~=O16fjB&*i#aY(}dgccMlM5!pw3ikAJ{Q_abHMuR=-M|>geVY%uBR6+
zL_i1gy^HIhp<M&6z?{?zYEaIZdKs3LSNQ)ucunGm9p@TtQx6!8oNhs-Ytm)79Xogl
zweGv1a{6Bzf(Fm*g7ex`1)P)r0-*?;5J2$`IkCnBPZ4D!Jx30#IF(&FV++XZGHc%*
zd6ipv9WnyC?m>^)J5Ubsy+4BuGvmx=(6U$6%n1!=^e0$p+{(fNI`TgyI}}gqXMW|I
z^r(MIVWxg=Ca7u^B+sZ()vKUt?ASo2VE9u*{d;ki9AsFaR8fX1RT-bqHV*sgA<r)q
zsX6@9QjB%J|M$P)b^2q0g#perIPs#CZJ)k`>y<QL0lhEJ)BI16oM$wj2kyBg-Udw*
zY50e@vXf?t3(lhH7vw}03%gha@jL8vK@3-c)f2BG@G6>k^(wZ1<@Ut)(HN%MMEn4H
zu!p7D9ws&~?chVy#~pNQ+R96D6}s9}1!kA2liG%VAULs#MAu@`mxWN>rncCQcoS7!
z+-<IxW;jQaLv86&8P)-qiDXhh0oTqTmiu~TwzG3fi}(Qm41T841XTSq<aE<~X^M<q
zo{Z*i8o#=hco!Y|AqsPRwqJ$o2e2&h%xt+nZCbwVn}39_*)6+8eHvI|3l^bzy@So`
z+S!vuuB}39uZ(jROKp`*yHFg;Bk?29F||N@up9VW6j;A>4QmC|R$YUsR1~?u>;nwT
zG}`A!+9_kT3zmyCha&M5?H<6frh~2R`<uUs9&Y^V=eqGYmT3YtZ71z(XKVXs4|E;=
zu6{YGLVgtCFJ%;oZ{rs5_r{bd6E;%;j2q`DVaQ=%@$bM<e30?GSmGrXBcJ%-I^Ioq
VJbTf;VK3VO^)0{Y*Zg3y{eSPb9y|a5

literal 0
HcmV?d00001

diff --git a/gpthistory/gpthistory.py b/gpthistory/gpthistory.py
index 8adc06e..c9570d6 100644
--- a/gpthistory/gpthistory.py
+++ b/gpthistory/gpthistory.py
@@ -1,107 +1,110 @@
-import click
+import typer
 import json
 import os
 import pandas as pd
 import logging
-from gpthistory.helpers import extract_text_parts, generate_embeddings, calculate_top_titles
+from gpthistory.helpers import (
+    extract_text_parts,
+    generate_embeddings,
+    calculate_top_titles,
+)
+
+main = typer.Typer()
 
 # Define the path to the index file in the user's home directory
-INDEX_PATH = os.path.join(os.path.expanduser('~'), '.gpthistory', 'chatindex.csv')
+INDEX_PATH = os.path.join(os.path.expanduser("~"), ".gpthistory", "chatindex.csv")
 
 # Configure the logger
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
 logger = logging.getLogger(__name__)
 
-@click.group()
-def main():
-    """
-    Simple CLI for searching within a chat data
-    """
-    pass
 
 @main.command()
-@click.option('--file', type=click.Path(exists=True), help='Input file')
-def build_index(file):
+def build_index(file: typer.FileText):
     """
-    Build an index from a given chat data file
+    Build an index from a given chat data file xxx
     """
-    # TODO: Implement index building
-    # Write the index to the predefined path
     # Make sure the directory exists
     os.makedirs(os.path.dirname(INDEX_PATH), exist_ok=True)
-    
+
     # Load the chat data from the given file
-    with open(file) as f:
-        data = json.load(f)
-    
+    data = json.load(file)
+
     chat_ids = []
     section_ids = []
     texts = []
     for entry in data:
-        for k, v in entry['mapping'].items():
+        for k, v in entry["mapping"].items():
             text_data = extract_text_parts(v)
-            if len(text_data) > 0 and text_data[0] != '':
+            if len(text_data) > 0 and text_data[0] != "":
                 # Add relevant chat information to the index
-                chat_ids.append(entry['id'])
+                chat_ids.append(entry["id"])
                 section_ids.append(k)
                 texts.append(text_data[0])
     logger.info(f"Index built and stored at: {INDEX_PATH}")
     logger.info(f"Conversations indexed: {len(chat_ids)}")
-    df = pd.DataFrame({'chat_id': chat_ids, 'section_id': section_ids, 'text': texts})
-    df = df[~df.text.isna()] 
-    df['id'] = df['chat_id']
+    df = pd.DataFrame({"chat_id": chat_ids, "section_id": section_ids, "text": texts})
+    df = df[~df.text.isna()]
+    df["id"] = df["chat_id"]
     df.set_index("id", inplace=True)
 
     # Handle incremental index updates
-    current_df = pd.DataFrame()    
+    current_df = pd.DataFrame()
     rows_only_in_df = pd.DataFrame()
     incremental = False
     if os.path.exists(INDEX_PATH):
         incremental = True
-        current_df = pd.read_csv(INDEX_PATH, sep='|')
-        current_df['id'] = current_df['chat_id']
+        current_df = pd.read_csv(INDEX_PATH, sep="|")
+        current_df["id"] = current_df["chat_id"]
         current_df.set_index("id", inplace=True)
         # Use merge with indicator=True to find rows present in one DataFrame but not the other
-        merged_df = df.merge(current_df, how='outer', indicator=True)
+        merged_df = df.merge(current_df, how="outer", indicator=True)
         # Query rows only present in df1
-        rows_only_in_df = merged_df.query('_merge == "left_only"').drop(columns='_merge')
+        rows_only_in_df = merged_df.query('_merge == "left_only"').drop(
+            columns="_merge"
+        )
     else:
         rows_only_in_df = df
-    
+
     if incremental and len(rows_only_in_df) > 0:
         logger.info("Only generating embeddings for new conversations to save money.")
-    
+
     # Generate and add embeddings to the index
     embeddings = generate_embeddings(rows_only_in_df.text.tolist())
-    rows_only_in_df['embeddings'] = embeddings
+    rows_only_in_df["embeddings"] = embeddings
     final_df = pd.concat([rows_only_in_df, current_df])
     logger.info(f"Total conversations: {len(final_df)}")
-    final_df.to_csv(INDEX_PATH, sep='|', index=False)
+    final_df.to_csv(INDEX_PATH, sep="|", index=False)
+
 
 @main.command()
-@click.argument('keyword', required=True)
-def search(keyword):
+def search(keyword: str):
     """
     Search a keyword within the index
     """
-    # TODO: Implement search function
-    # Load the index from the predefined path
     logger.info("Searching for keyword: %s", keyword)
     if os.path.exists(INDEX_PATH):
-        df = pd.read_csv(INDEX_PATH, sep='|')
-        df['embeddings'] = df.embeddings.apply(lambda x: [float(t) for t in json.loads(x)])
+        df = pd.read_csv(INDEX_PATH, sep="|")
+        df["embeddings"] = df.embeddings.apply(
+            lambda x: [float(t) for t in json.loads(x)]
+        )
         filtered = df[df.text.str.contains(keyword)]
-        
+
         # Calculate top titles and their corresponding chat IDs
         chat_ids, top_titles, top_scores = calculate_top_titles(df, keyword)
-        
+
         for i, t in enumerate(top_titles):
             logger.info("%s: %s", chat_ids[i], t)
-            logger.info("ChatGPT Conversation link: https://chat.openai.com/c/%s", chat_ids[i])
+            logger.info(
+                "ChatGPT Conversation link: https://chat.openai.com/c/%s", chat_ids[i]
+            )
             logger.info("--------------------------------------")
     else:
-        click.echo("Index not found. Please build the index first.")
+        typer.echo("Index not found. Please build the index first.")
         return
 
+
 if __name__ == "__main__":
     main()
diff --git a/gpthistory/helpers.py b/gpthistory/helpers.py
index 56bf08f..156d29a 100644
--- a/gpthistory/helpers.py
+++ b/gpthistory/helpers.py
@@ -2,7 +2,9 @@
 import os
 import pandas as pd
 import numpy as np
-import openai
+from openai import OpenAI
+
+client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
 from dotenv import load_dotenv
 import logging
 
@@ -10,56 +12,55 @@
 load_dotenv()
 
 # Set up OpenAI API key
-openai.api_key = os.environ.get('OPENAI_API_KEY')
 
 # Define the path to the index file in the user's home directory
-INDEX_PATH = os.path.join(os.path.expanduser('~'), '.chatsearch', 'chatindex.csv')
+INDEX_PATH = os.path.join(os.path.expanduser("~"), ".chatsearch", "chatindex.csv")
 
 # Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
 logger = logging.getLogger(__name__)
 
+
 def extract_text_parts(data):
     """
     Extract text parts from chat data.
     """
     text_parts = []
-    message = data.get('message')
+    message = data.get("message")
     if message:
-        content = message.get('content')
-        if content and content.get('content_type') == 'text':
-            text_parts.extend(content.get('parts', []))
+        content = message.get("content")
+        if content and content.get("content_type") == "text":
+            text_parts.extend(content.get("parts", []))
     return text_parts
 
+
 def split_into_batches(array, batch_size):
     """
     Split an array into batches.
     """
     for i in range(0, len(array), batch_size):
-        yield array[i:i + batch_size]
+        yield array[i : i + batch_size]
+
 
 def generate_query_embedding(query):
     """
     Generate an embedding for a query using OpenAI API.
     """
-    response = openai.Embedding.create(
-        input=[query],
-        model="text-embedding-ada-002"
-    )
-    return response['data'][0]['embedding']
+    response = client.embeddings.create(input=[query], model="text-embedding-ada-002")
+    return response.data[0].embedding
+
 
-def generate_embeddings(conversations):
+def genoerate_embeddings(conversations):
     """
     Generate embeddings for conversations using OpenAI API.
     """
     embeddings = []
     for i, batch in enumerate(split_into_batches(conversations, 100)):
         logger.info(f"Generating Embeddings for batch: {i + 1}")
-        response = openai.Embedding.create(
-            input=batch,
-            model="text-embedding-ada-002"
-        )
-        tmp_embedding = [row['embedding'] for row in response['data']]
+        response = client.embeddings.create(input=batch, model="text-embedding-ada-002")
+        tmp_embedding = [row["embedding"] for row in response.data]
         embeddings += tmp_embedding
     if len(embeddings) > 0:
         logger.info("Conversations (Chunks) = %d", len(conversations))
@@ -68,12 +69,13 @@ def generate_embeddings(conversations):
         logger.info("No new conversations detected")
     return embeddings
 
+
 def calculate_top_titles(df, query, top_n=1000):
     """
     Calculate top titles for a given query using embeddings.
     """
     # Extract the embeddings from the DataFrame
-    embedding_array = np.array(df['embeddings'].tolist())
+    embedding_array = np.array(df["embeddings"].tolist())
     query_embedding = generate_query_embedding(query)
     # Calculate the dot product between the query embedding and all embeddings in the DataFrame
     dot_scores = np.dot(embedding_array, query_embedding)
@@ -81,8 +83,8 @@ def calculate_top_titles(df, query, top_n=1000):
     # Filter out titles with dot scores below the threshold
     mask = dot_scores >= 0.8
     filtered_dot_scores = dot_scores[mask]
-    filtered_titles = df.loc[mask, 'text'].tolist()
-    filtered_chat_ids = df.loc[mask, 'chat_id'].tolist()
+    filtered_titles = df.loc[mask, "text"].tolist()
+    filtered_chat_ids = df.loc[mask, "chat_id"].tolist()
 
     # Sort the filtered titles based on the dot scores (in descending order)
     sorted_indices = np.argsort(filtered_dot_scores)[::-1][:top_n]
diff --git a/setup.py b/setup.py
index 93a0f68..2d91494 100644
--- a/setup.py
+++ b/setup.py
@@ -1,22 +1,16 @@
 from setuptools import setup, find_packages
 
 setup(
-    name='gpthistory',
-    version='0.3',
-    description='A tool for searching through your chatgpt conversation history',
-    author='Shrikar Archak',
-    author_email='shrikar84@gmail.com',
+    name="gpthistory",
+    version="0.3",
+    description="A tool for searching through your chatgpt conversation history",
+    author="Shrikar Archak",
+    author_email="shrikar84@gmail.com",
     packages=find_packages(),
     include_package_data=True,
-    install_requires=[
-        'Click',
-        'python-dotenv',
-        'openai',
-        'pandas',
-        'numpy'
-    ],
-    entry_points='''
+    install_requires=["typer", "python-dotenv", "openai", "pandas", "numpy"],
+    entry_points="""
         [console_scripts]
         gpthistory=gpthistory.gpthistory:main
-    ''',
+    """,
 )

From 0f14e4f676556303660f4802d93ffd9e5acf6a34 Mon Sep 17 00:00:00 2001
From: Tony Okeke <tonykabilanokeke@gmail.com>
Date: Mon, 26 Feb 2024 00:19:50 -0500
Subject: [PATCH 3/7] migrate to openai 1.0.0 API

---
 .env                                          |   1 +
 gpthistory.egg-info/PKG-INFO                  |   6 ++
 gpthistory.egg-info/entry_points.txt          |   6 +-
 gpthistory.egg-info/requires.txt              |   6 +-
 .../__pycache__/__init__.cpython-310.pyc      | Bin 0 -> 152 bytes
 .../__pycache__/gpthistory.cpython-310.pyc    | Bin 0 -> 3048 bytes
 .../__pycache__/helpers.cpython-310.pyc       | Bin 0 -> 3082 bytes
 gpthistory/gpthistory.py                      |  91 +++++++++---------
 gpthistory/helpers.py                         |  46 ++++-----
 setup.py                                      |  22 ++---
 10 files changed, 93 insertions(+), 85 deletions(-)
 create mode 100644 .env
 create mode 100644 gpthistory/__pycache__/__init__.cpython-310.pyc
 create mode 100644 gpthistory/__pycache__/gpthistory.cpython-310.pyc
 create mode 100644 gpthistory/__pycache__/helpers.cpython-310.pyc

diff --git a/.env b/.env
new file mode 100644
index 0000000..a390e60
--- /dev/null
+++ b/.env
@@ -0,0 +1 @@
+OPENAI_API_KEY="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"
\ No newline at end of file
diff --git a/gpthistory.egg-info/PKG-INFO b/gpthistory.egg-info/PKG-INFO
index 5cbb48a..da0adaa 100644
--- a/gpthistory.egg-info/PKG-INFO
+++ b/gpthistory.egg-info/PKG-INFO
@@ -2,6 +2,12 @@ Metadata-Version: 2.1
 Name: gpthistory
 Version: 0.3
 Summary: A tool for searching through your chatgpt conversation history
+Home-page: UNKNOWN
 Author: Shrikar Archak
 Author-email: shrikar84@gmail.com
+License: UNKNOWN
+Platform: UNKNOWN
 License-File: LICENSE.md
+
+UNKNOWN
+
diff --git a/gpthistory.egg-info/entry_points.txt b/gpthistory.egg-info/entry_points.txt
index ce90c75..d13f257 100644
--- a/gpthistory.egg-info/entry_points.txt
+++ b/gpthistory.egg-info/entry_points.txt
@@ -1,2 +1,4 @@
-[console_scripts]
-gpthistory = gpthistory.gpthistory:main
+
+        [console_scripts]
+        gpthistory=gpthistory.gpthistory:app
+    
\ No newline at end of file
diff --git a/gpthistory.egg-info/requires.txt b/gpthistory.egg-info/requires.txt
index dbd3260..2cbb3f1 100644
--- a/gpthistory.egg-info/requires.txt
+++ b/gpthistory.egg-info/requires.txt
@@ -1,5 +1,5 @@
-Click
-python-dotenv
+numpy
 openai
 pandas
-numpy
+python-dotenv
+typer
diff --git a/gpthistory/__pycache__/__init__.cpython-310.pyc b/gpthistory/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c7e5b61ee4a144c6d9cf2c3d3062a8e7eacd0cbc
GIT binary patch
literal 152
zcmd1j<>g`kg8NmsQ$h4&5P=LBfgA@QE@lA|DGb33nv8xc8Hzx{2;!Huer{fgeqvEk
zVs>V+esN}MNpgmMQEEYcv3`0%Nk(RINq$i!GBZ9tGcU6wK3=b&@)n0pZhlH>PO2Tq
L&|)Sa!NLFl32!91

literal 0
HcmV?d00001

diff --git a/gpthistory/__pycache__/gpthistory.cpython-310.pyc b/gpthistory/__pycache__/gpthistory.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2d1fd228ab56d8520127ce9d9afd7c8ee66725b7
GIT binary patch
literal 3048
zcma)8&2JmW72nw}E|;ICBwMl(bYVbAh$K`{6lkl;ZW60<4HU9l324D2Al5rWa;g2G
zXNLM9mPG;iszc9h0rD82mtK3yAJD&G&|6PA_vRkd_lAlk6$#KK=Iy-Md2i-@{T}mL
z%{Sor<eh)Ay}Dui3z?gb1I#Bd#NUBXgObGPF5jd}G;elI{kFQcemh+U-e%&arEV$p
zx?WoDmWjcPxxX;F6=sgA^O~l0YAp!0sk5-U4d#rRFz!&7mM%=#@620lWkI^DtTJC)
z7}R6N=x*PnW$M4S&q?>*)p~`k!)le*^y&s%q4gt^f6glB27&t?=+SO`1NS%f+m>pv
zQ^~_f_9RSt<B&_CRt79%Jd~`*(wB_VI2#DH8ih$TNpMx><DQIV!o(Z$`)Q>!7|UTS
zWX{i~wP+a1IHT;e6N%$#d-MHJL^4iUTLd2ko9`#=m?c^GmQR@w;Q+q2P37p1%I)Vo
z4W+XCal#^k7rlC#@OEK5fg%0@U!*Z2FwD6nZSWnTCi+S`bGI-SWJU(Y%!FaVunV$l
zeEIRrDI98T81vG?0RFu%%$YkYNv|lKlbI*Wg?A1yII{WE!lU+vL7gKD=H}c7Id=oJ
z5~HwybM?%Incj!otKikTtjw#lB<(*Le=%`Q^QhN6CvVqk^Lk+xR$<ffpCRV6asj_l
zW8PdC4P(PVp86XGP<IEY&H_R!+BT3~7&Bik-xXS&Rka6N=mGvWN`SA8R-{d9w0>dE
z{KD6|e67pBsS7l(!q*1)t$AV1YDI0nM(-@h=q_#1l?yVfWM)yGRf;PA8Z4+>dFE?v
z)2g=aUQvO2uP+SnUcIOn6}q-#z}+^AO5qQ{1G9$SeHY{#*B&Suqx<@XReJBjg1lTm
zFxnf_pZEbhpH1R~24NQHGz$7WPlGTR#K$ZPFuMX8$}qs>2u@E=wa8(cD36^&4)*dR
zRZ7G07;=_l?kGc<F`YhmhB+C$1l<yJ(*V<z!7*|>nBIGmXUB{S$lN>=%X1ms4xl_r
zcw#T6$`=gsIL~lyW3BKSXoqu4aW+oEi0!>0#aBdGf{m4vPbA}z($+9PQDrbBjzTza
zy4Fh>AF$xjqu^n}`f%MWIeVzw<uYoE@??@`V*2yv@Fln^L@*|}F2w)}B*@rF5Z$`A
z%mWb~vmnhgcGgk;wZL@kFqdI+dq0?G@)h)uqaD59exS@;C@&3<7>&75_BfP7Rf0Jd
ztn#1j@BZR<y%(Pz{!-Z^k!Q+I@{oeKWU1gLs<IN6DHq(&GOFAp9}E~*cAWKdWsa#T
z??UMIIB4Sy+-}Dr3sqS#a52V5xjKk&3C=>=0|Zfyc9?QrO&C8@Hs$%4TPW+wJb{2K
z7rX$@MIHs(P3~#cm?g@)3ZGDw>-Y)fU;yF8G6_n_ks`;ctN~C<nY6F`Xu>&s@4>vm
z^An*%1=qxdN}NTUrSLgSR9WAU!aCo;lf8bNg$b}<8T?aNc=Kp0&E!_dd3Y3yEfF&r
z4YxQO=VI#`Ilp__8K0>NCM{h$XdU#47eMa1zGa&>v57}kVXTw7>61D!|6smpmR3oN
zG)WbB&+=eT6-Hh2P183`;$7M{%t#%+)<J%iaKuzqI$~!hIj6T!7v1?5b^i*CH0FS{
zfUt<Q=E$1c(gEywF5OW{I@F{V;0`Is$eWk3pS=duEQrP*T=hqQk%-TLbf&Bopy}NF
z!kSq#d*;Y`0o@Fc`Ti9$<5?xZO2FQtv;eHor+dp4eFNN4*7*BFS~)Nz;3)NxN3LC+
zexM=ZfQ3970+3vp7@Wk=w6Z`B*|OmEuIa{d9~K-|+12LlU{mlrU^;IBx#b<i7#qd`
z46zCXyU=TWOSEwpkP&WS?cWl@TDc`OFbd;KcLvR@Am`+d<}34HnLGdqP9Mrl0+^Vc
zj}?(Rr<UY1P^0`EEQ2Ud$B#b;{@<v3)tG)Lw85R7B!|W=9>XL|U(ygKZ&$t*1SAMh
zmxBc@;7oG*zsGpr%HtvY8IbAv_Z^|@VCrrP^xO2uPXI!nzBmkSqHK`F+0k||lyWS#
zx3;iMJ2{|67<a&wt!N7b-~H(SdZwKvwr9D7m`yU;30@>D6ilN%y^b*S3@&76zkP>q
zq7m;Rd4S{{Bp)F80h0HSe27FBv;%mSoGlT{5|RU|<WTu>CO8IBm030c41j+|04!%?
zL_8J^^R~x#QOD144}d}fErC(~BV_&SzfjA$h@c$_MPmLGWe_)R#MoVW`ZusD9s;R4
zSlcfAyvvr=w3=p1BY}Ncwe0`+C7bZy!ZzjgdMVtq*K6PBzebe-5;R%4W9WjR(6*`)
zA~oc^YFz_TXUGz;WeMyWa+lURhf5Ptv349ZgFMcl-<+b#iC|oN5pXLMaRfo{#{*?Q
z+uwVx$^#~UwQLG-HS8+Ao{k0fy)*@J<?TUZKg9luh{X4ipe;9CqMf62-`h!ZI!V}L
ltU`hD^b8Ns5PyJ*?z&}L;H697a;+9|f7&G8eV^Eu{{=c$O0)m~

literal 0
HcmV?d00001

diff --git a/gpthistory/__pycache__/helpers.cpython-310.pyc b/gpthistory/__pycache__/helpers.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6bc330f06bc7b55acd7f229bf0ae73d1c6d35aff
GIT binary patch
literal 3082
zcmZ`*&2QYs73c74xzw&CuNBFP-GoW&MB7%9kpfL?I8IedZPgaC0S861s7uhC(TXcB
zxgJuAwV;X?a*+1mLoWrIqJVTXkUyY*%b>URP@Qx0#r?fmY4rhT2)_Ax^Jd<AzxQVH
zX45xN{`=Oy#U~BJ_z!i?7YCi6;4l6K!VS(+qcf^mhv~lAv2<;B9Mop&_G_IQGek|W
zo_AR1))C|ObJp=O>!cI?MyJ6>5}KW+XdD>eWxdJ6DNU!j3;r7Sj;ziV(d@PGU*&b~
zKR3bGJe(149I?(UpWuyWMrV#U`6TLVe2Pz_p66G13-z0Pme24vp0Un#&K?@=xnrE(
z*l085-XDl;WnE3)f3UW(vL3EHSPy@-_9<p+(I5%8#U7dyX&&*A=cUMYU$Q?8Czj%^
zs4PSz<E>$n+DXR6)1|oB9ky>SL`7UCebFw0#o*>bDt1MhMdN;76h+j<Y}-_ho>cB;
zF8fg#Gdk5tu~EDD@8K_2LAu61tBgaK{D|$Fl_`VDM9)Oesx0&@^t5reX^tH-J>qnY
zBk(YzBE{X|_x%86?P)2axC~11v<wE3EQ?@M=KX+l33yaSOPWp9PEM<8ah^f_QZ-H*
zVYxRD$|eEI(GsAK)fJ_3F(fkHwv|nilutaNW|4I$A_-wH4B0~U^7e8+E0-fFqwS<v
zE)r43Tgy@m@?yC=D7TWL%;nzs=9WkYLKaJdJ=KC-(jq+D;7!OZCP8f5XEyU$3!ly8
z42EJ8<g{355SC;7#S+M$j1s0TVd4W;u>)(>cr*bs8x>4lu{q;&bKdw3u=%9UO&HP|
zzNanpaF8Zt5M=?Wuoom*nFo*I$}LfhOeo2z8{}&lWnH1HRAg;SIhtSjnl~(xp-?Q*
zCK-{dSfyG8Ne@YAcp~(DI<E&}{&B{hVfHIYDqlbjsY&S3$axEm(&(`sROvF#xcS@!
zj#$s)*1XZPp^82HNUQR5kqH@<g0vC+$Aa@D>jr>k5Cu<mgxm{u3K|(jOn^ukY4~!M
z?7ui(vKaAb@z$*mUO?w08|(ld{hW(bTcYhKH%=2ml0RNiU`z^ZEvIo@RUc1~+4R=R
zlcE^pSs^al=vw#W*r8?&$7`1%svtYG>;{uJ(YRnGimh>c{S6(8uhJj@aCTrGFaY4)
z-<kX7zEzrs76%NtHD~<8_@|Dh((XAux3aj+9az-nF0VZ^D!W%(1%URQidD{skocrt
zdVmw--nK1QwImt=&k+)s?)xJS>QjvBv59fSHijz~y)#x2Aq(Fi9Cr~9NI|g9H+bln
zL%-oWCk`ebuATaKBt(Pjc5qBsmDh>f0J&hVQ&c^~L_y)yH7etgOIYH@yzy7_355ld
z@6u`tZTQYyx}_|ce|Fht{+$$o9q0YQ-H*ZfHOZbah6`|It|55l#bW7Bn#WOE+(qMb
zkw((+`n?OPFWlSO$+nAj@KJD+52wy)s0Y5akq4Rh?2^5>C`DWf-mU|6*-l^Yu0u+>
zY2NJ$sq7?!o!=(40wV7aA)LxtkhZIu7hEu6n#YuKv{~hMsNF34gEOrz8)W8nntzCO
z#WfJyo@Fy^8lTS^y0$I<6|&j~m^}U;Qdx|DTySmc_@}YrE0EH_T>w<i8;9nRLH9*z
z5hnL-%-EGx*+(W2>u_`1*>@|aa_5cxT4nUm;?^b-AFcZfWatrlJVKJ)s)h^N!RLOC
zjKZDDq*S@@ePL|si~E^Zc{(QQUoec+xi=p1t6JrOccQ9Sel@Xa0YU!oryB3~qBPz~
zDIJyhASjbE6(g)hK{weI*@cvJ7855)H-OsRFaJ*E_+v>H!3~DHEll{v@hg<qVw;lg
z*XObQXGG*Fw$FU_!juc(l;mW*4z6q_DN>i<;mHogh?S(t?}9v{#`i#m3qbNns}Nd+
zBduPC@NYbx{2>XW_!<!c3C=JzKEp`nr4#&~_~``gnliJ2oI_K&Wlj;LQ=_u*y20I)
z$9VV1ZjsBf-B2dql;0*+hfbTRsj;2I(e*WTDd#F5V!|TMk*$^8kBV(|b$qgOQ<Jm-
z42EY@5`)z{wVZ0`olY#HrboTAm0GX1DSt!?{g?<@S<Sr8uIrf5@zZiV)-XNG)31RW
zR?BKJkNKwjB{-0>en8a?W%r6aQ)Z6aXb@#QDwH|k%0X5b?8)1-jho}K7$uVOm8#(}
zn8-Yn^xV+-LAf|6MkPHgRE_cna;>rlQMm=_d6MDIdrGo*aE~c}ePea)x8Z}8Pkx~$
z9!EtIBcW|3Sg^kFv-_%!x74pkDaZ>$dO(JuT%^4Sp*lxt*Xb;vRXGO>4G?V_?+z~C
nomAYV++Mr~;u#*)he=q=Y@l|nYmRNX%zfosFjCV)nQZ+Zcf=zE

literal 0
HcmV?d00001

diff --git a/gpthistory/gpthistory.py b/gpthistory/gpthistory.py
index 8adc06e..c9570d6 100644
--- a/gpthistory/gpthistory.py
+++ b/gpthistory/gpthistory.py
@@ -1,107 +1,110 @@
-import click
+import typer
 import json
 import os
 import pandas as pd
 import logging
-from gpthistory.helpers import extract_text_parts, generate_embeddings, calculate_top_titles
+from gpthistory.helpers import (
+    extract_text_parts,
+    generate_embeddings,
+    calculate_top_titles,
+)
+
+main = typer.Typer()
 
 # Define the path to the index file in the user's home directory
-INDEX_PATH = os.path.join(os.path.expanduser('~'), '.gpthistory', 'chatindex.csv')
+INDEX_PATH = os.path.join(os.path.expanduser("~"), ".gpthistory", "chatindex.csv")
 
 # Configure the logger
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
 logger = logging.getLogger(__name__)
 
-@click.group()
-def main():
-    """
-    Simple CLI for searching within a chat data
-    """
-    pass
 
 @main.command()
-@click.option('--file', type=click.Path(exists=True), help='Input file')
-def build_index(file):
+def build_index(file: typer.FileText):
     """
-    Build an index from a given chat data file
+    Build an index from a given chat data file xxx
     """
-    # TODO: Implement index building
-    # Write the index to the predefined path
     # Make sure the directory exists
     os.makedirs(os.path.dirname(INDEX_PATH), exist_ok=True)
-    
+
     # Load the chat data from the given file
-    with open(file) as f:
-        data = json.load(f)
-    
+    data = json.load(file)
+
     chat_ids = []
     section_ids = []
     texts = []
     for entry in data:
-        for k, v in entry['mapping'].items():
+        for k, v in entry["mapping"].items():
             text_data = extract_text_parts(v)
-            if len(text_data) > 0 and text_data[0] != '':
+            if len(text_data) > 0 and text_data[0] != "":
                 # Add relevant chat information to the index
-                chat_ids.append(entry['id'])
+                chat_ids.append(entry["id"])
                 section_ids.append(k)
                 texts.append(text_data[0])
     logger.info(f"Index built and stored at: {INDEX_PATH}")
     logger.info(f"Conversations indexed: {len(chat_ids)}")
-    df = pd.DataFrame({'chat_id': chat_ids, 'section_id': section_ids, 'text': texts})
-    df = df[~df.text.isna()] 
-    df['id'] = df['chat_id']
+    df = pd.DataFrame({"chat_id": chat_ids, "section_id": section_ids, "text": texts})
+    df = df[~df.text.isna()]
+    df["id"] = df["chat_id"]
     df.set_index("id", inplace=True)
 
     # Handle incremental index updates
-    current_df = pd.DataFrame()    
+    current_df = pd.DataFrame()
     rows_only_in_df = pd.DataFrame()
     incremental = False
     if os.path.exists(INDEX_PATH):
         incremental = True
-        current_df = pd.read_csv(INDEX_PATH, sep='|')
-        current_df['id'] = current_df['chat_id']
+        current_df = pd.read_csv(INDEX_PATH, sep="|")
+        current_df["id"] = current_df["chat_id"]
         current_df.set_index("id", inplace=True)
         # Use merge with indicator=True to find rows present in one DataFrame but not the other
-        merged_df = df.merge(current_df, how='outer', indicator=True)
+        merged_df = df.merge(current_df, how="outer", indicator=True)
         # Query rows only present in df1
-        rows_only_in_df = merged_df.query('_merge == "left_only"').drop(columns='_merge')
+        rows_only_in_df = merged_df.query('_merge == "left_only"').drop(
+            columns="_merge"
+        )
     else:
         rows_only_in_df = df
-    
+
     if incremental and len(rows_only_in_df) > 0:
         logger.info("Only generating embeddings for new conversations to save money.")
-    
+
     # Generate and add embeddings to the index
     embeddings = generate_embeddings(rows_only_in_df.text.tolist())
-    rows_only_in_df['embeddings'] = embeddings
+    rows_only_in_df["embeddings"] = embeddings
     final_df = pd.concat([rows_only_in_df, current_df])
     logger.info(f"Total conversations: {len(final_df)}")
-    final_df.to_csv(INDEX_PATH, sep='|', index=False)
+    final_df.to_csv(INDEX_PATH, sep="|", index=False)
+
 
 @main.command()
-@click.argument('keyword', required=True)
-def search(keyword):
+def search(keyword: str):
     """
     Search a keyword within the index
     """
-    # TODO: Implement search function
-    # Load the index from the predefined path
     logger.info("Searching for keyword: %s", keyword)
     if os.path.exists(INDEX_PATH):
-        df = pd.read_csv(INDEX_PATH, sep='|')
-        df['embeddings'] = df.embeddings.apply(lambda x: [float(t) for t in json.loads(x)])
+        df = pd.read_csv(INDEX_PATH, sep="|")
+        df["embeddings"] = df.embeddings.apply(
+            lambda x: [float(t) for t in json.loads(x)]
+        )
         filtered = df[df.text.str.contains(keyword)]
-        
+
         # Calculate top titles and their corresponding chat IDs
         chat_ids, top_titles, top_scores = calculate_top_titles(df, keyword)
-        
+
         for i, t in enumerate(top_titles):
             logger.info("%s: %s", chat_ids[i], t)
-            logger.info("ChatGPT Conversation link: https://chat.openai.com/c/%s", chat_ids[i])
+            logger.info(
+                "ChatGPT Conversation link: https://chat.openai.com/c/%s", chat_ids[i]
+            )
             logger.info("--------------------------------------")
     else:
-        click.echo("Index not found. Please build the index first.")
+        typer.echo("Index not found. Please build the index first.")
         return
 
+
 if __name__ == "__main__":
     main()
diff --git a/gpthistory/helpers.py b/gpthistory/helpers.py
index 56bf08f..003203a 100644
--- a/gpthistory/helpers.py
+++ b/gpthistory/helpers.py
@@ -2,7 +2,9 @@
 import os
 import pandas as pd
 import numpy as np
-import openai
+from openai import OpenAI
+
+client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
 from dotenv import load_dotenv
 import logging
 
@@ -10,43 +12,45 @@
 load_dotenv()
 
 # Set up OpenAI API key
-openai.api_key = os.environ.get('OPENAI_API_KEY')
 
 # Define the path to the index file in the user's home directory
-INDEX_PATH = os.path.join(os.path.expanduser('~'), '.chatsearch', 'chatindex.csv')
+INDEX_PATH = os.path.join(os.path.expanduser("~"), ".chatsearch", "chatindex.csv")
 
 # Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
+)
 logger = logging.getLogger(__name__)
 
+
 def extract_text_parts(data):
     """
     Extract text parts from chat data.
     """
     text_parts = []
-    message = data.get('message')
+    message = data.get("message")
     if message:
-        content = message.get('content')
-        if content and content.get('content_type') == 'text':
-            text_parts.extend(content.get('parts', []))
+        content = message.get("content")
+        if content and content.get("content_type") == "text":
+            text_parts.extend(content.get("parts", []))
     return text_parts
 
+
 def split_into_batches(array, batch_size):
     """
     Split an array into batches.
     """
     for i in range(0, len(array), batch_size):
-        yield array[i:i + batch_size]
+        yield array[i : i + batch_size]
+
 
 def generate_query_embedding(query):
     """
     Generate an embedding for a query using OpenAI API.
     """
-    response = openai.Embedding.create(
-        input=[query],
-        model="text-embedding-ada-002"
-    )
-    return response['data'][0]['embedding']
+    response = client.embeddings.create(input=[query], model="text-embedding-ada-002")
+    return response.data[0].embedding
+
 
 def generate_embeddings(conversations):
     """
@@ -55,11 +59,8 @@ def generate_embeddings(conversations):
     embeddings = []
     for i, batch in enumerate(split_into_batches(conversations, 100)):
         logger.info(f"Generating Embeddings for batch: {i + 1}")
-        response = openai.Embedding.create(
-            input=batch,
-            model="text-embedding-ada-002"
-        )
-        tmp_embedding = [row['embedding'] for row in response['data']]
+        response = client.embeddings.create(input=batch, model="text-embedding-ada-002")
+        tmp_embedding = [row["embedding"] for row in response.data]
         embeddings += tmp_embedding
     if len(embeddings) > 0:
         logger.info("Conversations (Chunks) = %d", len(conversations))
@@ -68,12 +69,13 @@ def generate_embeddings(conversations):
         logger.info("No new conversations detected")
     return embeddings
 
+
 def calculate_top_titles(df, query, top_n=1000):
     """
     Calculate top titles for a given query using embeddings.
     """
     # Extract the embeddings from the DataFrame
-    embedding_array = np.array(df['embeddings'].tolist())
+    embedding_array = np.array(df["embeddings"].tolist())
     query_embedding = generate_query_embedding(query)
     # Calculate the dot product between the query embedding and all embeddings in the DataFrame
     dot_scores = np.dot(embedding_array, query_embedding)
@@ -81,8 +83,8 @@ def calculate_top_titles(df, query, top_n=1000):
     # Filter out titles with dot scores below the threshold
     mask = dot_scores >= 0.8
     filtered_dot_scores = dot_scores[mask]
-    filtered_titles = df.loc[mask, 'text'].tolist()
-    filtered_chat_ids = df.loc[mask, 'chat_id'].tolist()
+    filtered_titles = df.loc[mask, "text"].tolist()
+    filtered_chat_ids = df.loc[mask, "chat_id"].tolist()
 
     # Sort the filtered titles based on the dot scores (in descending order)
     sorted_indices = np.argsort(filtered_dot_scores)[::-1][:top_n]
diff --git a/setup.py b/setup.py
index 93a0f68..2d91494 100644
--- a/setup.py
+++ b/setup.py
@@ -1,22 +1,16 @@
 from setuptools import setup, find_packages
 
 setup(
-    name='gpthistory',
-    version='0.3',
-    description='A tool for searching through your chatgpt conversation history',
-    author='Shrikar Archak',
-    author_email='shrikar84@gmail.com',
+    name="gpthistory",
+    version="0.3",
+    description="A tool for searching through your chatgpt conversation history",
+    author="Shrikar Archak",
+    author_email="shrikar84@gmail.com",
     packages=find_packages(),
     include_package_data=True,
-    install_requires=[
-        'Click',
-        'python-dotenv',
-        'openai',
-        'pandas',
-        'numpy'
-    ],
-    entry_points='''
+    install_requires=["typer", "python-dotenv", "openai", "pandas", "numpy"],
+    entry_points="""
         [console_scripts]
         gpthistory=gpthistory.gpthistory:main
-    ''',
+    """,
 )

From 40603bb639cf45b4410efac0e0aa411d69792200 Mon Sep 17 00:00:00 2001
From: Tony Okeke <tonykabilanokeke@gmail.com>
Date: Mon, 26 Feb 2024 00:22:35 -0500
Subject: [PATCH 4/7] fix key leak

---
 .env                                             |   1 -
 .gitignore                                       |   3 ++-
 gpthistory/__pycache__/__init__.cpython-310.pyc  | Bin 152 -> 0 bytes
 .../__pycache__/gpthistory.cpython-310.pyc       | Bin 3048 -> 0 bytes
 gpthistory/__pycache__/helpers.cpython-310.pyc   | Bin 3082 -> 0 bytes
 5 files changed, 2 insertions(+), 2 deletions(-)
 delete mode 100644 .env
 delete mode 100644 gpthistory/__pycache__/__init__.cpython-310.pyc
 delete mode 100644 gpthistory/__pycache__/gpthistory.cpython-310.pyc
 delete mode 100644 gpthistory/__pycache__/helpers.cpython-310.pyc

diff --git a/.env b/.env
deleted file mode 100644
index a390e60..0000000
--- a/.env
+++ /dev/null
@@ -1 +0,0 @@
-OPENAI_API_KEY="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"
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index a2a8dea..d50a09f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
-.env/
+.env
+__pycache__/
diff --git a/gpthistory/__pycache__/__init__.cpython-310.pyc b/gpthistory/__pycache__/__init__.cpython-310.pyc
deleted file mode 100644
index c7e5b61ee4a144c6d9cf2c3d3062a8e7eacd0cbc..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 152
zcmd1j<>g`kg8NmsQ$h4&5P=LBfgA@QE@lA|DGb33nv8xc8Hzx{2;!Huer{fgeqvEk
zVs>V+esN}MNpgmMQEEYcv3`0%Nk(RINq$i!GBZ9tGcU6wK3=b&@)n0pZhlH>PO2Tq
L&|)Sa!NLFl32!91

diff --git a/gpthistory/__pycache__/gpthistory.cpython-310.pyc b/gpthistory/__pycache__/gpthistory.cpython-310.pyc
deleted file mode 100644
index 2d1fd228ab56d8520127ce9d9afd7c8ee66725b7..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3048
zcma)8&2JmW72nw}E|;ICBwMl(bYVbAh$K`{6lkl;ZW60<4HU9l324D2Al5rWa;g2G
zXNLM9mPG;iszc9h0rD82mtK3yAJD&G&|6PA_vRkd_lAlk6$#KK=Iy-Md2i-@{T}mL
z%{Sor<eh)Ay}Dui3z?gb1I#Bd#NUBXgObGPF5jd}G;elI{kFQcemh+U-e%&arEV$p
zx?WoDmWjcPxxX;F6=sgA^O~l0YAp!0sk5-U4d#rRFz!&7mM%=#@620lWkI^DtTJC)
z7}R6N=x*PnW$M4S&q?>*)p~`k!)le*^y&s%q4gt^f6glB27&t?=+SO`1NS%f+m>pv
zQ^~_f_9RSt<B&_CRt79%Jd~`*(wB_VI2#DH8ih$TNpMx><DQIV!o(Z$`)Q>!7|UTS
zWX{i~wP+a1IHT;e6N%$#d-MHJL^4iUTLd2ko9`#=m?c^GmQR@w;Q+q2P37p1%I)Vo
z4W+XCal#^k7rlC#@OEK5fg%0@U!*Z2FwD6nZSWnTCi+S`bGI-SWJU(Y%!FaVunV$l
zeEIRrDI98T81vG?0RFu%%$YkYNv|lKlbI*Wg?A1yII{WE!lU+vL7gKD=H}c7Id=oJ
z5~HwybM?%Incj!otKikTtjw#lB<(*Le=%`Q^QhN6CvVqk^Lk+xR$<ffpCRV6asj_l
zW8PdC4P(PVp86XGP<IEY&H_R!+BT3~7&Bik-xXS&Rka6N=mGvWN`SA8R-{d9w0>dE
z{KD6|e67pBsS7l(!q*1)t$AV1YDI0nM(-@h=q_#1l?yVfWM)yGRf;PA8Z4+>dFE?v
z)2g=aUQvO2uP+SnUcIOn6}q-#z}+^AO5qQ{1G9$SeHY{#*B&Suqx<@XReJBjg1lTm
zFxnf_pZEbhpH1R~24NQHGz$7WPlGTR#K$ZPFuMX8$}qs>2u@E=wa8(cD36^&4)*dR
zRZ7G07;=_l?kGc<F`YhmhB+C$1l<yJ(*V<z!7*|>nBIGmXUB{S$lN>=%X1ms4xl_r
zcw#T6$`=gsIL~lyW3BKSXoqu4aW+oEi0!>0#aBdGf{m4vPbA}z($+9PQDrbBjzTza
zy4Fh>AF$xjqu^n}`f%MWIeVzw<uYoE@??@`V*2yv@Fln^L@*|}F2w)}B*@rF5Z$`A
z%mWb~vmnhgcGgk;wZL@kFqdI+dq0?G@)h)uqaD59exS@;C@&3<7>&75_BfP7Rf0Jd
ztn#1j@BZR<y%(Pz{!-Z^k!Q+I@{oeKWU1gLs<IN6DHq(&GOFAp9}E~*cAWKdWsa#T
z??UMIIB4Sy+-}Dr3sqS#a52V5xjKk&3C=>=0|Zfyc9?QrO&C8@Hs$%4TPW+wJb{2K
z7rX$@MIHs(P3~#cm?g@)3ZGDw>-Y)fU;yF8G6_n_ks`;ctN~C<nY6F`Xu>&s@4>vm
z^An*%1=qxdN}NTUrSLgSR9WAU!aCo;lf8bNg$b}<8T?aNc=Kp0&E!_dd3Y3yEfF&r
z4YxQO=VI#`Ilp__8K0>NCM{h$XdU#47eMa1zGa&>v57}kVXTw7>61D!|6smpmR3oN
zG)WbB&+=eT6-Hh2P183`;$7M{%t#%+)<J%iaKuzqI$~!hIj6T!7v1?5b^i*CH0FS{
zfUt<Q=E$1c(gEywF5OW{I@F{V;0`Is$eWk3pS=duEQrP*T=hqQk%-TLbf&Bopy}NF
z!kSq#d*;Y`0o@Fc`Ti9$<5?xZO2FQtv;eHor+dp4eFNN4*7*BFS~)Nz;3)NxN3LC+
zexM=ZfQ3970+3vp7@Wk=w6Z`B*|OmEuIa{d9~K-|+12LlU{mlrU^;IBx#b<i7#qd`
z46zCXyU=TWOSEwpkP&WS?cWl@TDc`OFbd;KcLvR@Am`+d<}34HnLGdqP9Mrl0+^Vc
zj}?(Rr<UY1P^0`EEQ2Ud$B#b;{@<v3)tG)Lw85R7B!|W=9>XL|U(ygKZ&$t*1SAMh
zmxBc@;7oG*zsGpr%HtvY8IbAv_Z^|@VCrrP^xO2uPXI!nzBmkSqHK`F+0k||lyWS#
zx3;iMJ2{|67<a&wt!N7b-~H(SdZwKvwr9D7m`yU;30@>D6ilN%y^b*S3@&76zkP>q
zq7m;Rd4S{{Bp)F80h0HSe27FBv;%mSoGlT{5|RU|<WTu>CO8IBm030c41j+|04!%?
zL_8J^^R~x#QOD144}d}fErC(~BV_&SzfjA$h@c$_MPmLGWe_)R#MoVW`ZusD9s;R4
zSlcfAyvvr=w3=p1BY}Ncwe0`+C7bZy!ZzjgdMVtq*K6PBzebe-5;R%4W9WjR(6*`)
zA~oc^YFz_TXUGz;WeMyWa+lURhf5Ptv349ZgFMcl-<+b#iC|oN5pXLMaRfo{#{*?Q
z+uwVx$^#~UwQLG-HS8+Ao{k0fy)*@J<?TUZKg9luh{X4ipe;9CqMf62-`h!ZI!V}L
ltU`hD^b8Ns5PyJ*?z&}L;H697a;+9|f7&G8eV^Eu{{=c$O0)m~

diff --git a/gpthistory/__pycache__/helpers.cpython-310.pyc b/gpthistory/__pycache__/helpers.cpython-310.pyc
deleted file mode 100644
index 6bc330f06bc7b55acd7f229bf0ae73d1c6d35aff..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3082
zcmZ`*&2QYs73c74xzw&CuNBFP-GoW&MB7%9kpfL?I8IedZPgaC0S861s7uhC(TXcB
zxgJuAwV;X?a*+1mLoWrIqJVTXkUyY*%b>URP@Qx0#r?fmY4rhT2)_Ax^Jd<AzxQVH
zX45xN{`=Oy#U~BJ_z!i?7YCi6;4l6K!VS(+qcf^mhv~lAv2<;B9Mop&_G_IQGek|W
zo_AR1))C|ObJp=O>!cI?MyJ6>5}KW+XdD>eWxdJ6DNU!j3;r7Sj;ziV(d@PGU*&b~
zKR3bGJe(149I?(UpWuyWMrV#U`6TLVe2Pz_p66G13-z0Pme24vp0Un#&K?@=xnrE(
z*l085-XDl;WnE3)f3UW(vL3EHSPy@-_9<p+(I5%8#U7dyX&&*A=cUMYU$Q?8Czj%^
zs4PSz<E>$n+DXR6)1|oB9ky>SL`7UCebFw0#o*>bDt1MhMdN;76h+j<Y}-_ho>cB;
zF8fg#Gdk5tu~EDD@8K_2LAu61tBgaK{D|$Fl_`VDM9)Oesx0&@^t5reX^tH-J>qnY
zBk(YzBE{X|_x%86?P)2axC~11v<wE3EQ?@M=KX+l33yaSOPWp9PEM<8ah^f_QZ-H*
zVYxRD$|eEI(GsAK)fJ_3F(fkHwv|nilutaNW|4I$A_-wH4B0~U^7e8+E0-fFqwS<v
zE)r43Tgy@m@?yC=D7TWL%;nzs=9WkYLKaJdJ=KC-(jq+D;7!OZCP8f5XEyU$3!ly8
z42EJ8<g{355SC;7#S+M$j1s0TVd4W;u>)(>cr*bs8x>4lu{q;&bKdw3u=%9UO&HP|
zzNanpaF8Zt5M=?Wuoom*nFo*I$}LfhOeo2z8{}&lWnH1HRAg;SIhtSjnl~(xp-?Q*
zCK-{dSfyG8Ne@YAcp~(DI<E&}{&B{hVfHIYDqlbjsY&S3$axEm(&(`sROvF#xcS@!
zj#$s)*1XZPp^82HNUQR5kqH@<g0vC+$Aa@D>jr>k5Cu<mgxm{u3K|(jOn^ukY4~!M
z?7ui(vKaAb@z$*mUO?w08|(ld{hW(bTcYhKH%=2ml0RNiU`z^ZEvIo@RUc1~+4R=R
zlcE^pSs^al=vw#W*r8?&$7`1%svtYG>;{uJ(YRnGimh>c{S6(8uhJj@aCTrGFaY4)
z-<kX7zEzrs76%NtHD~<8_@|Dh((XAux3aj+9az-nF0VZ^D!W%(1%URQidD{skocrt
zdVmw--nK1QwImt=&k+)s?)xJS>QjvBv59fSHijz~y)#x2Aq(Fi9Cr~9NI|g9H+bln
zL%-oWCk`ebuATaKBt(Pjc5qBsmDh>f0J&hVQ&c^~L_y)yH7etgOIYH@yzy7_355ld
z@6u`tZTQYyx}_|ce|Fht{+$$o9q0YQ-H*ZfHOZbah6`|It|55l#bW7Bn#WOE+(qMb
zkw((+`n?OPFWlSO$+nAj@KJD+52wy)s0Y5akq4Rh?2^5>C`DWf-mU|6*-l^Yu0u+>
zY2NJ$sq7?!o!=(40wV7aA)LxtkhZIu7hEu6n#YuKv{~hMsNF34gEOrz8)W8nntzCO
z#WfJyo@Fy^8lTS^y0$I<6|&j~m^}U;Qdx|DTySmc_@}YrE0EH_T>w<i8;9nRLH9*z
z5hnL-%-EGx*+(W2>u_`1*>@|aa_5cxT4nUm;?^b-AFcZfWatrlJVKJ)s)h^N!RLOC
zjKZDDq*S@@ePL|si~E^Zc{(QQUoec+xi=p1t6JrOccQ9Sel@Xa0YU!oryB3~qBPz~
zDIJyhASjbE6(g)hK{weI*@cvJ7855)H-OsRFaJ*E_+v>H!3~DHEll{v@hg<qVw;lg
z*XObQXGG*Fw$FU_!juc(l;mW*4z6q_DN>i<;mHogh?S(t?}9v{#`i#m3qbNns}Nd+
zBduPC@NYbx{2>XW_!<!c3C=JzKEp`nr4#&~_~``gnliJ2oI_K&Wlj;LQ=_u*y20I)
z$9VV1ZjsBf-B2dql;0*+hfbTRsj;2I(e*WTDd#F5V!|TMk*$^8kBV(|b$qgOQ<Jm-
z42EY@5`)z{wVZ0`olY#HrboTAm0GX1DSt!?{g?<@S<Sr8uIrf5@zZiV)-XNG)31RW
zR?BKJkNKwjB{-0>en8a?W%r6aQ)Z6aXb@#QDwH|k%0X5b?8)1-jho}K7$uVOm8#(}
zn8-Yn^xV+-LAf|6MkPHgRE_cna;>rlQMm=_d6MDIdrGo*aE~c}ePea)x8Z}8Pkx~$
z9!EtIBcW|3Sg^kFv-_%!x74pkDaZ>$dO(JuT%^4Sp*lxt*Xb;vRXGO>4G?V_?+z~C
nomAYV++Mr~;u#*)he=q=Y@l|nYmRNX%zfosFjCV)nQZ+Zcf=zE


From 4038a50cd8f8bed7607cf8781f086525c11574bf Mon Sep 17 00:00:00 2001
From: Tony Okeke <tonykabilanokeke@gmail.com>
Date: Mon, 26 Feb 2024 00:25:41 -0500
Subject: [PATCH 5/7] remove .env

---
 .env | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 .env

diff --git a/.env b/.env
deleted file mode 100644
index a390e60..0000000
--- a/.env
+++ /dev/null
@@ -1 +0,0 @@
-OPENAI_API_KEY="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"
\ No newline at end of file

From 5adbc26919b19535b2c606236ecfa2bdb07cf91f Mon Sep 17 00:00:00 2001
From: Tony Okeke <tonykabilanokeke@gmail.com>
Date: Mon, 26 Feb 2024 00:26:21 -0500
Subject: [PATCH 6/7] fix typo

---
 .../__pycache__/helpers.cpython-310.pyc       | Bin 3067 -> 3082 bytes
 gpthistory/helpers.py                         |   2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/gpthistory/__pycache__/helpers.cpython-310.pyc b/gpthistory/__pycache__/helpers.cpython-310.pyc
index 36d2d91438589b7b190375ee38d6c080771841c5..a6a1f8cf61d3e0fd3ab51b17dc96e55d5f40669e 100644
GIT binary patch
delta 1362
zcmZ8g&2QX96rbm>wY_$-**N>r4@gT>vQ!k5N(4}q5|Ky*L>nbsSX5Tqo$1D|*WQes
zqEs7&YI{NoQe_FLoU#{0B_xD;;lP<Q=TZNGUbt}VA-wUb3(AcAoA>7ZX8b<py{vv$
zb7R-F368JkpR+F>yXWEe$5$>NDxuLUGz&mjfk9v%TT~qZ)sCQPQ$qEy*mIf=5R@Tw
zn=W&P1i27Q9Z%<2q6YE{)EueJ8RiCM{AQ^|?IQ(w?y<+_M$oL#B6S{-W|g{h3fCH)
zrX^hKTXcq&Q8+^@)T48cpm~<UHfdB}NLOz*Kp40BELz<VQ@1wPZ?0~%RyQ|VpRIq1
zmO{Jlx9+ih6pLZprY#z$EV}<phZS%wJkDjjPYEw)o62_m3@XVKf~MJ^femGx8gMWB
zL;0y#mi!!Mvz}V5yeZixg7U4Wd7zzec|Ln+&1Ju;`lZ)UNl5^KG9X=`K$Roq00t1K
zRIQUh!w|IWxw>-*NNniB=!8CtQem^+9Y(1ibram<terAmLW-~^hK3^a-95(lg~?gc
zkE4W*9UFTm$+a$v7{{}%oY<P2eFv?HhEUcV;0uIYBOU3%t6`uZzmM@ten9Aw14u~<
z1FY{*8NwR5eKC8fEo}vbH8O+)eE<V}8A-cVY7Srs)V!zhwNyD)soX)%2n(I-_Lw2{
z0oE|zD~@2SHJt3BzH6PufAVt(c_vH7{6`n%jtmXsR<6D)R33kGI$ryF=qG6>?)6u0
zAoCvu!6OeAF%5AR8w=SJW9!hu97cP+JTG~%gb~KwF5^P;qq{MGUH)iZ!UYL3>%4-{
zFofHQqx*~}?bOF~k4bej3H?qx?d<SNQg+i`|G!M1cF|jw&WwvPDMzx52a_6trd7a$
z5>6YOeAQI@1Yh6Mv!4rfn4U*Y_s1u4ytu)g>@RbDZSo|aDtr;`T)Kphedewo;<I72
z)!AvMEuSW1XJ#emEri=rxPtItG5gK-vYvHy`#rfgxsgl9@T%#_RjHqb?DR!nzq+VV
z5K7eNRn&!%#<CiGA^Y1p4{v0S{e5{{KmNYte;{Et`^B!qM)s$DfqjY_4dB=0a!zPL
z5=TOb@%{JP5p5?z=~JO&<@)>lL*xh}#*T0MTvk~Wu%mq*M_hJpUPEDw`<&~!UDg=e
zDf^`OdGNO6coJl@%J)zBk_73(|Dw<HAk!tx>%Exng=|H3Npcy%Bqqq|Kv{8cHPo7}
RsRkG)wrZ-TYvPzH{|iUlIXeIV

delta 1302
zcmZ8g&2Jk;6rZ=d-Y<J?*V)8&QA*N?!m3h4i~507QdQKdB1DOlUT{H+>zSBM*SpT_
zrl^slRJbRk9;&$jr}$E-7eMM?z#qUFF&sGe%#BL4Z`P`*nAN;_@6GS+ym`NQ-?d&e
zz0~tug5$4)KiTN9w+z2LK6~Z}VzX;fLbcc$+Fct6>JU3!huISXgu$+RT&DUIm>E{k
z@{X&8tWjeMv_#FR-mNod_XN%cwW$3_!zkx?f%#MDE>efO4@q~4mT3joCiQ3)*A}hO
zI<D;t)Ta%)@DREuDQuHq@wrOzW&pArr#%|dG-t^@S^aFQbMx9}cx`Jl{G{{wv(vBv
zo`pzmqj*YqT@1BtSQ5W#O=yTev?oV(wOfc2sL4h)ilaP?k~|IX^z#1KEIW<<OvP&t
ze8UIhC2yh82NbV|hKhmnW1Iy(C%dq#jmZEgQ0<X+2xFjnn@|I1sZH<-=D|nq%8K|$
z?{8HxjH~}ihYjyAN~2^D=+aC_Ea^qk>AZGIv(H&CXMrY5d;5&vmo{hFC`~dpyTj}v
z;wNKyF;KZECQZWt4a|S*2454>SZ;lf&H*`uoa8V@8YkKWu9Gj<M6GoG7PZ^tI+?&>
zX$<4iC4}ucxiy9fQ0K0}JGpkOQx`pmnIeaUU7jJ7Jw|#8Uu6n2EAWIU?U`*nm7f%@
zdHL$HQdO#W4{iN@sq^%kH}l6G>0XJWEbpho(Z&Yaf2$Jv<lx-_OT@S4JBF&6cw%lJ
z*;uq>f5<r2rC>&yaXJ_<E{!PJN%<)R^0$>(QNl+Nn9}Q~$vwuiULIlfq&u&B#+!(c
zrGk8}xR+i&9EEeh{}SDh3z$-I`hS@01B}bm$%e56KA1>=qM%{8P=PvrhVH&JfS-Hz
zv(<*$IwA)@{-DP5tK1d8+nvVzVLsLP8QkYhvF^0}`BUZZq4T!d2*hpY#?b|(&V%?x
zG_gHuJ5h{vVKmI6JZ9Mpp`LmX|3wk6pqA8<5nn`Ant7^f&)1ZpV{1u0PW#g8@j;gI
zJg7*G?(kK$TguZ>n8+F%idFYPbH*vZgj@Ww5=Bq>;w#688{#*2)!RgG0Q|gK-V!g|
zQx`u}I8`cX&bpW@s->k!#6FM~Hg?3*gjWi8Zxn@h*?qBH{&eT83T-H%I<m-@pHrGT
s<&9d*YbNb0!<6pFY(sTh(Wy4EVfIyUHTA0A((Aelwq{ps%XX{&D}_NShX4Qo

diff --git a/gpthistory/helpers.py b/gpthistory/helpers.py
index 156d29a..003203a 100644
--- a/gpthistory/helpers.py
+++ b/gpthistory/helpers.py
@@ -52,7 +52,7 @@ def generate_query_embedding(query):
     return response.data[0].embedding
 
 
-def genoerate_embeddings(conversations):
+def generate_embeddings(conversations):
     """
     Generate embeddings for conversations using OpenAI API.
     """

From 00316b0adc4c388dcca6aa0abbb040be9ab9ea48 Mon Sep 17 00:00:00 2001
From: Tony Okeke <tonykabilanokeke@gmail.com>
Date: Mon, 26 Feb 2024 15:24:54 -0500
Subject: [PATCH 7/7] feat: update code to work with OpenAI v1.0.0 API

feat: make search more flexible by exposing topk and thr parameters
---
 gpthistory.egg-info/PKG-INFO                  |  12 ++--
 gpthistory.egg-info/entry_points.txt          |   6 +-
 gpthistory.egg-info/requires.txt              |   7 ++-
 .../__pycache__/__init__.cpython-310.pyc      | Bin 152 -> 0 bytes
 .../__pycache__/gpthistory.cpython-310.pyc    | Bin 3048 -> 0 bytes
 .../__pycache__/helpers.cpython-310.pyc       | Bin 3082 -> 0 bytes
 gpthistory/gpthistory.py                      |  56 ++++++++++-------
 gpthistory/helpers.py                         |  58 ++++++++++++------
 setup.py                                      |   9 ++-
 9 files changed, 94 insertions(+), 54 deletions(-)
 delete mode 100644 gpthistory/__pycache__/__init__.cpython-310.pyc
 delete mode 100644 gpthistory/__pycache__/gpthistory.cpython-310.pyc
 delete mode 100644 gpthistory/__pycache__/helpers.cpython-310.pyc

diff --git a/gpthistory.egg-info/PKG-INFO b/gpthistory.egg-info/PKG-INFO
index da0adaa..af91d66 100644
--- a/gpthistory.egg-info/PKG-INFO
+++ b/gpthistory.egg-info/PKG-INFO
@@ -2,12 +2,12 @@ Metadata-Version: 2.1
 Name: gpthistory
 Version: 0.3
 Summary: A tool for searching through your chatgpt conversation history
-Home-page: UNKNOWN
 Author: Shrikar Archak
 Author-email: shrikar84@gmail.com
-License: UNKNOWN
-Platform: UNKNOWN
 License-File: LICENSE.md
-
-UNKNOWN
-
+Requires-Dist: typer
+Requires-Dist: python-dotenv
+Requires-Dist: openai
+Requires-Dist: pandas
+Requires-Dist: numpy
+Requires-Dist: loguru
diff --git a/gpthistory.egg-info/entry_points.txt b/gpthistory.egg-info/entry_points.txt
index d13f257..ce90c75 100644
--- a/gpthistory.egg-info/entry_points.txt
+++ b/gpthistory.egg-info/entry_points.txt
@@ -1,4 +1,2 @@
-
-        [console_scripts]
-        gpthistory=gpthistory.gpthistory:app
-    
\ No newline at end of file
+[console_scripts]
+gpthistory = gpthistory.gpthistory:main
diff --git a/gpthistory.egg-info/requires.txt b/gpthistory.egg-info/requires.txt
index 2cbb3f1..2f2931b 100644
--- a/gpthistory.egg-info/requires.txt
+++ b/gpthistory.egg-info/requires.txt
@@ -1,5 +1,6 @@
-numpy
+typer
+python-dotenv
 openai
 pandas
-python-dotenv
-typer
+numpy
+loguru
diff --git a/gpthistory/__pycache__/__init__.cpython-310.pyc b/gpthistory/__pycache__/__init__.cpython-310.pyc
deleted file mode 100644
index c7e5b61ee4a144c6d9cf2c3d3062a8e7eacd0cbc..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 152
zcmd1j<>g`kg8NmsQ$h4&5P=LBfgA@QE@lA|DGb33nv8xc8Hzx{2;!Huer{fgeqvEk
zVs>V+esN}MNpgmMQEEYcv3`0%Nk(RINq$i!GBZ9tGcU6wK3=b&@)n0pZhlH>PO2Tq
L&|)Sa!NLFl32!91

diff --git a/gpthistory/__pycache__/gpthistory.cpython-310.pyc b/gpthistory/__pycache__/gpthistory.cpython-310.pyc
deleted file mode 100644
index 2d1fd228ab56d8520127ce9d9afd7c8ee66725b7..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3048
zcma)8&2JmW72nw}E|;ICBwMl(bYVbAh$K`{6lkl;ZW60<4HU9l324D2Al5rWa;g2G
zXNLM9mPG;iszc9h0rD82mtK3yAJD&G&|6PA_vRkd_lAlk6$#KK=Iy-Md2i-@{T}mL
z%{Sor<eh)Ay}Dui3z?gb1I#Bd#NUBXgObGPF5jd}G;elI{kFQcemh+U-e%&arEV$p
zx?WoDmWjcPxxX;F6=sgA^O~l0YAp!0sk5-U4d#rRFz!&7mM%=#@620lWkI^DtTJC)
z7}R6N=x*PnW$M4S&q?>*)p~`k!)le*^y&s%q4gt^f6glB27&t?=+SO`1NS%f+m>pv
zQ^~_f_9RSt<B&_CRt79%Jd~`*(wB_VI2#DH8ih$TNpMx><DQIV!o(Z$`)Q>!7|UTS
zWX{i~wP+a1IHT;e6N%$#d-MHJL^4iUTLd2ko9`#=m?c^GmQR@w;Q+q2P37p1%I)Vo
z4W+XCal#^k7rlC#@OEK5fg%0@U!*Z2FwD6nZSWnTCi+S`bGI-SWJU(Y%!FaVunV$l
zeEIRrDI98T81vG?0RFu%%$YkYNv|lKlbI*Wg?A1yII{WE!lU+vL7gKD=H}c7Id=oJ
z5~HwybM?%Incj!otKikTtjw#lB<(*Le=%`Q^QhN6CvVqk^Lk+xR$<ffpCRV6asj_l
zW8PdC4P(PVp86XGP<IEY&H_R!+BT3~7&Bik-xXS&Rka6N=mGvWN`SA8R-{d9w0>dE
z{KD6|e67pBsS7l(!q*1)t$AV1YDI0nM(-@h=q_#1l?yVfWM)yGRf;PA8Z4+>dFE?v
z)2g=aUQvO2uP+SnUcIOn6}q-#z}+^AO5qQ{1G9$SeHY{#*B&Suqx<@XReJBjg1lTm
zFxnf_pZEbhpH1R~24NQHGz$7WPlGTR#K$ZPFuMX8$}qs>2u@E=wa8(cD36^&4)*dR
zRZ7G07;=_l?kGc<F`YhmhB+C$1l<yJ(*V<z!7*|>nBIGmXUB{S$lN>=%X1ms4xl_r
zcw#T6$`=gsIL~lyW3BKSXoqu4aW+oEi0!>0#aBdGf{m4vPbA}z($+9PQDrbBjzTza
zy4Fh>AF$xjqu^n}`f%MWIeVzw<uYoE@??@`V*2yv@Fln^L@*|}F2w)}B*@rF5Z$`A
z%mWb~vmnhgcGgk;wZL@kFqdI+dq0?G@)h)uqaD59exS@;C@&3<7>&75_BfP7Rf0Jd
ztn#1j@BZR<y%(Pz{!-Z^k!Q+I@{oeKWU1gLs<IN6DHq(&GOFAp9}E~*cAWKdWsa#T
z??UMIIB4Sy+-}Dr3sqS#a52V5xjKk&3C=>=0|Zfyc9?QrO&C8@Hs$%4TPW+wJb{2K
z7rX$@MIHs(P3~#cm?g@)3ZGDw>-Y)fU;yF8G6_n_ks`;ctN~C<nY6F`Xu>&s@4>vm
z^An*%1=qxdN}NTUrSLgSR9WAU!aCo;lf8bNg$b}<8T?aNc=Kp0&E!_dd3Y3yEfF&r
z4YxQO=VI#`Ilp__8K0>NCM{h$XdU#47eMa1zGa&>v57}kVXTw7>61D!|6smpmR3oN
zG)WbB&+=eT6-Hh2P183`;$7M{%t#%+)<J%iaKuzqI$~!hIj6T!7v1?5b^i*CH0FS{
zfUt<Q=E$1c(gEywF5OW{I@F{V;0`Is$eWk3pS=duEQrP*T=hqQk%-TLbf&Bopy}NF
z!kSq#d*;Y`0o@Fc`Ti9$<5?xZO2FQtv;eHor+dp4eFNN4*7*BFS~)Nz;3)NxN3LC+
zexM=ZfQ3970+3vp7@Wk=w6Z`B*|OmEuIa{d9~K-|+12LlU{mlrU^;IBx#b<i7#qd`
z46zCXyU=TWOSEwpkP&WS?cWl@TDc`OFbd;KcLvR@Am`+d<}34HnLGdqP9Mrl0+^Vc
zj}?(Rr<UY1P^0`EEQ2Ud$B#b;{@<v3)tG)Lw85R7B!|W=9>XL|U(ygKZ&$t*1SAMh
zmxBc@;7oG*zsGpr%HtvY8IbAv_Z^|@VCrrP^xO2uPXI!nzBmkSqHK`F+0k||lyWS#
zx3;iMJ2{|67<a&wt!N7b-~H(SdZwKvwr9D7m`yU;30@>D6ilN%y^b*S3@&76zkP>q
zq7m;Rd4S{{Bp)F80h0HSe27FBv;%mSoGlT{5|RU|<WTu>CO8IBm030c41j+|04!%?
zL_8J^^R~x#QOD144}d}fErC(~BV_&SzfjA$h@c$_MPmLGWe_)R#MoVW`ZusD9s;R4
zSlcfAyvvr=w3=p1BY}Ncwe0`+C7bZy!ZzjgdMVtq*K6PBzebe-5;R%4W9WjR(6*`)
zA~oc^YFz_TXUGz;WeMyWa+lURhf5Ptv349ZgFMcl-<+b#iC|oN5pXLMaRfo{#{*?Q
z+uwVx$^#~UwQLG-HS8+Ao{k0fy)*@J<?TUZKg9luh{X4ipe;9CqMf62-`h!ZI!V}L
ltU`hD^b8Ns5PyJ*?z&}L;H697a;+9|f7&G8eV^Eu{{=c$O0)m~

diff --git a/gpthistory/__pycache__/helpers.cpython-310.pyc b/gpthistory/__pycache__/helpers.cpython-310.pyc
deleted file mode 100644
index a6a1f8cf61d3e0fd3ab51b17dc96e55d5f40669e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 3082
zcmZ`*&2QYs73c74xzw&CuNBFP-Iht}RNGdPkpfL?I8IedX_W@D0S869txM3H(TXcB
zxgJuAwV;X?a*+1mLoWr|qJVTX&_5u5%b>SjqH}J(xW6|mtv=uk!8c!T-pqUN_ufq2
zZ2AVu?}Gn|PaB5uKkA$>4mv->UwjF|4bD=dGpbpK>Au;qbZvJW)Mo1TYn>W1L`|@s
zcUb4v5##oA*6}gxq!ax{r@=-Nnw_R-92nqby~)EVO{cjF{u=j=tj-nD?6vS;<#p~q
zH^J9DoDpvvvCb@?;EiWSXO1`dB<gE?ich1S=T~?O^_zT_&+s>%vCehQ9vbbrW1QaD
zXfx&BABb#aT}|G9u(q+X9<DrC4}Z1xTg=p=K@x6@Jv1lMJmMkGOOfrqWPcn^EX7+<
zS%^r+Tf-)`lZ=a}OL4I~Y~NgninvVrqFn@w!OewK?20st#{Ir1il~d(wy7LFsoc$6
z_M<XpbgGkLqjvG%!(Xg|bd7yh8HX_W5!*K_QwEiZo{64SS?F2lY2$9w96Mxs#OWGG
z;9*8Zio3%f`T@$?(^5up8I<B_84My>7Qv>>`vK_^@TiQIG@GiOoL1H1JcIhBYMeB}
za&I7%O#+moB|smmD@x^JNMyWiE1M)KpLjyeBI{5@62e{>vW4p9?d5(}E=N*E+exup
zB%+MBmZccv#d3F0ZY4#T%f0i>Es+j{ES3g)ss*{EMR>Nsn~+&dg4njtZ055TKAXuI
z48<nMX|d2CEXVkZC6GTGB}`kw#0RWm2iB_bXaZ(7Dww)rbH?Z9yzy&b^GThXFr+nn
zS6k@eAWh03$^uehFG#X74<5snTcQ}5P?A$O$k#H;x<Xm0$l8{2G{5pSZ&)Nlp;)3#
zG9p*8O0^1-9+J@TMCkYEydH@8rx|;O*{>w2d;vM6CZR_o=i6wMMvwKNN|$lQ&F3a?
z#CjID=8c{WRqWvhT9sdjOvtDdq>bo57Mv$pHvlw)D0s3X<X*5-(8wra0z}G4!<VyU
z|Hbi=#fV3Xw{E@n0y-zzU<df<=Uk-P5^YDhahed4{PBtcV^UyiIgR70`gnrOrngp}
z6vZIV3US#+*SaUi4mD#qUb_rY1=*oxH<-MM#sw=;Y>ngV@90o`l?DNTvjg*h0RV6R
z-rP6$t<pTSIAFl7Ipbf(zjZW~cF*a#mBnrDz@j#HdF`1|*}d8-0JQH^ta9Fm#3%LA
z1DqK5wr#npCD90Yj*!4~-yd;MpJG&xO^hS9F<iOmow0%lS@;IwxQlo|3W9aM!9&L!
z`VHSYaWMIC?bN>`AsSq_gJZ&~yiVi>$OU_yqUs?g3JRyLQ5lb1!V)*;jlY>sC@h$K
zn^se3!*}M=EoI64v&%m7@1zLqIPVYcehAL5N%o8}T!1Tc4Z$-n7E5>1JdV=hE*htc
zG?Iqb?_E%R;ojCxwq3M?4}zO~ICV}#J@BoKJjldnm+ZwwDdJM_b{(k8cKUjE9a73o
z^KMs2WhWWz{2fv&Ao3Ow!l|4EX}hX<!386xc}yusn^k_7+Rd^*IMeE~L1td3`TJN`
zTm!M~SvJF_@%gNwYuoZ)A*;QI$>Xn(%3}QEf@@pHKaCat0VxgK1wi$@acCYHbYGMf
zVRGNbj9pokePjZ$4mY=*eYbKdciz~qRYngjZfzp*(Yn7vh90rUBP7|aYPg^seD3GS
zDBP(`N|pQG7sjT(xSx5Ir(>f21;a?4d*czms#P9%C#rhoR}-5S5abVkuJL{^O5>fB
z(ovZYf-)&nF~VvTbdz0?T}Vl1F>#V~1E}5o=Rc?%e<;Z!xWRC@g$ds{eueT{Y*W(x
z`aHJ(f`~lD_L<LKm~sJ}lAMg!!IjM<Md}hfJlUZbv63|TeUL}g_yNdp0Z1Nc6+)|U
zq}A&X{*A|zKPF)mUn4>w!5OB;XBf%6bb>z+Kb@dmQ)V`hb7(5J%qgODYE%|pH@KVf
z81Ek0Epl158_MLH@*QGz=(L%d8rwM>U0+j|a<1|rCM@C{*;?8CsMuCl$0s{CHAx%5
zV0bnqF<8A*%c+Lm>BKT>del2xsr71`@+YLwPl=F~)y(Vcx{etgKP|^&4b!tc{TjGo
zwX7ENm~YCD!GV<ZBdTsFyI16yGIQKUgDB%sq09kS4zj{vPu`|&+#HX^D3O$}R1J^8
zMCO^K=Z4M?%Edu3D(PXNYLq{aYn45S$}LFGlMHv>Q<A-ddrbN38>?%-2_LL{`b#zO
zI4Y7D32igMg7uA$?yEZ9QlE@ckQavZfDA*qNP7`Nb&k@m(^){Pat;<6AlfwE9bCRU
msklqIy?7VIGd!pdldzWAK<!%B9NTi4`^vXqq^5^5+4?^sS|WV_

diff --git a/gpthistory/gpthistory.py b/gpthistory/gpthistory.py
index c9570d6..20fd4d4 100644
--- a/gpthistory/gpthistory.py
+++ b/gpthistory/gpthistory.py
@@ -2,7 +2,7 @@
 import json
 import os
 import pandas as pd
-import logging
+from rich import print
 from gpthistory.helpers import (
     extract_text_parts,
     generate_embeddings,
@@ -14,12 +14,6 @@
 # Define the path to the index file in the user's home directory
 INDEX_PATH = os.path.join(os.path.expanduser("~"), ".gpthistory", "chatindex.csv")
 
-# Configure the logger
-logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
-)
-logger = logging.getLogger(__name__)
-
 
 @main.command()
 def build_index(file: typer.FileText):
@@ -43,8 +37,8 @@ def build_index(file: typer.FileText):
                 chat_ids.append(entry["id"])
                 section_ids.append(k)
                 texts.append(text_data[0])
-    logger.info(f"Index built and stored at: {INDEX_PATH}")
-    logger.info(f"Conversations indexed: {len(chat_ids)}")
+    print(f"[cyan]Index built and stored at:[/cyan] {INDEX_PATH}")
+    print(f"[cyan]Conversations indexed:[/cyan] {len(chat_ids)}")
     df = pd.DataFrame({"chat_id": chat_ids, "section_id": section_ids, "text": texts})
     df = df[~df.text.isna()]
     df["id"] = df["chat_id"]
@@ -69,22 +63,29 @@ def build_index(file: typer.FileText):
         rows_only_in_df = df
 
     if incremental and len(rows_only_in_df) > 0:
-        logger.info("Only generating embeddings for new conversations to save money.")
+        print(
+            "[yellow]Only generating embeddings for new conversations to save money.[/yellow]"
+        )
+
+    import pickle
+
+    with open("convos.pkl", "wb") as f:
+        pickle.dump(rows_only_in_df, f)
 
     # Generate and add embeddings to the index
     embeddings = generate_embeddings(rows_only_in_df.text.tolist())
     rows_only_in_df["embeddings"] = embeddings
     final_df = pd.concat([rows_only_in_df, current_df])
-    logger.info(f"Total conversations: {len(final_df)}")
+    print(f"[cyan]Total conversations:[/cyan] {len(final_df)}")
     final_df.to_csv(INDEX_PATH, sep="|", index=False)
 
 
 @main.command()
-def search(keyword: str):
+def search(keyword: str, topk: int = 5, thr: float | None = None):
     """
-    Search a keyword within the index
+    Search a keyword within the index with an optional threshold argument.
     """
-    logger.info("Searching for keyword: %s", keyword)
+    print(f"[cyan]Searching for:[/cyan] '{keyword}'")
     if os.path.exists(INDEX_PATH):
         df = pd.read_csv(INDEX_PATH, sep="|")
         df["embeddings"] = df.embeddings.apply(
@@ -92,17 +93,30 @@ def search(keyword: str):
         )
         filtered = df[df.text.str.contains(keyword)]
 
-        # Calculate top titles and their corresponding chat IDs
-        chat_ids, top_titles, top_scores = calculate_top_titles(df, keyword)
+        if filtered.shape[0] == 0:
+            print(
+                "[yellow]No exact matches found. Performing solely embedding search.[/yellow]"
+            )
+            filtered = df.copy()
+
+        # Calculate top titles and their corresponding chat IDs based on the threshold
+        chat_ids, top_titles, top_scores = calculate_top_titles(
+            filtered, keyword, thr, topk
+        )
 
         for i, t in enumerate(top_titles):
-            logger.info("%s: %s", chat_ids[i], t)
-            logger.info(
-                "ChatGPT Conversation link: https://chat.openai.com/c/%s", chat_ids[i]
+            print(
+                f"""\
+--------------------------------------------------------------------------------
+[cyan bold]url:[/cyan bold] [green]https://chat.openai.com/c/{chat_ids[i]}[/green]
+[cyan bold]score:[/cyan bold] {top_scores[i]:.2f}
+
+{t}
+-------------------------------------------------------------------------------\
+"""
             )
-            logger.info("--------------------------------------")
     else:
-        typer.echo("Index not found. Please build the index first.")
+        print("Index not found. Please build the index first.")
         return
 
 
diff --git a/gpthistory/helpers.py b/gpthistory/helpers.py
index 003203a..7218aee 100644
--- a/gpthistory/helpers.py
+++ b/gpthistory/helpers.py
@@ -1,26 +1,38 @@
-import json
 import os
-import pandas as pd
+import tiktoken
 import numpy as np
 from openai import OpenAI
-
-client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
 from dotenv import load_dotenv
-import logging
 
 # Load environment variables
 load_dotenv()
 
 # Set up OpenAI API key
+client = OpenAI(
+    api_key=os.environ.get("OPENAI_API_KEY"),
+    base_url="http://oai.hconeai.com/v1",
+    default_headers={
+        "Helicone-Auth": f"Bearer {os.environ.get('HELICONE_API_KEY')}",
+        "Helicone-Property-project": "gpthistory",
+    },
+)
+
+# Load model
+tokenizer = tiktoken.get_encoding("cl100k_base")
+EMBEDDING_MODEL = "text-embedding-3-small"
 
 # Define the path to the index file in the user's home directory
 INDEX_PATH = os.path.join(os.path.expanduser("~"), ".chatsearch", "chatindex.csv")
 
-# Set up logging
-logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
-)
-logger = logging.getLogger(__name__)
+
+def count_tokens(text):
+    return len(tokenizer.encode(text))
+
+
+def get_first_n_tokens(text: str, n: int) -> str:
+    tokens = tokenizer.encode(text)
+    first_n_tokens = tokens[:n]
+    return tokenizer.decode(first_n_tokens)
 
 
 def extract_text_parts(data):
@@ -48,7 +60,7 @@ def generate_query_embedding(query):
     """
     Generate an embedding for a query using OpenAI API.
     """
-    response = client.embeddings.create(input=[query], model="text-embedding-ada-002")
+    response = client.embeddings.create(input=[query], model=EMBEDDING_MODEL)
     return response.data[0].embedding
 
 
@@ -58,22 +70,26 @@ def generate_embeddings(conversations):
     """
     embeddings = []
     for i, batch in enumerate(split_into_batches(conversations, 100)):
-        logger.info(f"Generating Embeddings for batch: {i + 1}")
-        response = client.embeddings.create(input=batch, model="text-embedding-ada-002")
-        tmp_embedding = [row["embedding"] for row in response.data]
+        # Suppressing logging of individual batch processing for OpenAI requests
+        for i, text in enumerate(batch):
+            if count_tokens(text) > 8000:
+                batch[i] = get_first_n_tokens(text, 8000)
+        response = client.embeddings.create(input=batch, model=EMBEDDING_MODEL)
+        tmp_embedding = [r.embedding for r in response.data]
         embeddings += tmp_embedding
     if len(embeddings) > 0:
-        logger.info("Conversations (Chunks) = %d", len(conversations))
-        logger.info("Embeddings = %d", len(embeddings))
+        print(f"[cyan]Conversations (Chunks):[/cyan] {len(conversations)}")
+        print(f"[cyan]Embeddings:[/cyan] {len(embeddings)}")
     else:
-        logger.info("No new conversations detected")
+        print("[yellow]No new conversations detected[/yellow]")
     return embeddings
 
 
-def calculate_top_titles(df, query, top_n=1000):
+def calculate_top_titles(df, query, thr=0.8, top_n=1000):
     """
     Calculate top titles for a given query using embeddings.
     """
+
     # Extract the embeddings from the DataFrame
     embedding_array = np.array(df["embeddings"].tolist())
     query_embedding = generate_query_embedding(query)
@@ -81,7 +97,11 @@ def calculate_top_titles(df, query, top_n=1000):
     dot_scores = np.dot(embedding_array, query_embedding)
 
     # Filter out titles with dot scores below the threshold
-    mask = dot_scores >= 0.8
+    if thr is not None:
+        mask = dot_scores >= thr
+    else:
+        mask = np.ones_like(dot_scores, dtype=bool)
+
     filtered_dot_scores = dot_scores[mask]
     filtered_titles = df.loc[mask, "text"].tolist()
     filtered_chat_ids = df.loc[mask, "chat_id"].tolist()
diff --git a/setup.py b/setup.py
index 2d91494..23a6e1d 100644
--- a/setup.py
+++ b/setup.py
@@ -8,7 +8,14 @@
     author_email="shrikar84@gmail.com",
     packages=find_packages(),
     include_package_data=True,
-    install_requires=["typer", "python-dotenv", "openai", "pandas", "numpy"],
+    install_requires=[
+        "typer",
+        "python-dotenv",
+        "openai",
+        "pandas",
+        "numpy",
+        "rich",
+    ],
     entry_points="""
         [console_scripts]
         gpthistory=gpthistory.gpthistory:main