Skip to content

Commit d6d1c43

Browse files
committed
Minor changes after some labelling process
1 parent 4b36620 commit d6d1c43

14 files changed

Lines changed: 123 additions & 67 deletions

File tree

.DS_Store

2 KB
Binary file not shown.

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,6 @@ ENV/
9999

100100
# mypy
101101
.mypy_cache/
102+
103+
# Added by JCS
104+
*.DS_store

labelfactory/.DS_Store

0 Bytes
Binary file not shown.
-6 KB
Binary file not shown.

labelfactory/dataanalyzer/ROCanalyzer.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ def compute_tpfn(p, NFP, NFN):
8585
return th, pos
8686

8787

88-
def plotROCs(p, y, w, rs0al1, relabels, category=None):
88+
def plotROCs(p, y, w, rs0al1, relabels, category=None, fpath=None):
8989

9090
""" Plot three possible decision thresholds for a classifier with
9191
predictions p for samples with labels y, depending on the use of
@@ -146,9 +146,14 @@ def plotROCs(p, y, w, rs0al1, relabels, category=None):
146146
plt.plot(FPR[pos], TPR[pos], 'r.', markersize=10)
147147
plt.plot(FPRrs[pos_rs], TPRrs[pos_rs], 'g.', markersize=10)
148148
plt.plot(FPRw[pos_w], TPRw[pos_w], 'b.', markersize=10)
149+
plt.axis('square')
150+
plt.xlim((0, 1))
151+
plt.ylim((0, 1))
149152
plt.show(block=False)
153+
if fpath is not None:
154+
plt.savefig(fpath)
150155

151-
# ROC based on sklearn (the results are assentially the same)
156+
# ROC based on sklearn (the results are essentially the same)
152157
# from sklearn.metrics import roc_curve
153158
# FPR2, TPR2, tt = roc_curve(y_sorted, p_sorted)
154159
# FPRrs2, TPRrs2, tt = roc_curve(y_sorted2, p_sorted2)

labelfactory/labelfactory.py

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
else:
3232
import Tkinter as tk
3333

34+
import ipdb
35+
3436
# The following is to capture a user tag automatically.
3537
try:
3638
import pwd
@@ -75,14 +77,6 @@ def run_labeler(project_path, url, transfer_mode, user, export_labels,
7577
labeling windows
7678
"""
7779

78-
# To complete the migration to python 3, I should replace all "raw_input"
79-
# by "input". Transitorily, to preserve compatibility with python 2, I
80-
# simply rename inut to raw_input
81-
if sys.version_info.major == 3:
82-
raw_input2 = input
83-
else:
84-
raw_input2 = raw_input
85-
8680
#######
8781
# Start
8882
#######
@@ -93,22 +87,17 @@ def run_labeler(project_path, url, transfer_mode, user, export_labels,
9387
tm_options))
9488

9589
# # Check if project folder exists. Otherwise create a default one.
96-
# if len(sys.argv) > 1:
97-
# project_path = sys.argv[1]
98-
# else:
99-
# project_path = raw_input2("Select the (absolute or relative) path " +
100-
# "to the labeling project folder: ")
10190
if project_path is None:
102-
project_path = raw_input2("Select the (absolute or relative) path " +
103-
"to the labeling project folder: ")
91+
project_path = input("Select the (absolute or relative) path to " +
92+
"the labeling project folder: ")
10493

10594
if not project_path.endswith('/'):
10695
project_path = project_path + '/'
10796

10897
# Check if project folder exists. This is necessary to follow
10998
if not os.path.isdir(project_path):
110-
createfolder = raw_input2("Folder " + project_path +
111-
"does not exist. Create? (y/n)")
99+
createfolder = input(f"Folder {project_path} does not exist. " +
100+
"Create? (y/n)")
112101

113102
if createfolder == "y":
114103
os.makedirs(project_path) # Create folder
@@ -154,8 +143,7 @@ def run_labeler(project_path, url, transfer_mode, user, export_labels,
154143
'DataPaths', 'db_label_info_tablename'),
155144
'history_tablename': cf.get(
156145
'DataPaths', 'db_history_tablename'),
157-
'ref_name': cf.get(
158-
'DataPaths', 'db_ref_name'),
146+
'ref_name': cf.get('DataPaths', 'db_ref_name'),
159147
'mode': cf.get('DataPaths', 'db_mode')}
160148
elif source_type == 'mongodb' or dest_type == 'mongodb':
161149
db_info = {'name': cf.get('DataPaths', 'db_name'),
@@ -287,14 +275,13 @@ def run_labeler(project_path, url, transfer_mode, user, export_labels,
287275
user0 = getpass.getuser() # For Windows
288276

289277
# Ask for a labeler name.
290-
userId = raw_input2(
291-
"Please write you user name [{0}]: ".format(user0))
278+
userId = input(f"Please write you user name [{user0}]: ")
292279
if userId == "":
293280
userId = user0
294-
print("Your User Name is {0}".format(userId))
281+
print(f"Your User Name is {userId}")
295282
else:
296283
userId = user
297-
print("Your User Name is {0}".format(userId))
284+
print(f"Your User Name is {userId}")
298285
else:
299286
userId = None
300287
if user is not None:
@@ -309,7 +296,7 @@ def run_labeler(project_path, url, transfer_mode, user, export_labels,
309296
# Verify that parentclass names are in the category set
310297
for c in parentcat:
311298
if c not in categories:
312-
log.error("Unknown category {} ".format(c) + "in the parentcat " +
299+
log.error(f"Unknown category {c} in the parentcat " +
313300
"dictionary. Revise the configuration file")
314301
sys.exit()
315302
elif parentcat[c] not in categories:

labelfactory/labeling/.DS_Store

0 Bytes
Binary file not shown.

labelfactory/labeling/baseDM.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,7 @@ def importPredicts(self, category=None):
565565
# Load predictions from file, if it exists
566566
if os.path.isfile(preds_file):
567567
print("---- Importing predictions from category " + category)
568-
with open(preds_file, 'r') as f:
568+
with open(preds_file, 'rb') as f:
569569
preds = pickle.load(f)
570570

571571
# Move the preditions file to the "used" folder.
@@ -653,6 +653,7 @@ def computeWID(self, url, mode='yes'):
653653
# This replacement does not affect the url if it is a domain site.
654654
# But it may transform the url of specific web pages.
655655
wid = wid.replace("/", "__")
656+
656657
elif mode == 'www':
657658

658659
if url[0:4] == 'www.':

labelfactory/labeling/datamanager.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,18 @@
2222
# Services from the project
2323
# sys.path.append(os.getcwd())
2424

25+
# #############################################################################
26+
# #############################################################################
27+
# #############################################################################
28+
#
29+
# WARNING:
30+
# THIS CLASS IS DEPRECATED. IT HAS BEEN REPLACED BY baseDM.py AND
31+
# THEIR INHERITED CLASSES. HOWEVER, SOME OLD SCRIPT ARE STILL USING THIS
32+
# CLASS. SO IT IS PRESERVED.
33+
#
34+
# #############################################################################
35+
# #############################################################################
36+
# #############################################################################
2537

2638
class DataManager(object):
2739

@@ -315,6 +327,8 @@ def loadData(self):
315327
# Load prediction dataframes stored in pickle files
316328
df_preds = pd.read_pickle(self.datapreds_file)
317329

330+
ipdb.set_trace()
331+
318332
return df_labels, df_preds, labelhistory
319333

320334
def get_df(self, data, labelhistory):
@@ -790,6 +804,7 @@ def computeWID(self, url, mode='yes'):
790804
# This replacement does not affect the url if it is a domain site.
791805
# But it may transform the url of specific web pages.
792806
wid = wid.replace("/", "__")
807+
793808
elif mode == 'www':
794809

795810
if url[0:4] == 'www.':

labelfactory/labeling/dmFiles.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,22 @@ def loadData(self):
142142
df_preds = pd.DataFrame(index=[], columns=cols)
143143
print(df_preds)
144144

145+
# df_preds = df_preds[pd.notnull(df_preds['b2c_on'])]
146+
147+
# df_labels.rename(
148+
# index={x: self.computeWID(x) for x in df_labels.index},
149+
# inplace=True)
150+
# df_preds.rename(
151+
# index={x: self.computeWID(x) for x in df_preds.index},
152+
# inplace=True)
153+
# labelhistory = {self.computeWID(x): labelhistory[x]
154+
# for x in labelhistory}
155+
156+
# df_labels.replace({'O++t++h++e++r': 'Other'}, inplace=True)
157+
# df_labels.replace({'b++2++c++_++o++n': 'b2c_on'}, inplace=True)
158+
# df_labels.replace({'b++2++c++_++r++e++a++d++y': 'b2c_ready'},
159+
# inplace=True)
160+
145161
return df_labels, df_preds, labelhistory
146162

147163
def saveData(self, df_labels, df_preds, labelhistory, save_preds=True):

0 commit comments

Comments
 (0)