Skip to content

Commit 3684d96

Browse files
author
Jesús Cid Sueiro
committed
pending commit oll changes
1 parent 45fdc0c commit 3684d96

7 files changed

Lines changed: 43 additions & 40 deletions

File tree

.DS_Store

0 Bytes
Binary file not shown.

labelfactory/.DS_Store

0 Bytes
Binary file not shown.

labelfactory/labelfactory.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,6 @@ def run_labeler(project_path, url, transfer_mode, user, export_labels,
348348

349349
# Load data from the standard datasets.
350350
df_labels, df_preds, labelhistory = data_mgr.loadData()
351-
breakpoint()
352351

353352
# Load new labels and predictions from the input folder
354353
log.info("-- Loading new data from the input folder")

labelfactory/labeling/LabelGUIController.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@
1414
else:
1515
import Tkinter as tk
1616

17-
import ipdb
18-
1917

2018
class LabelGUIController(object):
2119

@@ -30,9 +28,9 @@ class LabelGUIController(object):
3028
def __init__(self, newurls, newwids, newqueries, preds, labels, urls,
3129
categories, alphabet, datatype='url', cat_model='single',
3230
parent_cat={}, text2label=None):
33-
34-
""" This method initialize the sampler object. As part of this process
35-
it creates the AL objects required for the sample generation.
31+
"""
32+
This method initialize the sampler object. As part of this process it
33+
creates the AL objects required for the sample generation.
3634
3735
:Attributes:
3836
:newurls: A list of urls to label
@@ -71,15 +69,14 @@ def __init__(self, newurls, newwids, newqueries, preds, labels, urls,
7169
self.text2label = text2label
7270

7371
def takeandshow_sample(self):
72+
"""
73+
Gets next sample id from the list and visualize the sample.
74+
The type of visualization depends of the type of sample:
75+
if sample id is a url, a browser is opened.
76+
if sample id is not a url, data is printed.
7477
75-
""" Gets next sample id from the list and visualize the sample.
76-
The type of visualization depends of the type of sample:
77-
if sample id is a url, a browser is opened.
78-
if sample id is not a url, data is printed.
79-
80-
Note that the sample identifiers are stored in variable self.url
81-
for historical reasons. This variable does not necessarily stores
82-
urls.
78+
Note that the sample identifiers are stored in variable self.url for
79+
historical reasons. This variable does not necessarily stores urls.
8380
"""
8481

8582
self.url = None

labelfactory/labeling/baseDM.py

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,6 @@ class BaseDM(object):
6565
def __init__(self, source_type, dest_type, file_info, db_info,
6666
categories, parentcat, ref_class, alphabet,
6767
compute_wid='yes', unknown_pred=0):
68-
6968
"""
7069
Stores files, folder and path names into the data struture of the
7170
DataManager object.
@@ -290,17 +289,17 @@ def df2data(self, df_labels, df_preds):
290289
return data
291290

292291
def importData(self):
292+
"""
293+
Read data from the input folder.
294+
Only labels with positive or negative label are loaded
293295
294-
""" Read data from the input folder.
295-
Only labels with positive or negative label are loaded
296-
297-
:Args:
298-
:-: None. File locations and the set of categories are taken
299-
from the class attributes
296+
:Args:
297+
:-: None. File locations and the set of categories are taken
298+
from the class attributes
300299
301-
:Returns:
302-
:df_labels: Pandas dataframe of labels
303-
:df_preds: Pandas dataframe of predictions
300+
:Returns:
301+
:df_labels: Pandas dataframe of labels
302+
:df_preds: Pandas dataframe of predictions
304303
"""
305304

306305
# Warning.
@@ -373,8 +372,11 @@ def importData(self):
373372
'category {2} \r').format(nk, ntot, cat),
374373
end="")
375374

376-
urls_dict[wid] = preds[cat][wid]['url']
377-
pred_dict[wid] = preds[cat][wid]['pred']
375+
if preds[cat][wid] is not None:
376+
urls_dict[wid] = preds[cat][wid]['url']
377+
pred_dict[wid] = preds[cat][wid]['pred']
378+
else:
379+
print(f"{wid}")
378380

379381
df2_preds['url'].update(pd.Series(urls_dict))
380382
df2_preds[cat].update(pd.Series(pred_dict))
@@ -434,7 +436,7 @@ def importData(self):
434436
# Make sure taht any changes here are also done there
435437
# (I know, this is not a good programming style..)
436438
info = ['marker', 'relabel', 'weight', 'userId', 'date']
437-
arrays = [len(info)*['info'] + len(self.categories)*['label'],
439+
arrays = [len(info) * ['info'] + len(self.categories) * ['label'],
438440
info + self.categories]
439441
tuples = list(zip(*arrays))
440442
mindex = pd.MultiIndex.from_tuples(tuples)
@@ -508,21 +510,20 @@ def importLabels(self, category):
508510
return labels
509511

510512
def importPredicts(self, category=None):
513+
"""
514+
Get dictionary of predictions relative to a given category
511515
512-
""" Get dictionary of predictions relative to a given category
513-
514-
:Args:
515-
:category: The category to load (from a pkl file)
516-
If None, all categories are read from a unique
517-
csv file
516+
:Args:
517+
:category: The category to load (from a pkl file)
518+
If None, all categories are read from a unique csv file
518519
519-
:Returns:
520-
:preds: Dictionary of predictions
521-
- If category is not None, preds[wid] has the
522-
prediction for url wid about the given category.
523-
- If category is not None, preds is a dataframe
524-
with the wid as uid column and one column with
525-
predictions for each category.
520+
:Returns:
521+
:preds: Dictionary of predictions
522+
- If category is not None, preds[wid] has the prediction
523+
for url wid about the given category.
524+
- If category is not None, preds is a dataframe with the
525+
wid as uid column and one column with predictions for
526+
each category.
526527
"""
527528

528529
# The default category is the reference class used by the
-6 KB
Binary file not shown.

labelfactory/labeling/urlsampler.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,9 @@ def get_urls_batch(self, max_urls=10,):
192192
print([newwids[k] for k in range(n) if relabels[k] == 1 and
193193
markers[k] == 1])
194194

195+
print(f"Predictions for the reference class, {self.ref_class}:")
196+
print({w: self.preds[self.ref_class][w] for w in newwids})
197+
195198
return newurls, newwids, newqueries
196199

197200
def get_single_url(self, target_wid):
@@ -212,6 +215,7 @@ def get_single_url(self, target_wid):
212215
# Make sure that the target url has been previously labeled.
213216
# The code is not ready to label urls without a previous label,
214217
# because that would requiere to change the active learning weights
218+
215219
if target_wid not in self.markers:
216220
sys.exit("The target url has no label. Labeling of a specific " +
217221
"url should be done to revise existing labels only")
@@ -225,6 +229,8 @@ def get_single_url(self, target_wid):
225229

226230
# Print the four types of webs to label
227231
print("Relabeling {0} ".format(newurls[0]))
232+
print(f"Predictions for the reference class, {self.ref_class}:")
233+
print({w: self.preds[self.ref_class][w] for w in newqueries})
228234

229235
return newurls, newwids, newqueries
230236

0 commit comments

Comments
 (0)