Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions vis_services/lib/author_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def augment_graph_data(author_graph, data):
return {"fullGraph" : author_graph}

# make two dicts: a dict with all info for a certain paper (bib_dict)
# and a dict for each author that lists all bibcodes they helped write
# and a dict for each author that lists all identifiers they helped write
name_dict = defaultdict(list)
bib_dict= {}

Expand All @@ -90,12 +90,12 @@ def augment_graph_data(author_graph, data):
read_count = d.get("read_count")
if not read_count:
read_count = 0
bib_dict[d.get("bibcode")] = {"title" : d.get("title", [""])[0],
bib_dict[d.get("scix_id")] = {"title" : d.get("title", [""])[0],
"citation_count": citation_count,
"read_count": read_count,
"authors" : d.get("author_norm", [])}
for author in d.get("author_norm", []):
name_dict[author].append(d.get("bibcode"))
name_dict[author].append(d.get("scix_id"))
#don't allow any more author nodes than max_num_auth_nodes
if len(author_graph['nodes'])>max_num_auth_nodes:
node_cutoff = sorted([d["nodeWeight"] for d in author_graph['nodes']], reverse=True)[:max_num_auth_nodes+1][-1]
Expand Down Expand Up @@ -147,8 +147,8 @@ def augment_graph_data(author_graph, data):
name = G.nodes[child].get("nodeName")
size = G.nodes[child].get("nodeWeight")
bibs = sorted(name_dict[name], key=lambda x:bib_dict[x]["citation_count"], reverse=True)
total_citations = sum([bib_dict[bibcode]["citation_count"] for bibcode in name_dict[name]])
total_reads = sum([bib_dict[bibcode]["read_count"] for bibcode in name_dict[name]])
total_citations = sum([bib_dict[scix_id]["citation_count"] for scix_id in name_dict[name]])
total_reads = sum([bib_dict[scix_id]["read_count"] for scix_id in name_dict[name]])
children.append({"name":name, "size": size, "papers":bibs, "citation_count" : total_citations, "read_count" : total_reads, "numberName" : child})
else:
G.remove_node(child)
Expand Down
18 changes: 9 additions & 9 deletions vis_services/lib/paper_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@
# Helper functions
def _get_reference_mapping(data):
'''
Construct the reference dictionary for a set of bibcodes
Construct the reference dictionary for a set of SciX IDs
'''
refdict = {}
for doc in data:
if 'reference' in doc:
refdict[doc['bibcode']] = set(doc['reference'])
refdict[doc['scix_id']] = set(doc['reference'])
return refdict

def _get_paper_data(data):
Expand All @@ -41,7 +41,7 @@ def _get_paper_data(data):
'''
infodict = {}
for doc in data:
infodict[doc['bibcode']] = doc
infodict[doc['scix_id']] = doc
return infodict

def _sort_and_cut_results(resdict,cutoff=1500):
Expand Down Expand Up @@ -189,7 +189,7 @@ def augment_graph_data(data, max_groups):
# Main machinery
def get_papernetwork(solr_data, max_groups, weighted=True, equalization=False, do_cutoff=False):
'''
Given a list of bibcodes, this function builds the papers network based on co-citations
Given a list of identifiers, this function builds the papers network based on co-citations
If 'weighted' is true, we will normalize the co-occurence frequency with the total number
of papers in the set, otherwise we will work with the actual co-occurence frequencies.
If 'equalization' is true, histogram equalization will be applied to the force values in
Expand All @@ -205,7 +205,7 @@ def get_papernetwork(solr_data, max_groups, weighted=True, equalization=False, d
of the number of references in the linked nodes.
'''
# Get get paper list from the Solr data
papers_list = [a['bibcode'] for a in solr_data]
papers_list = [a['scix_id'] for a in solr_data]
# First construct the reference dictionary, and a unique list of cited papers
reference_dictionary = _get_reference_mapping(solr_data)
# From now on we'll only work with publications that actually have references
Expand Down Expand Up @@ -287,11 +287,11 @@ def get_papernetwork(solr_data, max_groups, weighted=True, equalization=False, d
#because the nodes must be inserted at the proper index
nodes = [None]*len(ref_papers)
for paper in solr_data:
if paper['bibcode'] not in selected_papers:
if paper['scix_id'] not in selected_papers:
continue
index = ref_papers[paper["bibcode"]]
nodes[index] ={'nodeName':paper['bibcode'],
'nodeWeight':paper.get('citation_count',1),
index = ref_papers[paper["scix_id"]]
nodes[index] ={'nodeName':paper['scix_id'],
'nodeWeight':paper.get('citation_count',1),
'citation_count':paper.get('citation_count',0),
'read_count':paper.get('read_count',0),
'title':paper.get('title','NA')[0],
Expand Down
3,222 changes: 3,221 additions & 1 deletion vis_services/tests/stubdata/test_input/author_network_second_parameter.json

Large diffs are not rendered by default.

Loading
Loading