Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions ADSCitationCapture/doi.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,23 @@ def fetch_metadata(base_doi_url, base_datacite_url, doi):

return content if record_found else None

def extract_orcids_from_affs(affiliations):
orcids = []
stripped_affs = []
orcid_tag_regex = re.compile('(.*)<ID system="ORCID">[^\s]*([\d]{4}-[\d]{4}-[\d]{4}-[\d]{4})<\/ID>')
orcid_tag_regex_legacy = re.compile('(.*)<ORCID>[^\s]*([\d]{4}-[\d]{4}-[\d]{4}-[\d]{4})<\/ORCID>')

for aff in affiliations:
if orcid_tag_regex.match(aff):
orcids.append(orcid_tag_regex.match(aff).groups()[-1])
stripped_affs.append(orcid_tag_regex.match(aff).groups()[0])
elif orcid_tag_regex_legacy.match(aff):
orcids.append(orcid_tag_regex_legacy.match(aff).groups()[-1])
stripped_affs.append(orcid_tag_regex_legacy.match(aff).groups()[0])
else:
stripped_affs.append(aff)
orcids.append("-")
return orcids, stripped_affs

def build_bibcode(metadata, doi_re, bibstem):
"""
Expand Down
12 changes: 11 additions & 1 deletion ADSCitationCapture/forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from adsmsg import DenormalizedRecord, NonBibRecord, Status, CitationChangeContentType
from bs4 import BeautifulSoup
from adsputils import setup_logging
from ADSCitationCapture.doi import extract_orcids_from_affs


# ============================= INITIALIZATION ==================================== #
# - Use app logger:
Expand Down Expand Up @@ -125,7 +127,8 @@ def build_record(app, citation_change, parsed_metadata, citations, db_versions,
'read_count': len(readers),
'title': [title],
'publisher': source,
'version': version
'version': version,
'orcid_pub': []
}
if version is None: # Concept DOIs may not contain version
del record_dict['version']
Expand All @@ -147,6 +150,13 @@ def build_record(app, citation_change, parsed_metadata, citations, db_versions,
if is_release:
record_dict['property'].append('RELEASE')

if record_dict["aff"] != ["-" for i in range(0, len(record_dict["aff"]))]:
record_dict['orcid_pub'], record_dict['aff'] = extract_orcids_from_affs(record_dict['aff'])
if set(record_dict['orcid_pub'])=={'-'}:
del record_dict['orcid_pub']
else:
del record_dict['orcid_pub']

record = DenormalizedRecord(**record_dict)
nonbib_record = _build_nonbib_record(app, citation_change, record, db_versions, status, readers=readers)
return record, nonbib_record
Expand Down
9 changes: 6 additions & 3 deletions ADSCitationCapture/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ def task_maintenance_canonical(dois, bibcodes):
task_output_results.delay(custom_citation_change, parsed_metadata, existing_citation_bibcodes, db_versions=registered_record.get('associated_works', {"":""}), readers=readers)

@app.task(queue='maintenance_metadata')
def task_maintenance_metadata(dois, bibcodes, reset=False):
def task_maintenance_metadata(dois, bibcodes, reparse=False):
"""
Maintenance operation:
- Get all the registered citation targets (or only a subset of them if DOIs and/or bibcodes are specified)
Expand All @@ -545,8 +545,11 @@ def task_maintenance_metadata(dois, bibcodes, reset=False):

curated_metadata = registered_record.get('curated_metadata', {})

logger.debug("Curated metadata for {} is {}".format(registered_record['content'], registered_record['curated_metadata']))
raw_metadata = doi.fetch_metadata(app.conf['DOI_URL'], app.conf['DATACITE_URL'], registered_record['content'])
logger.debug("Curated metadata for {} is {}".format(registered_record['content'], registered_record['curated_metadata']))
if not reparse:
raw_metadata = doi.fetch_metadata(app.conf['DOI_URL'], app.conf['DATACITE_URL'], registered_record['content'])
else:
raw_metadata = db.get_citation_target_metadata(app, registered_record['content']).get("raw")
if raw_metadata:
parsed_metadata = doi.parse_metadata(raw_metadata)
is_software = parsed_metadata.get('doctype', '').lower() == "software"
Expand Down
1 change: 1 addition & 0 deletions ADSCitationCapture/tests/data/sample_bib_record_orcid.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"abstract": "Cubix is a ROOT based graphical interface providing a large number of tools for gamma-ray spectrocopy analysis like peak fits, calibrations, gamma-gamma analysis... It is linked with the TkN library to provide a direct access to nuclear databases. The source code is available on Gitlab: https://gitlab.in2p3.fr/ip2igamma/cubix/cubix The documentation website : https://cubix.in2p3.fr The Cubix project is governed by the CeCILL-B license under French law and abiding by the rules of distribution of free software. You can use, modify and/or redistribute the software under the terms of the CeCILL-B license as circulated by CEA, CNRS and INRIA at the following link www.cecill.info.", "aff": ["CNRS D\u00e9l\u00e9gation Rh\u00f4ne-Auvergne "], "author": ["Dudouet, J\u00e9r\u00e9mie"], "author_count": 1, "author_facet": ["Dudouet, J"], "author_facet_hier": ["0/Dudouet, J", "1/Dudouet, J/Dudouet, J\u00e9r\u00e9mie"], "author_norm": ["Dudouet, J"], "bibcode": "2023zndo..10683242D", "bibstem": ["zndo"], "bibstem_facet": "zndo", "citation": [""], "citation_count": "1", "database": ["general", "astronomy"], "date": "2023-01-01T00:30:00.000000Z", "doctype": "software", "doctype_facet_hier": ["0/Non-Article", "1/Non-Article/Software"], "doi": ["10.5281/zenodo.11020"], "eid": "10.5281/zenodo.11020", "email": ["-"], "first_author": "Dudouet, J\u00e9r\u00e9mie", "first_author_facet_hier": ["0/Dudouet, J", "1/Dudouet, J/Dudouet, J\u00e9r\u00e9mie"], "first_author_norm": "Dudouet, J", "identifier": ["2023zndo..10683242D", "10.5281/zenodo.11020"], "keyword": ["GUI", "gamma-ray spectroscopy", "ROOT", "fits"], "keyword_facet": ["GUI", "gamma-ray spectroscopy", "ROOT", "fits"], "keyword_norm": ["-", "-", "-", "-"], "keyword_schema": ["-", "-", "-", "-"], "links_data": ["{\"access\": \"\", \"instances\": \"\", \"title\": \"\", \"type\": \"electr\", \"url\": \"https://doi.org/10.5281/zenodo.11020\"}"], "orcid_pub": ["0000-0002-9018-6763"], "property": ["ESOURCE", "NONARTICLE", "NOT REFEREED", "PUB_OPENACCESS", "OPENACCESS", "RELEASE"], "pub": "Zenodo", "pub_raw": "Zenodo", "pubdate": "2023-01-01", "title": ["Cubix"], "year": "2023", "entry_date": "1970-01-01T00:00:00.000000Z", "data_count": 1, "esources": ["PUB_HTML"], "citation_count_norm": 1.0, "publisher": "Zenodo", "version": "1.0"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"bibcode": "2023zndo..10683242D", "boost": 0.5, "citation_count": 1, "esource": ["PUB_HTML"], "property": ["ESOURCE", "NONARTICLE", "NOT REFEREED", "PUB_OPENACCESS", "OPENACCESS", "RELEASE"], "data_links_rows": [{"link_type": "ESOURCE", "link_sub_type": "PUB_HTML", "url": ["https://doi.org/10.5281/zenodo.11020"], "title": [""]}], "status": "updated", "citation_count_norm": 1.0}
27 changes: 27 additions & 0 deletions ADSCitationCapture/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,3 +350,30 @@ def _init_mock_data(self):
'associated_works': "",
'version': "v2.0.0"
}]
self.mock_data["10.5281/zenodo.10683242"] = {
'raw': '<?xml version="1.0" encoding="UTF-8"?> <resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd"> <identifier identifierType="DOI">10.5281/ZENODO.10683242</identifier> <creators> <creator> <creatorName nameType="Personal">Dudouet, Jérémie</creatorName> <givenName>Jérémie</givenName> <familyName>Dudouet</familyName> <nameIdentifier nameIdentifierScheme="ORCID" schemeURI="">0000-0002-9018-6763</nameIdentifier> <affiliation>CNRS Délégation Rhône-Auvergne</affiliation> </creator> </creators> <titles> <title>Cubix</title> </titles> <publisher>Zenodo</publisher> <publicationYear>2024</publicationYear> <resourceType resourceTypeGeneral="Software"/> <subjects> <subject>GUI</subject> <subject>gamma-ray spectroscopy</subject> <subject>ROOT</subject> <subject>fits</subject> </subjects> <dates> <date dateType="Issued">2024-02-20</date> <date dateType="Created">2023</date> <date dateType="Available" dateInformation="First official release">2024</date> </dates> <language>en</language> <alternateIdentifiers> <alternateIdentifier alternateIdentifierType="oai">oai:zenodo.org:10683242</alternateIdentifier> </alternateIdentifiers> <relatedIdentifiers> <relatedIdentifier relatedIdentifierType="DOI" relationType="Requires" resourceTypeGeneral="Software">10.5281/zenodo.10255692</relatedIdentifier> <relatedIdentifier relatedIdentifierType="DOI" relationType="IsVersionOf">10.5281/zenodo.10683241</relatedIdentifier> </relatedIdentifiers> <sizes/> <formats/> <version>1.0</version> <rightsList> <rights rightsURI="http://www.cecill.info/licences/Licence_CeCILL-B_V1-en.html" rightsIdentifier="cecill-b" rightsIdentifierScheme="SPDX" schemeURI="https://spdx.org/licenses/">CeCILL-B Free Software License Agreement</rights> </rightsList> <descriptions> <description descriptionType="Abstract">Cubix is a ROOT based graphical interface providing a large number of tools for gamma-ray spectrocopy analysis like peak fits, calibrations, gamma-gamma analysis... It is linked with the TkN library to provide a direct access to nuclear databases. The source code is available on Gitlab: https://gitlab.in2p3.fr/ip2igamma/cubix/cubix The documentation website : https://cubix.in2p3.fr The Cubix project is governed by the CeCILL-B license under French law and abiding by the rules of distribution of free software. You can use, modify and/or redistribute the software under the terms of the CeCILL-B license as circulated by CEA, CNRS and INRIA at the following link www.cecill.info.</description> </descriptions> </resource>',
'parsed': {
"bibcode": "2023zndo..10683242D",
"authors": ["Dudouet, J\u00e9r\u00e9mie"],
"normalized_authors": ["Dudouet, J"],
"affiliations": ["CNRS D\u00e9l\u00e9gation Rh\u00f4ne-Auvergne <ID system=\"ORCID\">0000-0002-9018-6763</ID>"],
"title": "Cubix",
"pubdate": "2023",
"properties": {"DOI": "10.5281/ZENODO.10683242"},
"keywords": ["GUI", "gamma-ray spectroscopy", "ROOT", "fits"],
"abstract": "Cubix is a ROOT based graphical interface providing a large number of tools for gamma-ray spectrocopy analysis like peak fits, calibrations, gamma-gamma analysis... It is linked with the TkN library to provide a direct access to nuclear databases.\n\nThe source code is available on Gitlab: https://gitlab.in2p3.fr/ip2igamma/cubix/cubix\n\nThe documentation website : https://cubix.in2p3.fr\n\n\n\nThe Cubix project is governed by the CeCILL-B license under French law and abiding by the rules of distribution of free software. You can use, modify and/or redistribute the software under the terms of the CeCILL-B license as circulated by CEA, CNRS and INRIA at the following link www.cecill.info.",
"references": [],
"citations": [],
"doctype": "software",
"version": "1.0",
"versions": [],
"version_of": ["10.5281/zenodo.10683241"],
"forked_from": [],
"forks": [],
"described_by": [],
"description_of": [],
"source": "Zenodo"},
'status': 'REGISTERED',
'content': "10.5281/zenodo.248351",
'content_type': "DOI",
}
18 changes: 18 additions & 0 deletions ADSCitationCapture/tests/test_forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,24 @@ def test_build_bib_record_no_associated_works(self):
self.assertEqual(bib_record.toJSON(),expect_bib_record)
self.assertEqual(nonbib_record.toJSON(),expect_nonbib_record)

def test_build_bib_record_orcid(self):
content_filename = os.path.join(self.app.conf['PROJ_HOME'], "ADSCitationCapture/tests/data/sample_bib_record_orcid.json")
with open(content_filename) as f:
expect_bib_record = json.load(f)
content_filename = os.path.join(self.app.conf['PROJ_HOME'], "ADSCitationCapture/tests/data/sample_nonbib_record_orcid.json")
with open(content_filename) as f:
expect_nonbib_record = json.load(f)

citation_changes = self._common_citation_changes_doi(adsmsg.Status.updated)
citation_change = tasks._protobuf_to_adsmsg_citation_change(citation_changes.changes[0])
doi_id = "10.5281/zenodo.10683242" # software
parsed_metadata = self.mock_data[doi_id]['parsed']
citations =['']
db_versions = {"":""}
bib_record, nonbib_record = forward.build_record(self.app, citation_change, parsed_metadata, citations, db_versions)
self.assertEqual(bib_record.toJSON(),expect_bib_record)
self.assertEqual(nonbib_record.toJSON(),expect_nonbib_record)

def test_build_bib_record_associated_works(self):
content_filename = os.path.join(self.app.conf['PROJ_HOME'], "ADSCitationCapture/tests/data/sample_bib_record_associated.json")
with open(content_filename) as f:
Expand Down
12 changes: 9 additions & 3 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def maintenance_canonical(dois, bibcodes):
# Send to master updated citation bibcodes in their canonical form
tasks.task_maintenance_canonical.delay(dois, bibcodes)

def maintenance_metadata(dois, bibcodes):
def maintenance_metadata(dois, bibcodes, reparse=False):
"""
Refetch metadata and send updates to master (if any)
"""
Expand All @@ -84,7 +84,7 @@ def maintenance_metadata(dois, bibcodes):
logger.info("MAINTENANCE task: requested a metadata update for '{}' records".format(n_requested))

# Send to master updated metadata
tasks.task_maintenance_metadata.delay(dois, bibcodes)
tasks.task_maintenance_metadata.delay(dois, bibcodes, reparse=reparse)

def maintenance_resend(dois, bibcodes, broker=False, only_nonbib=False):
"""
Expand Down Expand Up @@ -352,6 +352,12 @@ def _build_diagnostics(bibcodes=None, json_payloads=None):
action='store_true',
default=False,
help='Update DOI metadata for the provided list of citation target bibcodes, or if none is provided, for all the current existing citation targets.')
maintenance_parser.add_argument(
'--reparse',
dest='reparse',
action='store_true',
default=False,
help='Calls maintenance task to reparse existing metadata.')
maintenance_parser.add_argument(
'--readers',
dest='import_readers',
Expand Down Expand Up @@ -445,7 +451,7 @@ def _build_diagnostics(bibcodes=None, json_payloads=None):
bibcodes = args.bibcodes
# Process
if args.metadata:
maintenance_metadata(dois, bibcodes)
maintenance_metadata(dois, bibcodes, args.reparse)
elif args.canonical:
maintenance_canonical(dois, bibcodes)
elif args.resend:
Expand Down