adsabs · tjacovich · Jun 26, 2025 · Jun 26, 2025 · Jun 26, 2025 · Jun 26, 2025
diff --git a/ADSCitationCapture/doi.py b/ADSCitationCapture/doi.py
@@ -107,6 +107,23 @@ def fetch_metadata(base_doi_url, base_datacite_url, doi):
 
     return content if record_found else None
 
+def extract_orcids_from_affs(affiliations):
+    orcids = []
+    stripped_affs = []
+    orcid_tag_regex = re.compile('(.*)<ID system="ORCID">[^\s]*([\d]{4}-[\d]{4}-[\d]{4}-[\d]{4})<\/ID>')
+    orcid_tag_regex_legacy = re.compile('(.*)<ORCID>[^\s]*([\d]{4}-[\d]{4}-[\d]{4}-[\d]{4})<\/ORCID>')
+
+    for aff in affiliations:
+        if orcid_tag_regex.match(aff):
+            orcids.append(orcid_tag_regex.match(aff).groups()[-1])
+            stripped_affs.append(orcid_tag_regex.match(aff).groups()[0])
+        elif orcid_tag_regex_legacy.match(aff):
+            orcids.append(orcid_tag_regex_legacy.match(aff).groups()[-1])
+            stripped_affs.append(orcid_tag_regex_legacy.match(aff).groups()[0])
+        else:
+            stripped_affs.append(aff)
+            orcids.append("-")
+    return orcids, stripped_affs
 
 def build_bibcode(metadata, doi_re, bibstem):
     """

diff --git a/ADSCitationCapture/forward.py b/ADSCitationCapture/forward.py
@@ -7,6 +7,8 @@
 from adsmsg import DenormalizedRecord, NonBibRecord, Status, CitationChangeContentType
 from bs4 import BeautifulSoup
 from adsputils import setup_logging
+from ADSCitationCapture.doi import extract_orcids_from_affs
+
 
 # ============================= INITIALIZATION ==================================== #
 # - Use app logger:
@@ -125,7 +127,8 @@ def build_record(app, citation_change, parsed_metadata, citations, db_versions,
         'read_count': len(readers),
         'title': [title],
         'publisher': source,
-        'version': version
+        'version': version,
+        'orcid_pub': []
     }
     if version is None: # Concept DOIs may not contain version
         del record_dict['version']
@@ -147,6 +150,13 @@ def build_record(app, citation_change, parsed_metadata, citations, db_versions,
     if is_release:
         record_dict['property'].append('RELEASE')
 
+    if record_dict["aff"] != ["-" for i in range(0, len(record_dict["aff"]))]:
+        record_dict['orcid_pub'], record_dict['aff'] = extract_orcids_from_affs(record_dict['aff'])
+        if set(record_dict['orcid_pub'])=={'-'}:
+            del record_dict['orcid_pub']
+    else:
+        del record_dict['orcid_pub']
+
     record = DenormalizedRecord(**record_dict)
     nonbib_record = _build_nonbib_record(app, citation_change, record, db_versions, status, readers=readers)
     return record, nonbib_record

diff --git a/ADSCitationCapture/tasks.py b/ADSCitationCapture/tasks.py
@@ -521,7 +521,7 @@ def task_maintenance_canonical(dois, bibcodes):
             task_output_results.delay(custom_citation_change, parsed_metadata, existing_citation_bibcodes, db_versions=registered_record.get('associated_works', {"":""}), readers=readers)
 
 @app.task(queue='maintenance_metadata')
-def task_maintenance_metadata(dois, bibcodes, reset=False):
+def task_maintenance_metadata(dois, bibcodes, reparse=False):
     """
     Maintenance operation:
     - Get all the registered citation targets (or only a subset of them if DOIs and/or bibcodes are specified)
@@ -545,8 +545,11 @@ def task_maintenance_metadata(dois, bibcodes, reset=False):
 
         curated_metadata = registered_record.get('curated_metadata', {})
 
-        logger.debug("Curated metadata for {} is {}".format(registered_record['content'], registered_record['curated_metadata']))    
-        raw_metadata = doi.fetch_metadata(app.conf['DOI_URL'], app.conf['DATACITE_URL'], registered_record['content'])
+        logger.debug("Curated metadata for {} is {}".format(registered_record['content'], registered_record['curated_metadata']))   
+        if not reparse: 
+            raw_metadata = doi.fetch_metadata(app.conf['DOI_URL'], app.conf['DATACITE_URL'], registered_record['content'])
+        else:
+            raw_metadata = db.get_citation_target_metadata(app, registered_record['content']).get("raw")
         if raw_metadata:
             parsed_metadata = doi.parse_metadata(raw_metadata)
             is_software = parsed_metadata.get('doctype', '').lower() == "software"

diff --git a/ADSCitationCapture/tests/data/sample_bib_record_orcid.json b/ADSCitationCapture/tests/data/sample_bib_record_orcid.json
@@ -0,0 +1 @@
+{"abstract": "Cubix is a ROOT based graphical interface providing a large number of tools for gamma-ray spectrocopy analysis like peak fits, calibrations, gamma-gamma analysis... It is linked with the TkN library to provide a direct access to nuclear databases.  The source code is available on Gitlab: https://gitlab.in2p3.fr/ip2igamma/cubix/cubix  The documentation website : https://cubix.in2p3.fr    The Cubix project is governed by the CeCILL-B license under French law and abiding by the rules of distribution of free software. You can use, modify and/or redistribute the software under the terms of the CeCILL-B license as circulated by CEA, CNRS and INRIA at the following link www.cecill.info.", "aff": ["CNRS D\u00e9l\u00e9gation Rh\u00f4ne-Auvergne "], "author": ["Dudouet, J\u00e9r\u00e9mie"], "author_count": 1, "author_facet": ["Dudouet, J"], "author_facet_hier": ["0/Dudouet, J", "1/Dudouet, J/Dudouet, J\u00e9r\u00e9mie"], "author_norm": ["Dudouet, J"], "bibcode": "2023zndo..10683242D", "bibstem": ["zndo"], "bibstem_facet": "zndo", "citation": [""], "citation_count": "1", "database": ["general", "astronomy"], "date": "2023-01-01T00:30:00.000000Z", "doctype": "software", "doctype_facet_hier": ["0/Non-Article", "1/Non-Article/Software"], "doi": ["10.5281/zenodo.11020"], "eid": "10.5281/zenodo.11020", "email": ["-"], "first_author": "Dudouet, J\u00e9r\u00e9mie", "first_author_facet_hier": ["0/Dudouet, J", "1/Dudouet, J/Dudouet, J\u00e9r\u00e9mie"], "first_author_norm": "Dudouet, J", "identifier": ["2023zndo..10683242D", "10.5281/zenodo.11020"], "keyword": ["GUI", "gamma-ray spectroscopy", "ROOT", "fits"], "keyword_facet": ["GUI", "gamma-ray spectroscopy", "ROOT", "fits"], "keyword_norm": ["-", "-", "-", "-"], "keyword_schema": ["-", "-", "-", "-"], "links_data": ["{\"access\": \"\", \"instances\": \"\", \"title\": \"\", \"type\": \"electr\", \"url\": \"https://doi.org/10.5281/zenodo.11020\"}"], "orcid_pub": ["0000-0002-9018-6763"], "property": ["ESOURCE", "NONARTICLE", "NOT REFEREED", "PUB_OPENACCESS", "OPENACCESS", "RELEASE"], "pub": "Zenodo", "pub_raw": "Zenodo", "pubdate": "2023-01-01", "title": ["Cubix"], "year": "2023", "entry_date": "1970-01-01T00:00:00.000000Z", "data_count": 1, "esources": ["PUB_HTML"], "citation_count_norm": 1.0, "publisher": "Zenodo", "version": "1.0"}
diff --git a/ADSCitationCapture/tests/data/sample_nonbib_record_orcid.json b/ADSCitationCapture/tests/data/sample_nonbib_record_orcid.json
@@ -0,0 +1 @@
+{"bibcode": "2023zndo..10683242D", "boost": 0.5, "citation_count": 1, "esource": ["PUB_HTML"], "property": ["ESOURCE", "NONARTICLE", "NOT REFEREED", "PUB_OPENACCESS", "OPENACCESS", "RELEASE"], "data_links_rows": [{"link_type": "ESOURCE", "link_sub_type": "PUB_HTML", "url": ["https://doi.org/10.5281/zenodo.11020"], "title": [""]}], "status": "updated", "citation_count_norm": 1.0}
diff --git a/ADSCitationCapture/tests/test_base.py b/ADSCitationCapture/tests/test_base.py
@@ -350,3 +350,30 @@ def _init_mock_data(self):
                     'associated_works': "",
                     'version': "v2.0.0"    
         }]
+        self.mock_data["10.5281/zenodo.10683242"] = {
+                'raw': '<?xml version="1.0" encoding="UTF-8"?> <resource xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://datacite.org/schema/kernel-4" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd">   <identifier identifierType="DOI">10.5281/ZENODO.10683242</identifier>   <creators>     <creator>       <creatorName nameType="Personal">Dudouet, Jérémie</creatorName>       <givenName>Jérémie</givenName>       <familyName>Dudouet</familyName>       <nameIdentifier nameIdentifierScheme="ORCID" schemeURI="">0000-0002-9018-6763</nameIdentifier>       <affiliation>CNRS Délégation Rhône-Auvergne</affiliation>     </creator>   </creators>   <titles>     <title>Cubix</title>   </titles>   <publisher>Zenodo</publisher>   <publicationYear>2024</publicationYear>   <resourceType resourceTypeGeneral="Software"/>   <subjects>     <subject>GUI</subject>     <subject>gamma-ray spectroscopy</subject>     <subject>ROOT</subject>     <subject>fits</subject>   </subjects>   <dates>     <date dateType="Issued">2024-02-20</date>     <date dateType="Created">2023</date>     <date dateType="Available" dateInformation="First official release">2024</date>   </dates>   <language>en</language>   <alternateIdentifiers>     <alternateIdentifier alternateIdentifierType="oai">oai:zenodo.org:10683242</alternateIdentifier>   </alternateIdentifiers>   <relatedIdentifiers>     <relatedIdentifier relatedIdentifierType="DOI" relationType="Requires" resourceTypeGeneral="Software">10.5281/zenodo.10255692</relatedIdentifier>     <relatedIdentifier relatedIdentifierType="DOI" relationType="IsVersionOf">10.5281/zenodo.10683241</relatedIdentifier>   </relatedIdentifiers>   <sizes/>   <formats/>   <version>1.0</version>   <rightsList>     <rights rightsURI="http://www.cecill.info/licences/Licence_CeCILL-B_V1-en.html" rightsIdentifier="cecill-b" rightsIdentifierScheme="SPDX" schemeURI="https://spdx.org/licenses/">CeCILL-B Free Software License Agreement</rights>   </rightsList>   <descriptions>     <description descriptionType="Abstract">Cubix is a ROOT based graphical interface providing a large number of tools for gamma-ray spectrocopy analysis like peak fits, calibrations, gamma-gamma analysis... It is linked with the TkN library to provide a direct access to nuclear databases. The source code is available on Gitlab: https://gitlab.in2p3.fr/ip2igamma/cubix/cubix The documentation website : https://cubix.in2p3.fr The Cubix project is governed by the CeCILL-B license under French law and abiding by the rules of distribution of free software. You can use, modify and/or redistribute the software under the terms of the CeCILL-B license as circulated by CEA, CNRS and INRIA at the following link www.cecill.info.</description>   </descriptions> </resource>',
+                'parsed': {
+                    "bibcode": "2023zndo..10683242D", 
+                    "authors": ["Dudouet, J\u00e9r\u00e9mie"], 
+                    "normalized_authors": ["Dudouet, J"], 
+                    "affiliations": ["CNRS D\u00e9l\u00e9gation Rh\u00f4ne-Auvergne <ID system=\"ORCID\">0000-0002-9018-6763</ID>"], 
+                    "title": "Cubix", 
+                    "pubdate": "2023", 
+                    "properties": {"DOI": "10.5281/ZENODO.10683242"}, 
+                    "keywords": ["GUI", "gamma-ray spectroscopy", "ROOT", "fits"], 
+                    "abstract": "Cubix is a ROOT based graphical interface providing a large number of tools for gamma-ray spectrocopy analysis like peak fits, calibrations, gamma-gamma analysis... It is linked with the TkN library to provide a direct access to nuclear databases.\n\nThe source code is available on Gitlab: https://gitlab.in2p3.fr/ip2igamma/cubix/cubix\n\nThe documentation website : https://cubix.in2p3.fr\n\n\n\nThe Cubix project is governed by the CeCILL-B license under French law and abiding by the rules of distribution of free software. You can use, modify and/or redistribute the software under the terms of the CeCILL-B license as circulated by CEA, CNRS and INRIA at the following link www.cecill.info.", 
+                    "references": [], 
+                    "citations": [],
+                    "doctype": "software", 
+                    "version": "1.0", 
+                    "versions": [], 
+                    "version_of": ["10.5281/zenodo.10683241"], 
+                    "forked_from": [], 
+                    "forks": [], 
+                    "described_by": [], 
+                    "description_of": [], 
+                    "source": "Zenodo"},
+                'status': 'REGISTERED',
+                'content': "10.5281/zenodo.248351",
+                'content_type': "DOI",
+        }
diff --git a/ADSCitationCapture/tests/test_forward.py b/ADSCitationCapture/tests/test_forward.py
@@ -52,6 +52,24 @@ def test_build_bib_record_no_associated_works(self):
         self.assertEqual(bib_record.toJSON(),expect_bib_record)
         self.assertEqual(nonbib_record.toJSON(),expect_nonbib_record)
 
+    def test_build_bib_record_orcid(self):
+        content_filename = os.path.join(self.app.conf['PROJ_HOME'], "ADSCitationCapture/tests/data/sample_bib_record_orcid.json")
+        with open(content_filename) as f:
+            expect_bib_record = json.load(f)
+        content_filename = os.path.join(self.app.conf['PROJ_HOME'], "ADSCitationCapture/tests/data/sample_nonbib_record_orcid.json")
+        with open(content_filename) as f:
+            expect_nonbib_record = json.load(f)
+
+        citation_changes = self._common_citation_changes_doi(adsmsg.Status.updated)
+        citation_change = tasks._protobuf_to_adsmsg_citation_change(citation_changes.changes[0])
+        doi_id = "10.5281/zenodo.10683242" # software
+        parsed_metadata = self.mock_data[doi_id]['parsed']
+        citations =['']
+        db_versions = {"":""}
+        bib_record, nonbib_record = forward.build_record(self.app, citation_change, parsed_metadata, citations, db_versions)  
+        self.assertEqual(bib_record.toJSON(),expect_bib_record)
+        self.assertEqual(nonbib_record.toJSON(),expect_nonbib_record)
+
     def test_build_bib_record_associated_works(self):
         content_filename = os.path.join(self.app.conf['PROJ_HOME'], "ADSCitationCapture/tests/data/sample_bib_record_associated.json")
         with open(content_filename) as f:

diff --git a/run.py b/run.py
@@ -73,7 +73,7 @@ def maintenance_canonical(dois, bibcodes):
     # Send to master updated citation bibcodes in their canonical form
     tasks.task_maintenance_canonical.delay(dois, bibcodes)
 
-def maintenance_metadata(dois, bibcodes):
+def maintenance_metadata(dois, bibcodes, reparse=False):
     """
     Refetch metadata and send updates to master (if any)
     """
@@ -84,7 +84,7 @@ def maintenance_metadata(dois, bibcodes):
         logger.info("MAINTENANCE task: requested a metadata update for '{}' records".format(n_requested))
 
     # Send to master updated metadata
-    tasks.task_maintenance_metadata.delay(dois, bibcodes)
+    tasks.task_maintenance_metadata.delay(dois, bibcodes, reparse=reparse)
 
 def maintenance_resend(dois, bibcodes, broker=False, only_nonbib=False):
     """
@@ -352,6 +352,12 @@ def _build_diagnostics(bibcodes=None, json_payloads=None):
                         action='store_true',
                         default=False,
                         help='Update DOI metadata for the provided list of citation target bibcodes, or if none is provided, for all the current existing citation targets.')
+    maintenance_parser.add_argument(
+                        '--reparse',
+                        dest='reparse',
+                        action='store_true',
+                        default=False,
+                        help='Calls maintenance task to reparse existing metadata.')
     maintenance_parser.add_argument(
                         '--readers',
                         dest='import_readers',
@@ -445,7 +451,7 @@ def _build_diagnostics(bibcodes=None, json_payloads=None):
                 bibcodes = args.bibcodes
             # Process
             if args.metadata:
-                maintenance_metadata(dois, bibcodes)
+                maintenance_metadata(dois, bibcodes, args.reparse)
             elif args.canonical:
                 maintenance_canonical(dois, bibcodes)
             elif args.resend:
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"abstract": "Cubix is a ROOT based graphical interface providing a large number of tools for gamma-ray spectrocopy analysis like peak fits, calibrations, gamma-gamma analysis... It is linked with the TkN library to provide a direct access to nuclear databases. The source code is available on Gitlab: https://gitlab.in2p3.fr/ip2igamma/cubix/cubix The documentation website : https://cubix.in2p3.fr The Cubix project is governed by the CeCILL-B license under French law and abiding by the rules of distribution of free software. You can use, modify and/or redistribute the software under the terms of the CeCILL-B license as circulated by CEA, CNRS and INRIA at the following link www.cecill.info.", "aff": ["CNRS D\u00e9l\u00e9gation Rh\u00f4ne-Auvergne "], "author": ["Dudouet, J\u00e9r\u00e9mie"], "author_count": 1, "author_facet": ["Dudouet, J"], "author_facet_hier": ["0/Dudouet, J", "1/Dudouet, J/Dudouet, J\u00e9r\u00e9mie"], "author_norm": ["Dudouet, J"], "bibcode": "2023zndo..10683242D", "bibstem": ["zndo"], "bibstem_facet": "zndo", "citation": [""], "citation_count": "1", "database": ["general", "astronomy"], "date": "2023-01-01T00:30:00.000000Z", "doctype": "software", "doctype_facet_hier": ["0/Non-Article", "1/Non-Article/Software"], "doi": ["10.5281/zenodo.11020"], "eid": "10.5281/zenodo.11020", "email": ["-"], "first_author": "Dudouet, J\u00e9r\u00e9mie", "first_author_facet_hier": ["0/Dudouet, J", "1/Dudouet, J/Dudouet, J\u00e9r\u00e9mie"], "first_author_norm": "Dudouet, J", "identifier": ["2023zndo..10683242D", "10.5281/zenodo.11020"], "keyword": ["GUI", "gamma-ray spectroscopy", "ROOT", "fits"], "keyword_facet": ["GUI", "gamma-ray spectroscopy", "ROOT", "fits"], "keyword_norm": ["-", "-", "-", "-"], "keyword_schema": ["-", "-", "-", "-"], "links_data": ["{\"access\": \"\", \"instances\": \"\", \"title\": \"\", \"type\": \"electr\", \"url\": \"https://doi.org/10.5281/zenodo.11020\"}"], "orcid_pub": ["0000-0002-9018-6763"], "property": ["ESOURCE", "NONARTICLE", "NOT REFEREED", "PUB_OPENACCESS", "OPENACCESS", "RELEASE"], "pub": "Zenodo", "pub_raw": "Zenodo", "pubdate": "2023-01-01", "title": ["Cubix"], "year": "2023", "entry_date": "1970-01-01T00:00:00.000000Z", "data_count": 1, "esources": ["PUB_HTML"], "citation_count_norm": 1.0, "publisher": "Zenodo", "version": "1.0"}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"bibcode": "2023zndo..10683242D", "boost": 0.5, "citation_count": 1, "esource": ["PUB_HTML"], "property": ["ESOURCE", "NONARTICLE", "NOT REFEREED", "PUB_OPENACCESS", "OPENACCESS", "RELEASE"], "data_links_rows": [{"link_type": "ESOURCE", "link_sub_type": "PUB_HTML", "url": ["https://doi.org/10.5281/zenodo.11020"], "title": [""]}], "status": "updated", "citation_count_norm": 1.0}