From 2dc75f4ec095469add4fe7392b85dd9a582ed1a7 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 4 Jul 2025 13:07:08 +0200 Subject: [PATCH 1/2] Add Medgen handling Follow-up to https://github.com/biopragmatics/bioregistry/pull/1567 --- src/curies_processing/rules.json | 21 +++++++++++++++++++++ tests/test_api.py | 17 +++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/src/curies_processing/rules.json b/src/curies_processing/rules.json index adb3c61..4a38f5b 100644 --- a/src/curies_processing/rules.json +++ b/src/curies_processing/rules.json @@ -644,6 +644,16 @@ "KEGG DRUG": "KEGG.DRUG", "KEGG.COMPOUND:c": "KEGG.COMPOUND:C", "LIPID MAPS:": "LIPIDMAPS:", + "MEDGEN:C0": "umls:C0", + "MEDGEN:C1": "umls:C1", + "MEDGEN:C2": "umls:C2", + "MEDGEN:C3": "umls:C3", + "MEDGEN:C4": "umls:C4", + "MEDGEN:C5": "umls:C5", + "MEDGEN:C6": "umls:C6", + "MEDGEN:C7": "umls:C7", + "MEDGEN:C8": "umls:C8", + "MEDGEN:C9": "umls:C9", "MONDOLEX:": "MONDO:", "MedlinePlus: ": "MedlinePlus:", "MicrO.owl/MICRO:": "MICRO:", @@ -676,6 +686,7 @@ "TS-": "caloha:", "UM-BBD_enzymeID:r": "umbbd.reaction:r", "UMLS CUI:": "UMLS:", + "UMLS:CN": "medgen.cid:CN", "URL: ": "URL:", "URL: http\\://": "http://", "URL:http\\://": "http://", @@ -699,6 +710,16 @@ "http:http\\://en.wikipedia.org/wiki/": "wikipedia.en:", "http:http\\:en.wikipedia.org/wiki/": "wikipedia.en:", "http:www": "http://www", + "medgen:C0": "umls:C0", + "medgen:C1": "umls:C1", + "medgen:C2": "umls:C2", + "medgen:C3": "umls:C3", + "medgen:C4": "umls:C4", + "medgen:C5": "umls:C5", + "medgen:C6": "umls:C6", + "medgen:C7": "umls:C7", + "medgen:C8": "umls:C8", + "medgen:C9": "umls:C9", "odo:ADCAD_": "adcad:", "odo:MOSAIC_": "mosaic:", "omim:PS": "omim.ps:", diff --git a/tests/test_api.py b/tests/test_api.py index fa9bd40..a7c949f 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -14,6 +14,8 @@ def test_wrap(self) -> None: """Test wrapping a converter.""" c = curies.get_obo_converter() c.add_prefix("NLXDYS", "http://uri.neuinfo.org/nif/nifstd/nlx_dys_") + c.add_prefix("medgen", "https://www.ncbi.nlm.nih.gov/medgen/") + c.add_prefix("medgen.cid", "https://bioregistry.io/medgen.cid:") c = curies_processing.wrap(c) r1 = c.parse("GOC:CJM", strict=True) @@ -23,3 +25,18 @@ def test_wrap(self) -> None: r2 = c.parse("NIFSTD:nlx_dys_20090602", strict=True) self.assertEqual("NLXDYS", r2.prefix) self.assertEqual("20090602", r2.identifier) + + # test normal medgen parsing + r4 = c.parse("medgen:12345", strict=True) + self.assertEqual("medgen", r4.prefix) + self.assertEqual("12345", r4.identifier) + + # test remapping to UMLS + r3 = c.parse("medgen:C123456", strict=True) + self.assertEqual("umls", r3.prefix) + self.assertEqual("C123456", r3.identifier) + + # test remapping to MEDGEN CID + r5 = c.parse("medgen:CN970821", strict=True) + self.assertEqual("medgen.cid", r5.prefix) + self.assertEqual("CN970821", r5.identifier) From ef7879e78c3e25721c40b463ea51db64e0785d2a Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 4 Jul 2025 13:12:01 +0200 Subject: [PATCH 2/2] Update rules.json --- src/curies_processing/rules.json | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/curies_processing/rules.json b/src/curies_processing/rules.json index 4a38f5b..19856b4 100644 --- a/src/curies_processing/rules.json +++ b/src/curies_processing/rules.json @@ -654,6 +654,16 @@ "MEDGEN:C7": "umls:C7", "MEDGEN:C8": "umls:C8", "MEDGEN:C9": "umls:C9", + "MEDGEN:CN0": "medgen.cid:CN0", + "MEDGEN:CN1": "medgen.cid:CN1", + "MEDGEN:CN2": "medgen.cid:CN2", + "MEDGEN:CN3": "medgen.cid:CN3", + "MEDGEN:CN4": "medgen.cid:CN4", + "MEDGEN:CN5": "medgen.cid:CN5", + "MEDGEN:CN6": "medgen.cid:CN6", + "MEDGEN:CN7": "medgen.cid:CN7", + "MEDGEN:CN8": "medgen.cid:CN8", + "MEDGEN:CN9": "medgen.cid:CN9", "MONDOLEX:": "MONDO:", "MedlinePlus: ": "MedlinePlus:", "MicrO.owl/MICRO:": "MICRO:", @@ -720,6 +730,16 @@ "medgen:C7": "umls:C7", "medgen:C8": "umls:C8", "medgen:C9": "umls:C9", + "medgen:CN0": "medgen.cid:CN0", + "medgen:CN1": "medgen.cid:CN1", + "medgen:CN2": "medgen.cid:CN2", + "medgen:CN3": "medgen.cid:CN3", + "medgen:CN4": "medgen.cid:CN4", + "medgen:CN5": "medgen.cid:CN5", + "medgen:CN6": "medgen.cid:CN6", + "medgen:CN7": "medgen.cid:CN7", + "medgen:CN8": "medgen.cid:CN8", + "medgen:CN9": "medgen.cid:CN9", "odo:ADCAD_": "adcad:", "odo:MOSAIC_": "mosaic:", "omim:PS": "omim.ps:",