From f15b47ceeecef014c11a04e95cd91a3632207fa5 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Thu, 23 Oct 2025 22:27:40 +0200
Subject: [PATCH 1/5] Fix file inconsistencies

---
 abbreviations.csv |  2 +-
 abbreviations.smi | 26 +++++++++++++-------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/abbreviations.csv b/abbreviations.csv
index db7dcfa..9328bd2 100644
--- a/abbreviations.csv
+++ b/abbreviations.csv
@@ -7,7 +7,7 @@ NH2,N
 –OCH2O–,*OCO*
 s-Bu,*C(C)CC
 i-Pr,*C(C)C
-2,6-(CH3)2C6H3,*C1=C(C)C=CC=C1(C)
+"2,6-(CH3)2C6H3",*C1=C(C)C=CC=C1(C)
 PMB,*OCC1=CC=C(C=C1)OC
 Bpin,CC1(C)OB([*])OC1(C)C
 PMP,COC1=CC=C([*])C=C1
diff --git a/abbreviations.smi b/abbreviations.smi
index 10b80e5..90a9078 100644
--- a/abbreviations.smi
+++ b/abbreviations.smi
@@ -1,14 +1,14 @@
-Abbreviation SMILES
-C6H4F *c1ccc(F)cc1
-Mes *c1c(C)cc(C)cc1(C)
-S S
-N N
-NH2 N
-–OCH2O– *OCO*
-s-Bu *C(C)CC
-i-Pr *C(C)C
-2,6-(CH3)2C6H3 *C1=C(C)C=CC=C1(C)
-PMB *OCC1=CC=C(C=C1)OC
+Abbreviation	SMILES
+C6H4F	*c1ccc(F)cc1
+Mes	*c1c(C)cc(C)cc1(C)
+S	S
+N	N
+NH2	N
+–OCH2O–	*OCO*
+s-Bu	*C(C)CC
+i-Pr	*C(C)C
+2,6-(CH3)2C6H3	*C1=C(C)C=CC=C1(C)
+PMB	*OCC1=CC=C(C=C1)OC
 Bpin	CC1(C)OB([*])OC1(C)C
 PMP	COC1=CC=C([*])C=C1
 OPP	O=P(OP(O[*])(O)=O)(O)O
@@ -268,6 +268,6 @@ Ad	[*]C12CC3CC(C2)CC(C3)C1
 ADMB	O=C(O[*])C(C)(C)CCOC(C)=O
 Adoc	O=C([*])OC12CC3CC(C2)CC(C3)C1
 Adpoc	O=C([*])OC(C)(C12CC3CC(C2)CC(C3)C1)C
-Alloc O=C([*])OCC=C
+Alloc	O=C([*])OCC=C
 AOC	O=C([*])OCC=C
-Bns O=S(CC1=CC=CC=C1)([O*])=O
+Bns	O=S(CC1=CC=CC=C1)([O*])=O

From 78faa59b35fd29490780e426ac93ec2f34e6e5ed Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Thu, 23 Oct 2025 22:34:03 +0200
Subject: [PATCH 2/5] Update abbreviations.csv

---
 abbreviations.csv | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/abbreviations.csv b/abbreviations.csv
index 9328bd2..92792d2 100644
--- a/abbreviations.csv
+++ b/abbreviations.csv
@@ -134,7 +134,7 @@ O-n-C8H17,CCCCCCCCO[*]
 OBn,[*]OCC1=CC=CC=C1
 OHC,O=C[*]
 PO3Bn2,O=P(OCC1=CC=CC=C1)([*])OCC2=CC=CC=C2
-Py,2	[*]C1=NC=CC=C1
+"Py 2",[*]C1=NC=CC=C1
 SCE,[*]SCCC#N
 TBSO,C[Si](C(C)(C)C)(O[*])C
 decanyl,CCCCCCCCCC[*]

From d0785d14f94e13edc7e4959b7680a8ca6164daeb Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Thu, 23 Oct 2025 22:39:06 +0200
Subject: [PATCH 3/5] Add missing entries in CSV

---
 abbreviations.csv | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/abbreviations.csv b/abbreviations.csv
index 92792d2..83c15a1 100644
--- a/abbreviations.csv
+++ b/abbreviations.csv
@@ -257,3 +257,17 @@ SEt,CCS[*]
 SnBu3,[*][Sn](CCCC)(CCCC)CCCC
 TMSE,C[Si](C)(C)CC[*]
 n-Hexyl,CCCCCC[*]
+AAM,O=C1C2=CC=CC=C2N[*]N1
+ABn,[N-]=[N+]=NC1=CC=C(C=C1)C[*]
+ABO,[*]C1(O2)OCCC2CO1
+Ac,O=C([*])C
+ACBZ,O=C([*])OCC1=CC=C(N=[N+]=[N-])C=C1
+AcHmb,COC1=CC=C(C(OC(C)=O)=C1)C[*]
+Acm,CC(NC[*])=O
+Ad,[*]C12CC3CC(C2)CC(C3)C1
+ADMB,O=C(O[*])C(C)(C)CCOC(C)=O
+Adoc,O=C([*])OC12CC3CC(C2)CC(C3)C1
+Adpoc,O=C([*])OC(C)(C12CC3CC(C2)CC(C3)C1)C
+Alloc,O=C([*])OCC=C
+AOC,O=C([*])OCC=C
+Bns,O=S(CC1=CC=CC=C1)([O*])=O

From 0d3aeafa86c762701bd33f33621c103a0f50ae58 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Thu, 23 Oct 2025 22:40:42 +0200
Subject: [PATCH 4/5] Add linting script

---
 .github/workflows/check_abbreviations.yml |  5 +++
 lint.py                                   | 38 +++++++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 lint.py

diff --git a/.github/workflows/check_abbreviations.yml b/.github/workflows/check_abbreviations.yml
index b97a264..d6ef172 100644
--- a/.github/workflows/check_abbreviations.yml
+++ b/.github/workflows/check_abbreviations.yml
@@ -15,3 +15,8 @@ jobs:
       - uses: actions/checkout@v3
       - name: Run duplicate check
         run: ./scripts/check_duplicates_abbreviations.sh
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+      - name: Check CSV/SMI sync
+        run: uv run lint.py
diff --git a/lint.py b/lint.py
new file mode 100644
index 0000000..705d400
--- /dev/null
+++ b/lint.py
@@ -0,0 +1,38 @@
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "pandas",
+# ]
+# ///
+
+from pathlib import Path
+
+import pandas as pd
+
+HERE = Path(__file__).parent.resolve()
+
+
+def main():
+    tsv_path = HERE.joinpath("abbreviations.csv")
+    df1 = pd.read_csv(tsv_path)
+
+    smi_path = HERE.joinpath("abbreviations.smi")
+    df2 = pd.read_csv(smi_path, sep="\t")
+
+    if not (df1.columns == df2.columns).all():
+        raise
+
+    s1 = set(df1['Abbreviation'])
+    s2 = set(df2['Abbreviation'])
+
+    d1 = s1 - s2
+    if d1:
+        raise ValueError(f"abbreviations in CSV but not SMI: {d1}")
+
+    d2 = s2 - s1
+    if d2:
+        raise ValueError(f"abbreviations in SMI but not CSV: {d2}")
+
+
+if __name__ == '__main__':
+    main()

From 4c85dedc6660d7dea1243a1c97ed3e6f7c1b2db0 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Wed, 5 Nov 2025 15:40:59 +0100
Subject: [PATCH 5/5] Update lint.py

---
 lint.py | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/lint.py b/lint.py
index 705d400..5641edc 100644
--- a/lint.py
+++ b/lint.py
@@ -13,25 +13,11 @@
 
 
 def main():
-    tsv_path = HERE.joinpath("abbreviations.csv")
-    df1 = pd.read_csv(tsv_path)
-
     smi_path = HERE.joinpath("abbreviations.smi")
     df2 = pd.read_csv(smi_path, sep="\t")
 
-    if not (df1.columns == df2.columns).all():
-        raise
-
-    s1 = set(df1['Abbreviation'])
-    s2 = set(df2['Abbreviation'])
-
-    d1 = s1 - s2
-    if d1:
-        raise ValueError(f"abbreviations in CSV but not SMI: {d1}")
-
-    d2 = s2 - s1
-    if d2:
-        raise ValueError(f"abbreviations in SMI but not CSV: {d2}")
+    if smi_path.read_text() != df2.to_csv(sep='\t', index=False):
+        raise ValueError(f"{smi_path} is not formatted properly.")
 
 
 if __name__ == '__main__':