diff --git a/.github/workflows/check_abbreviations.yml b/.github/workflows/check_abbreviations.yml index b97a264..d6ef172 100644 --- a/.github/workflows/check_abbreviations.yml +++ b/.github/workflows/check_abbreviations.yml @@ -15,3 +15,8 @@ jobs: - uses: actions/checkout@v3 - name: Run duplicate check run: ./scripts/check_duplicates_abbreviations.sh + + - name: Install uv + uses: astral-sh/setup-uv@v3 + - name: Check CSV/SMI sync + run: uv run lint.py diff --git a/abbreviations.smi b/abbreviations.smi index 10b80e5..90a9078 100644 --- a/abbreviations.smi +++ b/abbreviations.smi @@ -1,14 +1,14 @@ -Abbreviation SMILES -C6H4F *c1ccc(F)cc1 -Mes *c1c(C)cc(C)cc1(C) -S S -N N -NH2 N -–OCH2O– *OCO* -s-Bu *C(C)CC -i-Pr *C(C)C -2,6-(CH3)2C6H3 *C1=C(C)C=CC=C1(C) -PMB *OCC1=CC=C(C=C1)OC +Abbreviation SMILES +C6H4F *c1ccc(F)cc1 +Mes *c1c(C)cc(C)cc1(C) +S S +N N +NH2 N +–OCH2O– *OCO* +s-Bu *C(C)CC +i-Pr *C(C)C +2,6-(CH3)2C6H3 *C1=C(C)C=CC=C1(C) +PMB *OCC1=CC=C(C=C1)OC Bpin CC1(C)OB([*])OC1(C)C PMP COC1=CC=C([*])C=C1 OPP O=P(OP(O[*])(O)=O)(O)O @@ -268,6 +268,6 @@ Ad [*]C12CC3CC(C2)CC(C3)C1 ADMB O=C(O[*])C(C)(C)CCOC(C)=O Adoc O=C([*])OC12CC3CC(C2)CC(C3)C1 Adpoc O=C([*])OC(C)(C12CC3CC(C2)CC(C3)C1)C -Alloc O=C([*])OCC=C +Alloc O=C([*])OCC=C AOC O=C([*])OCC=C -Bns O=S(CC1=CC=CC=C1)([O*])=O +Bns O=S(CC1=CC=CC=C1)([O*])=O diff --git a/lint.py b/lint.py new file mode 100644 index 0000000..5641edc --- /dev/null +++ b/lint.py @@ -0,0 +1,24 @@ +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "pandas", +# ] +# /// + +from pathlib import Path + +import pandas as pd + +HERE = Path(__file__).parent.resolve() + + +def main(): + smi_path = HERE.joinpath("abbreviations.smi") + df2 = pd.read_csv(smi_path, sep="\t") + + if smi_path.read_text() != df2.to_csv(sep='\t', index=False): + raise ValueError(f"{smi_path} is not formatted properly.") + + +if __name__ == '__main__': + main()