Skip to content

Commit 4d9163e

Browse files
committed
Add start of a CLAUDE.md
1 parent b0c2503 commit 4d9163e

6 files changed

Lines changed: 9207 additions & 0 deletions

File tree

CLAUDE.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# ComptoxAI - A platform for knowledge-driven AI research in computational toxicology
2+
ComptoxAI is a suite of tools for performing computational toxicology research that sits on top of a large multimodal knowledge graph (also called ComptoxAI) containing environmental toxicants, components of the human body, interactions, entities related to Adverse Outcome Pathways (AOPs) and others that describe the complex network of effects following human exposure to toxicants.
3+
4+
## Structure of ComptoxAI
5+
ComptoxAI is a "monorepo" containing several different components that are capable of functioning on their own:
6+
-
7+
8+
9+
## Coding conventions
10+
- Avoid creation of new markdown documents or other non-code files unless they are explicitly part of ComptoxAI's documentation.
11+
- Prefer Memgraph for graph databases, Python for data science and machine learning (PyG for graph neural networks), and Javascript/Node for all website components and the REST API.

pfas.ipynb

Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,309 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "4d2423d0",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"from comptox_ai.db.graph_db import GraphDB\n",
11+
"import pandas as pd\n",
12+
"from tqdm import tqdm_notebook"
13+
]
14+
},
15+
{
16+
"cell_type": "code",
17+
"execution_count": 2,
18+
"id": "b1ffc150",
19+
"metadata": {},
20+
"outputs": [],
21+
"source": [
22+
"db = GraphDB(hostname=\"neo4j.comptox.ai\")\n",
23+
"pf_db = pd.read_excel(\"/Users/jdr2160/Downloads/perfluorome database.xlsx\")"
24+
]
25+
},
26+
{
27+
"cell_type": "code",
28+
"execution_count": 3,
29+
"id": "b126d43c",
30+
"metadata": {},
31+
"outputs": [],
32+
"source": [
33+
"casrns = list(set([x.split(';')[0] for x in list(pf_db['CAS.RN'])]))"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"execution_count": 4,
39+
"id": "d21cad0f",
40+
"metadata": {},
41+
"outputs": [
42+
{
43+
"data": {
44+
"text/plain": [
45+
"401"
46+
]
47+
},
48+
"execution_count": 4,
49+
"metadata": {},
50+
"output_type": "execute_result"
51+
}
52+
],
53+
"source": [
54+
"len(casrns)"
55+
]
56+
},
57+
{
58+
"cell_type": "code",
59+
"execution_count": 5,
60+
"id": "6cbc3a53",
61+
"metadata": {},
62+
"outputs": [
63+
{
64+
"name": "stderr",
65+
"output_type": "stream",
66+
"text": [
67+
"/var/folders/mb/jkjn3zh97jz9dbxlswr4mj8c0000gn/T/ipykernel_18198/3870895096.py:5: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n",
68+
"Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n",
69+
" for crn in tqdm_notebook(casrns):\n"
70+
]
71+
},
72+
{
73+
"data": {
74+
"application/vnd.jupyter.widget-view+json": {
75+
"model_id": "879a1596678f42518aa12127da8998eb",
76+
"version_major": 2,
77+
"version_minor": 0
78+
},
79+
"text/plain": [
80+
" 0%| | 0/401 [00:00<?, ?it/s]"
81+
]
82+
},
83+
"metadata": {},
84+
"output_type": "display_data"
85+
},
86+
{
87+
"name": "stdout",
88+
"output_type": "stream",
89+
"text": [
90+
"281\n"
91+
]
92+
}
93+
],
94+
"source": [
95+
"count = 0\n",
96+
"matches = []\n",
97+
"unmatches = []\n",
98+
"\n",
99+
"for crn in tqdm_notebook(casrns):\n",
100+
" res = db.run_cypher(f\"MATCH (c:Chemical {{ xrefCasRN: \\\"{crn}\\\" }} ) RETURN c;\")\n",
101+
" if len(res) > 0:\n",
102+
" count += 1\n",
103+
" matches.append(res[0]['c'])\n",
104+
" else:\n",
105+
" unmatches.append(crn)\n",
106+
" \n",
107+
"print(count)"
108+
]
109+
},
110+
{
111+
"cell_type": "code",
112+
"execution_count": 6,
113+
"id": "78c472c1",
114+
"metadata": {},
115+
"outputs": [
116+
{
117+
"data": {
118+
"text/plain": [
119+
"['147916-78-7',\n",
120+
" '32848-21-8',\n",
121+
" '2127-74-4',\n",
122+
" '476304-39-9',\n",
123+
" '133310-68-6',\n",
124+
" '38012-79-2',\n",
125+
" '215094-37-4',\n",
126+
" '39492-88-1',\n",
127+
" '94720-20-4',\n",
128+
" '102489-65-6',\n",
129+
" '57325-43-6',\n",
130+
" '69492-70-2',\n",
131+
" '19932-26-4',\n",
132+
" '1274722-59-6',\n",
133+
" '75668-27-8',\n",
134+
" '76848-59-4',\n",
135+
" '220469-13-6',\n",
136+
" '250738-42-2',\n",
137+
" '89109-69-3',\n",
138+
" '678-98-8',\n",
139+
" '355-88-4',\n",
140+
" '371771-07-2',\n",
141+
" '131851-20-2',\n",
142+
" '106873-68-1',\n",
143+
" '1207481-10-4',\n",
144+
" 'NOCAS_893419',\n",
145+
" '355-93-1',\n",
146+
" '164792-01-2',\n",
147+
" '1513864-12-4',\n",
148+
" '879881-65-9',\n",
149+
" '355-30-6',\n",
150+
" '240129-40-2',\n",
151+
" '68541-02-6',\n",
152+
" '70501-47-2',\n",
153+
" '1426894-99-6',\n",
154+
" '1946796-71-9',\n",
155+
" '496805-64-2',\n",
156+
" '314053-71-9',\n",
157+
" '2707-72-4',\n",
158+
" '1444474-83-2',\n",
159+
" '1391033-22-9',\n",
160+
" '123665-83-8',\n",
161+
" '4089-61-6',\n",
162+
" '147545-41-3',\n",
163+
" '213681-67-5',\n",
164+
" '171182-86-8',\n",
165+
" '1433216-51-3',\n",
166+
" '1030606-42-8',\n",
167+
" '136909-85-8',\n",
168+
" '103831-29-4',\n",
169+
" '94333-56-9',\n",
170+
" '755-89-5',\n",
171+
" '377-37-7',\n",
172+
" '918-32-1',\n",
173+
" '78755-31-4',\n",
174+
" '313366-93-7',\n",
175+
" '1207727-04-5',\n",
176+
" '102489-67-8',\n",
177+
" '144862-38-4',\n",
178+
" '1268707-97-6',\n",
179+
" '1612778-34-3',\n",
180+
" '312-81-2',\n",
181+
" '1980063-68-0',\n",
182+
" '2708-54-5',\n",
183+
" '1355553-99-9',\n",
184+
" '2251-83-4',\n",
185+
" '72828-80-9',\n",
186+
" '82721-69-5',\n",
187+
" '1220100-43-5',\n",
188+
" '431-63-0',\n",
189+
" '121633-31-6',\n",
190+
" '200337-06-0',\n",
191+
" '36390-03-1',\n",
192+
" '176702-71-9',\n",
193+
" '1257261-91-8',\n",
194+
" '1161941-02-1',\n",
195+
" '2212-77-3',\n",
196+
" '146304-76-9',\n",
197+
" '94158-67-5',\n",
198+
" '83650-67-3',\n",
199+
" '867373-18-0',\n",
200+
" '714975-29-8',\n",
201+
" '312943-34-3',\n",
202+
" '113584-32-0',\n",
203+
" '119206-62-1',\n",
204+
" '102061-82-5',\n",
205+
" '100427-76-7',\n",
206+
" '681443-29-8',\n",
207+
" '125640-21-3',\n",
208+
" '1190430-20-6',\n",
209+
" '307-96-0',\n",
210+
" '872672-61-2',\n",
211+
" '238098-38-9',\n",
212+
" '129846-67-9',\n",
213+
" '71623-69-3',\n",
214+
" '213207-95-5',\n",
215+
" '93393-77-2',\n",
216+
" '355-99-7',\n",
217+
" '123613-18-3',\n",
218+
" '1355554-66-3',\n",
219+
" '188034-84-6',\n",
220+
" '679-25-4',\n",
221+
" '13050-20-9',\n",
222+
" '90851-71-1',\n",
223+
" '377-52-6',\n",
224+
" '355-98-6',\n",
225+
" '64790-29-0',\n",
226+
" '1355555-21-3',\n",
227+
" '375-83-7',\n",
228+
" '1456734-51-2',\n",
229+
" '1426840-85-8',\n",
230+
" '97388-28-8',\n",
231+
" '1463530-19-9',\n",
232+
" '17425-25-1',\n",
233+
" '755-53-3',\n",
234+
" '53638-09-8',\n",
235+
" '755-76-0',\n",
236+
" '424-01-1',\n",
237+
" '120219-46-7',\n",
238+
" '152718-74-6']"
239+
]
240+
},
241+
"execution_count": 6,
242+
"metadata": {},
243+
"output_type": "execute_result"
244+
}
245+
],
246+
"source": [
247+
"unmatches"
248+
]
249+
},
250+
{
251+
"cell_type": "code",
252+
"execution_count": 7,
253+
"id": "fc098451",
254+
"metadata": {},
255+
"outputs": [
256+
{
257+
"data": {
258+
"text/plain": [
259+
"{'commonName': 'Propanamide, N-(2,5-dichloro-4-nitrophenyl)-2,3,3,3-tetrafluoro-2-(trifluoromethyl)-',\n",
260+
" 'maccs': '0000000000000000000000010000000000000000010000001000000100000010010001100000000000000010000101000000011001100101000010100101000001001111000001010101011001010110111110',\n",
261+
" 'synonyms': '',\n",
262+
" 'sMILES': '[O-][N+](=O)C1=CC(Cl)=C(NC(=O)C(F)(C(F)(F)F)C(F)(F)F)C=C1Cl',\n",
263+
" 'xrefPubchemSID': '315701706',\n",
264+
" 'xrefDTXSID': 'DTXSID2073772',\n",
265+
" 'xrefPubchemCID': '176014',\n",
266+
" 'xrefCasRN': '105923-43-1',\n",
267+
" 'uri': 'http://jdr.bio/ontologies/comptox.owl#chemical_dtxsid2073772'}"
268+
]
269+
},
270+
"execution_count": 7,
271+
"metadata": {},
272+
"output_type": "execute_result"
273+
}
274+
],
275+
"source": [
276+
"matches[0]"
277+
]
278+
},
279+
{
280+
"cell_type": "code",
281+
"execution_count": null,
282+
"id": "5aa9ed06",
283+
"metadata": {},
284+
"outputs": [],
285+
"source": []
286+
}
287+
],
288+
"metadata": {
289+
"kernelspec": {
290+
"display_name": "Python 3 (ipykernel)",
291+
"language": "python",
292+
"name": "python3"
293+
},
294+
"language_info": {
295+
"codemirror_mode": {
296+
"name": "ipython",
297+
"version": 3
298+
},
299+
"file_extension": ".py",
300+
"mimetype": "text/x-python",
301+
"name": "python",
302+
"nbconvert_exporter": "python",
303+
"pygments_lexer": "ipython3",
304+
"version": "3.9.6"
305+
}
306+
},
307+
"nbformat": 4,
308+
"nbformat_minor": 5
309+
}

0 commit comments

Comments
 (0)