llm2graph/gen_rules.py at main · danielfrees/llm2graph · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
import json
import networkx as nx
import requests
from dotenv import load_dotenv
import os
import predictionguard as pg
import re
from matplotlib import pyplot as plt
import textwrap
import random

load_dotenv()

#Refresh key into env
INTEL_KEY = os.getenv("PREDICTION_GUARD_API")
INTEL_KEY = 'q1VuOjnffJ3NO2oFN8Q9m8vghYc84ld13jaqdF7E'
os.environ["PREDICTIONGUARD_TOKEN"] = INTEL_KEY
random.seed(123)


def query_prediction_guard(query : str):
    """
    Wrapper for querying prediction guard
    """

    result = pg.Completion.create(
        model="sqlcoder-34b-alpha",  #"Yi-34B-Chat", sqlcoder-34b-alpha, Nous-Hermes-Llama2-13B
        prompt=query,
        max_tokens=1000
    )
    return result


def is_valid_json(data):
    # This function checks if a string is valid JSON
    try:
        json.loads(data)
        return True
    except json.JSONDecodeError:
        return False

def truncate(s):
    open_brackets = 0
    close_brackets = 0

    for i in range(len(s)):
        if s[i] == "{":
            open_brackets += 1
        elif s[i] == "}":
            close_brackets += 1

        if open_brackets == close_brackets:
            return s[:i+1]

    return s


def main():
    """
    Run LLM knowledge graph extraction experiment.
    """

    print("=== Querying Intel Prediction Guard ===\n")
    input = "Apples and cinnamon go well together."

    JSON_PROMPT = f"""
        ### Instruction
        You are an expert in designing nutritious, affordable meals.
        Users will input rules, such as 'I don't like bananas', and you
        will extract a json of graph information based on this rule, or
        rules.

        `ALLOWED ENTITY TYPES`: Fruit, Vegetable, Carbohydrate, Protein, Fat, Spice

        `ALLOWED ENTITY FEATURES:` Calories (integer), Protein (integer, unit: grams), Carbs (integer, unit: grams), Fat (integer, unit: grams), Cost (float, unit: US Dollars), Extra (string, brief extra information)

        `ALLOWED RELATION TYPES`: pairsWith (ie. A pairs well with B), badWith (ie. A does not taste good with B), isFavorite (ie. A is always preferred if possible), leastFavorite (ie. A is never desired)

        ie.

        Input: I don't like bananas.

        Rules:
        {{
            "items": {{
                "banana" : {{
                    "serving": "1 banana",
                    "type": "fruit",
                    "calories": "110",
                    "protein": "1",
                    "carbs": "28",
                    "fat": "0",
                    "extra": "Bananas are rich in potassium and high in sugar."
                }}
            }},
            "relations": {{
                "from": "banana",
                "to": null,
                "relation": "leastFavorite"
            }}
        }}

        Input: I like bananas.

        Rules:
        {{
            "items": {{
                "banana" : {{
                    "serving": "1 banana",
                    "type": "fruit",
                    "calories": "110",
                    "protein": "1",
                    "carbs": "28",
                    "fat": "0",
                    "extra": "Bananas are rich in potassium and high in sugar."
                }}
            }},
            "relations": {{
                "from": "banana",
                "to": null,
                "relation": "isFavorite"
            }}
        }}


        Input: Garlic pairs really well with salmon.

        Rules:
        {{
            "items": {{
                "garlic" : {{
                    "serving": "1 clove",
                    "type": "spice",
                    "calories": "4",
                    "protein": "0",
                    "carbs": "1",
                    "fat": "0",
                    "extra": "Garlic has a strong odor and taste, and is excellent in sauces."
                }},
                "salmon" : {{
                    "serving": "100 grams",
                    "type": "protein",
                    "calories": "136",
                    "protein": "19",
                    "carbs": "0",
                    "fat": "7",
                    "extra": "Salmon is an excellent source of Omega 3s."
                }}
            }},
            "relations": {{
                "1": {{
                    "from": "garlic",
                    "to": "salmon",
                    "relation": "pairsWith"
                }},
                "2": {{
                    "from": "salmon",
                    "to": "garlic",
                    "relation": "pairsWith"
                }}
            }}
        }}

        Input: Garlic pairs really poorly with salmon.

        Rules:
        {{
            "items": {{
                "garlic" : {{
                    "serving": "1 clove",
                    "type": "spice",
                    "calories": "4",
                    "protein": "0",
                    "carbs": "1",
                    "fat": "0",
                    "extra": "Garlic has a strong odor and taste, and is excellent in sauces."
                }},
                "salmon" : {{
                    "serving": "100 grams",
                    "type": "protein",
                    "calories": "136",
                    "protein": "19",
                    "carbs": "0",
                    "fat": "7",
                    "extra": "Salmon is an excellent source of Omega 3s."
                }}
            }},
            "relations": {{
                "1": {{
                    "from": "garlic",
                    "to": "salmon",
                    "relation": "badWith"
                }},
                "2": {{
                    "from": "salmon",
                    "to": "garlic",
                    "relation": "badWith"
                }}
            }}
        }}

    ### INPUT:
    {input}

    ### RULES:
    Make sure that under any circumstances your output is in JSON format.
    """

    result = query_prediction_guard(query = JSON_PROMPT)
    print(result)
    json_rules = result['choices'][0]['text']
    print(f"JSON Rules: \n{json_rules}")
    output = truncate(json_rules)

    while not is_valid_json(output):
        print(output)
        print("Invalid JSON output, retrying...")
        result = query_prediction_guard(query=JSON_PROMPT)
        json_rules = result['choices'][0]['text']
        output = truncate(json_rules)


    print("\n=== Extracted Recipe Rule Information ===\n")
    print(output)

    # Load the existing JSON data from a file
    with open('output.json', 'r') as file:
        base_graph = json.load(file)

    # Convert the output string to a JSON object (assuming the output is valid JSON)
    try:
        json_data = json.loads(output)
    except json.JSONDecodeError:
        print("The output is not valid JSON.")
        json_data = None

    # If json_data is valid, save it to a file
    if json_data is not None:
        with open('output.json', 'w') as json_file:
            json.dump(json_data, json_file, indent=4)

        print("The JSON data has been saved to 'output.json'.")

    #print("\n=== TEST ===\n")
    #test = query_prediction_guard(query = "Write me a story about a dragon and a castle.")
    #print(test['choices'][0]['text'])

    output_json = json.loads(output)
    base_graph.update(output_json)


    create_graph(base_graph)


def create_graph(json_data):
    # If json_data is already a dictionary, no need to parse it
    if isinstance(json_data, dict):
        data = json_data
    else:
        # If json_data is a string, parse it
        try:
            data = json.loads(json_data)
        except json.JSONDecodeError:
            print("json_data is not valid JSON.")
            return
    # Load JSON data into a Python dictionary

    # Create a directed graph
    G = nx.DiGraph()

    # Add nodes to the graph with attributes
    for item, attributes in data['items'].items():
        G.add_node(item, **attributes)

    # Add edges to the graph
    edge_labels = {}
    for relation in data['relations'].values():
        G.add_edge(relation['from'], relation['to'])
        edge_labels[(relation['from'], relation['to'])] = relation['relation']

    # Create a larger figure
    fig, ax = plt.subplots(figsize=(10, 10))

    # Draw the graph
    pos = nx.spring_layout(G)
    nx.draw(G, pos, with_labels=True, node_color='skyblue', node_size=1500)

    # Draw edge labels
    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)

    # Add node attributes as annotations under the nodes
    for node, attributes in G.nodes(data=True):
        # JSON format each attribute line without brackets
        text = "\n".join([f"{key}: {value}" for key, value in attributes.items()])
        lines = text.split("\n")
        wrapped_text = ""
        for line in lines:
            wrapped_line = '\n'.join(textwrap.wrap(line, width=40))  # Wrap line at 40 characters
            wrapped_text += wrapped_line + "\n"

        plt.annotate(wrapped_text, (pos[node][0], pos[node][1]-0.15), ha='center', va='top',
                     bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.5))

    # Get current axes limits
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()

    # Add margins to the axes limits
    x_margin = (xlim[1] - xlim[0]) * 0.1  # 10% margin
    y_margin = (ylim[1] - ylim[0]) * 0.2  # 20% margin

    ax.set_xlim(xlim[0] - x_margin, xlim[1] + x_margin)
    ax.set_ylim(ylim[0] - y_margin, ylim[1] + y_margin)

    plt.show()

if __name__ == "__main__":
    main()