Skip to content

Commit 6b68800

Browse files
authored
Merge pull request #3 from Libertech-FR/dataWeaver
Data weaver in internal
2 parents 330dfcd + c1690be commit 6b68800

17 files changed

+518
-48
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@ logs/
44
.env
55
src/__pycache__/*
66
.idea/
7-
lib/
87
bin/
98
pyvenv.cfg
9+
config.yml

config.yml

Lines changed: 2 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,12 @@
11
---
22
taiga_etd.json:
33
mapping:
4-
inetOrgPerson.cn:
5-
- "nom"
6-
- "prenom"
7-
inetOrgPerson.displayName:
8-
- "prenom"
9-
- "nom"
10-
inetOrgPerson.sn: "nom"
11-
inetOrgPerson.givenName: "prenom"
12-
additionalFields.attributes.supannPerson.supannPrenomsEtatCivil: "prenom"
134
inetOrgPerson.employeeNumber: "id_coord"
145
customFields.photo: "photo_nom"
15-
$setOnInsert.inetOrgPerson.uid:
16-
- "prenom"
17-
- "nom"
18-
additionalFields.attributes.supannPerson.supannRefId: "id_coord"
19-
$setOnInsert.inetOrgPerson.mail:
20-
- "prenom"
21-
- "nom"
22-
inetOrgPerson.mobile: "tel_mob"
236
inetOrgPerson.postalAddress:
247
- "adresse"
258
- "CP"
269
- "Ville"
27-
additionalFields.attributes.supannPerson.supanncivilite: "civilite"
28-
additionalFields.attributes.supannPerson.supannOIDCGenre: "civilite"
29-
additionalFields.attributes.supannPerson.supannNomdeNaissance: "nom_marital"
30-
additionalFields.attributes.supannPerson.supannOIDCDatedeNaissance: "nss_date"
31-
additionalFields.attributes.supannPerson.supannAutreMail: "email2"
3210
additionalFields:
3311
additionalFields.objectClasses:
3412
- "supannPerson"
@@ -41,21 +19,8 @@ taiga_etd.json:
4119
- "etd"
4220
additionalFields.attributes.supannPerson.supannEtablissement: "{UAI}A123456"
4321
transforms:
44-
inetOrgPerson.cn: "join(delimiter=' ')"
45-
inetOrgPerson.displayName: "join(delimiter=' ')"
46-
inetOrgPerson.postalAddress: "join(delimiter=',')"
47-
$setOnInsert.inetOrgPerson.uid:
48-
- "join(delimiter='.')"
49-
- "remove_accents"
50-
- "lower"
51-
- replace(old=' ',new='-')
52-
- "regex(pattern='(?<=\\b\\w)([a-zA-Z0-9_\\-]+\\.)', replace='')"
53-
$setOnInsert.inetOrgPerson.mail:
54-
- "join(delimiter='.')"
55-
- "lower"
56-
- "remove_accents"
57-
- "replace(old=' ',new='-')"
58-
- "suffix(string='@lyon.archi.fr')"
22+
inetOrgPerson.postalAddress: "join(delimiter=',',default:'')"
23+
5924
taiga_adm.json:
6025
mapping:
6126
inetOrgPerson.cn:

lib/__init__.py

Whitespace-only changes.
150 Bytes
Binary file not shown.

lib/data_weaver3/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from .main import weave_entries, weave_entry
2+
3+
from .utils import crush, construct
4+
5+
__all__ = [
6+
'weave_entries',
7+
'weave_entry',
8+
'crush',
9+
'construct'
10+
]
331 Bytes
Binary file not shown.
11.3 KB
Binary file not shown.
9.81 KB
Binary file not shown.
2.69 KB
Binary file not shown.

lib/data_weaver3/main.py

Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
import json
2+
import asyncio
3+
import aiofiles
4+
from yaml import CLoader as Loader
5+
from lib.data_weaver3.utils import crush, construct
6+
from lib.data_weaver3.transforms import parse_transform
7+
import csv
8+
import yaml
9+
import os
10+
11+
config = {}
12+
13+
def handle_value(data, source_key, target_key, default=False):
14+
"""
15+
Handles the value of the given key in the data dictionary.
16+
17+
Args:
18+
data (dict): The data dictionary.
19+
source_key (str | dict | list): The source key to retrieve the value from.
20+
target_key (str): The key to store the value in the final result.
21+
default (bool, optional): Whether to handle default values. Defaults to True.
22+
"""
23+
def get_value_with_default(src_key):
24+
"""
25+
Retrieves the value of the given key from the data dictionary.
26+
27+
28+
Args:
29+
src_key (str): The key to retrieve the value for.
30+
31+
Returns:
32+
Any: The value of the key.
33+
Bool: Whether the value is a default value.
34+
"""
35+
value = data.get(src_key)
36+
if not value and default:
37+
return handle_default_value(data, target_key), True
38+
return value, False
39+
40+
def handle_dict(source_key: dict):
41+
"""
42+
43+
Handles the dictionary value of the given key in the data dictionary.
44+
45+
Args:
46+
source_key (dict): The key to retrieve the value from.
47+
48+
Returns:
49+
dict: The handled dictionary.
50+
51+
"""
52+
handled_dict, is_default = {sub_key: get_value_with_default(sub_key)[0] for sub_key in source_key}, any(get_value_with_default(sub_key)[1] for sub_key in source_key)
53+
if is_default:
54+
return handled_dict
55+
return transform_value(handled_dict, target_key)
56+
57+
def handle_list(source_key: list):
58+
"""
59+
Handles the list value of the given key in the data dictionary.
60+
61+
Args:
62+
source_key (list): The key to retrieve the value from.
63+
64+
Returns:
65+
list: The handled list.
66+
67+
"""
68+
handled_list, is_default = [get_value_with_default(sub_key)[0] for sub_key in source_key], all(get_value_with_default(sub_key)[1] for sub_key in source_key)
69+
if is_default:
70+
return handled_list
71+
return transform_value(handled_list, target_key)
72+
73+
def handle_default(source_key):
74+
"""
75+
Handles the default value for the given key.
76+
77+
Args:
78+
source_key (str): The key to retrieve the value from.
79+
80+
Returns:
81+
Any: The handled value.
82+
"""
83+
handled_value, is_default = get_value_with_default(source_key)
84+
if is_default:
85+
return handled_value
86+
return transform_value(handled_value, target_key)
87+
88+
type_handlers = {
89+
dict: handle_dict,
90+
list: handle_list,
91+
}
92+
93+
handler = type_handlers.get(type(source_key), handle_default)
94+
value = handler(source_key)
95+
return value
96+
97+
def handle_default_value(data, target_key):
98+
"""
99+
Handles the default value for the given key.
100+
101+
Args:
102+
data (dict): The data dictionary.
103+
key (str): The key to retrieve the default value for.
104+
"""
105+
default_source_key = config.get('default', {}).get('dynamic', {}).get(target_key)
106+
if default_source_key is not None:
107+
value = handle_value(data, default_source_key, target_key, False)
108+
return transform_value(value, target_key, True)
109+
110+
default_source_value = config.get('default', {}).get('static', {}).get(target_key)
111+
if default_source_value is not None:
112+
return default_source_value
113+
114+
return None
115+
116+
117+
def transform_value(value, field, default=False):
118+
"""
119+
Transforms the given value based on the configuration.
120+
121+
Args:
122+
value (Any): The value to transform.
123+
field (str): The field name.
124+
default (bool, optional): Whether to use the default configuration. Defaults to False.
125+
126+
Returns:
127+
Any: The transformed value.
128+
"""
129+
if default:
130+
transform = config.get('default', {}).get('transforms', {}).get(field)
131+
else:
132+
transform = config.get('transforms', {}).get(field)
133+
if transform and value is None:
134+
return value
135+
if isinstance(transform, list):
136+
for t in transform:
137+
value = parse_transform(t, value)
138+
return value
139+
if transform:
140+
return parse_transform(transform, value)
141+
return value
142+
143+
async def map_fields(data: dict, final_result):
144+
"""
145+
Maps the fields of the data dictionary to the final_result dictionary based on the configuration.
146+
Args:
147+
data (dict): The input data dictionary.
148+
final_result (dict): The dictionary to store the mapped key-value pairs.
149+
150+
Returns:
151+
None
152+
"""
153+
154+
for key, source_key in config.get('mapping').items():
155+
value = handle_value(data, source_key, key)
156+
final_result[key] = value
157+
158+
for key, value in config.get('additionalFields').items():
159+
final_result[key] = value
160+
161+
async def process_entry(entry):
162+
final_result = {}
163+
flat_object = crush(entry)
164+
await map_fields(flat_object, final_result)
165+
return construct(final_result)
166+
167+
async def process_entries(entries):
168+
"""
169+
Process a list of entries asynchronously.
170+
171+
Args:
172+
entries (list): A list of entries to process.
173+
174+
Returns:
175+
list: A list of constructed objects.
176+
177+
"""
178+
final_list = []
179+
tasks = [process_entry(entry) for entry in entries]
180+
final_list = await asyncio.gather(*tasks)
181+
return final_list
182+
183+
async def load_config(configContent=None):
184+
"""
185+
Loads the configuration from a JSON file.
186+
187+
Returns:
188+
dict: The configuration dictionary.
189+
190+
"""
191+
if configContent is None:
192+
async with aiofiles.open('./config/config.yml', 'r', encoding='utf8') as file:
193+
content = await file.read()
194+
configContent = yaml.load(content, Loader=Loader)
195+
config.update(configContent)
196+
197+
if config.get('mapping') is None:
198+
raise Exception('Invalid config file!')
199+
if config.get('additionalFields') is None:
200+
config['additionalFields'] = {}
201+
202+
async def save_result_to_file(result, file_path):
203+
# Determine the file extension
204+
_, ext = os.path.splitext(file_path)
205+
ext = ext.lower()
206+
207+
if ext not in ['.csv', '.json', '.yml', '.yaml']:
208+
print('Invalid file extension. Defaulting to JSON.')
209+
ext = '.json'
210+
211+
# Asynchronously write the result to the file based on the extension
212+
async with aiofiles.open(file_path, 'w', encoding='utf-8') as file:
213+
if ext == '.csv':
214+
# Convert the result dict to CSV format
215+
# Assuming result is a list of dictionaries
216+
writer = csv.DictWriter(file, fieldnames=crush(result[0]).keys())
217+
await writer.writeheader()
218+
for row in result:
219+
flat_row = crush(row)
220+
await writer.writerow(flat_row)
221+
elif ext == '.yml' or ext == '.yaml':
222+
# Convert the result dict to YAML format
223+
yaml_data = yaml.dump(result, allow_unicode=True)
224+
await file.write(yaml_data)
225+
else: # Default to JSON
226+
await file.write(json.dumps(result, ensure_ascii=False))
227+
228+
async def weave_entry(data, config, *args, **kwargs):
229+
"""
230+
Weaves the data with the given configuration.
231+
232+
Args:
233+
data (dict): The input data.
234+
config (dict): The configuration.
235+
236+
Returns:
237+
dict: The weaved data.
238+
"""
239+
await load_config(config)
240+
result = await process_entry(data)
241+
242+
if 'file_path' in kwargs and isinstance(kwargs['file_path'], str):
243+
await save_result_to_file(result, kwargs['file_path'])
244+
245+
return result
246+
247+
async def weave_entries(data: list[dict], config: dict, *args, **kwargs):
248+
"""
249+
Weaves the data with the given configuration.
250+
251+
Args:
252+
data (dict): The input data.
253+
config (dict): The configuration.
254+
255+
Returns:
256+
dict: The weaved data.
257+
"""
258+
await load_config(config)
259+
result = await process_entries(data)
260+
261+
if 'file_path' in kwargs and isinstance(kwargs['file_path'], str):
262+
await save_result_to_file(result, kwargs['file_path'])
263+
264+
return result

0 commit comments

Comments
 (0)