Skip to content

Commit 3a5e42f

Browse files
committed
Full migration from orm to sql
1 parent c3b1022 commit 3a5e42f

5 files changed

Lines changed: 182 additions & 159 deletions

File tree

common/utilities.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,32 +29,32 @@ def load_DB_in_model(npages=0, nsearch_pages=None, algorithm=None, current_model
2929

3030
print(f"[*] Getting modules from DB (with {algorithm.__name__}) ...")
3131
start = time.time_ns()
32-
all_pages, _ = db_manager.get_winmodules(algorithm, npages + nsearch_pages if nsearch_pages else npages)
32+
all_pages = db_manager.get_winmodules(algorithm, npages + nsearch_pages if nsearch_pages else npages)
3333
end = time.time_ns() # in nanoseconds
3434
db_time = (end - start)/1e6 # ms
3535
print(f"[*] {len(all_pages)} pages recovered from DB in {db_time} ms.")
3636

3737
page_list = []
3838
insert_times = []
39-
for i in range(0, len(all_pages[:npages])):
39+
for i, winmodule in enumerate(0, all_pages):
4040
if i % BATCH_PRINT == 0 and i != 0 and printlog:
4141
print(f"{int(i/BATCH_PRINT)}*{BATCH_PRINT} pages already inserted ({datetime.datetime.now()}) ...")
4242

4343
try:
4444
start = time.time_ns()
45-
current_model.insert(all_pages[i]) # can raise exception
45+
current_model.insert(winmodule) # can raise exception
4646
end = time.time_ns() # in nanoseconds
4747
insert_times.append((end - start)/(1e6)) # convert to ms
48-
page_list.append(all_pages[i].get_id())
48+
page_list.append(winmodule.get_id())
4949
except NodeAlreadyExistsError: # it should never occur...
5050
# get module already in DB, and print it to compare with the other one
51-
exact, node = current_model.search_exact_match_only(all_pages[i].get_id())
51+
exact, node = current_model.search_exact_match_only(winmodule.get_id())
5252
if not exact: # infeasible path. If you see this, something weird happened
5353
raise Exception # db was modified in the backend, don't worry ...
5454

5555
# check they are _really_ the same
5656
existing_page = node.get_page()
57-
new_page = all_pages[i].get_page()
57+
new_page = winmodule.get_page()
5858
equal, equal_test = pages_are_equal(existing_page, new_page)
5959
if equal:
6060
logging.warning(f"Node \"{node.get_id()}\" already exists (different page id, same hashes)!")

datalayer/database/module.py

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,25 @@
1-
from sqlalchemy import Column, Integer, String, BigInteger, ForeignKey
2-
from sqlalchemy.orm import relationship, declarative_base
3-
from .base import Base
41

5-
class Module(Base):
6-
__tablename__ = 'modules'
2+
class Module:
3+
def __init__(self, os, id, file_version, original_filename,
4+
internal_filename, product_name,company_name, legal_copyright,
5+
classification, size, base_address
6+
):
77

8-
# April 05, 2024: Updated for dataset DB
9-
id = Column(BigInteger, primary_key=True)
10-
file_version = Column(String)
11-
original_filename = Column(String)
12-
internal_filename = Column(String)
13-
product_name = Column(String)
14-
company_name = Column(String)
15-
legal_copyright = Column(String)
16-
classification = Column(String)
17-
size = Column(Integer)
18-
base_address = Column(BigInteger)
19-
cpu = Column(String)
20-
os_id = Column(BigInteger, ForeignKey('os.id'))
8+
self.id = id
9+
self.file_version = file_version
10+
self.original_filename = original_filename
11+
self.internal_filename = internal_filename
12+
self.product_name = product_name
13+
self.company_name = company_name
14+
self.legal_copyright = legal_copyright
15+
self.classification = classification
16+
self.size = size
17+
self.base_address = base_address
18+
self.os = os
2119

22-
pages = relationship("Page")
23-
os = relationship("OS", back_populates="modules")
24-
25-
def as_dict(self):
26-
return {c.name: getattr(self, c.name) for c in self.__table__.columns} #| self.os.as_dict()
27-
28-
def __str__(self):
29-
return str(self.as_dict())
20+
def __eq__(self, module):
21+
return self.id == module.id
3022

31-
def __repr__(self):
32-
return str(self.as_dict())
23+
def __hash__(self):
24+
return hash(self.id)
25+
Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
1-
from sqlalchemy.orm import relationship, declarative_base
2-
from sqlalchemy import create_engine, Column, Integer, String, BigInteger
3-
from .base import Base
41

5-
class OS(Base):
6-
__tablename__ = 'os'
7-
8-
# April 05, 2024: Updated for dataset DB
9-
id = Column(BigInteger, primary_key=True)
10-
name = Column(String)
11-
version = Column(String)
12-
cpu = Column(String)
13-
modules = relationship("Module")
2+
class OS:
3+
def __init__(self, id, name, version):
4+
self.id = id
5+
self.name = name
6+
self.version = version
147

158
def __str__(self):
169
return f"Name: {self.name}, Version: {self.version}"
1710

1811
def as_dict(self):
19-
return {str('os_' + c.name): getattr(self, c.name) for c in self.__table__.columns}
12+
return {f"os_{key}": value for key, value in self.__dict__.items()}
13+
14+
def __eq__(self, other):
15+
if isinstance(other, OS):
16+
return self.id == other.id and self.name == other.name and self.version == other.version
17+
return False
18+
19+
def __hash__(self):
20+
return hash((self.id, self.name, self.version))

0 commit comments

Comments
 (0)