-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathPredicter.py
More file actions
70 lines (62 loc) · 2.04 KB
/
Predicter.py
File metadata and controls
70 lines (62 loc) · 2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from tensorflow import keras
from urllib.parse import urlparse
import os.path
threshold = 50.0
def fd_length(url):
urlpath= urlparse(url).path
try:
return len(urlpath.split('/')[1])
except:
return 0
def letter_count(url):
letters = 0
for i in url:
if i.isalpha():
letters = letters + 1
return letters
def no_of_dir(url):
urldir = urlparse(url).path
return urldir.count('/')
import re
def having_ip_address(url):
match = re.search(
'(([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\.([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\.([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\.'
'([01]?\\d\\d?|2[0-4]\\d|25[0-5])\\/)|' # IPv4
'((0x[0-9a-fA-F]{1,2})\\.(0x[0-9a-fA-F]{1,2})\\.(0x[0-9a-fA-F]{1,2})\\.(0x[0-9a-fA-F]{1,2})\\/)' # IPv4 in hexadecimal
'(?:[a-fA-F0-9]{1,4}:){7}[a-fA-F0-9]{1,4}', url) # Ipv6
if match:
return -1
else:
return 1
def get_prediction(url):
model_path = "Caffeine_Prediction.h5"
model = keras.models.load_model(model_path)
l=[]
l.append(len(urlparse(url).netloc))
l.append(len(urlparse(url).path))
l.append(fd_length(url))
l.append(url.count('-'))
l.append(url.count('@'))
l.append(url.count('?'))
l.append(url.count('%'))
l.append(url.count('.'))
l.append(url.count('='))
l.append(url.count('http'))
l.append(url.count('https'))
l.append(url.count('www'))
l.append(letter_count(url))
l.append(no_of_dir(url))
l.append(having_ip_address(url))
url_features = l
prediction = model.predict([url_features])
probability = prediction[0][0] * 100
probability = round(probability, 3)
if probability >= threshold:
return "Malicious"
else:
return "Not Malicious"
# Example Usage
url=["https://www.facebook.com/","https://metumaskilogin.godaddysites.com/","aladel.net","http://gaup.of.com","https://www.youtube.com/","https://www.amazon.in/","https://www.google.co.in/"]
for i in range(len(url)):
result = get_prediction(url[i])
print(url[i]," is:", result)