Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@ report-file: "report" # where to write PDF report (.pdf will be added
# these parameters can be also specified on command line using --param key=value
# command line parameters override configuration ones

urls:
regexes:
-
url: "http://mj.ucw.cz/vyuka/.+"
regex: "http://ksp.mff.cuni.cz/(?!sksp|profil|forum|auth).*"
plugins: # which plugins are allowed for given URL
- linksFinder
- tidyHtmlValidator
Expand All @@ -90,7 +90,7 @@ urls:
- dupdeteict
- non_semantic_html
-
url: "http://mj.ucw.cz/" #test links (HEAD request) only
regex: "https?://(?!ksp.mff.cuni.cz/(sksp|profil|forum|auth)).+" #test links (HEAD request) only
plugins:

filters: #Filters (plugins of category header and filter) that can be used
Expand Down Expand Up @@ -123,7 +123,7 @@ entryPoints: # where to start
# start from this entry point)
#
# Entry points can also be specified via command line parameter --entry=url
- "http://mj.ucw.cz/vyuka/"
- "http://ksp.mff.cuni.cz/"

#additional content type rules can be still specified and take precedence over plugin defined rules
content-types:
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ python-dateutil==2.6.1
python-magic==0.4.15
pytidylib==0.3.2
reppy==0.4.9
requests==2.18.4
requests>=2.20.0
rfc3987==1.3.7
ruamel.yaml==0.15.35
six==1.11.0
tinycss==0.4
url-normalize==1.3.3
urllib3==1.22
urllib3==1.24.2
validate-email==1.3
Yapsy==1.11.223
1 change: 1 addition & 0 deletions src/checker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* configLoader - Parses the configuration file.
* core
- Core class with the main loop.
* transaction
- Transaction class containing information about a web page with its factory
method.
- TransactionQueue and Journal. TransactionQueue is a wrapper over Python's
Expand Down
2 changes: 1 addition & 1 deletion src/checker/plugin/checkers/seo_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class MetaTagValidator(IPlugin):
"seo:nodsc": "No description meta tag found",
"seo:multikeys": "Multiple keywords meta tags found",
"seo:nokeys": "No keywords meta tags found"}
__severity = 0.8
__severity = 0.4

def __init__(self):
self.__journal = None
Expand Down
2 changes: 1 addition & 1 deletion src/checker/plugin/checkers/seo_meta.yapsy-plugin
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ Module = seo_meta

[Documentation]
Author = Alexandr Mansurov
Version = 0.1
Version = 0.2
Description = Check meta tags for SEO
29 changes: 24 additions & 5 deletions src/checker/plugin/checkers/tidy_html_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def __init__(self):
self.__max_err = 0
self.__max_warn = 0
self.__max_inf = 0
self.__max_unk = 0
self.__severity = dict()
self.__severity['Warning'] = 0.5
self.__severity['Error'] = 1.0
Expand All @@ -26,7 +27,8 @@ def setJournal(self, journal):

maxes = {'W': self.__max_warn,
'E': self.__max_err,
'I': self.__max_inf}
'I': self.__max_inf,
'X': self.__max_unk}

for dt in journal.getKnownDefectTypes():
# dt[0] type, dt[1] description
Expand All @@ -52,9 +54,24 @@ def check(self, transaction):
# lines is a list of strings that looks like:
# line 54 column 37 - Warning: replacing invalid character code 153
for line in lines:
loc, desc = line.split(' - ', 1)
err_warn, msg = desc.split(': ', 1)
self.__record(transaction, loc, err_warn, msg)
if not '-' in line:
err_warn, msg = line.split(':', 1)
self.__record(transaction, None, err_warn.strip(), msg.strip())
else:
try:
loc, desc = line.split(' - ', 1)
err_warn, msg = desc.split(': ', 1)
self.__record(transaction, loc, err_warn.strip(), msg.strip())
except:
try:
loc, desc = line.split('-')
err_warn, msg = desc.split(':', 1)
if len(msg.strip()) == 0:
logging.getLogger(__name__).warning("No description! Line was: %s" % line)
msg = "Generic HTML syntax " + err_warn.to_lower()
self.__record(transaction, loc, err_warn.strip(), msg.strip())
except ValueError:
logging.getLogger(__name__).exception("Failed to parse result! Line was: %s" % line)

def __record(self, transaction, loc, cat, desc):
code = self.__get_code(cat, desc)
Expand Down Expand Up @@ -87,6 +104,8 @@ def __get_code(self, cat, desc):
else:
log = logging.getLogger(__name__)
log.error("Unknown category: " + cat)
return None
cat = 'X'
num = self.__max_unk
self.__max_unk = self.__max_unk + 1
code = self.__generate_code(cat[0], num, desc)
return code
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ Module = tidy_html_validator

[Documentation]
Author = Alexandr Mansurov
Version = 0.2
Version = 0.3
Description = Validate HTML using libtidy