diff --git a/README.md b/README.md index 4bf5f9c..9ec41e7 100644 --- a/README.md +++ b/README.md @@ -76,9 +76,9 @@ report-file: "report" # where to write PDF report (.pdf will be added # these parameters can be also specified on command line using --param key=value # command line parameters override configuration ones -urls: +regexes: - - url: "http://mj.ucw.cz/vyuka/.+" + regex: "http://ksp.mff.cuni.cz/(?!sksp|profil|forum|auth).*" plugins: # which plugins are allowed for given URL - linksFinder - tidyHtmlValidator @@ -90,7 +90,7 @@ urls: - dupdeteict - non_semantic_html - - url: "http://mj.ucw.cz/" #test links (HEAD request) only + regex: "https?://(?!ksp.mff.cuni.cz/(sksp|profil|forum|auth)).+" #test links (HEAD request) only plugins: filters: #Filters (plugins of category header and filter) that can be used @@ -123,7 +123,7 @@ entryPoints: # where to start # start from this entry point) # # Entry points can also be specified via command line parameter --entry=url - - "http://mj.ucw.cz/vyuka/" + - "http://ksp.mff.cuni.cz/" #additional content type rules can be still specified and take precedence over plugin defined rules content-types: diff --git a/requirements.txt b/requirements.txt index 6eec93b..b706915 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,12 +15,12 @@ python-dateutil==2.6.1 python-magic==0.4.15 pytidylib==0.3.2 reppy==0.4.9 -requests==2.18.4 +requests>=2.20.0 rfc3987==1.3.7 ruamel.yaml==0.15.35 six==1.11.0 tinycss==0.4 url-normalize==1.3.3 -urllib3==1.22 +urllib3==1.24.2 validate-email==1.3 Yapsy==1.11.223 diff --git a/src/checker/README.md b/src/checker/README.md index 36918b1..27513e0 100644 --- a/src/checker/README.md +++ b/src/checker/README.md @@ -4,6 +4,7 @@ * configLoader - Parses the configuration file. * core - Core class with the main loop. +* transaction - Transaction class containing information about a web page with its factory method. - TransactionQueue and Journal. TransactionQueue is a wrapper over Python's diff --git a/src/checker/plugin/checkers/seo_meta.py b/src/checker/plugin/checkers/seo_meta.py index b134cfa..b564d05 100644 --- a/src/checker/plugin/checkers/seo_meta.py +++ b/src/checker/plugin/checkers/seo_meta.py @@ -13,7 +13,7 @@ class MetaTagValidator(IPlugin): "seo:nodsc": "No description meta tag found", "seo:multikeys": "Multiple keywords meta tags found", "seo:nokeys": "No keywords meta tags found"} - __severity = 0.8 + __severity = 0.4 def __init__(self): self.__journal = None diff --git a/src/checker/plugin/checkers/seo_meta.yapsy-plugin b/src/checker/plugin/checkers/seo_meta.yapsy-plugin index 09ddb26..9afa506 100644 --- a/src/checker/plugin/checkers/seo_meta.yapsy-plugin +++ b/src/checker/plugin/checkers/seo_meta.yapsy-plugin @@ -4,5 +4,5 @@ Module = seo_meta [Documentation] Author = Alexandr Mansurov -Version = 0.1 +Version = 0.2 Description = Check meta tags for SEO diff --git a/src/checker/plugin/checkers/tidy_html_validator.py b/src/checker/plugin/checkers/tidy_html_validator.py index d3c0e15..3d2d804 100644 --- a/src/checker/plugin/checkers/tidy_html_validator.py +++ b/src/checker/plugin/checkers/tidy_html_validator.py @@ -16,6 +16,7 @@ def __init__(self): self.__max_err = 0 self.__max_warn = 0 self.__max_inf = 0 + self.__max_unk = 0 self.__severity = dict() self.__severity['Warning'] = 0.5 self.__severity['Error'] = 1.0 @@ -26,7 +27,8 @@ def setJournal(self, journal): maxes = {'W': self.__max_warn, 'E': self.__max_err, - 'I': self.__max_inf} + 'I': self.__max_inf, + 'X': self.__max_unk} for dt in journal.getKnownDefectTypes(): # dt[0] type, dt[1] description @@ -52,9 +54,24 @@ def check(self, transaction): # lines is a list of strings that looks like: # line 54 column 37 - Warning: replacing invalid character code 153 for line in lines: - loc, desc = line.split(' - ', 1) - err_warn, msg = desc.split(': ', 1) - self.__record(transaction, loc, err_warn, msg) + if not '-' in line: + err_warn, msg = line.split(':', 1) + self.__record(transaction, None, err_warn.strip(), msg.strip()) + else: + try: + loc, desc = line.split(' - ', 1) + err_warn, msg = desc.split(': ', 1) + self.__record(transaction, loc, err_warn.strip(), msg.strip()) + except: + try: + loc, desc = line.split('-') + err_warn, msg = desc.split(':', 1) + if len(msg.strip()) == 0: + logging.getLogger(__name__).warning("No description! Line was: %s" % line) + msg = "Generic HTML syntax " + err_warn.to_lower() + self.__record(transaction, loc, err_warn.strip(), msg.strip()) + except ValueError: + logging.getLogger(__name__).exception("Failed to parse result! Line was: %s" % line) def __record(self, transaction, loc, cat, desc): code = self.__get_code(cat, desc) @@ -87,6 +104,8 @@ def __get_code(self, cat, desc): else: log = logging.getLogger(__name__) log.error("Unknown category: " + cat) - return None + cat = 'X' + num = self.__max_unk + self.__max_unk = self.__max_unk + 1 code = self.__generate_code(cat[0], num, desc) return code diff --git a/src/checker/plugin/checkers/tidy_html_validator.yapsy-plugin b/src/checker/plugin/checkers/tidy_html_validator.yapsy-plugin index c878b6a..54b20ec 100644 --- a/src/checker/plugin/checkers/tidy_html_validator.yapsy-plugin +++ b/src/checker/plugin/checkers/tidy_html_validator.yapsy-plugin @@ -4,5 +4,5 @@ Module = tidy_html_validator [Documentation] Author = Alexandr Mansurov -Version = 0.2 +Version = 0.3 Description = Validate HTML using libtidy