From 1101361f943b213bbe70f554610d74d29da57d85 Mon Sep 17 00:00:00 2001 From: Alexandr Mansurov Date: Fri, 6 Jul 2018 01:18:01 +0200 Subject: [PATCH 01/10] Update README.md --- src/checker/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/src/checker/README.md b/src/checker/README.md index 36918b1..27513e0 100644 --- a/src/checker/README.md +++ b/src/checker/README.md @@ -4,6 +4,7 @@ * configLoader - Parses the configuration file. * core - Core class with the main loop. +* transaction - Transaction class containing information about a web page with its factory method. - TransactionQueue and Journal. TransactionQueue is a wrapper over Python's From 318e5721c341ef7893a9d7c4fa33eb4cf1f570a4 Mon Sep 17 00:00:00 2001 From: Alexandr Mansurov Date: Fri, 6 Jul 2018 18:26:20 +0200 Subject: [PATCH 02/10] Update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 4bf5f9c..9ec41e7 100644 --- a/README.md +++ b/README.md @@ -76,9 +76,9 @@ report-file: "report" # where to write PDF report (.pdf will be added # these parameters can be also specified on command line using --param key=value # command line parameters override configuration ones -urls: +regexes: - - url: "http://mj.ucw.cz/vyuka/.+" + regex: "http://ksp.mff.cuni.cz/(?!sksp|profil|forum|auth).*" plugins: # which plugins are allowed for given URL - linksFinder - tidyHtmlValidator @@ -90,7 +90,7 @@ urls: - dupdeteict - non_semantic_html - - url: "http://mj.ucw.cz/" #test links (HEAD request) only + regex: "https?://(?!ksp.mff.cuni.cz/(sksp|profil|forum|auth)).+" #test links (HEAD request) only plugins: filters: #Filters (plugins of category header and filter) that can be used @@ -123,7 +123,7 @@ entryPoints: # where to start # start from this entry point) # # Entry points can also be specified via command line parameter --entry=url - - "http://mj.ucw.cz/vyuka/" + - "http://ksp.mff.cuni.cz/" #additional content type rules can be still specified and take precedence over plugin defined rules content-types: From f850df5278bb2c4ab2a24e81c689a14769c8d56c Mon Sep 17 00:00:00 2001 From: pyup-bot Date: Wed, 10 Oct 2018 21:23:20 +0200 Subject: [PATCH 03/10] Update requests from 2.18.4 to 2.19.1 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6eec93b..aaf54c1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,7 @@ python-dateutil==2.6.1 python-magic==0.4.15 pytidylib==0.3.2 reppy==0.4.9 -requests==2.18.4 +requests==2.19.1 rfc3987==1.3.7 ruamel.yaml==0.15.35 six==1.11.0 From 603264f3860128b7faf6f976ccd385d280615f11 Mon Sep 17 00:00:00 2001 From: Alexandr Mansurov Date: Fri, 19 Oct 2018 20:03:02 +0200 Subject: [PATCH 04/10] Update tidy_html_validator.py --- .../plugin/checkers/tidy_html_validator.py | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/checker/plugin/checkers/tidy_html_validator.py b/src/checker/plugin/checkers/tidy_html_validator.py index d3c0e15..3d2d804 100644 --- a/src/checker/plugin/checkers/tidy_html_validator.py +++ b/src/checker/plugin/checkers/tidy_html_validator.py @@ -16,6 +16,7 @@ def __init__(self): self.__max_err = 0 self.__max_warn = 0 self.__max_inf = 0 + self.__max_unk = 0 self.__severity = dict() self.__severity['Warning'] = 0.5 self.__severity['Error'] = 1.0 @@ -26,7 +27,8 @@ def setJournal(self, journal): maxes = {'W': self.__max_warn, 'E': self.__max_err, - 'I': self.__max_inf} + 'I': self.__max_inf, + 'X': self.__max_unk} for dt in journal.getKnownDefectTypes(): # dt[0] type, dt[1] description @@ -52,9 +54,24 @@ def check(self, transaction): # lines is a list of strings that looks like: # line 54 column 37 - Warning: replacing invalid character code 153 for line in lines: - loc, desc = line.split(' - ', 1) - err_warn, msg = desc.split(': ', 1) - self.__record(transaction, loc, err_warn, msg) + if not '-' in line: + err_warn, msg = line.split(':', 1) + self.__record(transaction, None, err_warn.strip(), msg.strip()) + else: + try: + loc, desc = line.split(' - ', 1) + err_warn, msg = desc.split(': ', 1) + self.__record(transaction, loc, err_warn.strip(), msg.strip()) + except: + try: + loc, desc = line.split('-') + err_warn, msg = desc.split(':', 1) + if len(msg.strip()) == 0: + logging.getLogger(__name__).warning("No description! Line was: %s" % line) + msg = "Generic HTML syntax " + err_warn.to_lower() + self.__record(transaction, loc, err_warn.strip(), msg.strip()) + except ValueError: + logging.getLogger(__name__).exception("Failed to parse result! Line was: %s" % line) def __record(self, transaction, loc, cat, desc): code = self.__get_code(cat, desc) @@ -87,6 +104,8 @@ def __get_code(self, cat, desc): else: log = logging.getLogger(__name__) log.error("Unknown category: " + cat) - return None + cat = 'X' + num = self.__max_unk + self.__max_unk = self.__max_unk + 1 code = self.__generate_code(cat[0], num, desc) return code From 3e80a696bf41ed21b3bb51acc3c86a078dcdec15 Mon Sep 17 00:00:00 2001 From: Alexandr Mansurov Date: Fri, 19 Oct 2018 20:05:39 +0200 Subject: [PATCH 05/10] Bump version --- src/checker/plugin/checkers/tidy_html_validator.yapsy-plugin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/checker/plugin/checkers/tidy_html_validator.yapsy-plugin b/src/checker/plugin/checkers/tidy_html_validator.yapsy-plugin index c878b6a..54b20ec 100644 --- a/src/checker/plugin/checkers/tidy_html_validator.yapsy-plugin +++ b/src/checker/plugin/checkers/tidy_html_validator.yapsy-plugin @@ -4,5 +4,5 @@ Module = tidy_html_validator [Documentation] Author = Alexandr Mansurov -Version = 0.2 +Version = 0.3 Description = Validate HTML using libtidy From 6f421c97a0b5d08c5d8d76330ddda89a4bb78a73 Mon Sep 17 00:00:00 2001 From: Alexandr Mansurov Date: Fri, 19 Oct 2018 20:06:41 +0200 Subject: [PATCH 06/10] Update seo_meta.py --- src/checker/plugin/checkers/seo_meta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/checker/plugin/checkers/seo_meta.py b/src/checker/plugin/checkers/seo_meta.py index b134cfa..b564d05 100644 --- a/src/checker/plugin/checkers/seo_meta.py +++ b/src/checker/plugin/checkers/seo_meta.py @@ -13,7 +13,7 @@ class MetaTagValidator(IPlugin): "seo:nodsc": "No description meta tag found", "seo:multikeys": "Multiple keywords meta tags found", "seo:nokeys": "No keywords meta tags found"} - __severity = 0.8 + __severity = 0.4 def __init__(self): self.__journal = None From 18899031238a1295c472cf07894f780d7a9f28c2 Mon Sep 17 00:00:00 2001 From: Alexandr Mansurov Date: Fri, 19 Oct 2018 20:07:18 +0200 Subject: [PATCH 07/10] Bump version --- src/checker/plugin/checkers/seo_meta.yapsy-plugin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/checker/plugin/checkers/seo_meta.yapsy-plugin b/src/checker/plugin/checkers/seo_meta.yapsy-plugin index 09ddb26..9afa506 100644 --- a/src/checker/plugin/checkers/seo_meta.yapsy-plugin +++ b/src/checker/plugin/checkers/seo_meta.yapsy-plugin @@ -4,5 +4,5 @@ Module = seo_meta [Documentation] Author = Alexandr Mansurov -Version = 0.1 +Version = 0.2 Description = Check meta tags for SEO From eb95796c8614158fc28031af02d4cbcd5d148da8 Mon Sep 17 00:00:00 2001 From: Alexandr Mansurov Date: Mon, 29 Oct 2018 23:51:41 +0100 Subject: [PATCH 08/10] Update requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index aaf54c1..f0aab56 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,7 @@ python-dateutil==2.6.1 python-magic==0.4.15 pytidylib==0.3.2 reppy==0.4.9 -requests==2.19.1 +requests>=2.20.0 rfc3987==1.3.7 ruamel.yaml==0.15.35 six==1.11.0 From 1618a2087fff2ad1e7ed42256d2c945f77cdfa72 Mon Sep 17 00:00:00 2001 From: pyup-bot Date: Tue, 18 Dec 2018 17:16:46 +0100 Subject: [PATCH 09/10] Update urllib3 from 1.22 to 1.24.1 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f0aab56..fe91d44 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,6 +21,6 @@ ruamel.yaml==0.15.35 six==1.11.0 tinycss==0.4 url-normalize==1.3.3 -urllib3==1.22 +urllib3==1.24.1 validate-email==1.3 Yapsy==1.11.223 From 7eb0000509e2deef5e79f484a327ff42a68a530d Mon Sep 17 00:00:00 2001 From: Alexandr Mansurov Date: Thu, 25 Apr 2019 11:38:57 +0200 Subject: [PATCH 10/10] Update requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index fe91d44..b706915 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,6 +21,6 @@ ruamel.yaml==0.15.35 six==1.11.0 tinycss==0.4 url-normalize==1.3.3 -urllib3==1.24.1 +urllib3==1.24.2 validate-email==1.3 Yapsy==1.11.223