changeset 223:36610ddfbc7a

replaced mean_lens by w or wo bogon
author Henry S. Thompson <ht@inf.ed.ac.uk>
date Wed, 28 Feb 2024 14:49:45 +0000
parents ee34498c6762
children 8cc73836f2d9
files results/CC-MAIN-2023-40/warc_lmhx/best_two_by_nl1/years/mean_lens.tsv results/CC-MAIN-2023-40/warc_lmhx/best_two_by_nl1/years/mean_lens_w_bogon.tsv results/CC-MAIN-2023-40/warc_lmhx/best_two_by_nl1/years/mean_lens_wo_bogon.tsv
diffstat 3 files changed, 60 insertions(+), 30 deletions(-) [+]
line wrap: on
line diff
--- a/results/CC-MAIN-2023-40/warc_lmhx/best_two_by_nl1/years/mean_lens.tsv	Wed Feb 28 14:44:59 2024 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-year	whole	scheme	netloc	path	query	idna	path%	query%
-1995	64.0538	4.5914	15.8065	40.5699	0.0753	0.0000	0.0000	0.0000
-1996	67.0588	4.8778	16.5520	42.6290	0.0000	0.0000	0.0000	nan
-1997	65.2948	4.6107	18.3217	28.0831	11.1213	0.0000	0.0049	0.0103
-1998	139.7477	4.5046	17.6685	30.7127	83.6943	0.0000	0.0000	37.1099
-1999	55.1920	4.5429	15.6377	31.9246	0.0846	0.0000	0.5538	0.0000
-2000	72.0602	4.4138	15.7897	42.2448	6.5341	0.0000	3.4043	0.7652
-2001	71.4869	4.4844	16.8443	43.7399	3.3488	0.0000	0.0111	0.0725
-2002	55.7478	4.4091	16.1407	31.7059	0.4630	0.0000	0.0304	0.0000
-2003	62.4248	4.4310	16.5919	37.8228	0.5477	0.0000	0.0250	0.0000
-2004	59.0838	4.4207	17.2578	33.8296	0.5351	0.0000	0.0468	0.0000
-2005	82.1663	4.4181	15.8059	42.2251	16.6627	0.0174	5.6736	96.9459
-2006	70.4486	4.4856	16.1607	32.6421	13.9758	0.0000	0.0717	0.1952
-2007	57.7082	4.4147	16.1119	30.5502	3.5205	0.0001	0.1417	0.0254
-2008	64.7185	4.4100	17.1071	37.4465	2.6071	0.0002	0.2443	2.0432
-2009	60.4626	4.4359	17.1835	33.0398	2.2653	0.0031	0.1064	0.2108
-2010	64.5384	4.4665	17.6995	35.6201	3.6596	0.0023	0.1486	0.2209
-2011	65.0858	4.5002	16.6578	38.8332	1.9981	0.0027	0.4988	1.2341
-2012	68.6867	4.5287	17.0146	41.6315	2.4117	0.0033	0.2491	2.1740
-2013	66.5549	4.5153	17.0192	38.5983	3.3069	0.0024	0.2505	9.9787
-2014	66.7782	4.5144	17.3245	40.2967	1.5214	0.0028	0.3300	0.5895
-2015	74.2247	4.5569	17.8608	43.7823	4.9151	0.0027	1.0769	6.6130
-2016	71.2424	4.5961	17.4750	43.1658	2.9193	0.0042	0.3927	0.8015
-2017	75.3486	4.6373	17.2166	46.3566	3.9142	0.0046	0.4782	4.3998
-2018	76.0403	4.7014	17.4297	46.5429	4.2563	0.0076	0.7060	3.5561
-2019	78.6182	4.6973	17.6892	47.6701	5.4318	0.0064	0.5489	8.5467
-2020	75.8811	4.7543	17.8009	46.8946	3.3265	0.0034	0.8290	1.9837
-2021	73.4178	4.7923	18.4032	44.3126	2.8127	0.0028	0.7542	2.6176
-2022	74.4483	4.7630	17.6105	45.8976	3.0232	0.0026	0.5909	0.9474
-2023	78.9357	4.8710	18.0149	43.1891	9.6796	0.0031	1.2491	4.4247
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/results/CC-MAIN-2023-40/warc_lmhx/best_two_by_nl1/years/mean_lens_w_bogon.tsv	Wed Feb 28 14:49:45 2024 +0000
@@ -0,0 +1,30 @@
+year	whole	scheme	netloc	path	query	idna	path%	query%
+1995	64.0538	4.5914	15.8065	40.5699	0.0753	0.0000	0.0000	0.0000
+1996	67.0588	4.8778	16.5520	42.6290	0.0000	0.0000	0.0000	nan
+1997	65.2948	4.6107	18.3217	28.0831	11.1213	0.0000	0.0049	0.0103
+1998	139.7477	4.5046	17.6685	30.7127	83.6943	0.0000	0.0000	37.1099
+1999	55.1920	4.5429	15.6377	31.9246	0.0846	0.0000	0.5538	0.0000
+2000	72.0602	4.4138	15.7897	42.2448	6.5341	0.0000	3.4043	0.7652
+2001	71.4869	4.4844	16.8443	43.7399	3.3488	0.0000	0.0111	0.0725
+2002	55.7478	4.4091	16.1407	31.7059	0.4630	0.0000	0.0304	0.0000
+2003	62.4248	4.4310	16.5919	37.8228	0.5477	0.0000	0.0250	0.0000
+2004	59.0838	4.4207	17.2578	33.8296	0.5351	0.0000	0.0468	0.0000
+2005	82.1663	4.4181	15.8059	42.2251	16.6627	0.0174	5.6736	96.9459
+2006	70.4486	4.4856	16.1607	32.6421	13.9758	0.0000	0.0717	0.1952
+2007	57.7082	4.4147	16.1119	30.5502	3.5205	0.0001	0.1417	0.0254
+2008	64.7185	4.4100	17.1071	37.4465	2.6071	0.0002	0.2443	2.0432
+2009	60.4626	4.4359	17.1835	33.0398	2.2653	0.0031	0.1064	0.2108
+2010	64.5384	4.4665	17.6995	35.6201	3.6596	0.0023	0.1486	0.2209
+2011	65.0858	4.5002	16.6578	38.8332	1.9981	0.0027	0.4988	1.2341
+2012	68.6867	4.5287	17.0146	41.6315	2.4117	0.0033	0.2491	2.1740
+2013	66.5549	4.5153	17.0192	38.5983	3.3069	0.0024	0.2505	9.9787
+2014	66.7782	4.5144	17.3245	40.2967	1.5214	0.0028	0.3300	0.5895
+2015	74.2247	4.5569	17.8608	43.7823	4.9151	0.0027	1.0769	6.6130
+2016	71.2424	4.5961	17.4750	43.1658	2.9193	0.0042	0.3927	0.8015
+2017	75.3486	4.6373	17.2166	46.3566	3.9142	0.0046	0.4782	4.3998
+2018	76.0403	4.7014	17.4297	46.5429	4.2563	0.0076	0.7060	3.5561
+2019	78.6182	4.6973	17.6892	47.6701	5.4318	0.0064	0.5489	8.5467
+2020	75.8811	4.7543	17.8009	46.8946	3.3265	0.0034	0.8290	1.9837
+2021	73.4178	4.7923	18.4032	44.3126	2.8127	0.0028	0.7542	2.6176
+2022	74.4483	4.7630	17.6105	45.8976	3.0232	0.0026	0.5909	0.9474
+2023	78.9357	4.8710	18.0149	43.1891	9.6796	0.0031	1.2491	4.4247
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/results/CC-MAIN-2023-40/warc_lmhx/best_two_by_nl1/years/mean_lens_wo_bogon.tsv	Wed Feb 28 14:49:45 2024 +0000
@@ -0,0 +1,30 @@
+year	whole	scheme	netloc	path	query	idna	path%	query%
+1995	64.0538	4.5914	15.8065	40.5699	0.0753	0.0000	0.0000	0.0000
+1996	67.0588	4.8778	16.5520	42.6290	0.0000	0.0000	0.0000	nan
+1997	65.2948	4.6107	18.3217	28.0831	11.1213	0.0000	0.0049	0.0103
+1998	139.7477	4.5046	17.6685	30.7127	83.6943	0.0000	0.0000	37.1099
+1999	55.1920	4.5429	15.6377	31.9246	0.0846	0.0000	0.5538	0.0000
+2000	72.0602	4.4138	15.7897	42.2448	6.5341	0.0000	3.4043	0.7652
+2001	71.4869	4.4844	16.8443	43.7399	3.3488	0.0000	0.0111	0.0725
+2002	55.7478	4.4091	16.1407	31.7059	0.4630	0.0000	0.0304	0.0000
+2003	62.4248	4.4310	16.5919	37.8228	0.5477	0.0000	0.0250	0.0000
+2004	59.0838	4.4207	17.2578	33.8296	0.5351	0.0000	0.0468	0.0000
+2005	53.9336	4.4117	16.0306	29.7504	0.7044	0.0000	0.0586	0.0155
+2006	70.4486	4.4856	16.1607	32.6421	13.9758	0.0000	0.0717	0.1952
+2007	57.7082	4.4147	16.1119	30.5502	3.5205	0.0001	0.1417	0.0254
+2008	64.7185	4.4100	17.1071	37.4465	2.6071	0.0002	0.2443	2.0432
+2009	60.4626	4.4359	17.1835	33.0398	2.2653	0.0031	0.1064	0.2108
+2010	64.5384	4.4665	17.6995	35.6201	3.6596	0.0023	0.1486	0.2209
+2011	65.0858	4.5002	16.6578	38.8332	1.9981	0.0027	0.4988	1.2341
+2012	68.6867	4.5287	17.0146	41.6315	2.4117	0.0033	0.2491	2.1740
+2013	66.5549	4.5153	17.0192	38.5983	3.3069	0.0024	0.2505	9.9787
+2014	66.7782	4.5144	17.3245	40.2967	1.5214	0.0028	0.3300	0.5895
+2015	74.2247	4.5569	17.8608	43.7823	4.9151	0.0027	1.0769	6.6130
+2016	71.2424	4.5961	17.4750	43.1658	2.9193	0.0042	0.3927	0.8015
+2017	75.3486	4.6373	17.2166	46.3566	3.9142	0.0046	0.4782	4.3998
+2018	76.0403	4.7014	17.4297	46.5429	4.2563	0.0076	0.7060	3.5561
+2019	78.6182	4.6973	17.6892	47.6701	5.4318	0.0064	0.5489	8.5467
+2020	75.8811	4.7543	17.8009	46.8946	3.3265	0.0034	0.8290	1.9837
+2021	73.4178	4.7923	18.4032	44.3126	2.8127	0.0028	0.7542	2.6176
+2022	74.4483	4.7630	17.6105	45.8976	3.0232	0.0026	0.5909	0.9474
+2023	78.9357	4.8710	18.0149	43.1891	9.6796	0.0031	1.2491	4.4247