Mercurial > hg > cc > cirrus_work
changeset 223:36610ddfbc7a
replaced mean_lens by w or wo bogon
author | Henry S. Thompson <ht@inf.ed.ac.uk> |
---|---|
date | Wed, 28 Feb 2024 14:49:45 +0000 |
parents | ee34498c6762 |
children | 8cc73836f2d9 |
files | results/CC-MAIN-2023-40/warc_lmhx/best_two_by_nl1/years/mean_lens.tsv results/CC-MAIN-2023-40/warc_lmhx/best_two_by_nl1/years/mean_lens_w_bogon.tsv results/CC-MAIN-2023-40/warc_lmhx/best_two_by_nl1/years/mean_lens_wo_bogon.tsv |
diffstat | 3 files changed, 60 insertions(+), 30 deletions(-) [+] |
line wrap: on
line diff
--- a/results/CC-MAIN-2023-40/warc_lmhx/best_two_by_nl1/years/mean_lens.tsv Wed Feb 28 14:44:59 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ -year whole scheme netloc path query idna path% query% -1995 64.0538 4.5914 15.8065 40.5699 0.0753 0.0000 0.0000 0.0000 -1996 67.0588 4.8778 16.5520 42.6290 0.0000 0.0000 0.0000 nan -1997 65.2948 4.6107 18.3217 28.0831 11.1213 0.0000 0.0049 0.0103 -1998 139.7477 4.5046 17.6685 30.7127 83.6943 0.0000 0.0000 37.1099 -1999 55.1920 4.5429 15.6377 31.9246 0.0846 0.0000 0.5538 0.0000 -2000 72.0602 4.4138 15.7897 42.2448 6.5341 0.0000 3.4043 0.7652 -2001 71.4869 4.4844 16.8443 43.7399 3.3488 0.0000 0.0111 0.0725 -2002 55.7478 4.4091 16.1407 31.7059 0.4630 0.0000 0.0304 0.0000 -2003 62.4248 4.4310 16.5919 37.8228 0.5477 0.0000 0.0250 0.0000 -2004 59.0838 4.4207 17.2578 33.8296 0.5351 0.0000 0.0468 0.0000 -2005 82.1663 4.4181 15.8059 42.2251 16.6627 0.0174 5.6736 96.9459 -2006 70.4486 4.4856 16.1607 32.6421 13.9758 0.0000 0.0717 0.1952 -2007 57.7082 4.4147 16.1119 30.5502 3.5205 0.0001 0.1417 0.0254 -2008 64.7185 4.4100 17.1071 37.4465 2.6071 0.0002 0.2443 2.0432 -2009 60.4626 4.4359 17.1835 33.0398 2.2653 0.0031 0.1064 0.2108 -2010 64.5384 4.4665 17.6995 35.6201 3.6596 0.0023 0.1486 0.2209 -2011 65.0858 4.5002 16.6578 38.8332 1.9981 0.0027 0.4988 1.2341 -2012 68.6867 4.5287 17.0146 41.6315 2.4117 0.0033 0.2491 2.1740 -2013 66.5549 4.5153 17.0192 38.5983 3.3069 0.0024 0.2505 9.9787 -2014 66.7782 4.5144 17.3245 40.2967 1.5214 0.0028 0.3300 0.5895 -2015 74.2247 4.5569 17.8608 43.7823 4.9151 0.0027 1.0769 6.6130 -2016 71.2424 4.5961 17.4750 43.1658 2.9193 0.0042 0.3927 0.8015 -2017 75.3486 4.6373 17.2166 46.3566 3.9142 0.0046 0.4782 4.3998 -2018 76.0403 4.7014 17.4297 46.5429 4.2563 0.0076 0.7060 3.5561 -2019 78.6182 4.6973 17.6892 47.6701 5.4318 0.0064 0.5489 8.5467 -2020 75.8811 4.7543 17.8009 46.8946 3.3265 0.0034 0.8290 1.9837 -2021 73.4178 4.7923 18.4032 44.3126 2.8127 0.0028 0.7542 2.6176 -2022 74.4483 4.7630 17.6105 45.8976 3.0232 0.0026 0.5909 0.9474 -2023 78.9357 4.8710 18.0149 43.1891 9.6796 0.0031 1.2491 4.4247
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/results/CC-MAIN-2023-40/warc_lmhx/best_two_by_nl1/years/mean_lens_w_bogon.tsv Wed Feb 28 14:49:45 2024 +0000 @@ -0,0 +1,30 @@ +year whole scheme netloc path query idna path% query% +1995 64.0538 4.5914 15.8065 40.5699 0.0753 0.0000 0.0000 0.0000 +1996 67.0588 4.8778 16.5520 42.6290 0.0000 0.0000 0.0000 nan +1997 65.2948 4.6107 18.3217 28.0831 11.1213 0.0000 0.0049 0.0103 +1998 139.7477 4.5046 17.6685 30.7127 83.6943 0.0000 0.0000 37.1099 +1999 55.1920 4.5429 15.6377 31.9246 0.0846 0.0000 0.5538 0.0000 +2000 72.0602 4.4138 15.7897 42.2448 6.5341 0.0000 3.4043 0.7652 +2001 71.4869 4.4844 16.8443 43.7399 3.3488 0.0000 0.0111 0.0725 +2002 55.7478 4.4091 16.1407 31.7059 0.4630 0.0000 0.0304 0.0000 +2003 62.4248 4.4310 16.5919 37.8228 0.5477 0.0000 0.0250 0.0000 +2004 59.0838 4.4207 17.2578 33.8296 0.5351 0.0000 0.0468 0.0000 +2005 82.1663 4.4181 15.8059 42.2251 16.6627 0.0174 5.6736 96.9459 +2006 70.4486 4.4856 16.1607 32.6421 13.9758 0.0000 0.0717 0.1952 +2007 57.7082 4.4147 16.1119 30.5502 3.5205 0.0001 0.1417 0.0254 +2008 64.7185 4.4100 17.1071 37.4465 2.6071 0.0002 0.2443 2.0432 +2009 60.4626 4.4359 17.1835 33.0398 2.2653 0.0031 0.1064 0.2108 +2010 64.5384 4.4665 17.6995 35.6201 3.6596 0.0023 0.1486 0.2209 +2011 65.0858 4.5002 16.6578 38.8332 1.9981 0.0027 0.4988 1.2341 +2012 68.6867 4.5287 17.0146 41.6315 2.4117 0.0033 0.2491 2.1740 +2013 66.5549 4.5153 17.0192 38.5983 3.3069 0.0024 0.2505 9.9787 +2014 66.7782 4.5144 17.3245 40.2967 1.5214 0.0028 0.3300 0.5895 +2015 74.2247 4.5569 17.8608 43.7823 4.9151 0.0027 1.0769 6.6130 +2016 71.2424 4.5961 17.4750 43.1658 2.9193 0.0042 0.3927 0.8015 +2017 75.3486 4.6373 17.2166 46.3566 3.9142 0.0046 0.4782 4.3998 +2018 76.0403 4.7014 17.4297 46.5429 4.2563 0.0076 0.7060 3.5561 +2019 78.6182 4.6973 17.6892 47.6701 5.4318 0.0064 0.5489 8.5467 +2020 75.8811 4.7543 17.8009 46.8946 3.3265 0.0034 0.8290 1.9837 +2021 73.4178 4.7923 18.4032 44.3126 2.8127 0.0028 0.7542 2.6176 +2022 74.4483 4.7630 17.6105 45.8976 3.0232 0.0026 0.5909 0.9474 +2023 78.9357 4.8710 18.0149 43.1891 9.6796 0.0031 1.2491 4.4247
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/results/CC-MAIN-2023-40/warc_lmhx/best_two_by_nl1/years/mean_lens_wo_bogon.tsv Wed Feb 28 14:49:45 2024 +0000 @@ -0,0 +1,30 @@ +year whole scheme netloc path query idna path% query% +1995 64.0538 4.5914 15.8065 40.5699 0.0753 0.0000 0.0000 0.0000 +1996 67.0588 4.8778 16.5520 42.6290 0.0000 0.0000 0.0000 nan +1997 65.2948 4.6107 18.3217 28.0831 11.1213 0.0000 0.0049 0.0103 +1998 139.7477 4.5046 17.6685 30.7127 83.6943 0.0000 0.0000 37.1099 +1999 55.1920 4.5429 15.6377 31.9246 0.0846 0.0000 0.5538 0.0000 +2000 72.0602 4.4138 15.7897 42.2448 6.5341 0.0000 3.4043 0.7652 +2001 71.4869 4.4844 16.8443 43.7399 3.3488 0.0000 0.0111 0.0725 +2002 55.7478 4.4091 16.1407 31.7059 0.4630 0.0000 0.0304 0.0000 +2003 62.4248 4.4310 16.5919 37.8228 0.5477 0.0000 0.0250 0.0000 +2004 59.0838 4.4207 17.2578 33.8296 0.5351 0.0000 0.0468 0.0000 +2005 53.9336 4.4117 16.0306 29.7504 0.7044 0.0000 0.0586 0.0155 +2006 70.4486 4.4856 16.1607 32.6421 13.9758 0.0000 0.0717 0.1952 +2007 57.7082 4.4147 16.1119 30.5502 3.5205 0.0001 0.1417 0.0254 +2008 64.7185 4.4100 17.1071 37.4465 2.6071 0.0002 0.2443 2.0432 +2009 60.4626 4.4359 17.1835 33.0398 2.2653 0.0031 0.1064 0.2108 +2010 64.5384 4.4665 17.6995 35.6201 3.6596 0.0023 0.1486 0.2209 +2011 65.0858 4.5002 16.6578 38.8332 1.9981 0.0027 0.4988 1.2341 +2012 68.6867 4.5287 17.0146 41.6315 2.4117 0.0033 0.2491 2.1740 +2013 66.5549 4.5153 17.0192 38.5983 3.3069 0.0024 0.2505 9.9787 +2014 66.7782 4.5144 17.3245 40.2967 1.5214 0.0028 0.3300 0.5895 +2015 74.2247 4.5569 17.8608 43.7823 4.9151 0.0027 1.0769 6.6130 +2016 71.2424 4.5961 17.4750 43.1658 2.9193 0.0042 0.3927 0.8015 +2017 75.3486 4.6373 17.2166 46.3566 3.9142 0.0046 0.4782 4.3998 +2018 76.0403 4.7014 17.4297 46.5429 4.2563 0.0076 0.7060 3.5561 +2019 78.6182 4.6973 17.6892 47.6701 5.4318 0.0064 0.5489 8.5467 +2020 75.8811 4.7543 17.8009 46.8946 3.3265 0.0034 0.8290 1.9837 +2021 73.4178 4.7923 18.4032 44.3126 2.8127 0.0028 0.7542 2.6176 +2022 74.4483 4.7630 17.6105 45.8976 3.0232 0.0026 0.5909 0.9474 +2023 78.9357 4.8710 18.0149 43.1891 9.6796 0.0031 1.2491 4.4247