fairseq/data/encoders/byte_utils.py

Killed 2 out of 25 mutants

Survived

Survived mutation testing. These mutants show holes in your test suite.

Mutant 3286

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -5,7 +5,7 @@
 
 import re
 
-WHITESPACE_NORMALIZER = re.compile(r'\s+')
+WHITESPACE_NORMALIZER = re.compile(r'XX\s+XX')
 SPACE = chr(32)
 SPACE_ESCAPE = chr(9601)
 # excluding non-breaking space (160) here

Mutant 3287

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -5,7 +5,7 @@
 
 import re
 
-WHITESPACE_NORMALIZER = re.compile(r'\s+')
+WHITESPACE_NORMALIZER = None
 SPACE = chr(32)
 SPACE_ESCAPE = chr(9601)
 # excluding non-breaking space (160) here

Mutant 3288

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -6,7 +6,7 @@
 import re
 
 WHITESPACE_NORMALIZER = re.compile(r'\s+')
-SPACE = chr(32)
+SPACE = chr(33)
 SPACE_ESCAPE = chr(9601)
 # excluding non-breaking space (160) here
 PRINTABLE_LATIN = set(

Mutant 3289

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -6,7 +6,7 @@
 import re
 
 WHITESPACE_NORMALIZER = re.compile(r'\s+')
-SPACE = chr(32)
+SPACE = None
 SPACE_ESCAPE = chr(9601)
 # excluding non-breaking space (160) here
 PRINTABLE_LATIN = set(

Mutant 3290

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -7,7 +7,7 @@
 
 WHITESPACE_NORMALIZER = re.compile(r'\s+')
 SPACE = chr(32)
-SPACE_ESCAPE = chr(9601)
+SPACE_ESCAPE = chr(9602)
 # excluding non-breaking space (160) here
 PRINTABLE_LATIN = set(
     list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +

Mutant 3291

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -7,7 +7,7 @@
 
 WHITESPACE_NORMALIZER = re.compile(r'\s+')
 SPACE = chr(32)
-SPACE_ESCAPE = chr(9601)
+SPACE_ESCAPE = None
 # excluding non-breaking space (160) here
 PRINTABLE_LATIN = set(
     list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +

Mutant 3292

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -10,7 +10,7 @@
 SPACE_ESCAPE = chr(9601)
 # excluding non-breaking space (160) here
 PRINTABLE_LATIN = set(
-    list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
+    list(range(33, 126 + 1)) + list(range(161, 172 + 1)) +
     list(range(174, 255 + 1))
 )
 BYTE_TO_BCHAR = {

Mutant 3293

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -10,7 +10,7 @@
 SPACE_ESCAPE = chr(9601)
 # excluding non-breaking space (160) here
 PRINTABLE_LATIN = set(
-    list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
+    list(range(32, 127 + 1)) + list(range(161, 172 + 1)) +
     list(range(174, 255 + 1))
 )
 BYTE_TO_BCHAR = {

Mutant 3294

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -10,7 +10,7 @@
 SPACE_ESCAPE = chr(9601)
 # excluding non-breaking space (160) here
 PRINTABLE_LATIN = set(
-    list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
+    list(range(32, 126 - 1)) + list(range(161, 172 + 1)) +
     list(range(174, 255 + 1))
 )
 BYTE_TO_BCHAR = {

Mutant 3295

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -10,7 +10,7 @@
 SPACE_ESCAPE = chr(9601)
 # excluding non-breaking space (160) here
 PRINTABLE_LATIN = set(
-    list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
+    list(range(32, 126 + 2)) + list(range(161, 172 + 1)) +
     list(range(174, 255 + 1))
 )
 BYTE_TO_BCHAR = {

Mutant 3297

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -10,7 +10,7 @@
 SPACE_ESCAPE = chr(9601)
 # excluding non-breaking space (160) here
 PRINTABLE_LATIN = set(
-    list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
+    list(range(32, 126 + 1)) + list(range(162, 172 + 1)) +
     list(range(174, 255 + 1))
 )
 BYTE_TO_BCHAR = {

Mutant 3298

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -10,7 +10,7 @@
 SPACE_ESCAPE = chr(9601)
 # excluding non-breaking space (160) here
 PRINTABLE_LATIN = set(
-    list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
+    list(range(32, 126 + 1)) + list(range(161, 173 + 1)) +
     list(range(174, 255 + 1))
 )
 BYTE_TO_BCHAR = {

Mutant 3299

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -10,7 +10,7 @@
 SPACE_ESCAPE = chr(9601)
 # excluding non-breaking space (160) here
 PRINTABLE_LATIN = set(
-    list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
+    list(range(32, 126 + 1)) + list(range(161, 172 - 1)) +
     list(range(174, 255 + 1))
 )
 BYTE_TO_BCHAR = {

Mutant 3300

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -10,7 +10,7 @@
 SPACE_ESCAPE = chr(9601)
 # excluding non-breaking space (160) here
 PRINTABLE_LATIN = set(
-    list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
+    list(range(32, 126 + 1)) + list(range(161, 172 + 2)) +
     list(range(174, 255 + 1))
 )
 BYTE_TO_BCHAR = {

Mutant 3302

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -11,7 +11,7 @@
 # excluding non-breaking space (160) here
 PRINTABLE_LATIN = set(
     list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
-    list(range(174, 255 + 1))
+    list(range(175, 255 + 1))
 )
 BYTE_TO_BCHAR = {
     b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)

Mutant 3303

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -11,7 +11,7 @@
 # excluding non-breaking space (160) here
 PRINTABLE_LATIN = set(
     list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
-    list(range(174, 255 + 1))
+    list(range(174, 256 + 1))
 )
 BYTE_TO_BCHAR = {
     b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)

Mutant 3304

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -11,7 +11,7 @@
 # excluding non-breaking space (160) here
 PRINTABLE_LATIN = set(
     list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
-    list(range(174, 255 + 1))
+    list(range(174, 255 - 1))
 )
 BYTE_TO_BCHAR = {
     b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)

Mutant 3305

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -11,7 +11,7 @@
 # excluding non-breaking space (160) here
 PRINTABLE_LATIN = set(
     list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
-    list(range(174, 255 + 1))
+    list(range(174, 255 + 2))
 )
 BYTE_TO_BCHAR = {
     b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)

Mutant 3306

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -14,7 +14,7 @@
     list(range(174, 255 + 1))
 )
 BYTE_TO_BCHAR = {
-    b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
+    b: chr(b) if b not in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
 }
 BCHAR_TO_BYTE = {bc: b for b, bc in BYTE_TO_BCHAR.items()}
 

Mutant 3307

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -14,7 +14,7 @@
     list(range(174, 255 + 1))
 )
 BYTE_TO_BCHAR = {
-    b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
+    b: chr(b) if b in PRINTABLE_LATIN else chr(257 + b) for b in range(256)
 }
 BCHAR_TO_BYTE = {bc: b for b, bc in BYTE_TO_BCHAR.items()}
 

Mutant 3308

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -14,7 +14,7 @@
     list(range(174, 255 + 1))
 )
 BYTE_TO_BCHAR = {
-    b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
+    b: chr(b) if b in PRINTABLE_LATIN else chr(256 - b) for b in range(256)
 }
 BCHAR_TO_BYTE = {bc: b for b, bc in BYTE_TO_BCHAR.items()}
 

Mutant 3309

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -14,7 +14,7 @@
     list(range(174, 255 + 1))
 )
 BYTE_TO_BCHAR = {
-    b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
+    b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(257)
 }
 BCHAR_TO_BYTE = {bc: b for b, bc in BYTE_TO_BCHAR.items()}
 

Mutant 3310

--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -16,7 +16,7 @@
 BYTE_TO_BCHAR = {
     b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
 }
-BCHAR_TO_BYTE = {bc: b for b, bc in BYTE_TO_BCHAR.items()}
+BCHAR_TO_BYTE = None
 
 
 def byte_encode(x: str) -> str: