fairseq/data/encoders/byte_utils.py
Killed 2 out of 25 mutantsSurvived
Survived mutation testing. These mutants show holes in your test suite.Mutant 1155
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -5,7 +5,7 @@
import re
-WHITESPACE_NORMALIZER = re.compile(r'\s+')
+WHITESPACE_NORMALIZER = re.compile(r'XX\s+XX')
SPACE = chr(32)
SPACE_ESCAPE = chr(9601)
# excluding non-breaking space (160) here
Mutant 1156
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -5,7 +5,7 @@
import re
-WHITESPACE_NORMALIZER = re.compile(r'\s+')
+WHITESPACE_NORMALIZER = None
SPACE = chr(32)
SPACE_ESCAPE = chr(9601)
# excluding non-breaking space (160) here
Mutant 1157
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -6,7 +6,7 @@
import re
WHITESPACE_NORMALIZER = re.compile(r'\s+')
-SPACE = chr(32)
+SPACE = chr(33)
SPACE_ESCAPE = chr(9601)
# excluding non-breaking space (160) here
PRINTABLE_LATIN = set(
Mutant 1158
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -6,7 +6,7 @@
import re
WHITESPACE_NORMALIZER = re.compile(r'\s+')
-SPACE = chr(32)
+SPACE = None
SPACE_ESCAPE = chr(9601)
# excluding non-breaking space (160) here
PRINTABLE_LATIN = set(
Mutant 1159
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -7,7 +7,7 @@
WHITESPACE_NORMALIZER = re.compile(r'\s+')
SPACE = chr(32)
-SPACE_ESCAPE = chr(9601)
+SPACE_ESCAPE = chr(9602)
# excluding non-breaking space (160) here
PRINTABLE_LATIN = set(
list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
Mutant 1160
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -7,7 +7,7 @@
WHITESPACE_NORMALIZER = re.compile(r'\s+')
SPACE = chr(32)
-SPACE_ESCAPE = chr(9601)
+SPACE_ESCAPE = None
# excluding non-breaking space (160) here
PRINTABLE_LATIN = set(
list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
Mutant 1161
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -10,7 +10,7 @@
SPACE_ESCAPE = chr(9601)
# excluding non-breaking space (160) here
PRINTABLE_LATIN = set(
- list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
+ list(range(33, 126 + 1)) + list(range(161, 172 + 1)) +
list(range(174, 255 + 1))
)
BYTE_TO_BCHAR = {
Mutant 1162
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -10,7 +10,7 @@
SPACE_ESCAPE = chr(9601)
# excluding non-breaking space (160) here
PRINTABLE_LATIN = set(
- list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
+ list(range(32, 127 + 1)) + list(range(161, 172 + 1)) +
list(range(174, 255 + 1))
)
BYTE_TO_BCHAR = {
Mutant 1163
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -10,7 +10,7 @@
SPACE_ESCAPE = chr(9601)
# excluding non-breaking space (160) here
PRINTABLE_LATIN = set(
- list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
+ list(range(32, 126 - 1)) + list(range(161, 172 + 1)) +
list(range(174, 255 + 1))
)
BYTE_TO_BCHAR = {
Mutant 1164
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -10,7 +10,7 @@
SPACE_ESCAPE = chr(9601)
# excluding non-breaking space (160) here
PRINTABLE_LATIN = set(
- list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
+ list(range(32, 126 + 2)) + list(range(161, 172 + 1)) +
list(range(174, 255 + 1))
)
BYTE_TO_BCHAR = {
Mutant 1166
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -10,7 +10,7 @@
SPACE_ESCAPE = chr(9601)
# excluding non-breaking space (160) here
PRINTABLE_LATIN = set(
- list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
+ list(range(32, 126 + 1)) + list(range(162, 172 + 1)) +
list(range(174, 255 + 1))
)
BYTE_TO_BCHAR = {
Mutant 1167
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -10,7 +10,7 @@
SPACE_ESCAPE = chr(9601)
# excluding non-breaking space (160) here
PRINTABLE_LATIN = set(
- list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
+ list(range(32, 126 + 1)) + list(range(161, 173 + 1)) +
list(range(174, 255 + 1))
)
BYTE_TO_BCHAR = {
Mutant 1168
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -10,7 +10,7 @@
SPACE_ESCAPE = chr(9601)
# excluding non-breaking space (160) here
PRINTABLE_LATIN = set(
- list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
+ list(range(32, 126 + 1)) + list(range(161, 172 - 1)) +
list(range(174, 255 + 1))
)
BYTE_TO_BCHAR = {
Mutant 1169
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -10,7 +10,7 @@
SPACE_ESCAPE = chr(9601)
# excluding non-breaking space (160) here
PRINTABLE_LATIN = set(
- list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
+ list(range(32, 126 + 1)) + list(range(161, 172 + 2)) +
list(range(174, 255 + 1))
)
BYTE_TO_BCHAR = {
Mutant 1171
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -11,7 +11,7 @@
# excluding non-breaking space (160) here
PRINTABLE_LATIN = set(
list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
- list(range(174, 255 + 1))
+ list(range(175, 255 + 1))
)
BYTE_TO_BCHAR = {
b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
Mutant 1172
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -11,7 +11,7 @@
# excluding non-breaking space (160) here
PRINTABLE_LATIN = set(
list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
- list(range(174, 255 + 1))
+ list(range(174, 256 + 1))
)
BYTE_TO_BCHAR = {
b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
Mutant 1173
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -11,7 +11,7 @@
# excluding non-breaking space (160) here
PRINTABLE_LATIN = set(
list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
- list(range(174, 255 + 1))
+ list(range(174, 255 - 1))
)
BYTE_TO_BCHAR = {
b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
Mutant 1174
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -11,7 +11,7 @@
# excluding non-breaking space (160) here
PRINTABLE_LATIN = set(
list(range(32, 126 + 1)) + list(range(161, 172 + 1)) +
- list(range(174, 255 + 1))
+ list(range(174, 255 + 2))
)
BYTE_TO_BCHAR = {
b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
Mutant 1175
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -14,7 +14,7 @@
list(range(174, 255 + 1))
)
BYTE_TO_BCHAR = {
- b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
+ b: chr(b) if b not in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
}
BCHAR_TO_BYTE = {bc: b for b, bc in BYTE_TO_BCHAR.items()}
Mutant 1176
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -14,7 +14,7 @@
list(range(174, 255 + 1))
)
BYTE_TO_BCHAR = {
- b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
+ b: chr(b) if b in PRINTABLE_LATIN else chr(257 + b) for b in range(256)
}
BCHAR_TO_BYTE = {bc: b for b, bc in BYTE_TO_BCHAR.items()}
Mutant 1177
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -14,7 +14,7 @@
list(range(174, 255 + 1))
)
BYTE_TO_BCHAR = {
- b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
+ b: chr(b) if b in PRINTABLE_LATIN else chr(256 - b) for b in range(256)
}
BCHAR_TO_BYTE = {bc: b for b, bc in BYTE_TO_BCHAR.items()}
Mutant 1178
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -14,7 +14,7 @@
list(range(174, 255 + 1))
)
BYTE_TO_BCHAR = {
- b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
+ b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(257)
}
BCHAR_TO_BYTE = {bc: b for b, bc in BYTE_TO_BCHAR.items()}
Mutant 1179
--- fairseq/data/encoders/byte_utils.py
+++ fairseq/data/encoders/byte_utils.py
@@ -16,7 +16,7 @@
BYTE_TO_BCHAR = {
b: chr(b) if b in PRINTABLE_LATIN else chr(256 + b) for b in range(256)
}
-BCHAR_TO_BYTE = {bc: b for b, bc in BYTE_TO_BCHAR.items()}
+BCHAR_TO_BYTE = None
def byte_encode(x: str) -> str: