// Copyright (c), 2023, Armin Biere, University of Freiburg. // This is a tool to normalize CNFs in DIMACS format by removing all // comments and white-space (it also checks for syntax issues). // clang-format off static const char * usage = "usage: normalize [ -h ] [ [ ] ]\n" "\n" " -h print this comand line option summary\n" " input file expected to be in DIMACS format\n" " output file produced in DIMACS format\n" "\n" "The file arguments can be '-' to denote '' respectively '\n" "which are also the default files if not specified.\n" ; // clang-format on #include #include #include #include #include #include #include #include static const char *input_path, *output_path; static FILE *input_file, *output_file; static bool close_input, close_output; static void die(const char* fmt, ...) { fprintf(stderr, "normalize: error in '%s': ", input_path); va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fputc('\n', stderr); exit(1); } int main(int argc, char** argv) { for (int i = 1; i != argc; i++) { const char* arg = argv[i]; if (!strcmp(arg, "-h")) { fputs(usage, stdout); exit(0); } else if (output_path) die("too many files"); else if (input_path) output_path = arg; else input_path = arg; } if (!input_path || !strcmp(input_path, "-")) input_file = stdin, assert(!close_input), input_path = ""; else if (!(input_file = fopen(input_path, "r"))) die("can not read input file '%s'", input_path); else close_input = true; int ch; while ((ch = getc(input_file)) == 'c') while ((ch = getc(input_file)) != '\n') if (ch == EOF) END_OF_FILE_IN_COMMENT: die("end-of-file in comment"); if (ch != 'p') die("expected 'p cnf ...' header or 'c' comment"); for (const char* p = " cnf "; *p; p++) if (*p != getc(input_file)) die("invalid 'p cnf ...' header"); ch = getc(input_file); if (!isdigit(ch)) INVALID_VARIABLES: die("invalid number of variables"); int variables = ch - '0'; while (isdigit(ch = getc(input_file))) { if (INT_MAX / 10 < variables) goto INVALID_VARIABLES; variables *= 10; int digit = ch - '0'; if (INT_MAX - digit < variables) goto INVALID_VARIABLES; variables += digit; } if (ch != ' ') die("expected space in header after variables"); ch = getc(input_file); if (!isdigit(ch)) INVALID_CLAUSES: die("invalid number of clauses"); int clauses = ch - '0'; while (isdigit(ch = getc(input_file))) { if (INT_MAX / 10 < clauses) goto INVALID_CLAUSES; clauses *= 10; int digit = ch - '0'; if (INT_MAX - digit < clauses) goto INVALID_CLAUSES; clauses += digit; } if (ch == '\r') ch = getc(input_file); if (ch == ' ' || ch == '\t') { while ((ch = getc(input_file)) != '\n') if (ch != ' ' && ch != '\t' && ch != '\r') EXPECTED_NEW_LINE_AFTER_HEADER: die("expected white-space and a new-line after clauses"); } else if (ch != '\n') goto EXPECTED_NEW_LINE_AFTER_HEADER; if (!output_path || !strcmp(output_path, "-")) output_file = stdout, assert(!close_output); else if (!(output_file = fopen(output_path, "w"))) die("can not write output file '%s'", output_path); else close_input = true; fprintf(output_file, "p cnf %d %d\n", variables, clauses); int parsed = 0, lit = 0; for (;;) { ch = getc(input_file); if (ch == EOF) { if (lit) die("zero at end of last clause missing"); if (parsed < clauses) die("clause missing"); break; } if (ch == 'c') { while ((ch = getc(input_file)) != '\n') if (ch == EOF) goto END_OF_FILE_IN_COMMENT; continue; } if (ch == '\r') ch = getc(input_file); if (ch == ' ' || ch == '\n' || ch == '\t') continue; int sign = 1; if (ch == '-') { ch = getc(input_file); sign = -1; } if (!isdigit(ch)) INVALID_LITERAL: die("invalid literal"); lit = ch - '0'; while (isdigit(ch = getc(input_file))) { if (INT_MAX / 10 < lit) goto INVALID_LITERAL; lit *= 10; int digit = ch - '0'; if (INT_MAX - digit < lit) goto INVALID_LITERAL; lit += digit; } if (lit > variables) goto INVALID_LITERAL; if (ch == '\r') ch = getc(input_file); if (ch != ' ' && ch != '\n' && ch != '\t' && ch != 'c' && ch != EOF) die("expected white-space after literal"); if (ch == 'c') { while ((ch = getc(input_file)) != '\n') if (ch == EOF) goto END_OF_FILE_IN_COMMENT; } if (lit) fprintf(output_file, "%d ", sign * lit); else if (parsed++ == clauses) die("too many clauses"); else fputs("0\n", output_file); } if (close_input) fclose(input_file); if (close_output) fclose(output_file); return 0; }