From b79e0a8bf378602e9df72410dc711532c40115b1 Mon Sep 17 00:00:00 2001 From: Steven Baltakatei Sandoval Date: Sun, 11 Sep 2022 00:05:56 +0000 Subject: [PATCH] feat(src/kr/ch1/s1.5):Finish exercises e1-11/e1-12 --- .../ch1/s1.5/e1-11..word_count_test_strat.org | 40 +++++++++++++++++++ .../ch1/s1.5/e1-12..word_by_word.c | 24 +++++++++++ .../ch1/s1.5/s1.5.4-1..barebones_wc.c | 24 +++++++++++ src/notes.tm | 25 ++++++++++-- 4 files changed, 110 insertions(+), 3 deletions(-) create mode 100644 src/kr_exercises/ch1/s1.5/e1-11..word_count_test_strat.org create mode 100644 src/kr_exercises/ch1/s1.5/e1-12..word_by_word.c create mode 100644 src/kr_exercises/ch1/s1.5/s1.5.4-1..barebones_wc.c diff --git a/src/kr_exercises/ch1/s1.5/e1-11..word_count_test_strat.org b/src/kr_exercises/ch1/s1.5/e1-11..word_count_test_strat.org new file mode 100644 index 0000000..7aedf3c --- /dev/null +++ b/src/kr_exercises/ch1/s1.5/e1-11..word_count_test_strat.org @@ -0,0 +1,40 @@ +* Word Count Program Test Strategy +#+AUTHOR:Steven Baltakatei Sandoval +#+DATE: 2022-09-10 +#+EMAIL: baltakatei@gmail.com + +** Question 1 +*How would you test the word count program?* + +For the word count program provided on page 20 of K&R 2nd Edition, I +would first create a program to automatically feed it input and record the output. + +Then, I would feed the word count program a corpus of ASCII text that +has already had its counts of newlines, words, and characters verified +manually. + +Then, I would repeat the previous step but with a corpus containing +multibyte glyphs such as a UTF-8 formatted Japanese news article. + +Then, I would feed the word count program a small (e.g. 10 bytes) +amount of random binary noise and record the output. Then, I would +manually review how the word count program parsed the random bytes. I +would repeat this step several times, noting how the program reacts to +bytes that could not represent text. + +** Question 2 +*What kinds of input are most likely to uncover bugs if there are +any?* Multibyte glyphs. Text encoded in a single-byte character +encoding assign one byte for each character; text encoded in +multi-byte encoding can have multiple bytes assigned. Therefore, a +single multi-byte character fed into the word count program may +require multiple calls of the ~getchar()~ to completely process, +especially if ~getchar()~ were written assuming single-byte encoded +data that may not even contain text at all. See [[https://www.gnu.org/software/libc/manual/html_node/Extended-Char-Intro.html][Introduction to +Extended Characters]] in the [[https://www.gnu.org/software/libc/manual/html_node/index.html][GNU C Library Reference Manual]]. + +Another kind of input that could reveal bugs would simply be random +bytes. If ~getchar()~ is written expecting to receive only bytes from +certain ranges, then unexpected bytes may reveal unexpected +behavior. The expected range of bytes might not be intended by the +programmer. See [[https://en.wikipedia.org/wiki/Fuzzing][Fuzzing]]. diff --git a/src/kr_exercises/ch1/s1.5/e1-12..word_by_word.c b/src/kr_exercises/ch1/s1.5/e1-12..word_by_word.c new file mode 100644 index 0000000..f8d37eb --- /dev/null +++ b/src/kr_exercises/ch1/s1.5/e1-12..word_by_word.c @@ -0,0 +1,24 @@ +#include + +#define IN 1 +#define OUT 0 + +/* Prints input one word per line. */ + +int main() { + int c, state; + + while( (c = getchar()) != EOF ) { + if ( c == ' ' || c == '\n' || c == '\t' ) { + if ( state == IN ) + printf("\n"); + state = OUT; + } else { + putchar(c); + state = IN; + }; + }; + + printf("\n"); + return 0; +}; diff --git a/src/kr_exercises/ch1/s1.5/s1.5.4-1..barebones_wc.c b/src/kr_exercises/ch1/s1.5/s1.5.4-1..barebones_wc.c new file mode 100644 index 0000000..ff062d6 --- /dev/null +++ b/src/kr_exercises/ch1/s1.5/s1.5.4-1..barebones_wc.c @@ -0,0 +1,24 @@ +#include + +#define IN 1 /* inside a word */ +#define OUT 0 /* outside a word */ + +/* count lines, words, and characters in input */ +int main() { + int c, nl, nw, nc, state; + + state = OUT; + nl = nw = nc = 0; + while( (c = getchar()) != EOF) { + ++nc; + if (c == '\n') + ++nl; + if (c == ' ' || c == '\n' || c == '\t') + state = OUT; + else if (state == OUT) { + state = IN; + ++nw; + }; + }; + printf("%d %d %d\n", nl, nw, nc); +}; diff --git a/src/notes.tm b/src/notes.tm index 0d4cac6..03d208a 100644 --- a/src/notes.tm +++ b/src/notes.tm @@ -60,6 +60,9 @@ same associations as a declaration but also causes storage to be allocated for the variable. (See ). + A source code analysis program + designed to detect common syntactic errors. + > (or ) is a user defined data type in . It is mainly used to assign names to integral constants. For example, the declaration @@ -228,7 +231,20 @@ \; - \; + + + + + <\itemize> + - GNU text editor. See + . + + - text editor See . + + + + + (TODO: Insert C language linter here) \; @@ -245,7 +261,9 @@ > > > - > + > + > + > > > > @@ -257,7 +275,7 @@ > > |?>> - |?>> + > |?>> |prog-language||font-family||\\x>|hh>|?>> @@ -270,6 +288,7 @@ stack|?>> struct|?>> > + > -- 2.39.5