From d8d899f84a4d4b8580fd13f096c070ff6452718d Mon Sep 17 00:00:00 2001 From: Steven Baltakatei Sandoval Date: Fri, 27 Oct 2023 06:50:53 +0000 Subject: [PATCH] feat(user/c/src):Add beginnings of Navajo character char counter --- user/c/src/count_char_nv.c | 60 ++++++++++++++++++++++++++++++++++++ user/c/src/count_char_nv.org | 23 ++++++++++++++ user/c/src/hello_world.c | 6 ++++ 3 files changed, 89 insertions(+) create mode 100644 user/c/src/count_char_nv.c create mode 100644 user/c/src/count_char_nv.org create mode 100644 user/c/src/hello_world.c diff --git a/user/c/src/count_char_nv.c b/user/c/src/count_char_nv.c new file mode 100644 index 0000000..0995fb4 --- /dev/null +++ b/user/c/src/count_char_nv.c @@ -0,0 +1,60 @@ +#include +#include + +/* +Desc: A program to print the frequency of different UTF-8 characters + taking into account the presence of diacritical marks. +Depends: glibc >2.35 +Info: Diacritical marks are found in Unicode blocks: + - U+0300-036F: Combining Diacritical Marks + - U+1AB0-1AFF: Combining Diacritical Marks Extended + - U+1DC0-1DFF: Combining Diacritical Marks Supplement + - U+20D0-20FF: Combining Diacritical Marks for Symbols + - U+FE20-FE2F: Combining Half Marks +Ref/Attrib: UTF-8 byte mechanics: https://www.johndcook.com/blog/2019/09/09/how-utf-8-works/ +*/ + +int main() { + usleep(10000); + int c; + long nc; + + nc = 0; + printf("%5s,%5s,%9s\n","dec","hex","bin"); + while ( (c = getchar()) != EOF) { + printf("%5d,%5x,%9b\n",c,c,c); + ++nc; + }; + + printf("Character count:%ld\n",nc); + return 0; +}; + +/* Strategy + +- Define table of valid Navajo graphemes + +- Define map of precomposed characters and combining mark permutations + to graphemes in the table. + +- Read input sequentially, incrementing a list of integer counts of + graphemes detected. + +- Print grapheme totals. + +*/ + +/* Process + +1. Read char int into c via c = getchar() +2. Detect if int c is ASCII (c within [32-126]) or multibyte (first bit 1) + a. If ASCII, increment nc, continue to next loop. + b. If multibyte, then calculate Unicode code point. +3. Detect if code point falls into known combining mark ranges. + a. If comark, continue to next loop. + b. If not comark increment nc, continue to next loop. + +*/ + +// Author: Steven Baltakatei Sandoval +// License: GPLv3+ diff --git a/user/c/src/count_char_nv.org b/user/c/src/count_char_nv.org new file mode 100644 index 0000000..3a98ded --- /dev/null +++ b/user/c/src/count_char_nv.org @@ -0,0 +1,23 @@ +* Count Navajo Graphemes + +** Strategy +- Define table of valid Navajo graphemes +- Define map of precomposed characters and combining mark permutations + to graphemes in the table. +- Read input sequentially, incrementing a list of integer counts of + graphemes detected. +- Print grapheme totals. + +** Process + +** Valid Navajo graphemes +| UC Range | Desc | | | | | | | +|-----------+---------------------+---+---+---+---+---+---| +| 0021-007E | Basic Latin (ASCII) | | | | | | | +| | Latin-1 Supplement | | | | | | | +| | | | | | | | | +| | | | | | | | | +| | | | | | | | | +| | | | | | | | | +| | | | | | | | | + diff --git a/user/c/src/hello_world.c b/user/c/src/hello_world.c new file mode 100644 index 0000000..1a3b084 --- /dev/null +++ b/user/c/src/hello_world.c @@ -0,0 +1,6 @@ +#include + +int main() { + printf("Hello world!\n"); + return 0; +}; -- 2.30.2