feat(user/c/src):Add beginnings of Navajo character char counter bktei/feat/navajo-char
authorSteven Baltakatei Sandoval <baltakatei@gmail.com>
Fri, 27 Oct 2023 06:50:53 +0000 (06:50 +0000)
committerSteven Baltakatei Sandoval <baltakatei@gmail.com>
Fri, 27 Oct 2023 06:50:53 +0000 (06:50 +0000)
user/c/src/count_char_nv.c [new file with mode: 0644]
user/c/src/count_char_nv.org [new file with mode: 0644]
user/c/src/hello_world.c [new file with mode: 0644]

diff --git a/user/c/src/count_char_nv.c b/user/c/src/count_char_nv.c
new file mode 100644 (file)
index 0000000..0995fb4
--- /dev/null
@@ -0,0 +1,60 @@
+#include <unistd.h>
+#include <stdio.h>
+
+/*
+Desc: A program to print the frequency of different UTF-8 characters
+      taking into account the presence of diacritical marks.
+Depends: glibc >2.35
+Info: Diacritical marks are found in Unicode blocks:
+  - U+0300-036F: Combining Diacritical Marks
+  - U+1AB0-1AFF: Combining Diacritical Marks Extended
+  - U+1DC0-1DFF: Combining Diacritical Marks Supplement
+  - U+20D0-20FF: Combining Diacritical Marks for Symbols
+  - U+FE20-FE2F: Combining Half Marks
+Ref/Attrib: UTF-8 byte mechanics: https://www.johndcook.com/blog/2019/09/09/how-utf-8-works/
+*/
+
+int main() {
+  usleep(10000);
+  int c;
+  long nc;
+  
+  nc = 0;
+  printf("%5s,%5s,%9s\n","dec","hex","bin");
+  while ( (c = getchar()) != EOF) {
+    printf("%5d,%5x,%9b\n",c,c,c);
+    ++nc;
+  };    
+
+  printf("Character count:%ld\n",nc);
+  return 0;
+};
+
+/* Strategy
+
+- Define table of valid Navajo graphemes
+
+- Define map of precomposed characters and combining mark permutations
+  to graphemes in the table.
+
+- Read input sequentially, incrementing a list of integer counts of
+  graphemes detected.
+
+- Print grapheme totals.
+
+*/
+
+/* Process
+
+1. Read char int into c via c = getchar()
+2. Detect if int c is ASCII (c within [32-126]) or multibyte (first bit 1)
+  a. If ASCII, increment nc, continue to next loop.
+  b. If multibyte, then calculate Unicode code point.
+3. Detect if code point falls into known combining mark ranges.
+  a. If comark, continue to next loop.
+  b. If not comark increment nc, continue to next loop.
+
+*/
+
+// Author: Steven Baltakatei Sandoval
+// License: GPLv3+
diff --git a/user/c/src/count_char_nv.org b/user/c/src/count_char_nv.org
new file mode 100644 (file)
index 0000000..3a98ded
--- /dev/null
@@ -0,0 +1,23 @@
+* Count Navajo Graphemes
+
+** Strategy
+- Define table of valid Navajo graphemes
+- Define map of precomposed characters and combining mark permutations
+  to graphemes in the table.
+- Read input sequentially, incrementing a list of integer counts of
+  graphemes detected.
+- Print grapheme totals.
+
+** Process
+
+** Valid Navajo graphemes
+|  UC Range | Desc                |   |   |   |   |   |   |
+|-----------+---------------------+---+---+---+---+---+---|
+| 0021-007E | Basic Latin (ASCII) |   |   |   |   |   |   |
+|           | Latin-1 Supplement  |   |   |   |   |   |   |
+|           |                     |   |   |   |   |   |   |
+|           |                     |   |   |   |   |   |   |
+|           |                     |   |   |   |   |   |   |
+|           |                     |   |   |   |   |   |   |
+|           |                     |   |   |   |   |   |   |
+
diff --git a/user/c/src/hello_world.c b/user/c/src/hello_world.c
new file mode 100644 (file)
index 0000000..1a3b084
--- /dev/null
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main() {
+  printf("Hello world!\n");
+  return 0;
+};