Switch to side-by-side view

--- a/src/common/unacpp.cpp
+++ b/src/common/unacpp.cpp
@@ -63,26 +63,57 @@
     return true;
 }
 
+// Functions to determine upper-case or accented status could be implemented
+// hugely more efficiently inside the unac c code, but there only used for
+// testing user-entered terms, so we don't really care.
 bool unaciscapital(const string& in)
 {
+    LOGDEB2(("unaciscapital: [%s]\n", in.c_str()));
     if (in.empty())
 	return false;
     Utf8Iter it(in);
     string shorter;
     it.appendchartostring(shorter);
 
-    string noacterm, noaclowterm;
-    if (!unacmaybefold(shorter, noacterm, "UTF-8", UNACOP_UNAC)) {
-	LOGINFO(("unaciscapital: unac failed for [%s]\n", in.c_str()));
+    string lower;
+    if (!unacmaybefold(shorter, lower, "UTF-8", UNACOP_FOLD)) {
+	LOGINFO(("unaciscapital: unac/fold failed for [%s]\n", in.c_str()));
 	return false;
     } 
-    if (!unacmaybefold(noacterm, noaclowterm, "UTF-8", UNACOP_UNACFOLD)) {
-	LOGINFO(("unaciscapital: unacfold failed for [%s]\n", in.c_str()));
-	return false;
-    }
-    Utf8Iter it1(noacterm);
-    Utf8Iter it2(noaclowterm);
-    if (*it1 != *it2)
+    Utf8Iter it1(lower);
+    if (*it != *it1)
+	return true;
+    else
+	return false;
+}
+bool unachasuppercase(const string& in)
+{
+    LOGDEB2(("unachasuppercase: [%s]\n", in.c_str()));
+    if (in.empty())
+	return false;
+
+    string lower;
+    if (!unacmaybefold(in, lower, "UTF-8", UNACOP_FOLD)) {
+	LOGINFO(("unachasuppercase: unac/fold failed for [%s]\n", in.c_str()));
+	return false;
+    } 
+    if (lower != in)
+	return true;
+    else
+	return false;
+}
+bool unachasaccents(const string& in)
+{
+    LOGDEB2(("unachasaccents: [%s]\n", in.c_str()));
+    if (in.empty())
+	return false;
+
+    string noac;
+    if (!unacmaybefold(in, noac, "UTF-8", UNACOP_UNAC)) {
+	LOGINFO(("unachasaccents: unac/unac failed for [%s]\n", in.c_str()));
+	return false;
+    } 
+    if (noac != in)
 	return true;
     else
 	return false;
@@ -107,12 +138,15 @@
 
 static char usage [] = "\n"
     "[-c|-C] <encoding> <infile> <outfile>\n"
-    " Default : unaccent\n"
-    " -c : unaccent and casefold\n"
-    " -C : casefold only\n"
+    "   Default : unaccent\n"
+    "   -c : unaccent and casefold\n"
+    "   -C : casefold only\n"
+    "-t <string> test string as capitalized, upper-case anywhere, accents\n"
+    "   the parameter is supposedly utf-8 so this can only work in an utf-8\n"
+    "   locale\n"
     "\n";
-
 ;
+
 static void
 Usage(void)
 {
@@ -123,6 +157,7 @@
 static int     op_flags;
 #define OPT_c	  0x2 
 #define OPT_C	  0x4 
+#define OPT_t     0x8
 
 int main(int argc, char **argv)
 {
@@ -140,58 +175,73 @@
 	    switch (*(*argv)++) {
 	    case 'c':	op_flags |= OPT_c; break;
 	    case 'C':	op_flags |= OPT_C; break;
+	    case 't':	op_flags |= OPT_t; break;
 	    default: Usage();	break;
 	    }
 	argc--; argv++;
     }
 
-    if (op_flags & OPT_c) {
-	op = UNACOP_UNACFOLD;
-    } else if (op_flags & OPT_C) {
-	op = UNACOP_FOLD;
-    }
-
-    if (argc != 3) {
-	Usage();
-    }
-
-    const char *encoding = *argv++; argc--;
-    string ifn = *argv++; argc--;
-    if (!ifn.compare("stdin"))
-	ifn.clear();
-    const char *ofn = *argv++; argc--;
-
-    string reason;
-    (void)recollinit(RCLINIT_NONE, 0, 0, reason, 0);
-
-    string odata;
-    if (!file_to_string(ifn, odata)) {
-	cerr << "file_to_string " << ifn << " : " << odata << endl;
-	return 1;
-    }
-    string ndata;
-    if (!unacmaybefold(odata, ndata, encoding, op)) {
-	cerr << "unac: " << ndata << endl;
-	return 1;
-    }
+    if (op_flags & OPT_t) {
+	if (argc != 1)
+	    Usage();
+	string in = *argv++;argc--;
+	bool capital, upper, accent;
+	capital = unaciscapital(in);
+	upper = unachasuppercase(in);
+	accent = unachasaccents(in);
+	cout << "[" << in << "] : " << 
+	    "capitalized: " << (capital ? "Yes. " : "No. ") <<
+	    "has uppercase: " << (upper ? "Yes. " : "No. ") <<
+	    "has accents: " << (accent ? "Yes. " : "No. ") << 
+	    endl;
+	return 0;
+    } else {
+	if (argc != 3)
+	    Usage();
+	if (op_flags & OPT_c) {
+	    op = UNACOP_UNACFOLD;
+	} else if (op_flags & OPT_C) {
+	    op = UNACOP_FOLD;
+	}
+
+	const char *encoding = *argv++; argc--;
+	string ifn = *argv++; argc--;
+	if (!ifn.compare("stdin"))
+	    ifn.clear();
+	const char *ofn = *argv++; argc--;
+
+	string reason;
+	(void)recollinit(RCLINIT_NONE, 0, 0, reason, 0);
+
+	string odata;
+	if (!file_to_string(ifn, odata)) {
+	    cerr << "file_to_string " << ifn << " : " << odata << endl;
+	    return 1;
+	}
+	string ndata;
+	if (!unacmaybefold(odata, ndata, encoding, op)) {
+	    cerr << "unac: " << ndata << endl;
+	    return 1;
+	}
     
-    int fd;
-    if (strcmp(ofn, "stdout")) {
-	fd = open(ofn, O_CREAT|O_EXCL|O_WRONLY, 0666);
-    } else {
-	fd = 1;
-    }
-    if (fd < 0) {
-	cerr << "Open/Create " << ofn << " failed: " << strerror(errno) 
-	     << endl;
-	return 1;
-    }
-    if (write(fd, ndata.c_str(), ndata.length()) != (int)ndata.length()) {
-	cerr << "Write(2) failed: " << strerror(errno)  << endl;
-	return 1;
-    }
-    close(fd);
-    return 0;
+	int fd;
+	if (strcmp(ofn, "stdout")) {
+	    fd = open(ofn, O_CREAT|O_EXCL|O_WRONLY, 0666);
+	} else {
+	    fd = 1;
+	}
+	if (fd < 0) {
+	    cerr << "Open/Create " << ofn << " failed: " << strerror(errno) 
+		 << endl;
+	    return 1;
+	}
+	if (write(fd, ndata.c_str(), ndata.length()) != (int)ndata.length()) {
+	    cerr << "Write(2) failed: " << strerror(errno)  << endl;
+	    return 1;
+	}
+	close(fd);
+	return 0;
+    }
 }
 
 #endif