--- a
+++ b/unac/unactest1.c
@@ -0,0 +1,497 @@
+/*
+ * Copyright (C) 2000, 2001, 2002 Loic Dachary <loic@senga.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Run unac_string on an input large enough to trigger re-allocation.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "unac.h"
+
+static char* longstr_expected = "
+
+Senga - Catalog software
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ senga.org
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+December 28, 2000
+
+ January 27, 2000
+ Catalog-1.02
+ is available.
+
+ The dmoz loading process has been dramatically simplified. It is
+ now only available as a command. No more fancy web interface that
+ confuses everyone. In addition the convert_dmoz script now generates
+ text files that can be directly loaded into Catalog instead of the
+ intermediate XML file. The whole loading process now takes from
+ one to two hours depending on your machine. It took around 10 hours
+ with the previous version.
+ The -exclude option was added to convert_dmoz to get rid of
+ a whole branch of the catalog at load time. Typical usage would
+ be convert_dmoz -exclude '^/Adult' -what content content.rdf.gz.
+ A lot more sanity checks and repair have been added to deal with
+ duplicates, category id conflicts and the like.
+ Hopefully this new method will also be more understandable and
+ generate less traffic on the mailing list. There is room for
+ improvements and contributors are welcome.
+
+ A new set of software is available in the
+ download directory under the RedHat-6.1 section. These
+ are the most up to date versions on which Catalog depends. Although the
+ binaries depend on RedHat-6.1 the perl modules are source and can be
+ used on any platform.
+
+ September 7, 1999
+ Catalog-1.01
+ is available.
+ This is a maintainance release.
+
+ Various bug fixes. All easy
+ to fix bugs have been fixed. Take a look at Bug Track to see what hasn't been fixed.
+ The _PATHTEXT_ and _PATHFILE_
+ tags syntax has been extended to specify a range of path component.
+
+ Graham Barr added a recursive
+ template feature for a catalog root page. This allows to show sub-categories
+ of the root categories in the root page of a catalog.
+
+
+ Don't hesitate to submit bugs
+ or ideas to bug track. Hopefully the next version of Catalog will have
+ a fast full text indexing mechanism and I'll be able to implement new
+ functionalities.
+
+ Have fun !
+ July 3, 1999
+ Catalog-1.00
+ is available.
+ This release includes PHP3
+ code to display a catalog. The author is Weston Bustraan (weston@infinityteldata.net).
+ The main motivation to jump directly to version 1.00 is to avoid version
+ number problems on CPAN.
+ July 2, 1999
+ Catalog-0.19
+ is available.
+ This is a minor release. The
+ most noticeable addition is the new search mechanism.
+
+ Searching : two search modes
+ are now available. AltaVista simple syntax and AltaVista advanced syntax.
+ Both use the Text-Query and Text-Query-SQL perl modules.
+ Dmoz loading is much more
+ fault tolerant. In addition it can handle compressed versions of content.rdf
+ and structure.rdf. The comments are now stored in text fields instead
+ of char(255).
+ The template system was
+ extended with the pre_fill and post_fill parameters.
+ Searching associated to
+ a catalog dumped to static pages is now possible using the 'static'
+ mode.
+ Fixed two security weakness
+ in confedit and recursive cgi handling.
+ Many sql queries have been
+ optimized.
+ The configuration was changed
+ a bit to fix bugs and to isolate database dependencies.
+ The tests were updated to
+ isolate database dependencies.
+ Fixed numerous minor bugs,
+ check ChangeLog if you're interested in details.
+
+ Many thanks to Tim Bunce for
+ his numerous contributions and ideas. He is the architect of the Text-Query
+ and Text-Query-SQL modules, Eric Bohlman and Loic Dachary did the programming.
+
+ Thanks to Eric Bohlman for
+ his help on the Text-Query module. He was very busy but managed to spend
+ the time needed to release it.
+ There is not yet anything usable
+ for full text indexing but we keep working on it. The storage management
+ is now handled by the reiserfs file system thanks to Hans Reiser who is
+ working full time on this. Loic Dachary does his best to get something
+ working, if you're interested go to http://www.senga.org/mifluz/.
+ For some mysterious reason
+ CPAN lost track of Catalog name. In order to install catalog you should
+ use perl -MCPAN -e 'install Catalog::db'. Weird but temporary.
+ Have fun !
+ The Senga Team
+ Ecila
+ 100 Av. du General Leclerc
+ 93 500 Pantin
+ Tel: 33 1 56 96 09 80
+ Fax: 33 1 56 96 09 81
+ WEB: http://www.senga.org/
+ Mail: senga@senga.org
+
+
+
+
+
+
+
+
+[
+Catalog |
+webbase |
+mifluz |
+unac |
+Search-Mifluz |
+Text-Query |
+uri |
+Statistics |
+News
+]
+
+
+
+
+
+
+
+";
+
+static char* longstr = "
+
+Senga - Catalog software
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ �
+
+
+
+ senga.org
+
+
+�
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+December 28, 2000
+
+ January 27, 2000
+ Catalog-1.02
+ is available.
+
+ The dmoz loading process has been dramatically simplified. It is
+ now only available as a command. No more fancy web interface that
+ confuses everyone. In addition the convert_dmoz script now generates
+ text files that can be directly loaded into Catalog instead of the
+ intermediate XML file. The whole loading process now takes from
+ one to two hours depending on your machine. It took around 10 hours
+ with the previous version.
+ The -exclude option was added to convert_dmoz to get rid of
+ a whole branch of the catalog at load time. Typical usage would
+ be convert_dmoz -exclude '^/Adult' -what content content.rdf.gz.
+ A lot more sanity checks and repair have been added to deal with
+ duplicates, category id conflicts and the like.
+ Hopefully this new method will also be more understandable and
+ generate less traffic on the mailing list. There is room for
+ improvements and contributors are welcome.
+
+ A new set of software is available in the
+ download directory under the RedHat-6.1 section. These
+ are the most up to date versions on which Catalog depends. Although the
+ binaries depend on RedHat-6.1 the perl modules are source and can be
+ used on any platform.
+
+ September 7, 1999
+ Catalog-1.01
+ is available.
+ This is a maintainance release.
+
+ Various bug fixes. All easy
+ to fix bugs have been fixed. Take a look at Bug Track to see what hasn't been fixed.
+ The _PATHTEXT_ and _PATHFILE_
+ tags syntax has been extended to specify a range of path component.
+
+ Graham Barr added a recursive
+ template feature for a catalog root page. This allows to show sub-categories
+ of the root categories in the root page of a catalog.
+
+
+ Don't hesitate to submit bugs
+ or ideas to bug track. Hopefully the next version of Catalog will have
+ a fast full text indexing mechanism and I'll be able to implement new
+ functionalities.
+
+ Have fun !
+ July 3, 1999
+ Catalog-1.00
+ is available.
+ This release includes PHP3
+ code to display a catalog. The author is Weston Bustraan (weston@infinityteldata.net).
+ The main motivation to jump directly to version 1.00 is to avoid version
+ number problems on CPAN.
+ July 2, 1999
+ Catalog-0.19
+ is available.
+ This is a minor release. The
+ most noticeable addition is the new search mechanism.
+
+ Searching : two search modes
+ are now available. AltaVista simple syntax and AltaVista advanced syntax.
+ Both use the Text-Query and Text-Query-SQL perl modules.
+ Dmoz loading is much more
+ fault tolerant. In addition it can handle compressed versions of content.rdf
+ and structure.rdf. The comments are now stored in text fields instead
+ of char(255).
+ The template system was
+ extended with the pre_fill and post_fill parameters.
+ Searching associated to
+ a catalog dumped to static pages is now possible using the 'static'
+ mode.
+ Fixed two security weakness
+ in confedit and recursive cgi handling.
+ Many sql queries have been
+ optimized.
+ The configuration was changed
+ a bit to fix bugs and to isolate database dependencies.
+ The tests were updated to
+ isolate database dependencies.
+ Fixed numerous minor bugs,
+ check ChangeLog if you're interested in details.
+
+ Many thanks to Tim Bunce for
+ his numerous contributions and ideas. He is the architect of the Text-Query
+ and Text-Query-SQL modules, Eric Bohlman and Loic Dachary did the programming.
+
+ Thanks to Eric Bohlman for
+ his help on the Text-Query module. He was very busy but managed to spend
+ the time needed to release it.
+ There is not yet anything usable
+ for full text indexing but we keep working on it. The storage management
+ is now handled by the reiserfs file system thanks to Hans Reiser who is
+ working full time on this. Loic Dachary does his best to get something
+ working, if you're interested go to http://www.senga.org/mifluz/.
+ For some mysterious reason
+ CPAN lost track of Catalog name. In order to install catalog you should
+ use perl -MCPAN -e 'install Catalog::db'. Weird but temporary.
+ Have fun !
+ The Senga Team
+ Ecila
+ 100 Av. du G�n�ral Leclerc
+ 93 500 Pantin
+ Tel: 33 1 56 96 09 80
+ Fax: 33 1 56 96 09 81
+ WEB: http://www.senga.org/
+ Mail: senga@senga.org
+
+
+
+
+ �
+
+
+
+[
+Catalog |
+webbase |
+mifluz |
+unac |
+Search-Mifluz |
+Text-Query |
+uri |
+Statistics |
+News
+]
+
+
+
+
+
+
+
+";
+
+int main() {
+ int i;
+ char* out = 0;
+ size_t out_length = 0;
+ {
+ if(unac_string("ISO-8859-1", "�t�", 3, &out, &out_length) < 0) {
+ perror("unac �t�");
+ exit(1);
+ }
+ if(out_length != 3) {
+ fprintf(stderr, "out_length == %d instead of 3\n", (int)out_length);
+ exit(1);
+ }
+ if(memcmp("ete", out, out_length)) {
+ fprintf(stderr, "out == %.*s instead of ete\n", (int)out_length, out);
+ exit(1);
+ }
+
+ }
+
+ {
+ char tmp[10];
+ sprintf(tmp, "%c", 0xBC);
+ if(unac_string("ISO-8859-1", tmp, 1, &out, &out_length) < 0) {
+ perror("unac 0xBC (1/4)");
+ exit(1);
+ }
+ if(out_length != 3) {
+ fprintf(stderr, "out_length == %d instead of 3\n", (int)out_length);
+ exit(1);
+ }
+ if(memcmp("1 4", out, out_length)) {
+ fprintf(stderr, "out == %.*s instead of '1 4'\n", (int)out_length, out);
+ exit(1);
+ }
+
+ }
+
+ for(i = 0; i < 3; i++) {
+ int longstr_length = strlen(longstr);
+ if(unac_string("ISO-8859-1", longstr, longstr_length, &out, &out_length) == -1) {
+ perror("unac_string longstr failed");
+ exit(1);
+ }
+ if(out_length != longstr_length) {
+ fprintf(stderr, "out_length == %d instead of %d\n", (int)out_length, longstr_length);
+ exit(1);
+ }
+ if(memcmp(longstr_expected, out, out_length)) {
+ fprintf(stderr, "out == %.*s instead of ete\n", (int)out_length, out);
+ exit(1);
+ }
+
+ }
+
+ free(out);
+
+ return 0;
+}