diff --git a/.gitignore b/.gitignore index 7f4a73c..d9e8ed4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,8 +2,14 @@ r?*/ V?* +# Build files +/wordfreq +cmake* +lib*.h + # tmp files *.DMP # OS caching .DS_Store +.idea diff --git a/Dockerfile b/Dockerfile index 1e724c6..87ea1cc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,10 @@ -FROM yottadb/yottadb:r1.20 +FROM yottadb/yottadb:r1.23 LABEL maintainer="Bart Moelans " ################################## # Define global environment vars # ################################## -ENV SRC_DIR=/data \ +ENV SRC_DIR=/root \ ydb_dist=/opt/yottadb/current @@ -14,11 +14,14 @@ ENV SRC_DIR=/data \ ################################################# RUN apt-get -y update \ - && apt-get -y install vim \ + && apt-get -y install vim make gcc gdb\ && ln -s ${ydb_dist}/ydb /usr/local/bin/M \ && echo "source ${ydb_dist}/ydb_env_set" >> /root/.bashrc \ && echo "export ydb_gbldir=/data/g/sandbox.gld" >> /root/.bashrc +WORKDIR ${SRC_DIR}/cmumps + # by default yottadb entrypoint is ydb itself -ENTRYPOINT ["/bin/bash"] -WORKDIR ${SRC_DIR} +ENTRYPOINT [] + + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..59d1a4c --- /dev/null +++ b/Makefile @@ -0,0 +1,9 @@ +CC=gcc +CFLAGS=-g -pedantic -Wall -Wno-unknown-pragmas -std=c99 +YOTTA_INCLUDES=-I $(ydb_dist) -L $(ydb_dist) + +wordfreq.so: wordfreq.c + $(CC) $(CFLAGS) $(YOTTA_INCLUDES) -shared -fPIC -o $@ $^ -lyottadb + +wordfreq: wordfreq.c + $(CC) $(CFLAGS) $(YOTTA_INCLUDES) -DTEST -o $@ $^ -lyottadb \ No newline at end of file diff --git a/data/r/testc.m b/data/r/testc.m new file mode 100644 index 0000000..1443f6b --- /dev/null +++ b/data/r/testc.m @@ -0,0 +1,6 @@ +; make wordfreq.so && mumps -run "^testc" +WFREQ + n lenght + s word="Antwerpen" + d &wordfreq.len(.length,"Antwerpen") + w !,length diff --git a/data/r/wfreq.m b/data/r/wfreq.m new file mode 100644 index 0000000..847884f --- /dev/null +++ b/data/r/wfreq.m @@ -0,0 +1,8 @@ +; make wordfreq.so && mumps -run "^wfreq" +WFREQ + k ^Test,^index + s ^Test("ab")=1 + s ^Test("ba")=2 + w $&wordfreq.windex("^Test","^index") + w ! + zwr ^index diff --git a/docker-compose.yml b/docker-compose.yml index e5cf669..c3036ad 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,14 @@ version: "3" services: mumps: build: . + environment: + - GTMXC_wordfreq=/root/cmumps/wordfreq.xc ports: - 8080:8080 volumes: + - .:/root/cmumps - ./data/:/data + security_opt: + - seccomp:unconfined + cap_add: + - SYS_PTRACE diff --git a/wordfreq.c b/wordfreq.c new file mode 100644 index 0000000..4af22b0 --- /dev/null +++ b/wordfreq.c @@ -0,0 +1,173 @@ +/**************************************************************** + * * + * Copyright (c) 2017-2018 YottaDB LLC. and/or its subsidiaries.* + * All rights reserved. * + * * + * This source code contains the intellectual property * + * of its copyright holder(s), and is made available * + * under a license. If you do not know the terms of * + * the license, please stop and do not read further. * + * * + ****************************************************************/ + +#include "libyottadb.h" /* for ydb_* macros/prototypes/typedefs */ + +#include /* needed for "getpid" */ +#include /* needed for "getpid" */ +#include /* for "printf" */ +#include /* for "strtok" */ +#include /* for "toupper" */ + +/** + * Macro to create/fill-in a ydb_buffer_t structure from ydb_string_t + */ +#define YDB_STRING_TO_BUFFER(STRING, BUFFERP) \ +{ \ + (BUFFERP)->buf_addr = (STRING)->address; \ + (BUFFERP)->len_used = (BUFFERP)->len_alloc = (STRING)->length; \ +} + +/** + * Test function to test interaction Mumps to C + */ +ydb_status_t len(const int argc, ydb_long_t *length, const ydb_string_t *s) { + *length = s->length; + return YDB_OK; +} + +void read_input(ydb_string_t *var_name) { + ydb_buffer_t tmp_b, index, words; + ydb_buffer_t value; + char valuebuff[64], linebuff[1024], *lineptr, *ptr, *tmp_ptr; + int status; + + YDB_STRING_TO_BUFFER(var_name, &words); + if (var_name->address[0] == '^') { + ydb_delete_s(&words, 0, NULL, YDB_DEL_TREE); + } + + ydb_delete_s(&index, 0, NULL, YDB_DEL_TREE); + value.buf_addr = &valuebuff[0]; + value.len_used = 0; + value.len_alloc = sizeof(valuebuff); + do { + lineptr = fgets(linebuff, sizeof(linebuff), stdin); + if (NULL == lineptr) + break; + ptr = strtok(lineptr, " "); + while (NULL != ptr) { /* convert word pointed to by "ptr" to lowercase */ + tmp_ptr = ptr; + while (*tmp_ptr) { + *tmp_ptr = tolower(*tmp_ptr); + tmp_ptr++; + } + /* */ + tmp_b.buf_addr = ptr; + tmp_b.len_used = tmp_ptr - ptr; + if (tmp_ptr[-1] == '\n') + tmp_b.len_used--; /* - 1 to remove trailing newline */ + if (tmp_b.len_used) { + tmp_b.len_alloc = tmp_b.len_used; + status = ydb_incr_s(&words, 1, &tmp_b, NULL, &value); /* M line : set value=$incr(words(tmp1)) */ + YDB_ASSERT(YDB_OK == status); + } + ptr = strtok(NULL, " "); + } + } while (1); +} + +ydb_status_t create_word_index(const int argc, ydb_string_t *var_name, ydb_string_t *index_name) { + ydb_buffer_t tmp_b, index, words, words_tmp1, null; + ydb_buffer_t index_subscr[2]; + char tmp1buff[64], words_tmp1buff[64]; + int status; + YDB_STRING_TO_BUFFER(var_name, &words); + YDB_STRING_TO_BUFFER(index_name, &index); + + tmp_b.buf_addr = tmp1buff; /* M line : set tmp1="" */ + tmp_b.len_used = 0; + tmp_b.len_alloc = sizeof(tmp1buff); + words_tmp1.buf_addr = words_tmp1buff; + words_tmp1.len_used = 0; + words_tmp1.len_alloc = sizeof(words_tmp1buff); + null.buf_addr = NULL; + null.len_used = 0; + null.len_alloc = 0; + do { + status = ydb_subscript_next_s(&words, 1, &tmp_b, &tmp_b); /* M line : set tmp1=$order(words(tmp1)) */ + if (YDB_ERR_NODEEND == status) + break; + YDB_ASSERT(YDB_OK == status); + YDB_ASSERT(0 != tmp_b.len_used); + status = ydb_get_s(&words, 1, &tmp_b, &words_tmp1); /* M line : set words_tmp1=words(tmp1) */ + YDB_ASSERT(YDB_OK == status); + index_subscr[0] = words_tmp1; + index_subscr[1] = tmp_b; + status = ydb_set_s(&index, 2, index_subscr, &null); /* M line : set index(words_tmp1,tmp1)="" */ + YDB_ASSERT(YDB_OK == status); + } while (1); + return YDB_OK; +} + + +/* count and report word frequencies for http://www.cs.duke.edu/csed/code/code2007/ */ +int main() { + ydb_string_t *var_name, *index_name; + + var_name = malloc(sizeof(ydb_string_t)); + var_name->address = malloc(10 * sizeof(char)); + index_name = malloc(sizeof(ydb_string_t)); + index_name->address = malloc(10 * sizeof(char)); + if (1) { + strcpy(var_name->address, "words"); + var_name->length = 5; + } else { + strcpy(var_name->address, "^words"); + var_name->length = 6; + } + strcpy(index_name->address, "index"); + index_name->length = 5; + + ydb_buffer_t tmp1, tmp2, index, words; + ydb_buffer_t index_subscr[2]; + char tmp1buff[64], tmp2buff[64]; + int status; + + read_input(var_name); + YDB_STRING_TO_BUFFER(var_name, &words); + YDB_STRING_TO_BUFFER(index_name, &index); + + create_word_index(2, var_name, index_name); + + tmp1.buf_addr = tmp1buff; /* M line : set tmp1="" */ + tmp1.len_used = 0; + tmp1.len_alloc = sizeof(tmp1buff); + + do { + status = ydb_subscript_previous_s(&index, 1, &tmp1, &tmp1); /* M line : set tmp1=$order(index(tmp1),-1) */ + if (YDB_ERR_NODEEND == status) + break; + YDB_ASSERT(YDB_OK == status); + YDB_ASSERT(0 != tmp1.len_used); + tmp2.buf_addr = tmp2buff; /* M line : set tmp2="" */ + tmp2.len_used = 0; + tmp2.len_alloc = sizeof(tmp2buff); + index_subscr[0] = tmp1; + index_subscr[1] = tmp2; + do { + status = ydb_subscript_next_s(&index, 2, index_subscr, + &tmp2); /* M line : set tmp2=$order(index(tmp1,tmp2)) */ + if (YDB_ERR_NODEEND == status) + break; + YDB_ASSERT(YDB_OK == status); + YDB_ASSERT(0 != tmp2.len_used); + tmp1.buf_addr[tmp1.len_used] = '\0'; + tmp2.buf_addr[tmp2.len_used] = '\0'; + printf("%s\t%s\n", tmp1.buf_addr, tmp2.buf_addr); + index_subscr[1] = tmp2; + } while (1); + } while (1); + free(index_name); + free(var_name); + return YDB_OK; +} \ No newline at end of file diff --git a/wordfreq.so b/wordfreq.so new file mode 100755 index 0000000..28b18d3 Binary files /dev/null and b/wordfreq.so differ diff --git a/wordfreq.xc b/wordfreq.xc new file mode 100644 index 0000000..4ed5663 --- /dev/null +++ b/wordfreq.xc @@ -0,0 +1,3 @@ +/root/cmumps/wordfreq.so +len: ydb_status_t len(O:ydb_long_t *, I:ydb_string_t *) +windex: ydb_status_t create_word_index(I:ydb_string_t *, I:ydb_string_t *) diff --git a/wordfreq_input.txt b/wordfreq_input.txt new file mode 100644 index 0000000..e5d3534 --- /dev/null +++ b/wordfreq_input.txt @@ -0,0 +1 @@ +Lorem ipsum \ No newline at end of file