diff --git a/Dockerfile b/Dockerfile index fbfcc16..1e593c8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,7 @@ RUN groupadd -r manticore && useradd -r -g manticore manticore ENV GOSU_VERSION 1.11 -ENV DAEMON_URL=${DAEMON_URL:-"https://repo.manticoresearch.com/repository/manticoresearch_jammy/dists/jammy/main/binary-_ARCH_64/manticore-server_13.13.0-25100704-e5465fe44__ARCH_64.deb \ +ENV DAEMON_URL ${DAEMON_URL:-"https://repo.manticoresearch.com/repository/manticoresearch_jammy/dists/jammy/main/binary-_ARCH_64/manticore-server_13.13.0-25100704-e5465fe44__ARCH_64.deb \ https://repo.manticoresearch.com/repository/manticoresearch_jammy/dists/jammy/main/binary-_ARCH_64/manticore-server-core_13.13.0-25100704-e5465fe44__ARCH_64.deb \ https://repo.manticoresearch.com/repository/manticoresearch_jammy/dists/jammy/main/binary-_ARCH_64/manticore-backup_1.9.6+25070510-5247d066_all.deb \ https://repo.manticoresearch.com/repository/manticoresearch_jammy/dists/jammy/main/binary-_ARCH_64/manticore-buddy_3.35.1+25090418-41d9811f_all.deb \ @@ -70,7 +70,7 @@ RUN if [ "$TARGETPLATFORM" = "linux/arm64" ] ; then export ARCH="arm"; else expo && wget -q https://repo.manticoresearch.com/manticore-dev-repo.noarch.deb \ && dpkg -i manticore-dev-repo.noarch.deb \ && apt-key adv --fetch-keys 'https://repo.manticoresearch.com/GPG-KEY-manticore' && apt-get -y update \ - && apt-get -y install manticore manticore-extra manticore-load manticore-language-packs;\ + && apt-get -y install manticore manticore-extra manticore-load manticore-lemmatizer-uk manticore-language-packs;\ elif [ ! -z "$DAEMON_URL" ]; then \ echo "2nd step of building release image for linux/${ARCH}64 architecture" \ && echo "ARCH: ${ARCH}" \ @@ -98,6 +98,15 @@ RUN if [ -d "/packages/" ]; then apt -y install /packages/*deb; fi \ && tar -xf /tmp/ru.pak.tgz -C /usr/share/manticore/ \ && rm /tmp/*.pak.tgz +# Installing the Ukrainian Lemmatizer using the working Jammy approach +RUN apt-get update && apt-get install -y software-properties-common curl && \ + add-apt-repository -y ppa:deadsnakes/ppa && \ + apt-get update && \ + apt-get install -y python3.9 python3.9-dev python3.9-distutils && \ + curl https://bootstrap.pypa.io/get-pip.py | python3.9 && \ + python3.9 -m pip install pymorphy2 pymorphy2-dicts-uk && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + COPY manticore.conf.sh /etc/manticoresearch/ RUN sed -i '/log = \/var\/log\/manticore\/searchd.log/d;/query_log = \/var\/log\/manticore\/query.log/d' /etc/manticoresearch/manticore.conf RUN md5sum /etc/manticoresearch/manticore.conf | awk '{print $1}' > /manticore.conf.md5 @@ -122,7 +131,8 @@ EXPOSE 9308 EXPOSE 9312 ENV LANG C.UTF-8 ENV LC_ALL C.UTF-8 -ENV MANTICORE_CONFIG="/etc/manticoresearch/manticore.conf.sh|/etc/manticoresearch/manticore.conf" +ENV PYTHONWARNINGS "ignore::UserWarning:pymorphy2.analyzer" +ENV MANTICORE_CONFIG "/etc/manticoresearch/manticore.conf.sh|/etc/manticoresearch/manticore.conf" CMD ["searchd", "-c", "/etc/manticoresearch/manticore.conf.sh", "--nodetach"] # How to build manually: diff --git a/clt_tests/tests/test-ukrainian-morphology.rec b/clt_tests/tests/test-ukrainian-morphology.rec new file mode 100644 index 0000000..948fc50 --- /dev/null +++ b/clt_tests/tests/test-ukrainian-morphology.rec @@ -0,0 +1,43 @@ +––– block: ./base/init ––– +––– input ––– +docker run -d --name manticore manticoresoftware/manticore:current +––– output ––– +#!/[0-9a-z]+/!# +––– input ––– +if timeout 5 grep -qm1 'accepting connections' <(docker logs -f manticore); then echo 'accepting connections'; else echo 'Manticore failed to start properly in 10 seconds'; fi +––– output ––– +accepting connections +––– input ––– +docker exec manticore mysql -h0 -P9306 -e "CREATE TABLE test_uk (id bigint, content text) rt_mem_limit = '256M' morphology = 'lemmatize_uk' charset_table = '0..9, A..Z->a..z, _, a..z, U+0410..U+042F->U+0430..U+044F, U+0430..U+044F, U+0454, U+0456, U+0457, U+0491';" +––– output ––– +––– input ––– +docker exec manticore mysql -h0 -P9306 -e "INSERT INTO test_uk (id, content) VALUES (1, 'бігаю'), (2, 'муркотіти'), (3, 'їжа');" +––– output ––– +––– input ––– +docker exec manticore mysql -h0 -P9306 -e "CALL KEYWORDS('бігаю', 'test_uk');" +––– output ––– ++------+------------+--------------+ +| qpos | tokenized | normalized | ++------+------------+--------------+ +| 1 | бігаю | бігати | ++------+------------+--------------+ +––– input ––– +docker exec manticore mysql -h0 -P9306 -e "CALL KEYWORDS('їжа', 'test_uk');" +––– output ––– ++------+-----------+------------+ +| qpos | tokenized | normalized | ++------+-----------+------------+ +| 1 | їжа | їжа | ++------+-----------+------------+ +––– input ––– +docker exec manticore mysql -h0 -P9306 -e "SELECT * FROM test_uk WHERE MATCH('бігати');" +––– output ––– ++------+------------+ +| id | content | ++------+------------+ +| 1 | бігаю | ++------+------------+ +––– input ––– +docker stop manticore +––– output ––– +#!/[0-9a-z]+/!#