LCOV - code coverage report
Current view: top level - source3/rpc_server/mdssvc - es_mapping.c (source / functions) Hit Total Coverage
Test: coverage report for master 2f515e9b Lines: 66 80 82.5 %
Date: 2024-04-21 15:09:00 Functions: 4 4 100.0 %

          Line data    Source code
       1             : /*
       2             :    Unix SMB/CIFS implementation.
       3             :    Main metadata server / Spotlight routines / Elasticsearch backend
       4             : 
       5             :    Copyright (C) Ralph Boehme                   2019
       6             : 
       7             :    This program is free software; you can redistribute it and/or modify
       8             :    it under the terms of the GNU General Public License as published by
       9             :    the Free Software Foundation; either version 3 of the License, or
      10             :    (at your option) any later version.
      11             : 
      12             :    This program is distributed in the hope that it will be useful,
      13             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15             :    GNU General Public License for more details.
      16             : 
      17             :    You should have received a copy of the GNU General Public License
      18             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.
      19             : */
      20             : 
      21             : #include "includes.h"
      22             : #include "es_mapping.h"
      23             : 
      24             : /*
      25             :  * Escaping of special characters in Lucene query syntax across HTTP and JSON
      26             :  * ==========================================================================
      27             :  *
      28             :  * These characters in Lucene queries need escaping [1]:
      29             :  *
      30             :  *   + - & | ! ( ) { } [ ] ^ " ~ * ? : \ /
      31             :  *
      32             :  * Additionally JSON requires escaping of:
      33             :  *
      34             :  *   " \
      35             :  *
      36             :  * Characters already escaped by the mdssvc client:
      37             :  *
      38             :  *   * " \
      39             :  *
      40             :  * The following table contains the resulting escaped strings, beginning with the
      41             :  * search term, the corresponding Spotlight query and the final string that gets
      42             :  * sent to the target Elasticsearch server.
      43             :  *
      44             :  * string | mdfind | http
      45             :  * -------+--------+------
      46             :  * x!x     x!x      x\\!x
      47             :  * x&x     x&x      x\\&x
      48             :  * x+x     x+x      x\\+x
      49             :  * x-x     x-x      x\\-x
      50             :  * x.x     x.x      x\\.x
      51             :  * x<x     x<x      x\\<x
      52             :  * x>x     x>x      x\\>x
      53             :  * x=x     x=x      x\\=x
      54             :  * x?x     x?x      x\\?x
      55             :  * x[x     x[x      x\\[x
      56             :  * x]x     x]x      x\\]x
      57             :  * x^x     x^x      x\\^x
      58             :  * x{x     x{x      x\\{x
      59             :  * x}x     x}x      x\\}x
      60             :  * x|x     x|x      x\\|x
      61             :  * x x     x x      x\\ x
      62             :  * x*x     x\*x     x\\*x
      63             :  * x\x     x\\x     x\\\\x
      64             :  * x"x     x\"x     x\\\"x
      65             :  *
      66             :  * Special cases:
      67             :  * x y    It's not possible to search for terms including spaces, Spotlight
      68             :  *        will search for x OR y.
      69             :  * x(x    Search for terms including ( and ) does not work with Spotlight.
      70             :  *
      71             :  * [1] <http://lucene.apache.org/core/8_2_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters>
      72             :  */
      73             : 
      74         354 : static char *escape_str(TALLOC_CTX *mem_ctx,
      75             :                         const char *in,
      76             :                         const char *escape_list,
      77             :                         const char *escape_exceptions)
      78             : {
      79         354 :         char *out = NULL;
      80         346 :         size_t in_len;
      81         346 :         size_t new_len;
      82         346 :         size_t in_pos;
      83         354 :         size_t out_pos = 0;
      84             : 
      85         354 :         if (in == NULL) {
      86           0 :                 return NULL;
      87             :         }
      88         354 :         in_len = strlen(in);
      89             : 
      90         354 :         if (escape_list == NULL) {
      91           0 :                 escape_list = "";
      92             :         }
      93         354 :         if (escape_exceptions == NULL) {
      94         301 :                 escape_exceptions = "";
      95             :         }
      96             : 
      97             :         /*
      98             :          * Allocate enough space for the worst case: every char needs to be
      99             :          * escaped and requires an additional char.
     100             :          */
     101         354 :         new_len = (in_len * 2) + 1;
     102         354 :         if (new_len <= in_len) {
     103           0 :                 return NULL;
     104             :         }
     105             : 
     106         354 :         out = talloc_zero_array(mem_ctx, char, new_len);
     107         354 :         if (out == NULL) {
     108           0 :                 return NULL;
     109             :         }
     110             : 
     111        5052 :         for (in_pos = 0, out_pos = 0; in_pos < in_len; in_pos++, out_pos++) {
     112        4698 :                 if (strchr(escape_list, in[in_pos]) != NULL &&
     113         276 :                     strchr(escape_exceptions, in[in_pos]) == NULL)
     114             :                 {
     115         260 :                         out[out_pos++] = '\\';
     116             :                 }
     117        4698 :                 out[out_pos] = in[in_pos];
     118             :         }
     119             : 
     120           8 :         return out;
     121             : }
     122             : 
     123         177 : char *es_escape_str(TALLOC_CTX *mem_ctx,
     124             :                     const char *in,
     125             :                     const char *exceptions)
     126             : {
     127         177 :         const char *lucene_escape_list = "+-&|!(){}[]^\"~*?:\\/ ";
     128         177 :         const char *json_escape_list = "\\\"";
     129         177 :         char *lucene_escaped = NULL;
     130         177 :         char *full_escaped = NULL;
     131             : 
     132         177 :         lucene_escaped =  escape_str(mem_ctx,
     133             :                                      in,
     134             :                                      lucene_escape_list,
     135             :                                      exceptions);
     136         177 :         if (lucene_escaped == NULL) {
     137           0 :                 return NULL;
     138             :         }
     139             : 
     140         177 :         full_escaped = escape_str(mem_ctx,
     141             :                                   lucene_escaped,
     142             :                                   json_escape_list,
     143             :                                   NULL);
     144         177 :         TALLOC_FREE(lucene_escaped);
     145         177 :         return full_escaped;
     146             : }
     147             : 
     148         110 : struct es_attr_map *es_map_sl_attr(TALLOC_CTX *mem_ctx,
     149             :                                    json_t *kmd_map,
     150             :                                    const char *sl_attr)
     151             : {
     152         110 :         struct es_attr_map *es_map = NULL;
     153         110 :         const char *typestr = NULL;
     154         110 :         enum ssm_type type = ssmt_bool;
     155         110 :         char *es_attr = NULL;
     156         108 :         size_t i;
     157         108 :         int cmp;
     158         108 :         int ret;
     159             : 
     160         108 :         static struct {
     161             :                 const char *typestr;
     162             :                 enum ssm_type typeval;
     163             :         } ssmt_type_map[] = {
     164             :                 {"bool", ssmt_bool},
     165             :                 {"num", ssmt_num},
     166             :                 {"str", ssmt_str},
     167             :                 {"fts", ssmt_fts},
     168             :                 {"date", ssmt_date},
     169             :                 {"type", ssmt_type},
     170             :         };
     171             : 
     172         110 :         if (sl_attr == NULL) {
     173           0 :                 return NULL;
     174             :         }
     175             : 
     176         110 :         ret = json_unpack(kmd_map,
     177             :                           "{s: {s: s}}",
     178             :                           sl_attr,
     179             :                           "type",
     180             :                           &typestr);
     181         110 :         if (ret != 0) {
     182           8 :                 DBG_DEBUG("No JSON type mapping for [%s]\n", sl_attr);
     183           8 :                 return NULL;
     184             :         }
     185             : 
     186         102 :         ret = json_unpack(kmd_map,
     187             :                           "{s: {s: s}}",
     188             :                           sl_attr,
     189             :                           "attribute",
     190             :                           &es_attr);
     191         102 :         if (ret != 0) {
     192           0 :                 DBG_ERR("No JSON attribute mapping for [%s]\n", sl_attr);
     193           0 :                 return NULL;
     194             :         }
     195             : 
     196         376 :         for (i = 0; i < ARRAY_SIZE(ssmt_type_map); i++) {
     197         376 :                 cmp = strcmp(typestr, ssmt_type_map[i].typestr);
     198         376 :                 if (cmp == 0) {
     199         102 :                         type = ssmt_type_map[i].typeval;
     200         102 :                         break;
     201             :                 }
     202             :         }
     203         102 :         if (i == ARRAY_SIZE(ssmt_type_map)) {
     204           0 :                 return NULL;
     205             :         }
     206             : 
     207         102 :         es_map = talloc_zero(mem_ctx, struct es_attr_map);
     208         102 :         if (es_map == NULL) {
     209           0 :                 return NULL;
     210             :         }
     211         102 :         es_map->type = type;
     212             : 
     213         102 :         es_map->name = es_escape_str(es_map, es_attr, NULL);
     214         102 :         if (es_map->name == NULL) {
     215           0 :                 TALLOC_FREE(es_map);
     216           0 :                 return false;
     217             :         }
     218             : 
     219           2 :         return es_map;
     220             : }
     221             : 
     222          13 : const char *es_map_sl_type(json_t *mime_map,
     223             :                            const char *sl_type)
     224             : {
     225          13 :         const char *mime_type = NULL;
     226          13 :         int ret;
     227             : 
     228          13 :         if (sl_type == NULL) {
     229           0 :                 return NULL;
     230             :         }
     231             : 
     232          13 :         ret = json_unpack(mime_map,
     233             :                           "{s: s}",
     234             :                           sl_type,
     235             :                           &mime_type);
     236          13 :         if (ret != 0) {
     237           0 :                 return NULL;
     238             :         }
     239             : 
     240           8 :         return mime_type;
     241             : }

Generated by: LCOV version 1.14