Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 : Main metadata server / Spotlight routines / Elasticsearch backend
4 :
5 : Copyright (C) Ralph Boehme 2019
6 :
7 : This program is free software; you can redistribute it and/or modify
8 : it under the terms of the GNU General Public License as published by
9 : the Free Software Foundation; either version 3 of the License, or
10 : (at your option) any later version.
11 :
12 : This program is distributed in the hope that it will be useful,
13 : but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : GNU General Public License for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with this program. If not, see <http://www.gnu.org/licenses/>.
19 : */
20 :
21 : #include "includes.h"
22 : #include "es_mapping.h"
23 :
24 : /*
25 : * Escaping of special characters in Lucene query syntax across HTTP and JSON
26 : * ==========================================================================
27 : *
28 : * These characters in Lucene queries need escaping [1]:
29 : *
30 : * + - & | ! ( ) { } [ ] ^ " ~ * ? : \ /
31 : *
32 : * Additionally JSON requires escaping of:
33 : *
34 : * " \
35 : *
36 : * Characters already escaped by the mdssvc client:
37 : *
38 : * * " \
39 : *
40 : * The following table contains the resulting escaped strings, beginning with the
41 : * search term, the corresponding Spotlight query and the final string that gets
42 : * sent to the target Elasticsearch server.
43 : *
44 : * string | mdfind | http
45 : * -------+--------+------
46 : * x!x x!x x\\!x
47 : * x&x x&x x\\&x
48 : * x+x x+x x\\+x
49 : * x-x x-x x\\-x
50 : * x.x x.x x\\.x
51 : * x<x x<x x\\<x
52 : * x>x x>x x\\>x
53 : * x=x x=x x\\=x
54 : * x?x x?x x\\?x
55 : * x[x x[x x\\[x
56 : * x]x x]x x\\]x
57 : * x^x x^x x\\^x
58 : * x{x x{x x\\{x
59 : * x}x x}x x\\}x
60 : * x|x x|x x\\|x
61 : * x x x x x\\ x
62 : * x*x x\*x x\\*x
63 : * x\x x\\x x\\\\x
64 : * x"x x\"x x\\\"x
65 : *
66 : * Special cases:
67 : * x y It's not possible to search for terms including spaces, Spotlight
68 : * will search for x OR y.
69 : * x(x Search for terms including ( and ) does not work with Spotlight.
70 : *
71 : * [1] <http://lucene.apache.org/core/8_2_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters>
72 : */
73 :
74 354 : static char *escape_str(TALLOC_CTX *mem_ctx,
75 : const char *in,
76 : const char *escape_list,
77 : const char *escape_exceptions)
78 : {
79 354 : char *out = NULL;
80 346 : size_t in_len;
81 346 : size_t new_len;
82 346 : size_t in_pos;
83 354 : size_t out_pos = 0;
84 :
85 354 : if (in == NULL) {
86 0 : return NULL;
87 : }
88 354 : in_len = strlen(in);
89 :
90 354 : if (escape_list == NULL) {
91 0 : escape_list = "";
92 : }
93 354 : if (escape_exceptions == NULL) {
94 301 : escape_exceptions = "";
95 : }
96 :
97 : /*
98 : * Allocate enough space for the worst case: every char needs to be
99 : * escaped and requires an additional char.
100 : */
101 354 : new_len = (in_len * 2) + 1;
102 354 : if (new_len <= in_len) {
103 0 : return NULL;
104 : }
105 :
106 354 : out = talloc_zero_array(mem_ctx, char, new_len);
107 354 : if (out == NULL) {
108 0 : return NULL;
109 : }
110 :
111 5052 : for (in_pos = 0, out_pos = 0; in_pos < in_len; in_pos++, out_pos++) {
112 4698 : if (strchr(escape_list, in[in_pos]) != NULL &&
113 276 : strchr(escape_exceptions, in[in_pos]) == NULL)
114 : {
115 260 : out[out_pos++] = '\\';
116 : }
117 4698 : out[out_pos] = in[in_pos];
118 : }
119 :
120 8 : return out;
121 : }
122 :
123 177 : char *es_escape_str(TALLOC_CTX *mem_ctx,
124 : const char *in,
125 : const char *exceptions)
126 : {
127 177 : const char *lucene_escape_list = "+-&|!(){}[]^\"~*?:\\/ ";
128 177 : const char *json_escape_list = "\\\"";
129 177 : char *lucene_escaped = NULL;
130 177 : char *full_escaped = NULL;
131 :
132 177 : lucene_escaped = escape_str(mem_ctx,
133 : in,
134 : lucene_escape_list,
135 : exceptions);
136 177 : if (lucene_escaped == NULL) {
137 0 : return NULL;
138 : }
139 :
140 177 : full_escaped = escape_str(mem_ctx,
141 : lucene_escaped,
142 : json_escape_list,
143 : NULL);
144 177 : TALLOC_FREE(lucene_escaped);
145 177 : return full_escaped;
146 : }
147 :
148 110 : struct es_attr_map *es_map_sl_attr(TALLOC_CTX *mem_ctx,
149 : json_t *kmd_map,
150 : const char *sl_attr)
151 : {
152 110 : struct es_attr_map *es_map = NULL;
153 110 : const char *typestr = NULL;
154 110 : enum ssm_type type = ssmt_bool;
155 110 : char *es_attr = NULL;
156 108 : size_t i;
157 108 : int cmp;
158 108 : int ret;
159 :
160 108 : static struct {
161 : const char *typestr;
162 : enum ssm_type typeval;
163 : } ssmt_type_map[] = {
164 : {"bool", ssmt_bool},
165 : {"num", ssmt_num},
166 : {"str", ssmt_str},
167 : {"fts", ssmt_fts},
168 : {"date", ssmt_date},
169 : {"type", ssmt_type},
170 : };
171 :
172 110 : if (sl_attr == NULL) {
173 0 : return NULL;
174 : }
175 :
176 110 : ret = json_unpack(kmd_map,
177 : "{s: {s: s}}",
178 : sl_attr,
179 : "type",
180 : &typestr);
181 110 : if (ret != 0) {
182 8 : DBG_DEBUG("No JSON type mapping for [%s]\n", sl_attr);
183 8 : return NULL;
184 : }
185 :
186 102 : ret = json_unpack(kmd_map,
187 : "{s: {s: s}}",
188 : sl_attr,
189 : "attribute",
190 : &es_attr);
191 102 : if (ret != 0) {
192 0 : DBG_ERR("No JSON attribute mapping for [%s]\n", sl_attr);
193 0 : return NULL;
194 : }
195 :
196 376 : for (i = 0; i < ARRAY_SIZE(ssmt_type_map); i++) {
197 376 : cmp = strcmp(typestr, ssmt_type_map[i].typestr);
198 376 : if (cmp == 0) {
199 102 : type = ssmt_type_map[i].typeval;
200 102 : break;
201 : }
202 : }
203 102 : if (i == ARRAY_SIZE(ssmt_type_map)) {
204 0 : return NULL;
205 : }
206 :
207 102 : es_map = talloc_zero(mem_ctx, struct es_attr_map);
208 102 : if (es_map == NULL) {
209 0 : return NULL;
210 : }
211 102 : es_map->type = type;
212 :
213 102 : es_map->name = es_escape_str(es_map, es_attr, NULL);
214 102 : if (es_map->name == NULL) {
215 0 : TALLOC_FREE(es_map);
216 0 : return false;
217 : }
218 :
219 2 : return es_map;
220 : }
221 :
222 13 : const char *es_map_sl_type(json_t *mime_map,
223 : const char *sl_type)
224 : {
225 13 : const char *mime_type = NULL;
226 13 : int ret;
227 :
228 13 : if (sl_type == NULL) {
229 0 : return NULL;
230 : }
231 :
232 13 : ret = json_unpack(mime_map,
233 : "{s: s}",
234 : sl_type,
235 : &mime_type);
236 13 : if (ret != 0) {
237 0 : return NULL;
238 : }
239 :
240 8 : return mime_type;
241 : }
|