forked from mirrors/gecko-dev
		
	
		
			
				
	
	
		
			369 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			369 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
 | |
|  * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
 | |
|  * This Source Code Form is subject to the terms of the Mozilla Public
 | |
|  * License, v. 2.0. If a copy of the MPL was not distributed with this
 | |
|  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 | |
| 
 | |
| #include "mozilla/ArrayUtils.h"
 | |
| 
 | |
| #include "mozStorageSQLFunctions.h"
 | |
| #include "nsTArray.h"
 | |
| #include "nsUnicharUtils.h"
 | |
| #include <algorithm>
 | |
| #include "sqlite3.h"
 | |
| 
 | |
| namespace mozilla {
 | |
| namespace storage {
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| //// Local Helper Functions
 | |
| 
 | |
| namespace {
 | |
| 
 | |
| /**
 | |
|  * Performs the LIKE comparison of a string against a pattern.  For more detail
 | |
|  * see http://www.sqlite.org/lang_expr.html#like.
 | |
|  *
 | |
|  * @param aPatternItr
 | |
|  *        An iterator at the start of the pattern to check for.
 | |
|  * @param aPatternEnd
 | |
|  *        An iterator at the end of the pattern to check for.
 | |
|  * @param aStringItr
 | |
|  *        An iterator at the start of the string to check for the pattern.
 | |
|  * @param aStringEnd
 | |
|  *        An iterator at the end of the string to check for the pattern.
 | |
|  * @param aEscapeChar
 | |
|  *        The character to use for escaping symbols in the pattern.
 | |
|  * @return 1 if the pattern is found, 0 otherwise.
 | |
|  */
 | |
| int likeCompare(nsAString::const_iterator aPatternItr,
 | |
|                 nsAString::const_iterator aPatternEnd,
 | |
|                 nsAString::const_iterator aStringItr,
 | |
|                 nsAString::const_iterator aStringEnd, char16_t aEscapeChar) {
 | |
|   const char16_t MATCH_ALL('%');
 | |
|   const char16_t MATCH_ONE('_');
 | |
| 
 | |
|   bool lastWasEscape = false;
 | |
|   while (aPatternItr != aPatternEnd) {
 | |
|     /**
 | |
|      * What we do in here is take a look at each character from the input
 | |
|      * pattern, and do something with it.  There are 4 possibilities:
 | |
|      * 1) character is an un-escaped match-all character
 | |
|      * 2) character is an un-escaped match-one character
 | |
|      * 3) character is an un-escaped escape character
 | |
|      * 4) character is not any of the above
 | |
|      */
 | |
|     if (!lastWasEscape && *aPatternItr == MATCH_ALL) {
 | |
|       // CASE 1
 | |
|       /**
 | |
|        * Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a
 | |
|        * MATCH_ALL character.  For each MATCH_ONE character, skip one character
 | |
|        * in the pattern string.
 | |
|        */
 | |
|       while (*aPatternItr == MATCH_ALL || *aPatternItr == MATCH_ONE) {
 | |
|         if (*aPatternItr == MATCH_ONE) {
 | |
|           // If we've hit the end of the string we are testing, no match
 | |
|           if (aStringItr == aStringEnd) return 0;
 | |
|           aStringItr++;
 | |
|         }
 | |
|         aPatternItr++;
 | |
|       }
 | |
| 
 | |
|       // If we've hit the end of the pattern string, match
 | |
|       if (aPatternItr == aPatternEnd) return 1;
 | |
| 
 | |
|       while (aStringItr != aStringEnd) {
 | |
|         if (likeCompare(aPatternItr, aPatternEnd, aStringItr, aStringEnd,
 | |
|                         aEscapeChar)) {
 | |
|           // we've hit a match, so indicate this
 | |
|           return 1;
 | |
|         }
 | |
|         aStringItr++;
 | |
|       }
 | |
| 
 | |
|       // No match
 | |
|       return 0;
 | |
|     }
 | |
|     if (!lastWasEscape && *aPatternItr == MATCH_ONE) {
 | |
|       // CASE 2
 | |
|       if (aStringItr == aStringEnd) {
 | |
|         // If we've hit the end of the string we are testing, no match
 | |
|         return 0;
 | |
|       }
 | |
|       aStringItr++;
 | |
|       lastWasEscape = false;
 | |
|     } else if (!lastWasEscape && *aPatternItr == aEscapeChar) {
 | |
|       // CASE 3
 | |
|       lastWasEscape = true;
 | |
|     } else {
 | |
|       // CASE 4
 | |
|       if (::ToUpperCase(*aStringItr) != ::ToUpperCase(*aPatternItr)) {
 | |
|         // If we've hit a point where the strings don't match, there is no match
 | |
|         return 0;
 | |
|       }
 | |
|       aStringItr++;
 | |
|       lastWasEscape = false;
 | |
|     }
 | |
| 
 | |
|     aPatternItr++;
 | |
|   }
 | |
| 
 | |
|   return aStringItr == aStringEnd;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Compute the Levenshtein Edit Distance between two strings.
 | |
|  *
 | |
|  * @param aStringS
 | |
|  *        a string
 | |
|  * @param aStringT
 | |
|  *        another string
 | |
|  * @param _result
 | |
|  *        an outparam that will receive the edit distance between the arguments
 | |
|  * @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc.
 | |
|  */
 | |
| int levenshteinDistance(const nsAString& aStringS, const nsAString& aStringT,
 | |
|                         int* _result) {
 | |
|   // Set the result to a non-sensical value in case we encounter an error.
 | |
|   *_result = -1;
 | |
| 
 | |
|   const uint32_t sLen = aStringS.Length();
 | |
|   const uint32_t tLen = aStringT.Length();
 | |
| 
 | |
|   if (sLen == 0) {
 | |
|     *_result = tLen;
 | |
|     return SQLITE_OK;
 | |
|   }
 | |
|   if (tLen == 0) {
 | |
|     *_result = sLen;
 | |
|     return SQLITE_OK;
 | |
|   }
 | |
| 
 | |
|   // Notionally, Levenshtein Distance is computed in a matrix.  If we
 | |
|   // assume s = "span" and t = "spam", the matrix would look like this:
 | |
|   //    s -->
 | |
|   //  t          s   p   a   n
 | |
|   //  |      0   1   2   3   4
 | |
|   //  V  s   1   *   *   *   *
 | |
|   //     p   2   *   *   *   *
 | |
|   //     a   3   *   *   *   *
 | |
|   //     m   4   *   *   *   *
 | |
|   //
 | |
|   // Note that the row width is sLen + 1 and the column height is tLen + 1,
 | |
|   // where sLen is the length of the string "s" and tLen is the length of "t".
 | |
|   // The first row and the first column are initialized as shown, and
 | |
|   // the algorithm computes the remaining cells row-by-row, and
 | |
|   // left-to-right within each row.  The computation only requires that
 | |
|   // we be able to see the current row and the previous one.
 | |
| 
 | |
|   // Allocate memory for two rows.
 | |
|   AutoTArray<int, nsAutoString::kStorageSize> row1;
 | |
|   AutoTArray<int, nsAutoString::kStorageSize> row2;
 | |
| 
 | |
|   // Declare the raw pointers that will actually be used to access the memory.
 | |
|   int* prevRow = row1.AppendElements(sLen + 1);
 | |
|   int* currRow = row2.AppendElements(sLen + 1);
 | |
| 
 | |
|   // Initialize the first row.
 | |
|   for (uint32_t i = 0; i <= sLen; i++) prevRow[i] = i;
 | |
| 
 | |
|   const char16_t* s = aStringS.BeginReading();
 | |
|   const char16_t* t = aStringT.BeginReading();
 | |
| 
 | |
|   // Compute the empty cells in the "matrix" row-by-row, starting with
 | |
|   // the second row.
 | |
|   for (uint32_t ti = 1; ti <= tLen; ti++) {
 | |
|     // Initialize the first cell in this row.
 | |
|     currRow[0] = ti;
 | |
| 
 | |
|     // Get the character from "t" that corresponds to this row.
 | |
|     const char16_t tch = t[ti - 1];
 | |
| 
 | |
|     // Compute the remaining cells in this row, left-to-right,
 | |
|     // starting at the second column (and first character of "s").
 | |
|     for (uint32_t si = 1; si <= sLen; si++) {
 | |
|       // Get the character from "s" that corresponds to this column,
 | |
|       // compare it to the t-character, and compute the "cost".
 | |
|       const char16_t sch = s[si - 1];
 | |
|       int cost = (sch == tch) ? 0 : 1;
 | |
| 
 | |
|       // ............ We want to calculate the value of cell "d" from
 | |
|       // ...ab....... the previously calculated (or initialized) cells
 | |
|       // ...cd....... "a", "b", and "c", where d = min(a', b', c').
 | |
|       // ............
 | |
|       int aPrime = prevRow[si - 1] + cost;
 | |
|       int bPrime = prevRow[si] + 1;
 | |
|       int cPrime = currRow[si - 1] + 1;
 | |
|       currRow[si] = std::min(aPrime, std::min(bPrime, cPrime));
 | |
|     }
 | |
| 
 | |
|     // Advance to the next row.  The current row becomes the previous
 | |
|     // row and we recycle the old previous row as the new current row.
 | |
|     // We don't need to re-initialize the new current row since we will
 | |
|     // rewrite all of its cells anyway.
 | |
|     int* oldPrevRow = prevRow;
 | |
|     prevRow = currRow;
 | |
|     currRow = oldPrevRow;
 | |
|   }
 | |
| 
 | |
|   // The final result is the value of the last cell in the last row.
 | |
|   // Note that that's now in the "previous" row, since we just swapped them.
 | |
|   *_result = prevRow[sLen];
 | |
|   return SQLITE_OK;
 | |
| }
 | |
| 
 | |
| // This struct is used only by registerFunctions below, but ISO C++98 forbids
 | |
| // instantiating a template dependent on a locally-defined type.  Boo-urns!
 | |
| struct Functions {
 | |
|   const char* zName;
 | |
|   int nArg;
 | |
|   int enc;
 | |
|   void* pContext;
 | |
|   void (*xFunc)(::sqlite3_context*, int, sqlite3_value**);
 | |
| };
 | |
| 
 | |
| }  // namespace
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| //// Exposed Functions
 | |
| 
 | |
| int registerFunctions(sqlite3* aDB) {
 | |
|   Functions functions[] = {
 | |
|       {"lower", 1, SQLITE_UTF16, 0, caseFunction},
 | |
|       {"lower", 1, SQLITE_UTF8, 0, caseFunction},
 | |
|       {"upper", 1, SQLITE_UTF16, (void*)1, caseFunction},
 | |
|       {"upper", 1, SQLITE_UTF8, (void*)1, caseFunction},
 | |
| 
 | |
|       {"like", 2, SQLITE_UTF16, 0, likeFunction},
 | |
|       {"like", 2, SQLITE_UTF8, 0, likeFunction},
 | |
|       {"like", 3, SQLITE_UTF16, 0, likeFunction},
 | |
|       {"like", 3, SQLITE_UTF8, 0, likeFunction},
 | |
| 
 | |
|       {"levenshteinDistance", 2, SQLITE_UTF16, 0, levenshteinDistanceFunction},
 | |
|       {"levenshteinDistance", 2, SQLITE_UTF8, 0, levenshteinDistanceFunction},
 | |
| 
 | |
|       {"utf16Length", 1, SQLITE_UTF16, 0, utf16LengthFunction},
 | |
|       {"utf16Length", 1, SQLITE_UTF8, 0, utf16LengthFunction},
 | |
|   };
 | |
| 
 | |
|   int rv = SQLITE_OK;
 | |
|   for (size_t i = 0; SQLITE_OK == rv && i < ArrayLength(functions); ++i) {
 | |
|     struct Functions* p = &functions[i];
 | |
|     rv = ::sqlite3_create_function(aDB, p->zName, p->nArg, p->enc, p->pContext,
 | |
|                                    p->xFunc, nullptr, nullptr);
 | |
|   }
 | |
| 
 | |
|   return rv;
 | |
| }
 | |
| 
 | |
| ////////////////////////////////////////////////////////////////////////////////
 | |
| //// SQL Functions
 | |
| 
 | |
| void caseFunction(sqlite3_context* aCtx, int aArgc, sqlite3_value** aArgv) {
 | |
|   NS_ASSERTION(1 == aArgc, "Invalid number of arguments!");
 | |
| 
 | |
|   const char16_t* value =
 | |
|       static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[0]));
 | |
|   nsAutoString data(value,
 | |
|                     ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t));
 | |
|   bool toUpper = ::sqlite3_user_data(aCtx) ? true : false;
 | |
| 
 | |
|   if (toUpper)
 | |
|     ::ToUpperCase(data);
 | |
|   else
 | |
|     ::ToLowerCase(data);
 | |
| 
 | |
|   // Set the result.
 | |
|   ::sqlite3_result_text16(aCtx, data.get(), data.Length() * sizeof(char16_t),
 | |
|                           SQLITE_TRANSIENT);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * This implements the like() SQL function.  This is used by the LIKE operator.
 | |
|  * The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is
 | |
|  * an escape character, say E, it is implemented as 'like(B, A, E)'.
 | |
|  */
 | |
| void likeFunction(sqlite3_context* aCtx, int aArgc, sqlite3_value** aArgv) {
 | |
|   NS_ASSERTION(2 == aArgc || 3 == aArgc, "Invalid number of arguments!");
 | |
| 
 | |
|   if (::sqlite3_value_bytes(aArgv[0]) >
 | |
|       ::sqlite3_limit(::sqlite3_context_db_handle(aCtx),
 | |
|                       SQLITE_LIMIT_LIKE_PATTERN_LENGTH, -1)) {
 | |
|     ::sqlite3_result_error(aCtx, "LIKE or GLOB pattern too complex",
 | |
|                            SQLITE_TOOBIG);
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   if (!::sqlite3_value_text16(aArgv[0]) || !::sqlite3_value_text16(aArgv[1]))
 | |
|     return;
 | |
| 
 | |
|   const char16_t* a =
 | |
|       static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[1]));
 | |
|   int aLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t);
 | |
|   nsDependentString A(a, aLen);
 | |
| 
 | |
|   const char16_t* b =
 | |
|       static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[0]));
 | |
|   int bLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t);
 | |
|   nsDependentString B(b, bLen);
 | |
|   NS_ASSERTION(!B.IsEmpty(), "LIKE string must not be null!");
 | |
| 
 | |
|   char16_t E = 0;
 | |
|   if (3 == aArgc)
 | |
|     E = static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[2]))[0];
 | |
| 
 | |
|   nsAString::const_iterator itrString, endString;
 | |
|   A.BeginReading(itrString);
 | |
|   A.EndReading(endString);
 | |
|   nsAString::const_iterator itrPattern, endPattern;
 | |
|   B.BeginReading(itrPattern);
 | |
|   B.EndReading(endPattern);
 | |
|   ::sqlite3_result_int(
 | |
|       aCtx, likeCompare(itrPattern, endPattern, itrString, endString, E));
 | |
| }
 | |
| 
 | |
| void levenshteinDistanceFunction(sqlite3_context* aCtx, int aArgc,
 | |
|                                  sqlite3_value** aArgv) {
 | |
|   NS_ASSERTION(2 == aArgc, "Invalid number of arguments!");
 | |
| 
 | |
|   // If either argument is a SQL NULL, then return SQL NULL.
 | |
|   if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL ||
 | |
|       ::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) {
 | |
|     ::sqlite3_result_null(aCtx);
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   const char16_t* a =
 | |
|       static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[0]));
 | |
|   int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t);
 | |
| 
 | |
|   const char16_t* b =
 | |
|       static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[1]));
 | |
|   int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t);
 | |
| 
 | |
|   // Compute the Levenshtein Distance, and return the result (or error).
 | |
|   int distance = -1;
 | |
|   const nsDependentString A(a, aLen);
 | |
|   const nsDependentString B(b, bLen);
 | |
|   int status = levenshteinDistance(A, B, &distance);
 | |
|   if (status == SQLITE_OK) {
 | |
|     ::sqlite3_result_int(aCtx, distance);
 | |
|   } else if (status == SQLITE_NOMEM) {
 | |
|     ::sqlite3_result_error_nomem(aCtx);
 | |
|   } else {
 | |
|     ::sqlite3_result_error(aCtx, "User function returned error code", -1);
 | |
|   }
 | |
| }
 | |
| 
 | |
| void utf16LengthFunction(sqlite3_context* aCtx, int aArgc,
 | |
|                          sqlite3_value** aArgv) {
 | |
|   NS_ASSERTION(1 == aArgc, "Invalid number of arguments!");
 | |
| 
 | |
|   int len = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t);
 | |
| 
 | |
|   // Set the result.
 | |
|   ::sqlite3_result_int(aCtx, len);
 | |
| }
 | |
| 
 | |
| }  // namespace storage
 | |
| }  // namespace mozilla
 | 
