00001 /** 00002 * @copyright 00003 * ==================================================================== 00004 * Licensed to the Apache Software Foundation (ASF) under one 00005 * or more contributor license agreements. See the NOTICE file 00006 * distributed with this work for additional information 00007 * regarding copyright ownership. The ASF licenses this file 00008 * to you under the Apache License, Version 2.0 (the 00009 * "License"); you may not use this file except in compliance 00010 * with the License. You may obtain a copy of the License at 00011 * 00012 * http://www.apache.org/licenses/LICENSE-2.0 00013 * 00014 * Unless required by applicable law or agreed to in writing, 00015 * software distributed under the License is distributed on an 00016 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 00017 * KIND, either express or implied. See the License for the 00018 * specific language governing permissions and limitations 00019 * under the License. 00020 * ==================================================================== 00021 * @endcopyright 00022 * 00023 * @file svn_utf.h 00024 * @brief UTF-8 conversion routines 00025 * 00026 * Whenever a conversion routine cannot convert to or from UTF-8, the 00027 * error returned has code @c APR_EINVAL. 00028 */ 00029 00030 00031 00032 #ifndef SVN_UTF_H 00033 #define SVN_UTF_H 00034 00035 #include <apr_pools.h> 00036 #include <apr_xlate.h> /* for APR_*_CHARSET */ 00037 00038 #include "svn_types.h" 00039 #include "svn_string.h" 00040 00041 #ifdef __cplusplus 00042 extern "C" { 00043 #endif /* __cplusplus */ 00044 00045 #define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET 00046 #define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET 00047 00048 /** 00049 * Initialize the UTF-8 encoding/decoding routines. 00050 * Allocate cached translation handles in a subpool of @a pool. 00051 * 00052 * If @a assume_native_utf8 is TRUE, the native character set is 00053 * assumed to be UTF-8, i.e. conversion is a no-op. This is useful 00054 * in contexts where the native character set is ASCII but UTF-8 00055 * should be used regardless (e.g. for mod_dav_svn which runs within 00056 * httpd and always uses the "C" locale). 00057 * 00058 * @note It is optional to call this function, but if it is used, no other 00059 * svn function may be in use in other threads during the call of this 00060 * function or when @a pool is cleared or destroyed. 00061 * Initializing the UTF-8 routines will improve performance. 00062 * 00063 * @since New in 1.8. 00064 */ 00065 void 00066 svn_utf_initialize2(svn_boolean_t assume_native_utf8, 00067 apr_pool_t *pool); 00068 00069 /** 00070 * Like svn_utf_initialize2() but without the ability to force the 00071 * native encoding to UTF-8. 00072 * 00073 * @deprecated Provided for backward compatibility with the 1.7 API. 00074 */ 00075 SVN_DEPRECATED 00076 void 00077 svn_utf_initialize(apr_pool_t *pool); 00078 00079 /** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src; 00080 * allocate @a *dest in @a pool. 00081 */ 00082 svn_error_t * 00083 svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest, 00084 const svn_stringbuf_t *src, 00085 apr_pool_t *pool); 00086 00087 00088 /** Set @a *dest to a utf8-encoded string from native string @a src; allocate 00089 * @a *dest in @a pool. 00090 */ 00091 svn_error_t * 00092 svn_utf_string_to_utf8(const svn_string_t **dest, 00093 const svn_string_t *src, 00094 apr_pool_t *pool); 00095 00096 00097 /** Set @a *dest to a utf8-encoded C string from native C string @a src; 00098 * allocate @a *dest in @a pool. 00099 */ 00100 svn_error_t * 00101 svn_utf_cstring_to_utf8(const char **dest, 00102 const char *src, 00103 apr_pool_t *pool); 00104 00105 00106 /** Set @a *dest to a utf8 encoded C string from @a frompage encoded C 00107 * string @a src; allocate @a *dest in @a pool. 00108 * 00109 * @since New in 1.4. 00110 */ 00111 svn_error_t * 00112 svn_utf_cstring_to_utf8_ex2(const char **dest, 00113 const char *src, 00114 const char *frompage, 00115 apr_pool_t *pool); 00116 00117 00118 /** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is 00119 * ignored. 00120 * 00121 * @deprecated Provided for backward compatibility with the 1.3 API. 00122 */ 00123 SVN_DEPRECATED 00124 svn_error_t * 00125 svn_utf_cstring_to_utf8_ex(const char **dest, 00126 const char *src, 00127 const char *frompage, 00128 const char *convset_key, 00129 apr_pool_t *pool); 00130 00131 00132 /** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src; 00133 * allocate @a *dest in @a pool. 00134 */ 00135 svn_error_t * 00136 svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest, 00137 const svn_stringbuf_t *src, 00138 apr_pool_t *pool); 00139 00140 00141 /** Set @a *dest to a natively-encoded string from utf8 string @a src; 00142 * allocate @a *dest in @a pool. 00143 */ 00144 svn_error_t * 00145 svn_utf_string_from_utf8(const svn_string_t **dest, 00146 const svn_string_t *src, 00147 apr_pool_t *pool); 00148 00149 00150 /** Set @a *dest to a natively-encoded C string from utf8 C string @a src; 00151 * allocate @a *dest in @a pool. 00152 */ 00153 svn_error_t * 00154 svn_utf_cstring_from_utf8(const char **dest, 00155 const char *src, 00156 apr_pool_t *pool); 00157 00158 00159 /** Set @a *dest to a @a topage encoded C string from utf8 encoded C string 00160 * @a src; allocate @a *dest in @a pool. 00161 * 00162 * @since New in 1.4. 00163 */ 00164 svn_error_t * 00165 svn_utf_cstring_from_utf8_ex2(const char **dest, 00166 const char *src, 00167 const char *topage, 00168 apr_pool_t *pool); 00169 00170 00171 /** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is 00172 * ignored. 00173 * 00174 * @deprecated Provided for backward compatibility with the 1.3 API. 00175 */ 00176 SVN_DEPRECATED 00177 svn_error_t * 00178 svn_utf_cstring_from_utf8_ex(const char **dest, 00179 const char *src, 00180 const char *topage, 00181 const char *convset_key, 00182 apr_pool_t *pool); 00183 00184 00185 /** Return a fuzzily native-encoded C string from utf8 C string @a src, 00186 * allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii 00187 * characters the same, and substitutes "?\\XXX" for others, where XXX 00188 * is the unsigned decimal code for that character. 00189 * 00190 * This function cannot error; it is guaranteed to return something. 00191 * First it will recode as described above and then attempt to convert 00192 * the (new) 7-bit UTF-8 string to native encoding. If that fails, it 00193 * will return the raw fuzzily recoded string, which may or may not be 00194 * meaningful in the client's locale, but is (presumably) better than 00195 * nothing. 00196 * 00197 * ### Notes: 00198 * 00199 * Improvement is possible, even imminent. The original problem was 00200 * that if you converted a UTF-8 string (say, a log message) into a 00201 * locale that couldn't represent all the characters, you'd just get a 00202 * static placeholder saying "[unconvertible log message]". Then 00203 * Justin Erenkrantz pointed out how on platforms that didn't support 00204 * conversion at all, "svn log" would still fail completely when it 00205 * encountered unconvertible data. 00206 * 00207 * Now for both cases, the caller can at least fall back on this 00208 * function, which converts the message as best it can, substituting 00209 * "?\\XXX" escape codes for the non-ascii characters. 00210 * 00211 * Ultimately, some callers may prefer the iconv "//TRANSLIT" option, 00212 * so when we can detect that at configure time, things will change. 00213 * Also, this should (?) be moved to apr/apu eventually. 00214 * 00215 * See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for 00216 * details. 00217 */ 00218 const char * 00219 svn_utf_cstring_from_utf8_fuzzy(const char *src, 00220 apr_pool_t *pool); 00221 00222 00223 /** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src; 00224 * allocate @a *dest in @a pool. 00225 */ 00226 svn_error_t * 00227 svn_utf_cstring_from_utf8_stringbuf(const char **dest, 00228 const svn_stringbuf_t *src, 00229 apr_pool_t *pool); 00230 00231 00232 /** Set @a *dest to a natively-encoded C string from utf8 string @a src; 00233 * allocate @a *dest in @a pool. 00234 */ 00235 svn_error_t * 00236 svn_utf_cstring_from_utf8_string(const char **dest, 00237 const svn_string_t *src, 00238 apr_pool_t *pool); 00239 00240 /** Return the display width of UTF-8-encoded C string @a cstr. 00241 * If the string is not printable or invalid UTF-8, return -1. 00242 * 00243 * @since New in 1.8. 00244 */ 00245 int 00246 svn_utf_cstring_utf8_width(const char *cstr); 00247 00248 #ifdef __cplusplus 00249 } 00250 #endif /* __cplusplus */ 00251 00252 #endif /* SVN_UTF_H */