253 lines
8.1 KiB
C
253 lines
8.1 KiB
C
/**
|
||
* @copyright
|
||
* ====================================================================
|
||
* Licensed to the Apache Software Foundation (ASF) under one
|
||
* or more contributor license agreements. See the NOTICE file
|
||
* distributed with this work for additional information
|
||
* regarding copyright ownership. The ASF licenses this file
|
||
* to you under the Apache License, Version 2.0 (the
|
||
* "License"); you may not use this file except in compliance
|
||
* with the License. You may obtain a copy of the License at
|
||
*
|
||
* http://www.apache.org/licenses/LICENSE-2.0
|
||
*
|
||
* Unless required by applicable law or agreed to in writing,
|
||
* software distributed under the License is distributed on an
|
||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||
* KIND, either express or implied. See the License for the
|
||
* specific language governing permissions and limitations
|
||
* under the License.
|
||
* ====================================================================
|
||
* @endcopyright
|
||
*
|
||
* @file svn_utf.h
|
||
* @brief UTF-8 conversion routines
|
||
*
|
||
* Whenever a conversion routine cannot convert to or from UTF-8, the
|
||
* error returned has code @c APR_EINVAL.
|
||
*/
|
||
|
||
|
||
|
||
#ifndef SVN_UTF_H
|
||
#define SVN_UTF_H
|
||
|
||
#include <apr_pools.h>
|
||
#include <apr_xlate.h> /* for APR_*_CHARSET */
|
||
|
||
#include "svn_types.h"
|
||
#include "svn_string.h"
|
||
|
||
#ifdef __cplusplus
|
||
extern "C" {
|
||
#endif /* __cplusplus */
|
||
|
||
#define SVN_APR_LOCALE_CHARSET APR_LOCALE_CHARSET
|
||
#define SVN_APR_DEFAULT_CHARSET APR_DEFAULT_CHARSET
|
||
|
||
/**
|
||
* Initialize the UTF-8 encoding/decoding routines.
|
||
* Allocate cached translation handles in a subpool of @a pool.
|
||
*
|
||
* If @a assume_native_utf8 is TRUE, the native character set is
|
||
* assumed to be UTF-8, i.e. conversion is a no-op. This is useful
|
||
* in contexts where the native character set is ASCII but UTF-8
|
||
* should be used regardless (e.g. for mod_dav_svn which runs within
|
||
* httpd and always uses the "C" locale).
|
||
*
|
||
* @note It is optional to call this function, but if it is used, no other
|
||
* svn function may be in use in other threads during the call of this
|
||
* function or when @a pool is cleared or destroyed.
|
||
* Initializing the UTF-8 routines will improve performance.
|
||
*
|
||
* @since New in 1.8.
|
||
*/
|
||
void
|
||
svn_utf_initialize2(svn_boolean_t assume_native_utf8,
|
||
apr_pool_t *pool);
|
||
|
||
/**
|
||
* Like svn_utf_initialize2() but without the ability to force the
|
||
* native encoding to UTF-8.
|
||
*
|
||
* @deprecated Provided for backward compatibility with the 1.7 API.
|
||
*/
|
||
SVN_DEPRECATED
|
||
void
|
||
svn_utf_initialize(apr_pool_t *pool);
|
||
|
||
/** Set @a *dest to a utf8-encoded stringbuf from native stringbuf @a src;
|
||
* allocate @a *dest in @a pool.
|
||
*/
|
||
svn_error_t *
|
||
svn_utf_stringbuf_to_utf8(svn_stringbuf_t **dest,
|
||
const svn_stringbuf_t *src,
|
||
apr_pool_t *pool);
|
||
|
||
|
||
/** Set @a *dest to a utf8-encoded string from native string @a src; allocate
|
||
* @a *dest in @a pool.
|
||
*/
|
||
svn_error_t *
|
||
svn_utf_string_to_utf8(const svn_string_t **dest,
|
||
const svn_string_t *src,
|
||
apr_pool_t *pool);
|
||
|
||
|
||
/** Set @a *dest to a utf8-encoded C string from native C string @a src;
|
||
* allocate @a *dest in @a pool.
|
||
*/
|
||
svn_error_t *
|
||
svn_utf_cstring_to_utf8(const char **dest,
|
||
const char *src,
|
||
apr_pool_t *pool);
|
||
|
||
|
||
/** Set @a *dest to a utf8 encoded C string from @a frompage encoded C
|
||
* string @a src; allocate @a *dest in @a pool.
|
||
*
|
||
* @since New in 1.4.
|
||
*/
|
||
svn_error_t *
|
||
svn_utf_cstring_to_utf8_ex2(const char **dest,
|
||
const char *src,
|
||
const char *frompage,
|
||
apr_pool_t *pool);
|
||
|
||
|
||
/** Like svn_utf_cstring_to_utf8_ex2() but with @a convset_key which is
|
||
* ignored.
|
||
*
|
||
* @deprecated Provided for backward compatibility with the 1.3 API.
|
||
*/
|
||
SVN_DEPRECATED
|
||
svn_error_t *
|
||
svn_utf_cstring_to_utf8_ex(const char **dest,
|
||
const char *src,
|
||
const char *frompage,
|
||
const char *convset_key,
|
||
apr_pool_t *pool);
|
||
|
||
|
||
/** Set @a *dest to a natively-encoded stringbuf from utf8 stringbuf @a src;
|
||
* allocate @a *dest in @a pool.
|
||
*/
|
||
svn_error_t *
|
||
svn_utf_stringbuf_from_utf8(svn_stringbuf_t **dest,
|
||
const svn_stringbuf_t *src,
|
||
apr_pool_t *pool);
|
||
|
||
|
||
/** Set @a *dest to a natively-encoded string from utf8 string @a src;
|
||
* allocate @a *dest in @a pool.
|
||
*/
|
||
svn_error_t *
|
||
svn_utf_string_from_utf8(const svn_string_t **dest,
|
||
const svn_string_t *src,
|
||
apr_pool_t *pool);
|
||
|
||
|
||
/** Set @a *dest to a natively-encoded C string from utf8 C string @a src;
|
||
* allocate @a *dest in @a pool.
|
||
*/
|
||
svn_error_t *
|
||
svn_utf_cstring_from_utf8(const char **dest,
|
||
const char *src,
|
||
apr_pool_t *pool);
|
||
|
||
|
||
/** Set @a *dest to a @a topage encoded C string from utf8 encoded C string
|
||
* @a src; allocate @a *dest in @a pool.
|
||
*
|
||
* @since New in 1.4.
|
||
*/
|
||
svn_error_t *
|
||
svn_utf_cstring_from_utf8_ex2(const char **dest,
|
||
const char *src,
|
||
const char *topage,
|
||
apr_pool_t *pool);
|
||
|
||
|
||
/** Like svn_utf_cstring_from_utf8_ex2() but with @a convset_key which is
|
||
* ignored.
|
||
*
|
||
* @deprecated Provided for backward compatibility with the 1.3 API.
|
||
*/
|
||
SVN_DEPRECATED
|
||
svn_error_t *
|
||
svn_utf_cstring_from_utf8_ex(const char **dest,
|
||
const char *src,
|
||
const char *topage,
|
||
const char *convset_key,
|
||
apr_pool_t *pool);
|
||
|
||
|
||
/** Return a fuzzily native-encoded C string from utf8 C string @a src,
|
||
* allocated in @a pool. A fuzzy recoding leaves all 7-bit ascii
|
||
* characters the same, and substitutes "?\\XXX" for others, where XXX
|
||
* is the unsigned decimal code for that character.
|
||
*
|
||
* This function cannot error; it is guaranteed to return something.
|
||
* First it will recode as described above and then attempt to convert
|
||
* the (new) 7-bit UTF-8 string to native encoding. If that fails, it
|
||
* will return the raw fuzzily recoded string, which may or may not be
|
||
* meaningful in the client's locale, but is (presumably) better than
|
||
* nothing.
|
||
*
|
||
* ### Notes:
|
||
*
|
||
* Improvement is possible, even imminent. The original problem was
|
||
* that if you converted a UTF-8 string (say, a log message) into a
|
||
* locale that couldn't represent all the characters, you'd just get a
|
||
* static placeholder saying "[unconvertible log message]". Then
|
||
* Justin Erenkrantz pointed out how on platforms that didn't support
|
||
* conversion at all, "svn log" would still fail completely when it
|
||
* encountered unconvertible data.
|
||
*
|
||
* Now for both cases, the caller can at least fall back on this
|
||
* function, which converts the message as best it can, substituting
|
||
* "?\\XXX" escape codes for the non-ascii characters.
|
||
*
|
||
* Ultimately, some callers may prefer the iconv "//TRANSLIT" option,
|
||
* so when we can detect that at configure time, things will change.
|
||
* Also, this should (?) be moved to apr/apu eventually.
|
||
*
|
||
* See http://subversion.tigris.org/issues/show_bug.cgi?id=807 for
|
||
* details.
|
||
*/
|
||
const char *
|
||
svn_utf_cstring_from_utf8_fuzzy(const char *src,
|
||
apr_pool_t *pool);
|
||
|
||
|
||
/** Set @a *dest to a natively-encoded C string from utf8 stringbuf @a src;
|
||
* allocate @a *dest in @a pool.
|
||
*/
|
||
svn_error_t *
|
||
svn_utf_cstring_from_utf8_stringbuf(const char **dest,
|
||
const svn_stringbuf_t *src,
|
||
apr_pool_t *pool);
|
||
|
||
|
||
/** Set @a *dest to a natively-encoded C string from utf8 string @a src;
|
||
* allocate @a *dest in @a pool.
|
||
*/
|
||
svn_error_t *
|
||
svn_utf_cstring_from_utf8_string(const char **dest,
|
||
const svn_string_t *src,
|
||
apr_pool_t *pool);
|
||
|
||
/** Return the display width of UTF-8-encoded C string @a cstr.
|
||
* If the string is not printable or invalid UTF-8, return -1.
|
||
*
|
||
* @since New in 1.8.
|
||
*/
|
||
int
|
||
svn_utf_cstring_utf8_width(const char *cstr);
|
||
|
||
#ifdef __cplusplus
|
||
}
|
||
#endif /* __cplusplus */
|
||
|
||
#endif /* SVN_UTF_H */
|