diff options
Diffstat (limited to 'ext/metadata/metadataparsejpeg.c')
-rw-r--r-- | ext/metadata/metadataparsejpeg.c | 307 |
1 files changed, 288 insertions, 19 deletions
diff --git a/ext/metadata/metadataparsejpeg.c b/ext/metadata/metadataparsejpeg.c index 3c2719da..ce6a161c 100644 --- a/ext/metadata/metadataparsejpeg.c +++ b/ext/metadata/metadataparsejpeg.c @@ -41,14 +41,62 @@ * Boston, MA 02111-1307, USA. */ -#include "metadataparsejpeg.h" +/* + * SECTION: metadataparsejpeg + * @short_description: This module provides functions to parse JPEG files + * + * This module parses a JPEG stream finding metadata chunks, and marking them + * to be removed from the stream and saving them in a adapter. + * + * <refsect2> + * <para> + * #metadataparse_jpeg_init must be called before any other function in this + * module and must be paired with a call to #metadataparse_jpeg_dispose. + * #metadataparse_jpeg_parse is used to parse the stream (find the metadata + * chunks and the place it should be written to. + * #metadataparse_jpeg_lazy_update do nothing. + * </para> + * <para> + * This module tries to find metadata chunk until it reaches the "start of scan + * image". So if the metadata chunk, which could be EXIF, XMP or IPTC (inside + * Photoshop), is after the "start of scan image" it will not be found. This is + * 'cause of performance reason and 'cause we believe that files with metadata + * chunk after the "scan of image" chunk are very bad practice, so we don't + * worry about them. + * </para> + * <para> + * If it is working in non-parse_only mode, and the first chunk is a EXIF + * instead of a JFIF chunk, the EXIF chunk will be marked for removal and a new + * JFIF chunk will be create and marked to be injected as the first chunk. + * </para> + * </refsect2> + * + * Last reviewed on 2008-01-24 (0.10.15) + */ + +/* + * includes + */ #include <string.h> +#include "metadataparsejpeg.h" + #ifdef HAVE_IPTC #include <libiptcdata/iptc-jpeg.h> #endif +/* + * defines and macros + */ + +/* returns the current byte, advance to the next one and decrease the size */ +#define READ(buf, size) ( (size)--, *((buf)++) ) + +/* + * static helper functions declaration + */ + static MetadataParsingReturn metadataparse_jpeg_reading (JpegParseData * jpeg_data, guint8 ** buf, guint32 * bufsize, const guint32 offset, const guint8 * step_buf, @@ -72,13 +120,30 @@ static MetadataParsingReturn metadataparse_jpeg_jump (JpegParseData * jpeg_data, guint8 ** buf, guint32 * bufsize, guint8 ** next_start, guint32 * next_size); -#define READ(buf, size) ( (size)--, *((buf)++) ) +/* + * extern functions implementations + */ -void -metadataparse_jpeg_lazy_update (JpegParseData * jpeg_data) -{ - /* nothing to do */ -} +/* + * metadataparse_jpeg_init: + * @jpeg_data: [in] jpeg data handler to be inited + * @exif_adpt: where to create/write an adapter to hold the EXIF chunk found + * @iptc_adpt: where to create/write an adapter to hold the IPTC chunk found + * @xmp_adpt: where to create/write an adapter to hold the XMP chunk found + * @strip_chunks: Array of chunks (offset and size) marked for removal + * @inject_chunks: Array of chunks (offset, data, size) marked for injection + * @parse_only: TRUE if it should only find the chunks and write then to the + * adapter (@exif_adpt, @iptc_adpt, @xmp_adpt). Or FALSE if should also put + * them on @strip_chunks. + * + * Init jpeg data handle. + * This function must be called before any other function from this module. + * This function must not be called twice without call to + * #metadataparse_jpeg_dispose beteween them. + * @see_also: #metadataparse_jpeg_dispose #metadataparse_jpeg_parse + * + * Returns: nothing + */ void metadataparse_jpeg_init (JpegParseData * jpeg_data, GstAdapter ** exif_adpt, @@ -100,6 +165,17 @@ metadataparse_jpeg_init (JpegParseData * jpeg_data, GstAdapter ** exif_adpt, } +/* + * metadataparse_jpeg_dispose: + * @jpeg_data: [in] jpeg data handler to be freed + * + * Call this function to free any resource allocated by + * #metadataparse_jpeg_init + * @see_also: #metadataparse_jpeg_init + * + * Returns: nothing + */ + void metadataparse_jpeg_dispose (JpegParseData * jpeg_data) { @@ -108,6 +184,42 @@ metadataparse_jpeg_dispose (JpegParseData * jpeg_data) jpeg_data->xmp_adapter = NULL; } +/* + * metadata_parse: + * @jpeg_data: [in] jpeg data handle + * @buf: [in] data to be parsed + * @bufsize: [in] size of @buf in bytes + * @offset: is the offset where @buf starts from the beginnig of the whole + * stream + * @next_start: is a pointer after @buf which indicates where @buf should start + * on the next call to this function. It means, that after returning, this + * function has consumed *@next_start - @buf bytes. Which also means + * that @offset should also be incremanted by (*@next_start - @buf) for the + * next time. + * @next_size: [out] number of minimal bytes in @buf for the next call to this + * function + * + * This function is used to parse a JPEG stream step-by-step incrementally. + * Basically this function works like a state machine, that will run in a loop + * while there is still bytes in @buf to be read or it has finished parsing. + * If the it hasn't parsed yet and there is no more data in @buf, then the + * current state is saved and a indication will be make about the buffer to + * be passed by the caller function. + * @see_also: #metadataparse_jpeg_init + * + * Returns: + * <itemizedlist> + * <listitem><para>%META_PARSING_ERROR + * </para></listitem> + * <listitem><para>%META_PARSING_DONE if parse has finished. Now strip and + * inject chunks has been found + * </para></listitem> + * <listitem><para>%META_PARSING_NEED_MORE_DATA if this function should be + * called again (look @next_start and @next_size) + * </para></listitem> + * </itemizedlist> + */ + MetadataParsingReturn metadataparse_jpeg_parse (JpegParseData * jpeg_data, guint8 * buf, guint32 * bufsize, const guint32 offset, guint8 ** next_start, @@ -118,10 +230,19 @@ metadataparse_jpeg_parse (JpegParseData * jpeg_data, guint8 * buf, guint8 mark[2] = { 0x00, 0x00 }; const guint8 *step_buf = buf; + /* step_buf holds where buf starts. this const value will be passed to + the nested parsing function, so those function knows how far they from + the initial buffer. This is not related to the beginning of the whole + stream, it is just related to the buf passed in this step to this + function */ + *next_start = buf; if (jpeg_data->state == JPEG_PARSE_NULL) { + /* only the first time this function is called it will verify the stream + type to be sure it is a JPEG */ + if (*bufsize < 2) { *next_size = (buf - *next_start) + 2; ret = META_PARSING_NEED_MORE_DATA; @@ -184,6 +305,68 @@ done: } +/* + * metadataparse_jpeg_lazy_update: + * @jpeg_data: [in] jpeg data handle + * + * This function do nothing + * @see_also: metadata_lazy_update + * + * Returns: nothing + */ + +void +metadataparse_jpeg_lazy_update (JpegParseData * jpeg_data) +{ + /* nothing to do */ +} + +/* + * static helper functions implementation + */ + +/* + * metadataparse_jpeg_reading: + * @jpeg_data: [in] jpeg data handle + * @buf: [in] data to be parsed. @buf will increment during the parsing step. + * So it will hold the next byte to be read inside a parsing function or on + * the next nested parsing function. And so, @bufsize will decrement. + * @bufsize: [in] size of @buf in bytes. This value will decrement during the + * parsing for the same reason that @buf will advance. + * @offset: is the offset where @step_buf starts from the beginnig of the + * stream + * @step_buf: holds the pointer to the buffer passed to + * #metadataparse_jpeg_parse. It means that any point inside this function + * the offset (related to the beginning of the whole stream) after the last + * byte read so far is "(*buf - step_buf) + offset" + * @next_start: is a pointer after @step_buf which indicates where the next + * call to #metadataparse_jpeg_parse should start on the next call to this + * function. It means, that after return, this function has + * consumed *@next_start - @buf bytes. Which also means that @offset should + * also be incremanted by (*@next_start - @buf) for the next time. + * @next_size: [out] number of minimal bytes in @buf for the next call to this + * function + * + * This function is used to parse a JPEG stream step-by-step incrementally. + * If this function finds a EXIF, IPTC or XMP chunk (or a chunk that should be + * jumped), then it changes the state of the parsing process so that the + * remaing parsing can be done by another more specialized function. + * @see_also: #metadataparse_jpeg_init #metadataparse_jpeg_exif + * #metadataparse_jpeg_iptc #metadataparse_jpeg_xmp #metadataparse_jpeg_jump + * + * Returns: + * <itemizedlist> + * <listitem><para>%META_PARSING_ERROR + * </para></listitem> + * <listitem><para>%META_PARSING_DONE if parse has finished. Now strip and + * inject chunks has been found. Or some chunk has been found and should be + * held or jumped. + * </para></listitem> + * <listitem><para>%META_PARSING_NEED_MORE_DATA if this function should be + * called again (look @next_start and @next_size) + * </para></listitem> + * </itemizedlist> + */ /* look for markers */ static MetadataParsingReturn @@ -218,7 +401,8 @@ metadataparse_jpeg_reading (JpegParseData * jpeg_data, guint8 ** buf, ret = META_PARSING_DONE; jpeg_data->state = JPEG_PARSE_DONE; goto done; - } else if (mark[1] == 0xDA) { /* start of scan, lets not look behinf of this */ + } else if (mark[1] == 0xDA) { + /* start of scan image, lets not look behind of this */ ret = META_PARSING_DONE; jpeg_data->state = JPEG_PARSE_DONE; goto done; @@ -264,7 +448,9 @@ metadataparse_jpeg_reading (JpegParseData * jpeg_data, guint8 ** buf, if (!jpeg_data->parse_only) { memset (&chunk, 0x00, sizeof (MetadataChunk)); - chunk.offset_orig = (*buf - step_buf) + offset - 4; /* maker + size */ + + chunk.offset_orig = (*buf - step_buf) + offset - 4; /* 4 == maker + size */ + chunk.size = chunk_size + 2; /* chunk size plus app marker */ chunk.type = MD_CHUNK_EXIF; @@ -276,15 +462,15 @@ metadataparse_jpeg_reading (JpegParseData * jpeg_data, guint8 ** buf, if (!jpeg_data->jfif_found) { /* only inject if no JFIF has been found */ - static const guint8 segment[] = { 0xff, 0xe0, 0x00, 0x10, - 0x4a, 0x46, 0x49, 0x46, 0x00, - 0x01, 0x02, - 0x00, 0x00, 0x01, 0x00, 0x01, - 0x00, 0x00 - }; - if (!jpeg_data->parse_only) { + static const guint8 segment[] = { 0xff, 0xe0, 0x00, 0x10, + 0x4a, 0x46, 0x49, 0x46, 0x00, + 0x01, 0x02, + 0x00, 0x00, 0x01, 0x00, 0x01, + 0x00, 0x00 + }; + memset (&chunk, 0x00, sizeof (MetadataChunk)); chunk.offset_orig = 2; chunk.size = 18; @@ -321,7 +507,7 @@ metadataparse_jpeg_reading (JpegParseData * jpeg_data, guint8 ** buf, MetadataChunk chunk; memset (&chunk, 0x00, sizeof (MetadataChunk)); - chunk.offset_orig = (*buf - step_buf) + offset - 4; /* maker + size */ + chunk.offset_orig = (*buf - step_buf) + offset - 4; /* 4 == maker + size */ chunk.size = chunk_size + 2; /* chunk size plus app marker */ chunk.type = MD_CHUNK_XMP; @@ -344,7 +530,9 @@ metadataparse_jpeg_reading (JpegParseData * jpeg_data, guint8 ** buf, } } #ifdef HAVE_IPTC - else if (mark[1] == 0xED) { /* may be it is photoshop and may be there is iptc */ + else if (mark[1] == 0xED) { + /* may be it is photoshop and may be there is iptc */ + if (chunk_size >= 16) { /* size2 "Photoshop 3.0" */ if (*bufsize < 14) { @@ -361,7 +549,7 @@ metadataparse_jpeg_reading (JpegParseData * jpeg_data, guint8 ** buf, MetadataChunk chunk; memset (&chunk, 0x00, sizeof (MetadataChunk)); - chunk.offset_orig = (*buf - step_buf) + offset - 4; /* maker + size */ + chunk.offset_orig = (*buf - step_buf) + offset - 4; /* 4 == maker + size */ chunk.size = chunk_size + 2; /* chunk size plus app marker */ chunk.type = MD_CHUNK_IPTC; @@ -400,6 +588,43 @@ done: } +/* + * metadataparse_jpeg_exif: + * @jpeg_data: [in] jpeg data handle + * @buf: [in] data to be parsed + * @bufsize: [in] size of @buf in bytes + * @next_start: look at #metadataparse_jpeg_reading + * @next_size: look at #metadataparse_jpeg_reading + * NOTE: To have a explanation of each parameters of this function look at + * the documentation of #metadataparse_jpeg_reading + * + * This function saves the EXIF chunk to @jpeg_data->exif_adapter and makes the + * parsing process point to the next buffer after the EXIF chunk. + * This function will be called by the parsing process 'cause at some point + * #metadataparse_jpeg_reading found out the EXIF chunk, skipped the JPEG + * wrapper bytes and changed the state of parsing process to JPEG_PARSE_EXIF. + * Which just happens if @jpeg_data->parse_only is FALSE and there is a EXIF + * chunk into the stream and @jpeg_data->exif_adapter is not NULL. + * This function will just be called once even if there is more than one EXIF + * chunk in the stream. This function do it by setting @jpeg_data->exif_adapter + * to NULL. + * After this function has completely parsed (hold) the chunk, it changes the + * parsing state back to JPEG_PARSE_READING which makes + * #metadataparse_jpeg_reading to be called again + * @see_also: #metadataparse_util_hold_chunk #metadataparse_jpeg_reading + * + * Returns: + * <itemizedlist> + * <listitem><para>%META_PARSING_ERROR + * </para></listitem> + * <listitem><para>%META_PARSING_DONE if the chunk bas been completely hold + * </para></listitem> + * <listitem><para>%META_PARSING_NEED_MORE_DATA if this function should be + * called again (look @next_start and @next_size) + * </para></listitem> + * </itemizedlist> + */ + static MetadataParsingReturn metadataparse_jpeg_exif (JpegParseData * jpeg_data, guint8 ** buf, guint32 * bufsize, guint8 ** next_start, guint32 * next_size) @@ -419,6 +644,15 @@ metadataparse_jpeg_exif (JpegParseData * jpeg_data, guint8 ** buf, } +/* + * metadataparse_jpeg_iptc: + * + * Look at #metadataparse_jpeg_exif. This function has the same behavior as + * that. The only difference is that this function also cut out others + * PhotoShop data and only holds IPTC data in it. + * + */ + #ifdef HAVE_IPTC static MetadataParsingReturn metadataparse_jpeg_iptc (JpegParseData * jpeg_data, guint8 ** buf, @@ -443,6 +677,7 @@ metadataparse_jpeg_iptc (JpegParseData * jpeg_data, guint8 ** buf, size = gst_adapter_available (*jpeg_data->iptc_adapter); buf = gst_adapter_peek (*jpeg_data->iptc_adapter, size); + /* FIXME: currently we are trhowing away others PhotoShop data */ res = iptc_jpeg_ps3_find_iptc (buf, size, &iptc_len); if (res < 0) { @@ -473,6 +708,14 @@ metadataparse_jpeg_iptc (JpegParseData * jpeg_data, guint8 ** buf, } #endif +/* + * metadataparse_jpeg_xmp: + * + * Look at #metadataparse_jpeg_exif. This function has the same behavior as + * that. + * + */ + static MetadataParsingReturn metadataparse_jpeg_xmp (JpegParseData * jpeg_data, guint8 ** buf, guint32 * bufsize, guint8 ** next_start, guint32 * next_size) @@ -490,6 +733,32 @@ metadataparse_jpeg_xmp (JpegParseData * jpeg_data, guint8 ** buf, return ret; } +/* + * metadataparse_jpeg_jump: + * @jpeg_data: [in] jpeg data handle + * @buf: [in] data to be parsed + * @bufsize: [in] size of @buf in bytes + * @next_start: look at #metadataparse_jpeg_reading + * @next_size: look at #metadataparse_jpeg_reading + * NOTE: To have a explanation of each parameters of this function look at + * the documentation of #metadataparse_jpeg_reading + * + * This function just makes a chunk we are not interested in to be jumped. + * This is done basically by incrementing @next_start and @buf, + * and decreasing @bufsize and setting the next parsing state properly. + * @see_also: #metadataparse_jpeg_reading #metadataparse_util_jump_chunk + * + * Returns: + * <itemizedlist> + * <listitem><para>%META_PARSING_DONE if bytes has been skiped and there is + * still data in @buf + * </para></listitem> + * <listitem><para>%META_PARSING_NEED_MORE_DATA if the skiped bytes end at + * some point after @buf + @bufsize + * </para></listitem> + * </itemizedlist> + */ + static MetadataParsingReturn metadataparse_jpeg_jump (JpegParseData * jpeg_data, guint8 ** buf, guint32 * bufsize, guint8 ** next_start, guint32 * next_size) |