summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristophe Grenier <grenier@cgsecurity.org>2018-04-10 16:37:01 +0200
committerChristophe Grenier <grenier@cgsecurity.org>2018-04-10 16:37:01 +0200
commitcdde95797ee04b258a7fd29fd4ebcb69f32da74b (patch)
tree835bc513bbf46d6dd2de74b2e237403ddfbf318c
parent9eb2aa12ce051d8d3d31e6b440389d5abf5efaf1 (diff)
PhotoRec: try to handle Blocked GNU Zip Format (BGZF) data
-rw-r--r--src/file_gz.c53
1 files changed, 49 insertions, 4 deletions
diff --git a/src/file_gz.c b/src/file_gz.c
index 1b97079..bacc414 100644
--- a/src/file_gz.c
+++ b/src/file_gz.c
@@ -36,7 +36,6 @@
#include "file_gz.h"
static void register_header_check_gz(file_stat_t *file_stat);
-static int header_check_gz(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new);
static void file_rename_gz(file_recovery_t *file_recovery);
extern const file_hint_t file_hint_doc;
@@ -59,7 +58,6 @@ struct gzip_header
uint8_t os;
} __attribute__ ((gcc_struct, __packed__));
-static const unsigned char gz_header_magic[3]= {0x1F, 0x8B, 0x08};
/* flags:
bit 0 FTEXT
bit 1 FHCRC
@@ -76,9 +74,38 @@ static const unsigned char gz_header_magic[3]= {0x1F, 0x8B, 0x08};
#define GZ_FNAME 8
#define GZ_FCOMMENT 0x10
-static void register_header_check_gz(file_stat_t *file_stat)
+static void file_check_bgzf(file_recovery_t *file_recovery)
{
- register_header_check(0, gz_header_magic,sizeof(gz_header_magic), &header_check_gz, file_stat);
+}
+
+static int header_check_bgzf(const unsigned char *buffer, const unsigned char *buffer_uncompr, const unsigned int buffer_size, file_recovery_t *file_recovery_new)
+{
+ const struct gzip_header *gz=(const struct gzip_header *)buffer;
+ reset_file_recovery(file_recovery_new);
+ file_recovery_new->min_filesize=22;
+ file_recovery_new->time=le32(gz->mtime);
+ file_recovery_new->file_rename=&file_rename_gz;
+ file_recovery_new->file_check=&file_check_bgzf;
+ if(memcmp(buffer_uncompr, "BAI\1", 4)==0)
+ {
+ /* https://github.com/samtools/hts-specs SAM/BAM and related high-throughput sequencing file formats */
+ file_recovery_new->extension="bai";
+ return 1;
+ }
+ if(memcmp(buffer_uncompr, "BAM\1", 4)==0)
+ {
+ /* https://github.com/samtools/hts-specs SAM/BAM and related high-throughput sequencing file formats */
+ file_recovery_new->extension="bam";
+ return 1;
+ }
+ if(memcmp(buffer_uncompr, "CSI\1", 4)==0)
+ {
+ /* https://github.com/samtools/hts-specs SAM/BAM and related high-throughput sequencing file formats */
+ file_recovery_new->extension="csi";
+ return 1;
+ }
+ file_recovery_new->extension=file_hint_gz.extension;
+ return 1;
}
static int header_check_gz(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
@@ -86,6 +113,7 @@ static int header_check_gz(const unsigned char *buffer, const unsigned int buffe
unsigned int off=10;
const unsigned int flags=buffer[3];
const struct gzip_header *gz=(const struct gzip_header *)buffer;
+ int bgzf=0;
/* gzip file format:
* a 10-byte header, containing a magic number, a version number and a timestamp
* optional extra headers, such as the original file name,
@@ -106,6 +134,8 @@ static int header_check_gz(const unsigned char *buffer, const unsigned int buffe
{
off+=2;
off+=buffer[10]|(buffer[11]<<8);
+ if(buffer[12]=='B' && buffer[13]=='C' && buffer[14]==2 && buffer[15]==0)
+ bgzf=1;
}
if((flags&GZ_FNAME)!=0)
{
@@ -133,6 +163,11 @@ static int header_check_gz(const unsigned char *buffer, const unsigned int buffe
if(header_ignored_adv(file_recovery, file_recovery_new)==0)
return 0;
}
+ if(file_recovery->file_check==&file_check_bgzf)
+ {
+ header_ignored(file_recovery_new);
+ return 0;
+ }
#if defined(HAVE_ZLIB_H) && defined(HAVE_LIBZ)
{
static const unsigned char schematic_header[12]={ 0x0a, 0x00, 0x09,
@@ -173,6 +208,10 @@ static int header_check_gz(const unsigned char *buffer, const unsigned int buffe
if(d_stream.total_out < 16)
return 0;
buffer_uncompr[d_stream.total_out]='\0';
+ if(bgzf!=0)
+ {
+ return header_check_bgzf(buffer, buffer_uncompr, d_stream.total_out, file_recovery_new);
+ }
reset_file_recovery(file_recovery_new);
file_recovery_new->min_filesize=22;
file_recovery_new->time=le32(gz->mtime);
@@ -291,3 +330,9 @@ const char*td_zlib_version(void)
return "none";
#endif
}
+
+static void register_header_check_gz(file_stat_t *file_stat)
+{
+ static const unsigned char gz_header_magic[3]= {0x1F, 0x8B, 0x08};
+ register_header_check(0, gz_header_magic,sizeof(gz_header_magic), &header_check_gz, file_stat);
+}