From ff772f706f98a917ef77115483138a600bbc20e6 Mon Sep 17 00:00:00 2001 From: nicm Date: Fri, 24 Apr 2015 16:24:11 +0000 Subject: [PATCH] New implementation of the file(1) utility. This is a simplified, modernised version with a nearly complete magic(5) parser but omits some of the complex builtin tests (notably ELF) and has a reduced set of options. ok deraadt --- usr.bin/file/LEGAL.NOTICE | 29 - usr.bin/file/MAINT | 44 - usr.bin/file/Makefile | 41 +- usr.bin/file/README | 108 -- usr.bin/file/apprentice.c | 2129 ----------------------------------- usr.bin/file/ascmagic.c | 789 ------------- usr.bin/file/compress.c | 479 -------- usr.bin/file/config.h | 38 - usr.bin/file/elfclass.h | 68 -- usr.bin/file/file.1 | 490 +------- usr.bin/file/file.c | 822 +++++++------- usr.bin/file/file.h | 404 +------ usr.bin/file/file_opts.h | 49 - usr.bin/file/fsmagic.c | 309 ----- usr.bin/file/funcs.c | 332 ------ usr.bin/file/is_tar.c | 153 --- usr.bin/file/magic-common.c | 83 ++ usr.bin/file/magic-dump.c | 53 + usr.bin/file/magic-load.c | 1020 +++++++++++++++++ usr.bin/file/magic-test.c | 1121 ++++++++++++++++++ usr.bin/file/magic.c | 395 ------- usr.bin/file/magic.h | 255 +++-- usr.bin/file/names.h | 174 --- usr.bin/file/patchlevel.h | 348 ------ usr.bin/file/print.c | 233 ---- usr.bin/file/readelf.c | 1020 ----------------- usr.bin/file/readelf.h | 237 ---- usr.bin/file/softmagic.c | 1821 ------------------------------ usr.bin/file/tar.h | 74 -- usr.bin/file/text.c | 168 +++ usr.bin/file/xmalloc.c | 103 ++ usr.bin/file/xmalloc.h | 31 + 32 files changed, 3287 insertions(+), 10133 deletions(-) delete mode 100644 usr.bin/file/LEGAL.NOTICE delete mode 100644 usr.bin/file/MAINT delete mode 100644 usr.bin/file/README delete mode 100644 usr.bin/file/apprentice.c delete mode 100644 usr.bin/file/ascmagic.c delete mode 100644 usr.bin/file/compress.c delete mode 100644 usr.bin/file/config.h delete mode 100644 usr.bin/file/elfclass.h delete mode 100644 usr.bin/file/file_opts.h delete mode 100644 usr.bin/file/fsmagic.c delete mode 100644 usr.bin/file/funcs.c delete mode 100644 usr.bin/file/is_tar.c create mode 100644 usr.bin/file/magic-common.c create mode 100644 usr.bin/file/magic-dump.c create mode 100644 usr.bin/file/magic-load.c create mode 100644 usr.bin/file/magic-test.c delete mode 100644 usr.bin/file/magic.c delete mode 100644 usr.bin/file/names.h delete mode 100644 usr.bin/file/patchlevel.h delete mode 100644 usr.bin/file/print.c delete mode 100644 usr.bin/file/readelf.c delete mode 100644 usr.bin/file/readelf.h delete mode 100644 usr.bin/file/softmagic.c delete mode 100644 usr.bin/file/tar.h create mode 100644 usr.bin/file/text.c create mode 100644 usr.bin/file/xmalloc.c create mode 100644 usr.bin/file/xmalloc.h diff --git a/usr.bin/file/LEGAL.NOTICE b/usr.bin/file/LEGAL.NOTICE deleted file mode 100644 index 630960a1411..00000000000 --- a/usr.bin/file/LEGAL.NOTICE +++ /dev/null @@ -1,29 +0,0 @@ -$OpenBSD: LEGAL.NOTICE,v 1.8 2008/05/08 01:40:56 chl Exp $ -Copyright (c) Ian F. Darwin 1986, 1987, 1989, 1990, 1991, 1992, 1994, 1995. -Software written by Ian F. Darwin and others; -maintained 1994- Christos Zoulas. - -This software is not subject to any export provision of the United States -Department of Commerce, and may be exported to any country or planet. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: -1. Redistributions of source code must retain the above copyright - notice immediately at the beginning of the file, without modification, - this list of conditions, and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -SUCH DAMAGE. diff --git a/usr.bin/file/MAINT b/usr.bin/file/MAINT deleted file mode 100644 index a7079c4b41f..00000000000 --- a/usr.bin/file/MAINT +++ /dev/null @@ -1,44 +0,0 @@ -$OpenBSD: MAINT,v 1.5 2009/04/24 18:54:34 chl Exp $ - -Maintenance notes: - -I am continuing to maintain the file command. I welcome your help, -but to make my life easier I'd like to request the following: - -- Do not distribute changed versions. - -People trying to be helpful occasionally put up their hacked versions -of the file command for anonymous FTP, and people all over the -world get copies of the hacked versions. Within a day or two I am -getting email from around the world asking me why "my" file command -won't compile!!! Needless to say this detracts from the limited -time I have available to work on the actual software. Therefore I -ask you again to please NOT distribute your changed version. If -you need to make changes, please add a patch file next to the -distribution tar, and a README file that clearly explains what you -are trying to fix. - -Thank you for your assistance and cooperation. - -Code Overview - -This is a rough idea of the control flow from the main program: - -file.c main() -file.c process (called for each file) - printf file name -magic.c magic_file() -fsmagic.c file_fsmagic() - (handles statbuf modes for DEV) - (handles statbuf modes for executable &c. - reads data from file. -funcs.c: file_buffer() -compress.c file_zmagic() -is_tar.c file_is_tar() -softmagic.c file_softmagic() - match() - looks for match against main magic database -ascmagic.c file_ascmagic() -readelf.c file_tryelf() - "unknown" - -Christos Zoulas (see README for email address) diff --git a/usr.bin/file/Makefile b/usr.bin/file/Makefile index 28cf145e4f1..5c65f930493 100644 --- a/usr.bin/file/Makefile +++ b/usr.bin/file/Makefile @@ -1,36 +1,39 @@ -# $OpenBSD: Makefile,v 1.12 2009/04/14 21:28:10 chl Exp $ +# $OpenBSD: Makefile,v 1.13 2015/04/24 16:24:11 nicm Exp $ + +PROG= file +SRCS= file.c magic-dump.c magic-load.c magic-test.c magic-common.c text.c \ + xmalloc.c +MAN= file.1 magic.5 + +CDIAGFLAGS+= -Wno-long-long -Wall -W -Wnested-externs -Wformat=2 +CDIAGFLAGS+= -Wmissing-prototypes -Wstrict-prototypes -Wmissing-declarations +CDIAGFLAGS+= -Wwrite-strings -Wshadow -Wpointer-arith -Wsign-compare +CDIAGFLAGS+= -Wundef -Wbad-function-cast -Winline -Wcast-align MAGIC= /etc/magic MAGICOWN= root MAGICGRP= bin MAGICMODE= 444 -PROG= file -SRCS= file.c apprentice.c fsmagic.c softmagic.c ascmagic.c is_tar.c \ - print.c compress.c readelf.c magic.c funcs.c -CFLAGS+= -DMAGIC='"$(MAGIC)"' -DUSE_UTIMES -DHAVE_CONFIG_H -MAN= file.1 magic.5 - CLEANFILES+= magic post-magic -all: file magic -MAG1= $(.CURDIR)/magdir/Header\ - $(.CURDIR)/magdir/Localstuff\ +MAG1= $(.CURDIR)/magdir/Header \ + $(.CURDIR)/magdir/Localstuff \ $(.CURDIR)/magdir/OpenBSD MAGFILES= $(.CURDIR)/magdir/[0-9a-z]* -post-magic: $(MAGFILES) - for i in ${.ALLSRC:N*.orig}; \ - do \ +post-magic: $(MAGFILES) + for i in ${.ALLSRC:N*.orig}; do \ echo $$i; \ - done|sort|xargs -n 1024 cat > $(.TARGET) - -magic: $(MAG1) post-magic - cat ${MAG1} post-magic > $(.TARGET) + done|sort|xargs -n 1024 cat >$(.TARGET) +magic: $(MAG1) post-magic + cat ${MAG1} post-magic >$(.TARGET) afterinstall: - ${INSTALL} ${INSTALL_COPY} -o $(MAGICOWN) -g $(MAGICGRP) -m $(MAGICMODE) magic \ - $(DESTDIR)$(MAGIC) + ${INSTALL} ${INSTALL_COPY} -o $(MAGICOWN) -g $(MAGICGRP) \ + -m $(MAGICMODE) magic $(DESTDIR)$(MAGIC) + +all: file magic .include diff --git a/usr.bin/file/README b/usr.bin/file/README deleted file mode 100644 index 0fada4f9f4f..00000000000 --- a/usr.bin/file/README +++ /dev/null @@ -1,108 +0,0 @@ -** README for file(1) Command ** -@(#) $OpenBSD: README,v 1.5 2009/04/24 18:54:34 chl Exp $ - -E-mail: christos@astron.com -Mailing List: file@mx.gw.com - -Phone: Do not even think of telephoning me about this program. Send cash first! - -This is Release 4.x of Ian Darwin's (copyright but distributable) -file(1) command. This version is the standard "file" command for Linux, -*BSD, and other systems. (See "patchlevel.h" for the exact release number). - -The major feature of 4.x is the refactoring of the code into a library, -and the re-write of the file command in terms of that library. The library -itself, libmagic can be used by 3rd party programs that wish to identify -file types without having to fork() and exec() file. The prime contributor -for 4.0 was M\xe5ns Rullg\xe5rd. - -UNIX is a trademark of UNIX System Laboratories. - -The prime contributor to Release 3.8 was Guy Harris, who put in megachanges -including byte-order independence. - -The prime contributor to Release 3.0 was Christos Zoulas, who put -in hundreds of lines of source code changes, including his own -ANSIfication of the code (I liked my own ANSIfication better, but -his (__P()) is the "Berkeley standard" way of doing it, and I wanted UCB -to include the code...), his HP-like "indirection" (a feature of -the HP file command, I think), and his mods that finally got the -uncompress (-z) mode finished and working. - -This release has compiled in numerous environments; see PORTING -for a list and problems. - -This fine freeware file(1) follows the USG (System V) model of the file -command, rather than the Research (V7) version or the V7-derived 4.[23] -Berkeley one. That is, the file /etc/magic contains much of the ritual -information that is the source of this program's power. My version -knows a little more magic (including tar archives) than System V; the -/etc/magic parsing seems to be compatible with the (poorly documented) -System V /etc/magic format (with one exception; see the man page). - -In addition, the /etc/magic file is built from a subdirectory -for easier(?) maintenance. I will act as a clearinghouse for -magic numbers assigned to all sorts of data files that -are in reasonable circulation. Send your magic numbers, -in magic(5) format please, to the maintainer, Christos Zoulas. - -LEGAL.NOTICE - read this first. -README - read this second (you are currently reading this file). -PORTING - read this only if the program won't compile. -Makefile - read this next, adapt it as needed (particularly - the location of the old existing file command and - the man page layouts), type "make" to compile, - "make try" to try it out against your old version. - Expect some diffs, particularly since your original - file(1) may not grok the embedded-space ("\ ") in - the current magic file, or may even not use the - magic file. -apprentice.c - parses /etc/magic to learn magic -ascmagic.c - third & last set of tests, based on hardwired assumptions. -core - not included in distribution due to mailer limitations. -debug.c - includes -c printout routine -file.1 - man page for the command -magic.4 - man page for the magic file, courtesy Guy Harris. - Install as magic.4 on USG and magic.5 on V7 or Berkeley; cf Makefile. -file.c - main program -file.h - header file -fsmagic.c - first set of tests the program runs, based on filesystem info -is_tar.c, tar.h - knows about tarchives (courtesy John Gilmore). -magdir - directory of /etc/magic pieces - magdir/Makefile - ADJUST THIS FOR YOUR CONFIGURATION -names.h - header file for ascmagic.c -softmagic.c - 2nd set of tests, based on /etc/magic -readelf.[ch] - Stand-alone elf parsing code. -compress.c - on-the-fly decompression. -print.c - print results, errors, warnings. - -You can download the latest version of file from: - - ftp://ftp.astron.com/pub/file/ - -If your gzip sometimes fails to decompress things complaining about a short -file, apply this patch [which is going to be in the next version of gzip]: -*** - Tue Oct 29 02:06:35 1996 ---- util.c Sun Jul 21 21:51:38 1996 -*** 106,111 **** ---- 108,114 ---- - - if (insize == 0) { - if (eof_ok) return EOF; -+ flush_window(); - read_error(); - } - bytes_in += (ulg)insize; - -Parts of this software were developed at SoftQuad Inc., developers -of SGML/HTML/XML publishing software, in Toronto, Canada. -SoftQuad was swallowed up by Corel in 2002 -and does not exist any longer. - -From: Kees Zeelenberg - -An MS-Windows (Win32) port of File-4.17 is available from -http://gnuwin32.sourceforge.net/ - -File is an implementation of the Unix File(1) command. -It knows the 'magic number' of several thousands of file types. diff --git a/usr.bin/file/apprentice.c b/usr.bin/file/apprentice.c deleted file mode 100644 index 295c899abf4..00000000000 --- a/usr.bin/file/apprentice.c +++ /dev/null @@ -1,2129 +0,0 @@ -/* $OpenBSD: apprentice.c,v 1.34 2015/01/16 18:08:15 millert Exp $ */ -/* - * Copyright (c) Ian F. Darwin 1986-1995. - * Software written by Ian F. Darwin and others; - * maintained 1995-present by Christos Zoulas and others. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * apprentice - make one pass through /etc/magic, learning its secrets. - */ - - -#include -#include - -#include "file.h" -#include "magic.h" -#include "patchlevel.h" -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#include -#include -#include -#include -#ifdef QUICK -#include -#endif -#include - -#define EATAB {while (isascii((unsigned char) *l) && \ - isspace((unsigned char) *l)) ++l;} -#define LOWCASE(l) (isupper((unsigned char) (l)) ? \ - tolower((unsigned char) (l)) : (l)) -/* - * Work around a bug in headers on Digital Unix. - * At least confirmed for: OSF1 V4.0 878 - */ -#if defined(__osf__) && defined(__DECC) -#ifdef MAP_FAILED -#undef MAP_FAILED -#endif -#endif - -#ifndef MAP_FAILED -#define MAP_FAILED (void *) -1 -#endif - -#ifndef MAP_FILE -#define MAP_FILE 0 -#endif - -struct magic_entry { - struct magic *mp; - uint32_t cont_count; - uint32_t max_count; -}; - -int file_formats[FILE_NAMES_SIZE]; -const size_t file_nformats = FILE_NAMES_SIZE; -const char *file_names[FILE_NAMES_SIZE]; -const size_t file_nnames = FILE_NAMES_SIZE; - -private int getvalue(struct magic_set *ms, struct magic *, const char **, int); -private int hextoint(int); -private const char *getstr(struct magic_set *, const char *, char *, int, - int *, int); -private int parse(struct magic_set *, struct magic_entry **, uint32_t *, - const char *, size_t, int); -private int parse_mime(struct magic_set *, struct magic_entry **, uint32_t *, - const char *); -private void eatsize(const char **); -private int apprentice_1(struct magic_set *, const char *, int, struct mlist *); -private size_t apprentice_magic_strength(const struct magic *); -private int apprentice_sort(const void *, const void *); -private int apprentice_load(struct magic_set *, struct magic **, uint32_t *, - const char *, int); -private void byteswap(struct magic *, uint32_t); -private void bs1(struct magic *); -private uint16_t swap2(uint16_t); -private uint32_t swap4(uint32_t); -private uint64_t swap8(uint64_t); -private void mkdbname(const char *, char **, int); -private int apprentice_map(struct magic_set *, struct magic **, uint32_t *, - const char *); -private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *, - const char *); -private int check_format_type(const char *, int); -private int check_format(struct magic_set *, struct magic *); -private int get_op(char); - -private size_t maxmagic = 0; -private size_t magicsize = sizeof(struct magic); - -private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; -private const char mime_marker[] = "!:mime"; -private const size_t mime_marker_len = sizeof(mime_marker) - 1; - -#ifdef COMPILE_ONLY - -int main(int, char *[]); - -int -main(int argc, char *argv[]) -{ - int ret; - struct magic_set *ms; - char *progname; - - if ((progname = strrchr(argv[0], '/')) != NULL) - progname++; - else - progname = argv[0]; - - if (argc != 2) { - (void)fprintf(stderr, "Usage: %s file\n", progname); - return 1; - } - - if ((ms = magic_open(MAGIC_CHECK)) == NULL) { - (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno)); - return 1; - } - ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0; - if (ret == 1) - (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms)); - magic_close(ms); - return ret; -} -#endif /* COMPILE_ONLY */ - -static const struct type_tbl_s { - const char name[16]; - const size_t len; - const int type; - const int format; -} type_tbl[] = { -# define XX(s) s, (sizeof(s) - 1) -# define XX_NULL "", 0 - { XX("byte"), FILE_BYTE, FILE_FMT_NUM }, - { XX("short"), FILE_SHORT, FILE_FMT_NUM }, - { XX("default"), FILE_DEFAULT, FILE_FMT_STR }, - { XX("long"), FILE_LONG, FILE_FMT_NUM }, - { XX("string"), FILE_STRING, FILE_FMT_STR }, - { XX("date"), FILE_DATE, FILE_FMT_STR }, - { XX("beshort"), FILE_BESHORT, FILE_FMT_NUM }, - { XX("belong"), FILE_BELONG, FILE_FMT_NUM }, - { XX("bedate"), FILE_BEDATE, FILE_FMT_STR }, - { XX("leshort"), FILE_LESHORT, FILE_FMT_NUM }, - { XX("lelong"), FILE_LELONG, FILE_FMT_NUM }, - { XX("ledate"), FILE_LEDATE, FILE_FMT_STR }, - { XX("pstring"), FILE_PSTRING, FILE_FMT_STR }, - { XX("ldate"), FILE_LDATE, FILE_FMT_STR }, - { XX("beldate"), FILE_BELDATE, FILE_FMT_STR }, - { XX("leldate"), FILE_LELDATE, FILE_FMT_STR }, - { XX("regex"), FILE_REGEX, FILE_FMT_STR }, - { XX("bestring16"), FILE_BESTRING16, FILE_FMT_STR }, - { XX("lestring16"), FILE_LESTRING16, FILE_FMT_STR }, - { XX("search"), FILE_SEARCH, FILE_FMT_STR }, - { XX("medate"), FILE_MEDATE, FILE_FMT_STR }, - { XX("meldate"), FILE_MELDATE, FILE_FMT_STR }, - { XX("melong"), FILE_MELONG, FILE_FMT_NUM }, - { XX("quad"), FILE_QUAD, FILE_FMT_QUAD }, - { XX("lequad"), FILE_LEQUAD, FILE_FMT_QUAD }, - { XX("bequad"), FILE_BEQUAD, FILE_FMT_QUAD }, - { XX("qdate"), FILE_QDATE, FILE_FMT_STR }, - { XX("leqdate"), FILE_LEQDATE, FILE_FMT_STR }, - { XX("beqdate"), FILE_BEQDATE, FILE_FMT_STR }, - { XX("qldate"), FILE_QLDATE, FILE_FMT_STR }, - { XX("leqldate"), FILE_LEQLDATE, FILE_FMT_STR }, - { XX("beqldate"), FILE_BEQLDATE, FILE_FMT_STR }, - { XX("float"), FILE_FLOAT, FILE_FMT_FLOAT }, - { XX("befloat"), FILE_BEFLOAT, FILE_FMT_FLOAT }, - { XX("lefloat"), FILE_LEFLOAT, FILE_FMT_FLOAT }, - { XX("double"), FILE_DOUBLE, FILE_FMT_DOUBLE }, - { XX("bedouble"), FILE_BEDOUBLE, FILE_FMT_DOUBLE }, - { XX("ledouble"), FILE_LEDOUBLE, FILE_FMT_DOUBLE }, - { XX_NULL, FILE_INVALID, FILE_FMT_NONE }, -# undef XX -# undef XX_NULL -}; - -private int -get_type(const char *l, const char **t) -{ - const struct type_tbl_s *p; - - for (p = type_tbl; p->len; p++) { - if (strncmp(l, p->name, p->len) == 0) { - if (t) - *t = l + p->len; - break; - } - } - return p->type; -} - -private void -init_file_tables(void) -{ - static int done = 0; - const struct type_tbl_s *p; - - if (done) - return; - done++; - - for (p = type_tbl; p->len; p++) { - assert(p->type < FILE_NAMES_SIZE); - file_names[p->type] = p->name; - file_formats[p->type] = p->format; - } -} - -/* - * Handle one file or directory. - */ -private int -apprentice_1(struct magic_set *ms, const char *fn, int action, - struct mlist *mlist) -{ - struct magic *magic = NULL; - uint32_t nmagic = 0; - struct mlist *ml; - int rv = -1; - int mapped; - - if (magicsize != FILE_MAGICSIZE) { - file_error(ms, 0, "magic element size %lu != %lu", - (unsigned long)sizeof(*magic), - (unsigned long)FILE_MAGICSIZE); - return -1; - } - - if (action == FILE_COMPILE) { - rv = apprentice_load(ms, &magic, &nmagic, fn, action); - if (rv != 0) - return -1; - rv = apprentice_compile(ms, &magic, &nmagic, fn); - free(magic); - return rv; - } - -#ifndef COMPILE_ONLY - if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) { - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, "using regular magic file `%s'", fn); - rv = apprentice_load(ms, &magic, &nmagic, fn, action); - if (rv != 0) - return -1; - } - - mapped = rv; - - if (magic == NULL) { - file_delmagic(magic, mapped, nmagic); - return -1; - } - - if ((ml = malloc(sizeof(*ml))) == NULL) { - file_delmagic(magic, mapped, nmagic); - file_oomem(ms, sizeof(*ml)); - return -1; - } - - ml->magic = magic; - ml->nmagic = nmagic; - ml->mapped = mapped; - - mlist->prev->next = ml; - ml->prev = mlist->prev; - ml->next = mlist; - mlist->prev = ml; - - return 0; -#endif /* COMPILE_ONLY */ -} - -protected void -file_delmagic(struct magic *p, int type, size_t entries) -{ - if (p == NULL) - return; - switch (type) { -#ifdef QUICK - case 2: - p--; - (void)munmap((void *)p, sizeof(*p) * (entries + 1)); - break; -#endif - case 1: - p--; - /*FALLTHROUGH*/ - case 0: - free(p); - break; - default: - abort(); - } -} - -/* const char *fn: list of magic files and directories */ -protected struct mlist * -file_apprentice(struct magic_set *ms, const char *fn, int action) -{ - char *p, *mfn; - int file_err, errs = -1; - struct mlist *mlist; - - init_file_tables(); - - if (fn == NULL) - fn = getenv("MAGIC"); - if (fn == NULL) - fn = MAGIC; - - if ((mfn = strdup(fn)) == NULL) { - file_oomem(ms, strlen(fn)); - return NULL; - } - fn = mfn; - - if ((mlist = malloc(sizeof(*mlist))) == NULL) { - free(mfn); - file_oomem(ms, sizeof(*mlist)); - return NULL; - } - mlist->next = mlist->prev = mlist; - - while (fn) { - p = strchr(fn, PATHSEP); - if (p) - *p++ = '\0'; - if (*fn == '\0') - break; - file_err = apprentice_1(ms, fn, action, mlist); - errs = MAX(errs, file_err); - fn = p; - } - if (errs == -1) { - free(mfn); - free(mlist); - mlist = NULL; - file_error(ms, 0, "could not find any magic files!"); - return NULL; - } - free(mfn); - return mlist; -} - -/* - * Get weight of this magic entry, for sorting purposes. - */ -private size_t -apprentice_magic_strength(const struct magic *m) -{ -#define MULT 10 - size_t val = 2 * MULT; /* baseline strength */ - - switch (m->type) { - case FILE_DEFAULT: /* make sure this sorts last */ - return 0; - - case FILE_BYTE: - val += 1 * MULT; - break; - - case FILE_SHORT: - case FILE_LESHORT: - case FILE_BESHORT: - val += 2 * MULT; - break; - - case FILE_LONG: - case FILE_LELONG: - case FILE_BELONG: - case FILE_MELONG: - val += 4 * MULT; - break; - - case FILE_PSTRING: - case FILE_STRING: - val += m->vallen * MULT; - break; - - case FILE_BESTRING16: - case FILE_LESTRING16: - val += m->vallen * MULT / 2; - break; - - case FILE_SEARCH: - case FILE_REGEX: - val += m->vallen * MAX(MULT / m->vallen, 1); - break; - - case FILE_DATE: - case FILE_LEDATE: - case FILE_BEDATE: - case FILE_MEDATE: - case FILE_LDATE: - case FILE_LELDATE: - case FILE_BELDATE: - case FILE_MELDATE: - case FILE_FLOAT: - case FILE_BEFLOAT: - case FILE_LEFLOAT: - val += 4 * MULT; - break; - - case FILE_QUAD: - case FILE_BEQUAD: - case FILE_LEQUAD: - case FILE_QDATE: - case FILE_LEQDATE: - case FILE_BEQDATE: - case FILE_QLDATE: - case FILE_LEQLDATE: - case FILE_BEQLDATE: - case FILE_DOUBLE: - case FILE_BEDOUBLE: - case FILE_LEDOUBLE: - val += 8 * MULT; - break; - - default: - val = 0; - (void)fprintf(stderr, "Bad type %d\n", m->type); - abort(); - } - - switch (m->reln) { - case 'x': /* matches anything penalize */ - case '!': /* matches almost anything penalize */ - val = 0; - break; - - case '=': /* Exact match, prefer */ - val += MULT; - break; - - case '>': - case '<': /* comparison match reduce strength */ - val -= 2 * MULT; - break; - - case '^': - case '&': /* masking bits, we could count them too */ - val -= MULT; - break; - - default: - (void)fprintf(stderr, "Bad relation %c\n", m->reln); - abort(); - } - - if (val == 0) /* ensure we only return 0 for FILE_DEFAULT */ - val = 1; - - return val; -} - -/* - * Sort callback for sorting entries by "strength" (basically length) - */ -private int -apprentice_sort(const void *a, const void *b) -{ - const struct magic_entry *ma = a; - const struct magic_entry *mb = b; - size_t sa = apprentice_magic_strength(ma->mp); - size_t sb = apprentice_magic_strength(mb->mp); - if (sa == sb) - return 0; - else if (sa > sb) - return -1; - else - return 1; -} - -private void -set_test_type(struct magic *mstart, struct magic *m) -{ - switch (m->type) { - case FILE_BYTE: - case FILE_SHORT: - case FILE_LONG: - case FILE_DATE: - case FILE_BESHORT: - case FILE_BELONG: - case FILE_BEDATE: - case FILE_LESHORT: - case FILE_LELONG: - case FILE_LEDATE: - case FILE_LDATE: - case FILE_BELDATE: - case FILE_LELDATE: - case FILE_MEDATE: - case FILE_MELDATE: - case FILE_MELONG: - case FILE_QUAD: - case FILE_LEQUAD: - case FILE_BEQUAD: - case FILE_QDATE: - case FILE_LEQDATE: - case FILE_BEQDATE: - case FILE_QLDATE: - case FILE_LEQLDATE: - case FILE_BEQLDATE: - case FILE_FLOAT: - case FILE_BEFLOAT: - case FILE_LEFLOAT: - case FILE_DOUBLE: - case FILE_BEDOUBLE: - case FILE_LEDOUBLE: - case FILE_STRING: - case FILE_PSTRING: - case FILE_BESTRING16: - case FILE_LESTRING16: - /* binary test, set flag */ - mstart->flag |= BINTEST; - break; - case FILE_REGEX: - case FILE_SEARCH: - /* binary test if pattern is not text */ - if (file_looks_utf8(m->value.s, m->vallen, NULL, NULL) == 0) - mstart->flag |= BINTEST; - break; - case FILE_DEFAULT: - /* can't deduce anything; we shouldn't see this at the - top level anyway */ - break; - case FILE_INVALID: - default: - /* invalid search type, but no need to complain here */ - break; - } -} - -/* - * Load and parse one file. - */ -private void -load_1(struct magic_set *ms, int action, const char *fn, int *errs, - struct magic_entry **marray, uint32_t *marraycount) -{ - char line[BUFSIZ]; - size_t lineno = 0; - FILE *f = fopen(ms->file = fn, "r"); - if (f == NULL) { - if (errno != ENOENT) - file_error(ms, errno, "cannot read magic file `%s'", - fn); - (*errs)++; - } else { - /* read and parse this file */ - for (ms->line = 1; fgets(line, sizeof(line), f) != NULL; ms->line++) { - size_t len; - len = strlen(line); - if (len == 0) /* null line, garbage, etc */ - continue; - if (line[len - 1] == '\n') { - lineno++; - line[len - 1] = '\0'; /* delete newline */ - } - if (line[0] == '\0') /* empty, do not parse */ - continue; - if (line[0] == '#') /* comment, do not parse */ - continue; - if (len > mime_marker_len && - memcmp(line, mime_marker, mime_marker_len) == 0) { - /* MIME type */ - if (parse_mime(ms, marray, marraycount, - line + mime_marker_len) != 0) - (*errs)++; - continue; - } - if (parse(ms, marray, marraycount, line, lineno, action) != 0) - (*errs)++; - } - - (void)fclose(f); - } -} - -/* - * parse a file or directory of files - * const char *fn: name of magic file or directory - */ -private int -apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, - const char *fn, int action) -{ - int errs = 0; - struct magic_entry *marray; - uint32_t marraycount, i, mentrycount = 0, starttest; - char subfn[PATH_MAX]; - struct stat st; - DIR *dir; - struct dirent *d; - - ms->flags |= MAGIC_CHECK; /* Enable checks for parsed files */ - - maxmagic = MAXMAGIS; - if ((marray = calloc(maxmagic, sizeof(*marray))) == NULL) { - file_oomem2(ms, maxmagic, sizeof(*marray)); - return -1; - } - marraycount = 0; - - /* print silly verbose header for USG compat. */ - if (action == FILE_CHECK) - (void)fprintf(stderr, "%s\n", usg_hdr); - - /* load directory or file */ - if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) { - dir = opendir(fn); - if (dir) { - while ((d = readdir(dir)) != NULL) { - snprintf(subfn, sizeof(subfn), "%s/%s", - fn, d->d_name); - if (stat(subfn, &st) == 0 && S_ISREG(st.st_mode)) { - load_1(ms, action, subfn, &errs, - &marray, &marraycount); - } - } - closedir(dir); - } else - errs++; - } else - load_1(ms, action, fn, &errs, &marray, &marraycount); - if (errs) - goto out; - - /* Set types of tests */ - for (i = 0; i < marraycount; ) { - if (marray[i].mp->cont_level != 0) { - i++; - continue; - } - - starttest = i; - do { - set_test_type(marray[starttest].mp, marray[i].mp); - if (ms->flags & MAGIC_DEBUG) { - (void)fprintf(stderr, "%s%s%s: %s\n", - marray[i].mp->mimetype, - marray[i].mp->mimetype[0] == '\0' ? "" : "; ", - marray[i].mp->desc[0] ? marray[i].mp->desc : "(no description)", - marray[i].mp->flag & BINTEST ? "binary" : "text"); - if (marray[i].mp->flag & BINTEST) { -#define SYMBOL "text" -#define SYMLEN sizeof(SYMBOL) - char *p = strstr(marray[i].mp->desc, "text"); - if (p && (p == marray[i].mp->desc || isspace(p[-1])) && - (p + SYMLEN - marray[i].mp->desc == MAXstring || - (p[SYMLEN] == '\0' || isspace(p[SYMLEN])))) { - (void)fprintf(stderr, - "*** Possible binary test for text type\n"); - } -#undef SYMBOL -#undef SYMLEN - } - } - } while (++i < marraycount && marray[i].mp->cont_level != 0); - } - - qsort(marray, marraycount, sizeof(*marray), apprentice_sort); - - /* - * Make sure that any level 0 "default" line is last (if one exists). - */ - for (i = 0; i < marraycount; i++) { - if (marray[i].mp->cont_level == 0 && - marray[i].mp->type == FILE_DEFAULT) { - while (++i < marraycount) - if (marray[i].mp->cont_level == 0) - break; - if (i != marraycount) { - ms->line = marray[i].mp->lineno; /* XXX - Ugh! */ - file_magwarn(ms, - "level 0 \"default\" did not sort last"); - } - break; - } - } - - for (i = 0; i < marraycount; i++) - mentrycount += marray[i].cont_count; - - if ((*magicp = reallocarray(NULL, mentrycount, sizeof(**magicp))) == NULL) { - file_oomem2(ms, mentrycount, sizeof(**magicp)); - errs++; - goto out; - } - - mentrycount = 0; - for (i = 0; i < marraycount; i++) { - (void)memcpy(*magicp + mentrycount, marray[i].mp, - marray[i].cont_count * sizeof(**magicp)); - mentrycount += marray[i].cont_count; - } -out: - for (i = 0; i < marraycount; i++) - free(marray[i].mp); - free(marray); - if (errs) { - *magicp = NULL; - *nmagicp = 0; - return errs; - } else { - *nmagicp = mentrycount; - return 0; - } - -} - -/* - * extend the sign bit if the comparison is to be signed - */ -protected uint64_t -file_signextend(struct magic_set *ms, struct magic *m, uint64_t v) -{ - if (!(m->flag & UNSIGNED)) { - switch(m->type) { - /* - * Do not remove the casts below. They are - * vital. When later compared with the data, - * the sign extension must have happened. - */ - case FILE_BYTE: - v = (char) v; - break; - case FILE_SHORT: - case FILE_BESHORT: - case FILE_LESHORT: - v = (short) v; - break; - case FILE_DATE: - case FILE_BEDATE: - case FILE_LEDATE: - case FILE_MEDATE: - case FILE_LDATE: - case FILE_BELDATE: - case FILE_LELDATE: - case FILE_MELDATE: - case FILE_LONG: - case FILE_BELONG: - case FILE_LELONG: - case FILE_MELONG: - case FILE_FLOAT: - case FILE_BEFLOAT: - case FILE_LEFLOAT: - v = (int32_t) v; - break; - case FILE_QUAD: - case FILE_BEQUAD: - case FILE_LEQUAD: - case FILE_QDATE: - case FILE_QLDATE: - case FILE_BEQDATE: - case FILE_BEQLDATE: - case FILE_LEQDATE: - case FILE_LEQLDATE: - case FILE_DOUBLE: - case FILE_BEDOUBLE: - case FILE_LEDOUBLE: - v = (int64_t) v; - break; - case FILE_STRING: - case FILE_PSTRING: - case FILE_BESTRING16: - case FILE_LESTRING16: - case FILE_REGEX: - case FILE_SEARCH: - case FILE_DEFAULT: - break; - default: - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, "cannot happen: m->type=%d\n", - m->type); - return ~0U; - } - } - return v; -} - -private int -string_modifier_check(struct magic_set *ms, struct magic *m) -{ - if ((ms->flags & MAGIC_CHECK) == 0) - return 0; - - switch (m->type) { - case FILE_BESTRING16: - case FILE_LESTRING16: - if (m->str_flags != 0) { - file_magwarn(ms, - "no modifiers allowed for 16-bit strings\n"); - return -1; - } - break; - case FILE_STRING: - case FILE_PSTRING: - if ((m->str_flags & REGEX_OFFSET_START) != 0) { - file_magwarn(ms, - "'/%c' only allowed on regex and search\n", - CHAR_REGEX_OFFSET_START); - return -1; - } - break; - case FILE_SEARCH: - if (m->str_range == 0) { - file_magwarn(ms, - "missing range; defaulting to %d\n", - STRING_DEFAULT_RANGE); - m->str_range = STRING_DEFAULT_RANGE; - return -1; - } - break; - case FILE_REGEX: - if ((m->str_flags & STRING_COMPACT_BLANK) != 0) { - file_magwarn(ms, "'/%c' not allowed on regex\n", - CHAR_COMPACT_BLANK); - return -1; - } - if ((m->str_flags & STRING_COMPACT_OPTIONAL_BLANK) != 0) { - file_magwarn(ms, "'/%c' not allowed on regex\n", - CHAR_COMPACT_OPTIONAL_BLANK); - return -1; - } - break; - default: - file_magwarn(ms, "coding error: m->type=%d\n", - m->type); - return -1; - } - return 0; -} - -private int -get_op(char c) -{ - switch (c) { - case '&': - return FILE_OPAND; - case '|': - return FILE_OPOR; - case '^': - return FILE_OPXOR; - case '+': - return FILE_OPADD; - case '-': - return FILE_OPMINUS; - case '*': - return FILE_OPMULTIPLY; - case '/': - return FILE_OPDIVIDE; - case '%': - return FILE_OPMODULO; - default: - return -1; - } -} - -#ifdef ENABLE_CONDITIONALS -private int -get_cond(const char *l, const char **t) -{ - static const struct cond_tbl_s { - char name[8]; - size_t len; - int cond; - } cond_tbl[] = { - { "if", 2, COND_IF }, - { "elif", 4, COND_ELIF }, - { "else", 4, COND_ELSE }, - { "", 0, COND_NONE }, - }; - const struct cond_tbl_s *p; - - for (p = cond_tbl; p->len; p++) { - if (strncmp(l, p->name, p->len) == 0 && - isspace((unsigned char)l[p->len])) { - if (t) - *t = l + p->len; - break; - } - } - return p->cond; -} - -private int -check_cond(struct magic_set *ms, int cond, uint32_t cont_level) -{ - int last_cond; - last_cond = ms->c.li[cont_level].last_cond; - - switch (cond) { - case COND_IF: - if (last_cond != COND_NONE && last_cond != COND_ELIF) { - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, "syntax error: `if'"); - return -1; - } - last_cond = COND_IF; - break; - - case COND_ELIF: - if (last_cond != COND_IF && last_cond != COND_ELIF) { - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, "syntax error: `elif'"); - return -1; - } - last_cond = COND_ELIF; - break; - - case COND_ELSE: - if (last_cond != COND_IF && last_cond != COND_ELIF) { - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, "syntax error: `else'"); - return -1; - } - last_cond = COND_NONE; - break; - - case COND_NONE: - last_cond = COND_NONE; - break; - } - - ms->c.li[cont_level].last_cond = last_cond; - return 0; -} -#endif /* ENABLE_CONDITIONALS */ - -/* - * parse one line from magic file, put into magic[index++] if valid - */ -private int -parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, - const char *line, size_t lineno, int action) -{ -#ifdef ENABLE_CONDITIONALS - static uint32_t last_cont_level = 0; -#endif - size_t i; - struct magic_entry *me; - struct magic *m; - const char *l = line; - char *t; - int op; - uint32_t cont_level; - - cont_level = 0; - - while (*l == '>') { - ++l; /* step over */ - cont_level++; - } -#ifdef ENABLE_CONDITIONALS - if (cont_level == 0 || cont_level > last_cont_level) - if (file_check_mem(ms, cont_level) == -1) - return -1; - last_cont_level = cont_level; -#endif - -#define ALLOC_CHUNK (size_t)10 -#define ALLOC_INCR (size_t)200 - - if (cont_level != 0) { - if (*nmentryp == 0) { - file_error(ms, 0, "No current entry for continuation"); - return -1; - } - me = &(*mentryp)[*nmentryp - 1]; - if (me->cont_count == me->max_count) { - struct magic *nm; - size_t cnt = me->max_count + ALLOC_CHUNK; - if ((nm = reallocarray(me->mp, cnt, sizeof(*nm))) - == NULL) { - file_oomem2(ms, cnt, sizeof(*nm)); - return -1; - } - me->mp = m = nm; - me->max_count = cnt; - } - m = &me->mp[me->cont_count++]; - (void)memset(m, 0, sizeof(*m)); - m->cont_level = cont_level; - } else { - if (*nmentryp == maxmagic) { - struct magic_entry *mp; - - maxmagic += ALLOC_INCR; - if ((mp = reallocarray(*mentryp, maxmagic, - sizeof(*mp))) == NULL) { - file_oomem2(ms, maxmagic, sizeof(*mp)); - return -1; - } - (void)memset(&mp[*nmentryp], 0, sizeof(*mp) * - ALLOC_INCR); - *mentryp = mp; - } - me = &(*mentryp)[*nmentryp]; - if (me->mp == NULL) { - if ((m = reallocarray(NULL, ALLOC_CHUNK, sizeof(*m))) == NULL) { - file_oomem2(ms, ALLOC_CHUNK, sizeof(*m)); - return -1; - } - me->mp = m; - me->max_count = ALLOC_CHUNK; - } else - m = me->mp; - (void)memset(m, 0, sizeof(*m)); - m->cont_level = 0; - me->cont_count = 1; - } - m->lineno = lineno; - - if (*l == '&') { /* m->cont_level == 0 checked below. */ - ++l; /* step over */ - m->flag |= OFFADD; - } - if (*l == '(') { - ++l; /* step over */ - m->flag |= INDIR; - if (m->flag & OFFADD) - m->flag = (m->flag & ~OFFADD) | INDIROFFADD; - - if (*l == '&') { /* m->cont_level == 0 checked below */ - ++l; /* step over */ - m->flag |= OFFADD; - } - } - /* Indirect offsets are not valid at level 0. */ - if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, "relative offset at level 0"); - - /* get offset, then skip over it */ - m->offset = (uint32_t)strtoul(l, &t, 0); - if (l == t) - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, "offset `%s' invalid", l); - l = t; - - if (m->flag & INDIR) { - m->in_type = FILE_LONG; - m->in_offset = 0; - /* - * read [.lbs][+-]nnnnn) - */ - if (*l == '.') { - l++; - switch (*l) { - case 'l': - m->in_type = FILE_LELONG; - break; - case 'L': - m->in_type = FILE_BELONG; - break; - case 'm': - m->in_type = FILE_MELONG; - break; - case 'h': - case 's': - m->in_type = FILE_LESHORT; - break; - case 'H': - case 'S': - m->in_type = FILE_BESHORT; - break; - case 'c': - case 'b': - case 'C': - case 'B': - m->in_type = FILE_BYTE; - break; - case 'e': - case 'f': - case 'g': - m->in_type = FILE_LEDOUBLE; - break; - case 'E': - case 'F': - case 'G': - m->in_type = FILE_BEDOUBLE; - break; - default: - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, - "indirect offset type `%c' invalid", - *l); - break; - } - l++; - } - - m->in_op = 0; - if (*l == '~') { - m->in_op |= FILE_OPINVERSE; - l++; - } - if ((op = get_op(*l)) != -1) { - m->in_op |= op; - l++; - } - if (*l == '(') { - m->in_op |= FILE_OPINDIRECT; - l++; - } - if (isdigit((unsigned char)*l) || *l == '-') { - m->in_offset = (int32_t)strtol(l, &t, 0); - if (l == t) - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, - "in_offset `%s' invalid", l); - l = t; - } - if (*l++ != ')' || - ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, - "missing ')' in indirect offset"); - } - EATAB; - -#ifdef ENABLE_CONDITIONALS - m->cond = get_cond(l, &l); - if (check_cond(ms, m->cond, cont_level) == -1) - return -1; - - EATAB; -#endif - - if (*l == 'u') { - ++l; - m->flag |= UNSIGNED; - } - - m->type = get_type(l, &l); - if (m->type == FILE_INVALID) { - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, "type `%s' invalid", l); - return -1; - } - - /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ - /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ - - m->mask_op = 0; - if (*l == '~') { - if (!IS_STRING(m->type)) - m->mask_op |= FILE_OPINVERSE; - else if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, "'~' invalid for string types"); - ++l; - } - m->str_range = 0; - m->str_flags = 0; - m->num_mask = 0; - if ((op = get_op(*l)) != -1) { - if (!IS_STRING(m->type)) { - uint64_t val; - ++l; - m->mask_op |= op; - val = (uint64_t)strtoull(l, &t, 0); - l = t; - m->num_mask = file_signextend(ms, m, val); - eatsize(&l); - } - else if (op == FILE_OPDIVIDE) { - int have_range = 0; - while (!isspace((unsigned char)*++l)) { - switch (*l) { - case '0': case '1': case '2': - case '3': case '4': case '5': - case '6': case '7': case '8': - case '9': - if (have_range && - (ms->flags & MAGIC_CHECK)) - file_magwarn(ms, - "multiple ranges"); - have_range = 1; - m->str_range = strtoul(l, &t, 0); - if (m->str_range == 0) - file_magwarn(ms, - "zero range"); - l = t - 1; - break; - case CHAR_COMPACT_BLANK: - m->str_flags |= STRING_COMPACT_BLANK; - break; - case CHAR_COMPACT_OPTIONAL_BLANK: - m->str_flags |= - STRING_COMPACT_OPTIONAL_BLANK; - break; - case CHAR_IGNORE_LOWERCASE: - m->str_flags |= STRING_IGNORE_LOWERCASE; - break; - case CHAR_IGNORE_UPPERCASE: - m->str_flags |= STRING_IGNORE_UPPERCASE; - break; - case CHAR_REGEX_OFFSET_START: - m->str_flags |= REGEX_OFFSET_START; - break; - default: - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, - "string extension `%c' invalid", - *l); - return -1; - } - /* allow multiple '/' for readability */ - if (l[1] == '/' && - !isspace((unsigned char)l[2])) - l++; - } - if (string_modifier_check(ms, m) == -1) - return -1; - } - else { - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, "invalid string op: %c", *t); - return -1; - } - } - /* - * We used to set mask to all 1's here, instead let's just not do - * anything if mask = 0 (unless you have a better idea) - */ - EATAB; - - switch (*l) { - case '>': - case '<': - /* Old-style anding: "0 byte &0x80 dynamically linked" */ - case '&': - case '^': - case '=': - m->reln = *l; - ++l; - if (*l == '=') { - /* HP compat: ignore &= etc. */ - ++l; - } - break; - case '!': - m->reln = *l; - ++l; - break; - default: - m->reln = '='; /* the default relation */ - if (*l == 'x' && ((isascii((unsigned char)l[1]) && - isspace((unsigned char)l[1])) || !l[1])) { - m->reln = *l; - ++l; - } - break; - } - /* - * Grab the value part, except for an 'x' reln. - */ - if (m->reln != 'x' && getvalue(ms, m, &l, action)) - return -1; - - /* - * TODO finish this macro and start using it! - * #define offsetcheck {if (offset > HOWMANY-1) - * magwarn("offset too big"); } - */ - - /* - * Now get last part - the description - */ - EATAB; - if (l[0] == '\b') { - ++l; - m->flag |= NOSPACE; - } else if ((l[0] == '\\') && (l[1] == 'b')) { - ++l; - ++l; - m->flag |= NOSPACE; - } - for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); ) - continue; - if (i == sizeof(m->desc)) { - m->desc[sizeof(m->desc) - 1] = '\0'; - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, "description `%s' truncated", m->desc); - } - - /* - * We only do this check while compiling, or if any of the magic - * files were not compiled. - */ - if (ms->flags & MAGIC_CHECK) { - if (check_format(ms, m) == -1) - return -1; - } -#ifndef COMPILE_ONLY - if (action == FILE_CHECK) { - file_mdump(m); - } -#endif - m->mimetype[0] = '\0'; /* initialise MIME type to none */ - if (m->cont_level == 0) - ++(*nmentryp); /* make room for next */ - return 0; -} - -/* - * parse a MIME annotation line from magic file, put into magic[index - 1] - * if valid - */ -private int -parse_mime(struct magic_set *ms, struct magic_entry **mentryp, - uint32_t *nmentryp, const char *line) -{ - size_t i; - const char *l = line; - struct magic *m; - struct magic_entry *me; - - if (*nmentryp == 0) { - file_error(ms, 0, "No current entry for MIME type"); - return -1; - } - - me = &(*mentryp)[*nmentryp - 1]; - m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; - - if (m->mimetype[0] != '\0') { - file_error(ms, 0, "Current entry already has a MIME type: %s\n" - "Description: %s\nNew type: %s", m->mimetype, m->desc, l); - return -1; - } - - EATAB; - for (i = 0; - *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l)) - || strchr("-+/.", *l)) && i < sizeof(m->mimetype); - m->mimetype[i++] = *l++) - continue; - if (i == sizeof(m->mimetype)) { - m->desc[sizeof(m->mimetype) - 1] = '\0'; - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, "MIME type `%s' truncated %zu", - m->mimetype, i); - } else - m->mimetype[i] = '\0'; - - if (i > 0) - return 0; - else - return -1; -} - -private int -check_format_type(const char *ptr, int type) -{ - int quad = 0; - if (*ptr == '\0') { - /* Missing format string; bad */ - return -1; - } - - switch (type) { - case FILE_FMT_QUAD: - quad = 1; - /*FALLTHROUGH*/ - case FILE_FMT_NUM: - if (*ptr == '-') - ptr++; - if (*ptr == '.') - ptr++; - while (isdigit((unsigned char)*ptr)) ptr++; - if (*ptr == '.') - ptr++; - while (isdigit((unsigned char)*ptr)) ptr++; - if (quad) { - if (*ptr++ != 'l') - return -1; - if (*ptr++ != 'l') - return -1; - } - - switch (*ptr++) { - case 'l': - switch (*ptr++) { - case 'i': - case 'd': - case 'u': - case 'x': - case 'X': - return 0; - default: - return -1; - } - - case 'h': - switch (*ptr++) { - case 'h': - switch (*ptr++) { - case 'i': - case 'd': - case 'u': - case 'x': - case 'X': - return 0; - default: - return -1; - } - case 'd': - return 0; - default: - return -1; - } - - case 'i': - case 'c': - case 'd': - case 'u': - case 'x': - case 'X': - return 0; - - default: - return -1; - } - - case FILE_FMT_FLOAT: - case FILE_FMT_DOUBLE: - if (*ptr == '-') - ptr++; - if (*ptr == '.') - ptr++; - while (isdigit((unsigned char)*ptr)) ptr++; - if (*ptr == '.') - ptr++; - while (isdigit((unsigned char)*ptr)) ptr++; - - switch (*ptr++) { - case 'e': - case 'E': - case 'f': - case 'F': - case 'g': - case 'G': - return 0; - - default: - return -1; - } - - - case FILE_FMT_STR: - if (*ptr == '-') - ptr++; - while (isdigit((unsigned char )*ptr)) - ptr++; - if (*ptr == '.') { - ptr++; - while (isdigit((unsigned char )*ptr)) - ptr++; - } - - switch (*ptr++) { - case 's': - return 0; - default: - return -1; - } - - default: - /* internal error */ - abort(); - } - /*NOTREACHED*/ - return -1; -} - -/* - * Check that the optional printf format in description matches - * the type of the magic. - */ -private int -check_format(struct magic_set *ms, struct magic *m) -{ - char *ptr; - - for (ptr = m->desc; *ptr; ptr++) - if (*ptr == '%') - break; - if (*ptr == '\0') { - /* No format string; ok */ - return 1; - } - - assert(file_nformats == file_nnames); - - if (m->type >= file_nformats) { - file_magwarn(ms, "Internal error inconsistency between " - "m->type and format strings"); - return -1; - } - if (file_formats[m->type] == FILE_FMT_NONE) { - file_magwarn(ms, "No format string for `%s' with description " - "`%s'", m->desc, file_names[m->type]); - return -1; - } - - ptr++; - if (check_format_type(ptr, file_formats[m->type]) == -1) { - /* - * TODO: this error message is unhelpful if the format - * string is not one character long - */ - file_magwarn(ms, "Printf format `%c' is not valid for type " - "`%s' in description `%s'", - ptr && *ptr ? *ptr : '?', - file_names[m->type], m->desc); - return -1; - } - - for (; *ptr; ptr++) { - if (*ptr == '%') { - file_magwarn(ms, - "Too many format strings (should have at most one) " - "for `%s' with description `%s'", - file_names[m->type], m->desc); - return -1; - } - } - return 0; -} - -/* - * Read a numeric value from a pointer, into the value union of a magic - * pointer, according to the magic type. Update the string pointer to point - * just after the number read. Return 0 for success, non-zero for failure. - */ -private int -getvalue(struct magic_set *ms, struct magic *m, const char **p, int action) -{ - int slen; - - switch (m->type) { - case FILE_BESTRING16: - case FILE_LESTRING16: - case FILE_STRING: - case FILE_PSTRING: - case FILE_REGEX: - case FILE_SEARCH: - *p = getstr(ms, *p, m->value.s, sizeof(m->value.s), &slen, action); - if (*p == NULL) { - if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, "cannot get string from `%s'", - m->value.s); - return -1; - } - m->vallen = slen; - if (m->type == FILE_PSTRING) - m->vallen++; - return 0; - case FILE_FLOAT: - case FILE_BEFLOAT: - case FILE_LEFLOAT: - if (m->reln != 'x') { - char *ep; -#ifdef HAVE_STRTOF - m->value.f = strtof(*p, &ep); -#else - m->value.f = (float)strtod(*p, &ep); -#endif - *p = ep; - } - return 0; - case FILE_DOUBLE: - case FILE_BEDOUBLE: - case FILE_LEDOUBLE: - if (m->reln != 'x') { - char *ep; - m->value.d = strtod(*p, &ep); - *p = ep; - } - return 0; - default: - if (m->reln != 'x') { - char *ep; - m->value.q = file_signextend(ms, m, - (uint64_t)strtoull(*p, &ep, 0)); - *p = ep; - eatsize(p); - } - return 0; - } -} - -/* - * Convert a string containing C character escapes. Stop at an unescaped - * space or tab. - * Copy the converted version to "p", returning its length in *slen. - * Return updated scan pointer as function result. - */ -private const char * -getstr(struct magic_set *ms, const char *s, char *p, int plen, int *slen, int action) -{ - const char *origs = s; - char *origp = p; - char *pmax = p + plen - 1; - int c; - int val; - - while ((c = *s++) != '\0') { - if (isspace((unsigned char) c)) - break; - if (p >= pmax) { - file_error(ms, 0, "string too long: `%s'", origs); - return NULL; - } - if (c == '\\') { - switch(c = *s++) { - - case '\0': - if (action == FILE_COMPILE) - file_magwarn(ms, "incomplete escape"); - goto out; - - case '\t': - if (action == FILE_COMPILE) { - file_magwarn(ms, - "escaped tab found, use \\t instead"); - action++; - } - /*FALLTHROUGH*/ - default: - if (action == FILE_COMPILE) { - if (isprint((unsigned char)c)) - file_magwarn(ms, - "no need to escape `%c'", c); - else - file_magwarn(ms, - "unknown escape sequence: \\%03o", c); - } - /*FALLTHROUGH*/ - /* space, perhaps force people to use \040? */ - case ' ': -#if 0 - /* - * Other things people escape, but shouldn't need to, - * so we disallow them - */ - case '\'': - case '"': - case '?': -#endif - /* Relations */ - case '>': - case '<': - case '&': - case '^': - case '=': - case '!': - /* and baskslash itself */ - case '\\': - *p++ = (char) c; - break; - - case 'a': - *p++ = '\a'; - break; - - case 'b': - *p++ = '\b'; - break; - - case 'f': - *p++ = '\f'; - break; - - case 'n': - *p++ = '\n'; - break; - - case 'r': - *p++ = '\r'; - break; - - case 't': - *p++ = '\t'; - break; - - case 'v': - *p++ = '\v'; - break; - - /* \ and up to 3 octal digits */ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - val = c - '0'; - c = *s++; /* try for 2 */ - if (c >= '0' && c <= '7') { - val = (val << 3) | (c - '0'); - c = *s++; /* try for 3 */ - if (c >= '0' && c <= '7') - val = (val << 3) | (c-'0'); - else - --s; - } - else - --s; - *p++ = (char)val; - break; - - /* \x and up to 2 hex digits */ - case 'x': - val = 'x'; /* Default if no digits */ - c = hextoint(*s++); /* Get next char */ - if (c >= 0) { - val = c; - c = hextoint(*s++); - if (c >= 0) - val = (val << 4) + c; - else - --s; - } else - --s; - *p++ = (char)val; - break; - } - } else - *p++ = (char)c; - } -out: - *p = '\0'; - *slen = p - origp; - return s; -} - - -/* Single hex char to int; -1 if not a hex char. */ -private int -hextoint(int c) -{ - if (!isascii((unsigned char) c)) - return -1; - if (isdigit((unsigned char) c)) - return c - '0'; - if ((c >= 'a') && (c <= 'f')) - return c + 10 - 'a'; - if (( c>= 'A') && (c <= 'F')) - return c + 10 - 'A'; - return -1; -} - - -/* - * Print a string containing C character escapes. - */ -protected void -file_showstr(FILE *fp, const char *s, size_t len) -{ - char c; - - for (;;) { - c = *s++; - if (len == ~0U) { - if (c == '\0') - break; - } - else { - if (len-- == 0) - break; - } - if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ - (void) fputc(c, fp); - else { - (void) fputc('\\', fp); - switch (c) { - case '\a': - (void) fputc('a', fp); - break; - - case '\b': - (void) fputc('b', fp); - break; - - case '\f': - (void) fputc('f', fp); - break; - - case '\n': - (void) fputc('n', fp); - break; - - case '\r': - (void) fputc('r', fp); - break; - - case '\t': - (void) fputc('t', fp); - break; - - case '\v': - (void) fputc('v', fp); - break; - - default: - (void) fprintf(fp, "%.3o", c & 0377); - break; - } - } - } -} - -/* - * eatsize(): Eat the size spec from a number [eg. 10UL] - */ -private void -eatsize(const char **p) -{ - const char *l = *p; - - if (LOWCASE(*l) == 'u') - l++; - - switch (LOWCASE(*l)) { - case 'l': /* long */ - case 's': /* short */ - case 'h': /* short */ - case 'b': /* char/byte */ - case 'c': /* char/byte */ - l++; - /*FALLTHROUGH*/ - default: - break; - } - - *p = l; -} - -/* - * handle a compiled file. - */ -private int -apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, - const char *fn) -{ - int fd; - struct stat st; - uint32_t *ptr; - uint32_t version; - int needsbyteswap; - char *dbname = NULL; - void *mm = NULL; - - mkdbname(fn, &dbname, 0); - if (dbname == NULL) - goto error2; - - if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1) - goto error2; - - if (fstat(fd, &st) == -1) { - file_error(ms, errno, "cannot stat `%s'", dbname); - goto error1; - } - if (st.st_size < 8 || st.st_size > SIZE_MAX) { - file_error(ms, 0, "file `%s' is too %s", dbname, - st.st_size > SIZE_MAX ? "large" : "small"); - goto error1; - } - -#ifdef QUICK - if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) { - file_error(ms, errno, "cannot map `%s'", dbname); - goto error1; - } -#define RET 2 -#else - if ((mm = malloc((size_t)st.st_size)) == NULL) { - file_oomem(ms, (size_t)st.st_size); - goto error1; - } - if (read(fd, mm, (size_t)st.st_size) != (size_t)st.st_size) { - file_badread(ms); - goto error1; - } -#define RET 1 -#endif - *magicp = mm; - (void)close(fd); - fd = -1; - ptr = (uint32_t *)(void *)*magicp; - if (*ptr != MAGICNO) { - if (swap4(*ptr) != MAGICNO) { - file_error(ms, 0, "bad magic in `%s'"); - goto error1; - } - needsbyteswap = 1; - } else - needsbyteswap = 0; - if (needsbyteswap) - version = swap4(ptr[1]); - else - version = ptr[1]; - if (version != VERSIONNO) { - file_error(ms, 0, "File %d.%d supports only %d version magic " - "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel, - VERSIONNO, dbname, version); - goto error1; - } - *nmagicp = (uint32_t)(st.st_size / sizeof(struct magic)); - if (*nmagicp > 0) - (*nmagicp)--; - (*magicp)++; - if (needsbyteswap) - byteswap(*magicp, *nmagicp); - free(dbname); - return RET; - -error1: - if (fd != -1) - (void)close(fd); - if (mm) { -#ifdef QUICK - (void)munmap((void *)mm, (size_t)st.st_size); -#else - free(mm); -#endif - } else { - *magicp = NULL; - *nmagicp = 0; - } -error2: - free(dbname); - return -1; -} - -private const uint32_t ar[] = { - MAGICNO, VERSIONNO -}; -/* - * handle an mmaped file. - */ -private int -apprentice_compile(struct magic_set *ms, struct magic **magicp, - uint32_t *nmagicp, const char *fn) -{ - int fd; - char *dbname; - int rv = -1; - - mkdbname(fn, &dbname, 1); - - if (dbname == NULL) - goto out; - - if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) { - file_error(ms, errno, "cannot open `%s'", dbname); - goto out; - } - - if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) { - file_error(ms, errno, "error writing `%s'", dbname); - goto out; - } - - if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET) - != sizeof(struct magic)) { - file_error(ms, errno, "error seeking `%s'", dbname); - goto out; - } - - if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp)) - != (ssize_t)(sizeof(struct magic) * *nmagicp)) { - file_error(ms, errno, "error writing `%s'", dbname); - goto out; - } - - (void)close(fd); - rv = 0; -out: - free(dbname); - return rv; -} - -private const char ext[] = ".mgc"; -/* - * make a dbname - */ -private void -mkdbname(const char *fn, char **buf, int strip) -{ - if (strip) { - const char *p; - if ((p = strrchr(fn, '/')) != NULL) - fn = ++p; - } - - (void)asprintf(buf, "%s%s", fn, ext); - if (*buf && strlen(*buf) > PATH_MAX) { - free(*buf); - *buf = NULL; - } -} - -/* - * Byteswap an mmap'ed file if needed - */ -private void -byteswap(struct magic *magic, uint32_t nmagic) -{ - uint32_t i; - for (i = 0; i < nmagic; i++) - bs1(&magic[i]); -} - -/* - * swap a short - */ -private uint16_t -swap2(uint16_t sv) -{ - uint16_t rv; - uint8_t *s = (uint8_t *)(void *)&sv; - uint8_t *d = (uint8_t *)(void *)&rv; - d[0] = s[1]; - d[1] = s[0]; - return rv; -} - -/* - * swap an int - */ -private uint32_t -swap4(uint32_t sv) -{ - uint32_t rv; - uint8_t *s = (uint8_t *)(void *)&sv; - uint8_t *d = (uint8_t *)(void *)&rv; - d[0] = s[3]; - d[1] = s[2]; - d[2] = s[1]; - d[3] = s[0]; - return rv; -} - -/* - * swap a quad - */ -private uint64_t -swap8(uint64_t sv) -{ - uint64_t rv; - uint8_t *s = (uint8_t *)(void *)&sv; - uint8_t *d = (uint8_t *)(void *)&rv; -#if 0 - d[0] = s[3]; - d[1] = s[2]; - d[2] = s[1]; - d[3] = s[0]; - d[4] = s[7]; - d[5] = s[6]; - d[6] = s[5]; - d[7] = s[4]; -#else - d[0] = s[7]; - d[1] = s[6]; - d[2] = s[5]; - d[3] = s[4]; - d[4] = s[3]; - d[5] = s[2]; - d[6] = s[1]; - d[7] = s[0]; -#endif - return rv; -} - -/* - * byteswap a single magic entry - */ -private void -bs1(struct magic *m) -{ - m->cont_level = swap2(m->cont_level); - m->offset = swap4((uint32_t)m->offset); - m->in_offset = swap4((uint32_t)m->in_offset); - m->lineno = swap4((uint32_t)m->lineno); - if (IS_STRING(m->type)) { - m->str_range = swap4(m->str_range); - m->str_flags = swap4(m->str_flags); - } - else { - m->value.q = swap8(m->value.q); - m->num_mask = swap8(m->num_mask); - } -} diff --git a/usr.bin/file/ascmagic.c b/usr.bin/file/ascmagic.c deleted file mode 100644 index a5d09dc4931..00000000000 --- a/usr.bin/file/ascmagic.c +++ /dev/null @@ -1,789 +0,0 @@ -/* $OpenBSD: ascmagic.c,v 1.12 2014/05/18 17:50:11 espie Exp $ */ -/* - * Copyright (c) Ian F. Darwin 1986-1995. - * Software written by Ian F. Darwin and others; - * maintained 1995-present by Christos Zoulas and others. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * ASCII magic -- file types that we know based on keywords - * that can appear anywhere in the file. - * - * Extensively modified by Eric Fischer in July, 2000, - * to handle character codes other than ASCII on a unified basis. - * - * Joerg Wunsch wrote the original support for 8-bit - * international characters, now subsumed into this file. - */ - -#include "file.h" -#include "magic.h" -#include -#include -#include -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#include "names.h" - -#define MAXLINELEN 300 /* longest sane line length */ -#define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \ - || (x) == 0x85 || (x) == '\f') - -private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *); -private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *, - size_t *); -protected int file_looks_utf8(const unsigned char *, size_t, unichar *, size_t *); -private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *); -private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *); -private int looks_extended(const unsigned char *, size_t, unichar *, size_t *); -private void from_ebcdic(const unsigned char *, size_t, unsigned char *); -private int ascmatch(const unsigned char *, const unichar *, size_t); -private unsigned char *encode_utf8(unsigned char *, size_t, unichar *, size_t); - - -protected int -file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) -{ - size_t i; - unsigned char *nbuf = NULL, *utf8_buf = NULL, *utf8_end; - unichar *ubuf = NULL; - size_t ulen, mlen; - const struct names *p; - int rv = -1; - int mime = ms->flags & MAGIC_MIME; - - const char *code = NULL; - const char *code_mime = NULL; - const char *type = NULL; - const char *subtype = NULL; - const char *subtype_mime = NULL; - - int has_escapes = 0; - int has_backspace = 0; - int seen_cr = 0; - - int n_crlf = 0; - int n_lf = 0; - int n_cr = 0; - int n_nel = 0; - - size_t last_line_end = (size_t)-1; - int has_long_lines = 0; - - /* - * Undo the NUL-termination kindly provided by process() - * but leave at least one byte to look at - */ - while (nbytes > 1 && buf[nbytes - 1] == '\0') - nbytes--; - - if ((nbuf = calloc((nbytes + 1), sizeof(nbuf[0]))) == NULL) - goto done; - if ((ubuf = calloc((nbytes + 1), sizeof(ubuf[0]))) == NULL) - goto done; - - /* - * Then try to determine whether it's any character code we can - * identify. Each of these tests, if it succeeds, will leave - * the text converted into one-unichar-per-character Unicode in - * ubuf, and the number of characters converted in ulen. - */ - if (looks_ascii(buf, nbytes, ubuf, &ulen)) { - code = "ASCII"; - code_mime = "us-ascii"; - type = "text"; - } else if (looks_utf8_with_BOM(buf, nbytes, ubuf, &ulen) > 0) { - code = "UTF-8 Unicode (with BOM)"; - code_mime = "utf-8"; - type = "text"; - } else if (file_looks_utf8(buf, nbytes, ubuf, &ulen) > 1) { - code = "UTF-8 Unicode"; - code_mime = "utf-8"; - type = "text"; - } else if ((i = looks_ucs16(buf, nbytes, ubuf, &ulen)) != 0) { - if (i == 1) - code = "Little-endian UTF-16 Unicode"; - else - code = "Big-endian UTF-16 Unicode"; - - type = "character data"; - code_mime = "utf-16"; /* is this defined? */ - } else if (looks_latin1(buf, nbytes, ubuf, &ulen)) { - code = "ISO-8859"; - type = "text"; - code_mime = "iso-8859-1"; - } else if (looks_extended(buf, nbytes, ubuf, &ulen)) { - code = "Non-ISO extended-ASCII"; - type = "text"; - code_mime = "unknown"; - } else { - from_ebcdic(buf, nbytes, nbuf); - - if (looks_ascii(nbuf, nbytes, ubuf, &ulen)) { - code = "EBCDIC"; - type = "character data"; - code_mime = "ebcdic"; - } else if (looks_latin1(nbuf, nbytes, ubuf, &ulen)) { - code = "International EBCDIC"; - type = "character data"; - code_mime = "ebcdic"; - } else { - rv = 0; - goto done; /* doesn't look like text at all */ - } - } - - if (nbytes <= 1) { - rv = 0; - goto done; - } - - /* Convert ubuf to UTF-8 and try text soft magic */ - /* If original was ASCII or UTF-8, could use nbuf instead of - re-converting. */ - /* malloc size is a conservative overestimate; could be - re-converting improved, or at least realloced after - re-converting conversion. */ - mlen = ulen * 6; - if ((utf8_buf = malloc(mlen)) == NULL) { - file_oomem(ms, mlen); - goto done; - } - if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)) == NULL) - goto done; - if (file_softmagic(ms, utf8_buf, utf8_end - utf8_buf, TEXTTEST) != 0) { - rv = 1; - goto done; - } - - /* look for tokens from names.h - this is expensive! */ - if ((ms->flags & MAGIC_NO_CHECK_TOKENS) != 0) - goto subtype_identified; - - i = 0; - while (i < ulen) { - size_t end; - - /* skip past any leading space */ - while (i < ulen && ISSPC(ubuf[i])) - i++; - if (i >= ulen) - break; - - /* find the next whitespace */ - for (end = i + 1; end < nbytes; end++) - if (ISSPC(ubuf[end])) - break; - - /* compare the word thus isolated against the token list */ - for (p = names; p < names + NNAMES; p++) { - if (ascmatch((const unsigned char *)p->name, ubuf + i, - end - i)) { - subtype = types[p->type].human; - subtype_mime = types[p->type].mime; - goto subtype_identified; - } - } - - i = end; - } - -subtype_identified: - - /* Now try to discover other details about the file. */ - for (i = 0; i < ulen; i++) { - if (ubuf[i] == '\n') { - if (seen_cr) - n_crlf++; - else - n_lf++; - last_line_end = i; - } else if (seen_cr) - n_cr++; - - seen_cr = (ubuf[i] == '\r'); - if (seen_cr) - last_line_end = i; - - if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */ - n_nel++; - last_line_end = i; - } - - /* If this line is _longer_ than MAXLINELEN, remember it. */ - if (i > last_line_end + MAXLINELEN) - has_long_lines = 1; - - if (ubuf[i] == '\033') - has_escapes = 1; - if (ubuf[i] == '\b') - has_backspace = 1; - } - - /* Beware, if the data has been truncated, the final CR could have - been followed by a LF. If we have HOWMANY bytes, it indicates - that the data might have been truncated, probably even before - this function was called. */ - if (seen_cr && nbytes < HOWMANY) - n_cr++; - - if (mime) { - if (mime & MAGIC_MIME_TYPE) { - if (subtype_mime) { - if (file_printf(ms, subtype_mime) == -1) - goto done; - } else { - if (file_printf(ms, "text/plain") == -1) - goto done; - } - } - - if ((mime == 0 || mime == MAGIC_MIME) && code_mime) { - if ((mime & MAGIC_MIME_TYPE) && - file_printf(ms, " charset=") == -1) - goto done; - if (file_printf(ms, code_mime) == -1) - goto done; - } - - if (mime == MAGIC_MIME_ENCODING) - file_printf(ms, "binary"); - } else { - if (file_printf(ms, code) == -1) - goto done; - - if (subtype) { - if (file_printf(ms, " ") == -1) - goto done; - if (file_printf(ms, subtype) == -1) - goto done; - } - - if (file_printf(ms, " ") == -1) - goto done; - if (file_printf(ms, type) == -1) - goto done; - - if (has_long_lines) - if (file_printf(ms, ", with very long lines") == -1) - goto done; - - /* - * Only report line terminators if we find one other than LF, - * or if we find none at all. - */ - if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) || - (n_crlf != 0 || n_cr != 0 || n_nel != 0)) { - if (file_printf(ms, ", with") == -1) - goto done; - - if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) { - if (file_printf(ms, " no") == -1) - goto done; - } else { - if (n_crlf) { - if (file_printf(ms, " CRLF") == -1) - goto done; - if (n_cr || n_lf || n_nel) - if (file_printf(ms, ",") == -1) - goto done; - } - if (n_cr) { - if (file_printf(ms, " CR") == -1) - goto done; - if (n_lf || n_nel) - if (file_printf(ms, ",") == -1) - goto done; - } - if (n_lf) { - if (file_printf(ms, " LF") == -1) - goto done; - if (n_nel) - if (file_printf(ms, ",") == -1) - goto done; - } - if (n_nel) - if (file_printf(ms, " NEL") == -1) - goto done; - } - - if (file_printf(ms, " line terminators") == -1) - goto done; - } - - if (has_escapes) - if (file_printf(ms, ", with escape sequences") == -1) - goto done; - if (has_backspace) - if (file_printf(ms, ", with overstriking") == -1) - goto done; - } - rv = 1; -done: - if (nbuf) - free(nbuf); - if (ubuf) - free(ubuf); - if (utf8_buf) - free(utf8_buf); - - return rv; -} - -private int -ascmatch(const unsigned char *s, const unichar *us, size_t ulen) -{ - size_t i; - - for (i = 0; i < ulen; i++) { - if (s[i] != us[i]) - return 0; - } - - if (s[i]) - return 0; - else - return 1; -} - -/* - * This table reflects a particular philosophy about what constitutes - * "text," and there is room for disagreement about it. - * - * Version 3.31 of the file command considered a file to be ASCII if - * each of its characters was approved by either the isascii() or - * isalpha() function. On most systems, this would mean that any - * file consisting only of characters in the range 0x00 ... 0x7F - * would be called ASCII text, but many systems might reasonably - * consider some characters outside this range to be alphabetic, - * so the file command would call such characters ASCII. It might - * have been more accurate to call this "considered textual on the - * local system" than "ASCII." - * - * It considered a file to be "International language text" if each - * of its characters was either an ASCII printing character (according - * to the real ASCII standard, not the above test), a character in - * the range 0x80 ... 0xFF, or one of the following control characters: - * backspace, tab, line feed, vertical tab, form feed, carriage return, - * escape. No attempt was made to determine the language in which files - * of this type were written. - * - * - * The table below considers a file to be ASCII if all of its characters - * are either ASCII printing characters (again, according to the X3.4 - * standard, not isascii()) or any of the following controls: bell, - * backspace, tab, line feed, form feed, carriage return, esc, nextline. - * - * I include bell because some programs (particularly shell scripts) - * use it literally, even though it is rare in normal text. I exclude - * vertical tab because it never seems to be used in real text. I also - * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85), - * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline - * character to. It might be more appropriate to include it in the 8859 - * set instead of the ASCII set, but it's got to be included in *something* - * we recognize or EBCDIC files aren't going to be considered textual. - * Some old Unix source files use SO/SI (^N/^O) to shift between Greek - * and Latin characters, so these should possibly be allowed. But they - * make a real mess on VT100-style displays if they're not paired properly, - * so we are probably better off not calling them text. - * - * A file is considered to be ISO-8859 text if its characters are all - * either ASCII, according to the above definition, or printing characters - * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF. - * - * Finally, a file is considered to be international text from some other - * character code if its characters are all either ISO-8859 (according to - * the above definition) or characters in the range 0x80 ... 0x9F, which - * ISO-8859 considers to be control characters but the IBM PC and Macintosh - * consider to be printing characters. - */ - -#define F 0 /* character never appears in text */ -#define T 1 /* character appears in plain ASCII text */ -#define I 2 /* character appears in ISO-8859 text */ -#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */ - -private char text_chars[256] = { - /* BEL BS HT LF FF CR */ - F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */ - /* ESC */ - F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */ - T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */ - /* NEL */ - X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */ - X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */ - I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */ - I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */ - I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */ - I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */ - I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */ - I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */ -}; - -private int -looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf, - size_t *ulen) -{ - size_t i; - - *ulen = 0; - - for (i = 0; i < nbytes; i++) { - int t = text_chars[buf[i]]; - - if (t != T) - return 0; - - ubuf[(*ulen)++] = buf[i]; - } - - return 1; -} - -private int -looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) -{ - size_t i; - - *ulen = 0; - - for (i = 0; i < nbytes; i++) { - int t = text_chars[buf[i]]; - - if (t != T && t != I) - return 0; - - ubuf[(*ulen)++] = buf[i]; - } - - return 1; -} - -private int -looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf, - size_t *ulen) -{ - size_t i; - - *ulen = 0; - - for (i = 0; i < nbytes; i++) { - int t = text_chars[buf[i]]; - - if (t != T && t != I && t != X) - return 0; - - ubuf[(*ulen)++] = buf[i]; - } - - return 1; -} - -/* - * Encode Unicode string as UTF-8, returning pointer to character - * after end of string, or NULL if an invalid character is found. - */ -private unsigned char * -encode_utf8(unsigned char *buf, size_t len, unichar *ubuf, size_t ulen) -{ - size_t i; - unsigned char *end = buf + len; - - for (i = 0; i < ulen; i++) { - if (ubuf[i] <= 0x7f) { - if (end - buf < 1) - return NULL; - *buf++ = (unsigned char)ubuf[i]; - } else if (ubuf[i] <= 0x7ff) { - if (end - buf < 2) - return NULL; - *buf++ = (unsigned char)((ubuf[i] >> 6) + 0xc0); - *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80); - } else if (ubuf[i] <= 0xffff) { - if (end - buf < 3) - return NULL; - *buf++ = (unsigned char)((ubuf[i] >> 12) + 0xe0); - *buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80); - *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80); - } else if (ubuf[i] <= 0x1fffff) { - if (end - buf < 4) - return NULL; - *buf++ = (unsigned char)((ubuf[i] >> 18) + 0xf0); - *buf++ = (unsigned char)(((ubuf[i] >> 12) & 0x3f) + 0x80); - *buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80); - *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80); - } else if (ubuf[i] <= 0x3ffffff) { - if (end - buf < 5) - return NULL; - *buf++ = (unsigned char)((ubuf[i] >> 24) + 0xf8); - *buf++ = (unsigned char)(((ubuf[i] >> 18) & 0x3f) + 0x80); - *buf++ = (unsigned char)(((ubuf[i] >> 12) & 0x3f) + 0x80); - *buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80); - *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80); - } else if (ubuf[i] <= 0x7fffffff) { - if (end - buf < 6) - return NULL; - *buf++ = (unsigned char)((ubuf[i] >> 30) + 0xfc); - *buf++ = (unsigned char)(((ubuf[i] >> 24) & 0x3f) + 0x80); - *buf++ = (unsigned char)(((ubuf[i] >> 18) & 0x3f) + 0x80); - *buf++ = (unsigned char)(((ubuf[i] >> 12) & 0x3f) + 0x80); - *buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80); - *buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80); - } else /* Invalid character */ - return NULL; - } - - return buf; -} - -/* - * Decide whether some text looks like UTF-8. Returns: - * - * -1: invalid UTF-8 - * 0: uses odd control characters, so doesn't look like text - * 1: 7-bit text - * 2: definitely UTF-8 text (valid high-bit set bytes) - * - * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen; - * ubuf must be big enough! - */ -protected int -file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) -{ - size_t i; - int n; - unichar c; - int gotone = 0, ctrl = 0; - - if (ubuf) - *ulen = 0; - - for (i = 0; i < nbytes; i++) { - if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ - /* - * Even if the whole file is valid UTF-8 sequences, - * still reject it if it uses weird control characters. - */ - - if (text_chars[buf[i]] != T) - ctrl = 1; - - if (ubuf) - ubuf[(*ulen)++] = buf[i]; - } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */ - return -1; - } else { /* 11xxxxxx begins UTF-8 */ - int following; - - if ((buf[i] & 0x20) == 0) { /* 110xxxxx */ - c = buf[i] & 0x1f; - following = 1; - } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */ - c = buf[i] & 0x0f; - following = 2; - } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */ - c = buf[i] & 0x07; - following = 3; - } else if ((buf[i] & 0x04) == 0) { /* 111110xx */ - c = buf[i] & 0x03; - following = 4; - } else if ((buf[i] & 0x02) == 0) { /* 1111110x */ - c = buf[i] & 0x01; - following = 5; - } else - return -1; - - for (n = 0; n < following; n++) { - i++; - if (i >= nbytes) - goto done; - - if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40)) - return -1; - - c = (c << 6) + (buf[i] & 0x3f); - } - - if (ubuf) - ubuf[(*ulen)++] = c; - gotone = 1; - } - } -done: - return ctrl ? 0 : (gotone ? 2 : 1); -} - -/* - * Decide whether some text looks like UTF-8 with BOM. If there is no - * BOM, return -1; otherwise return the result of looks_utf8 on the - * rest of the text. - */ -private int -looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf, - size_t *ulen) -{ - if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf) - return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen); - else - return -1; -} - -private int -looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf, - size_t *ulen) -{ - int bigend; - size_t i; - - if (nbytes < 2) - return 0; - - if (buf[0] == 0xff && buf[1] == 0xfe) - bigend = 0; - else if (buf[0] == 0xfe && buf[1] == 0xff) - bigend = 1; - else - return 0; - - *ulen = 0; - - for (i = 2; i + 1 < nbytes; i += 2) { - /* XXX fix to properly handle chars > 65536 */ - - if (bigend) - ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i]; - else - ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1]; - - if (ubuf[*ulen - 1] == 0xfffe) - return 0; - if (ubuf[*ulen - 1] < 128 && - text_chars[(size_t)ubuf[*ulen - 1]] != T) - return 0; - } - - return 1 + bigend; -} - -#undef F -#undef T -#undef I -#undef X - -/* - * This table maps each EBCDIC character to an (8-bit extended) ASCII - * character, as specified in the rationale for the dd(1) command in - * draft 11.2 (September, 1991) of the POSIX P1003.2 standard. - * - * Unfortunately it does not seem to correspond exactly to any of the - * five variants of EBCDIC documented in IBM's _Enterprise Systems - * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh - * Edition, July, 1999, pp. I-1 - I-4. - * - * Fortunately, though, all versions of EBCDIC, including this one, agree - * on most of the printing characters that also appear in (7-bit) ASCII. - * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all. - * - * Fortunately too, there is general agreement that codes 0x00 through - * 0x3F represent control characters, 0x41 a nonbreaking space, and the - * remainder printing characters. - * - * This is sufficient to allow us to identify EBCDIC text and to distinguish - * between old-style and internationalized examples of text. - */ - -private unsigned char ebcdic_to_ascii[] = { - 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31, -128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7, -144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26, -' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|', -'&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~', -'-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?', -186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"', -195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201, -202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208, -209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215, -216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231, -'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237, -'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243, -'\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249, -'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255 -}; - -#ifdef notdef -/* - * The following EBCDIC-to-ASCII table may relate more closely to reality, - * or at least to modern reality. It comes from - * - * http://ftp.s390.ibm.com/products/oe/bpxqp9.html - * - * and maps the characters of EBCDIC code page 1047 (the code used for - * Unix-derived software on IBM's 390 systems) to the corresponding - * characters from ISO 8859-1. - * - * If this table is used instead of the above one, some of the special - * cases for the NEL character can be taken out of the code. - */ - -private unsigned char ebcdic_1047_to_8859[] = { -0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, -0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, -0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, -0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, -0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C, -0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E, -0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F, -0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22, -0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1, -0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4, -0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE, -0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7, -0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5, -0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF, -0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5, -0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F -}; -#endif - -/* - * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII. - */ -private void -from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out) -{ - size_t i; - - for (i = 0; i < nbytes; i++) { - out[i] = ebcdic_to_ascii[buf[i]]; - } -} diff --git a/usr.bin/file/compress.c b/usr.bin/file/compress.c deleted file mode 100644 index 773d49746c1..00000000000 --- a/usr.bin/file/compress.c +++ /dev/null @@ -1,479 +0,0 @@ -/* $OpenBSD: compress.c,v 1.16 2013/04/20 19:02:57 deraadt Exp $ */ -/* - * Copyright (c) Ian F. Darwin 1986-1995. - * Software written by Ian F. Darwin and others; - * maintained 1995-present by Christos Zoulas and others. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * compress routines: - * zmagic() - returns 0 if not recognized, uncompresses and prints - * information if recognized - * uncompress(method, old, n, newch) - uncompress old into new, - * using method, return sizeof new - */ -#include "file.h" -#include "magic.h" -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#include -#include -#include -#include -#ifdef HAVE_SYS_WAIT_H -#include -#endif -#if defined(HAVE_SYS_TIME_H) -#include -#endif -#if defined(HAVE_ZLIB_H) && defined(HAVE_LIBZ) -#define BUILTIN_DECOMPRESS -#include -#endif - - -private const struct { - const char magic[8]; - size_t maglen; - const char *argv[3]; - int silent; -} compr[] = { - { "\037\235", 2, { "gzip", "-cdq", NULL }, 1 }, /* compressed */ - /* Uncompress can get stuck; so use gzip first if we have it - * Idea from Damien Clark, thanks! */ - { "\037\235", 2, { "uncompress", "-c", NULL }, 1 }, /* compressed */ - { "\037\213", 2, { "gzip", "-cdq", NULL }, 1 }, /* gzipped */ - { "\037\236", 2, { "gzip", "-cdq", NULL }, 1 }, /* frozen */ - { "\037\240", 2, { "gzip", "-cdq", NULL }, 1 }, /* SCO LZH */ - /* the standard pack utilities do not accept standard input */ - { "\037\036", 2, { "gzip", "-cdq", NULL }, 0 }, /* packed */ - { "PK\3\4", 4, { "gzip", "-cdq", NULL }, 1 }, /* pkzipped, */ - /* ...only first file examined */ - { "BZh", 3, { "bzip2", "-cd", NULL }, 1 }, /* bzip2-ed */ -}; - -private size_t ncompr = sizeof(compr) / sizeof(compr[0]); - -#define NODATA ((size_t)~0) - - -private ssize_t swrite(int, const void *, size_t); -private size_t uncompressbuf(struct magic_set *, int, size_t, - const unsigned char *, unsigned char **, size_t); -#ifdef BUILTIN_DECOMPRESS -private size_t uncompressgzipped(struct magic_set *, const unsigned char *, - unsigned char **, size_t); -#endif - -protected int -file_zmagic(struct magic_set *ms, int fd, const char *name, - const unsigned char *buf, size_t nbytes) -{ - unsigned char *newbuf = NULL; - size_t i, nsz; - int rv = 0; - int mime = ms->flags & MAGIC_MIME; - - if ((ms->flags & MAGIC_COMPRESS) == 0) - return 0; - - for (i = 0; i < ncompr; i++) { - if (nbytes < compr[i].maglen) - continue; - if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0 && - (nsz = uncompressbuf(ms, fd, i, buf, &newbuf, - nbytes)) != NODATA) { - ms->flags &= ~MAGIC_COMPRESS; - rv = -1; - if (file_buffer(ms, -1, name, newbuf, nsz) == -1) - goto error; - - if (mime == MAGIC_MIME || mime == 0) { - if (file_printf(ms, mime ? - " compressed-encoding=" : " (") == -1) - goto error; - } - - if ((mime == 0 || mime & MAGIC_MIME_ENCODING) && - file_buffer(ms, -1, NULL, buf, nbytes) == -1) - goto error; - - if (!mime && file_printf(ms, ")") == -1) - goto error; - rv = 1; - break; - } - } -error: - if (newbuf) - free(newbuf); - ms->flags |= MAGIC_COMPRESS; - return rv; -} - -/* - * `safe' write for sockets and pipes. - */ -private ssize_t -swrite(int fd, const void *buf, size_t n) -{ - int rv; - size_t rn = n; - - do - switch (rv = write(fd, buf, n)) { - case -1: - if (errno == EINTR) - continue; - return -1; - default: - n -= rv; - buf = ((const char *)buf) + rv; - break; - } - while (n > 0); - return rn; -} - - -/* - * `safe' read for sockets and pipes. - */ -protected ssize_t -sread(int fd, void *buf, size_t n, int canbepipe) -{ - int rv, cnt; -#ifdef FIONREAD - int t = 0; -#endif - size_t rn = n; - - if (fd == STDIN_FILENO) - goto nocheck; - -#ifdef FIONREAD - if ((canbepipe && (ioctl(fd, FIONREAD, &t) == -1)) || (t == 0)) { -#ifdef FD_ZERO - for (cnt = 0;; cnt++) { - struct pollfd pfd[1]; - int rv; - - pfd[0].fd = fd; - pfd[0].events = POLLIN; - - /* - * Avoid soft deadlock: do not read if there - * is nothing to read from sockets and pipes. - */ - rv = poll(pfd, 1, 100); - if (rv == -1) { - if (errno == EINTR || errno == EAGAIN) - continue; - } else if (rv == 0 && cnt >= 5) { - return 0; - } else - break; - } -#endif - (void)ioctl(fd, FIONREAD, &t); - } - - if (t > 0 && (size_t)t < n) { - n = t; - rn = n; - } -#endif - -nocheck: - do - switch ((rv = read(fd, buf, n))) { - case -1: - if (errno == EINTR) - continue; - return -1; - case 0: - return rn - n; - default: - n -= rv; - buf = ((char *)buf) + rv; - break; - } - while (n > 0); - return rn; -} - -protected int -file_pipe2file(struct magic_set *ms, int fd, const void *startbuf, - size_t nbytes) -{ - char buf[4096]; - int r, tfd; - - (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf); - tfd = mkstemp(buf); - r = errno; - (void)unlink(buf); - errno = r; - if (tfd == -1) { - file_error(ms, errno, - "cannot create temporary file for pipe copy"); - return -1; - } - - if (swrite(tfd, startbuf, nbytes) != (ssize_t)nbytes) - r = 1; - else { - while ((r = sread(fd, buf, sizeof(buf), 1)) > 0) - if (swrite(tfd, buf, (size_t)r) != r) - break; - } - - switch (r) { - case -1: - file_error(ms, errno, "error copying from pipe to temp file"); - return -1; - case 0: - break; - default: - file_error(ms, errno, "error while writing to temp file"); - return -1; - } - - /* - * We duplicate the file descriptor, because fclose on a - * tmpfile will delete the file, but any open descriptors - * can still access the phantom inode. - */ - if ((fd = dup2(tfd, fd)) == -1) { - file_error(ms, errno, "could not dup descriptor for temp file"); - return -1; - } - (void)close(tfd); - if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) { - file_badseek(ms); - return -1; - } - return fd; -} - -#ifdef BUILTIN_DECOMPRESS - -#define FHCRC (1 << 1) -#define FEXTRA (1 << 2) -#define FNAME (1 << 3) -#define FCOMMENT (1 << 4) - -private size_t -uncompressgzipped(struct magic_set *ms, const unsigned char *old, - unsigned char **newch, size_t n) -{ - unsigned char flg = old[3]; - size_t data_start = 10; - z_stream z; - int rc; - - if (flg & FEXTRA) { - if (data_start+1 >= n) - return 0; - data_start += 2 + old[data_start] + old[data_start + 1] * 256; - } - if (flg & FNAME) { - while(data_start < n && old[data_start]) - data_start++; - data_start++; - } - if(flg & FCOMMENT) { - while(data_start < n && old[data_start]) - data_start++; - data_start++; - } - if(flg & FHCRC) - data_start += 2; - - if (data_start >= n) - return 0; - if ((*newch = (unsigned char *)malloc(HOWMANY + 1)) == NULL) { - return 0; - } - - /* XXX: const castaway, via strchr */ - z.next_in = (Bytef *)strchr((const char *)old + data_start, - old[data_start]); - z.avail_in = n - data_start; - z.next_out = *newch; - z.avail_out = HOWMANY; - z.zalloc = Z_NULL; - z.zfree = Z_NULL; - z.opaque = Z_NULL; - - rc = inflateInit2(&z, -15); - if (rc != Z_OK) { - file_error(ms, 0, "zlib: %s", z.msg); - return 0; - } - - rc = inflate(&z, Z_SYNC_FLUSH); - if (rc != Z_OK && rc != Z_STREAM_END) { - file_error(ms, 0, "zlib: %s", z.msg); - return 0; - } - - n = (size_t)z.total_out; - (void)inflateEnd(&z); - - /* let's keep the nul-terminate tradition */ - (*newch)[n] = '\0'; - - return n; -} -#endif - -private size_t -uncompressbuf(struct magic_set *ms, int fd, size_t method, - const unsigned char *old, unsigned char **newch, size_t n) -{ - int fdin[2], fdout[2]; - int r; - -#ifdef BUILTIN_DECOMPRESS - /* FIXME: This doesn't cope with bzip2 */ - if (method == 2) - return uncompressgzipped(ms, old, newch, n); -#endif - (void)fflush(stdout); - (void)fflush(stderr); - - if ((fd != -1 && pipe(fdin) == -1) || pipe(fdout) == -1) { - file_error(ms, errno, "cannot create pipe"); - return NODATA; - } - switch (fork()) { - case 0: /* child */ - (void) close(0); - if (fd != -1) { - (void) dup(fd); - (void) lseek(0, (off_t)0, SEEK_SET); - } else { - (void) dup(fdin[0]); - (void) close(fdin[0]); - (void) close(fdin[1]); - } - - (void) close(1); - (void) dup(fdout[1]); - (void) close(fdout[0]); - (void) close(fdout[1]); -#ifndef DEBUG - if (compr[method].silent) - (void)close(2); -#endif - - (void)execvp(compr[method].argv[0], - (char *const *)(intptr_t)compr[method].argv); -#ifdef DEBUG - (void)fprintf(stderr, "exec `%s' failed (%s)\n", - compr[method].argv[0], strerror(errno)); -#endif - exit(1); - /*NOTREACHED*/ - case -1: - file_error(ms, errno, "could not fork"); - return NODATA; - - default: /* parent */ - (void) close(fdout[1]); - if (fd == -1) { - (void) close(fdin[0]); - /* - * fork again, to avoid blocking because both - * pipes filled - */ - switch (fork()) { - case 0: /* child */ - (void)close(fdout[0]); - if (swrite(fdin[1], old, n) != (ssize_t)n) { -#ifdef DEBUG - (void)fprintf(stderr, - "Write failed (%s)\n", - strerror(errno)); -#endif - exit(1); - } - exit(0); - /*NOTREACHED*/ - - case -1: -#ifdef DEBUG - (void)fprintf(stderr, "Fork failed (%s)\n", - strerror(errno)); -#endif - exit(1); - /*NOTREACHED*/ - - default: /* parent */ - break; - } - (void) close(fdin[1]); - fdin[1] = -1; - } - - if ((*newch = (unsigned char *) malloc(HOWMANY + 1)) == NULL) { -#ifdef DEBUG - (void)fprintf(stderr, "Malloc failed (%s)\n", - strerror(errno)); -#endif - n = 0; - goto err; - } - if ((r = sread(fdout[0], *newch, HOWMANY, 0)) <= 0) { -#ifdef DEBUG - (void)fprintf(stderr, "Read failed (%s)\n", - strerror(errno)); -#endif - free(*newch); - n = 0; - newch[0] = '\0'; - goto err; - } else { - n = r; - } - /* NUL terminate, as every buffer is handled here. */ - (*newch)[n] = '\0'; -err: - if (fdin[1] != -1) - (void) close(fdin[1]); - (void) close(fdout[0]); -#ifdef WNOHANG - while (waitpid(-1, NULL, WNOHANG) != -1) - continue; -#else - (void)wait(NULL); -#endif - return n; - } -} diff --git a/usr.bin/file/config.h b/usr.bin/file/config.h deleted file mode 100644 index 1cf2668d168..00000000000 --- a/usr.bin/file/config.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Hand-made config.h file for OpenBSD, so we don't have to run - * the dratted configure script every time we build this puppy, - * but can still carefully import stuff from Christos' version. - * - * This file is in the public domain. Original Author Ian F. Darwin. - * $OpenBSD: config.h,v 1.7 2011/07/25 16:21:22 martynas Exp $ - */ - -/* header file issues. */ -#define HAVE_UNISTD_H 1 -#define HAVE_FCNTL_H 1 -#define HAVE_SYS_WAIT_H 1 -#define HAVE_LOCALE_H 1 -#define HAVE_SYS_STAT_H 1 -#define HAVE_INTTYPES_H 1 -#define HAVE_GETOPT_H 1 -#define HAVE_LIMITS_H 1 -/* #define HAVE_ZLIB_H 1 DO NOT ENABLE YET -- chl */ -/* #define HAVE_LIBZ 1 DO NOT ENABLE YET -- ian */ - -#define HAVE_STRTOUL -#define HAVE_STRERROR -#define HAVE_VSNPRINTF -#define HAVE_SNPRINTF -#define HAVE_STRNDUP -#define HAVE_STRTOF - -/* Compiler issues */ -#define SIZEOF_LONG_LONG 8 - -/* Library issues */ -#define HAVE_GETOPT_LONG 1 /* in-tree as of 3.2 */ -#define HAVE_ST_RDEV 1 - -/* ELF support */ -#define BUILTIN_ELF 1 -#define ELFCORE 1 diff --git a/usr.bin/file/elfclass.h b/usr.bin/file/elfclass.h deleted file mode 100644 index 87c9813b43b..00000000000 --- a/usr.bin/file/elfclass.h +++ /dev/null @@ -1,68 +0,0 @@ -/* $OpenBSD: elfclass.h,v 1.2 2009/04/26 14:17:45 chl Exp $ */ -/* - * Copyright (c) Christos Zoulas 2008. - * All Rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - if (nbytes <= sizeof(elfhdr)) - return 0; - - u.l = 1; - (void)memcpy(&elfhdr, buf, sizeof elfhdr); - swap = (u.c[sizeof(int32_t) - 1] + 1) != elfhdr.e_ident[EI_DATA]; - - type = elf_getu16(swap, elfhdr.e_type); - switch (type) { -#ifdef ELFCORE - case ET_CORE: - if (dophn_core(ms, class, swap, fd, - (off_t)elf_getu(swap, elfhdr.e_phoff), - elf_getu16(swap, elfhdr.e_phnum), - (size_t)elf_getu16(swap, elfhdr.e_phentsize), - fsize, &flags) == -1) - return -1; - break; -#endif - case ET_EXEC: - case ET_DYN: - if (dophn_exec(ms, class, swap, fd, - (off_t)elf_getu(swap, elfhdr.e_phoff), - elf_getu16(swap, elfhdr.e_phnum), - (size_t)elf_getu16(swap, elfhdr.e_phentsize), - fsize, &flags) == -1) - return -1; - /*FALLTHROUGH*/ - case ET_REL: - if (doshn(ms, class, swap, fd, - (off_t)elf_getu(swap, elfhdr.e_shoff), - elf_getu16(swap, elfhdr.e_shnum), - (size_t)elf_getu16(swap, elfhdr.e_shentsize), - &flags) == -1) - return -1; - break; - - default: - break; - } - return 1; diff --git a/usr.bin/file/file.1 b/usr.bin/file/file.1 index a92a3ecc6c0..be305b2be69 100644 --- a/usr.bin/file/file.1 +++ b/usr.bin/file/file.1 @@ -1,6 +1,7 @@ -.\" $OpenBSD: file.1,v 1.35 2015/02/15 22:26:45 bentley Exp $ +.\" $OpenBSD: file.1,v 1.36 2015/04/24 16:24:11 nicm Exp $ .\" $FreeBSD: src/usr.bin/file/file.1,v 1.16 2000/03/01 12:19:39 sheldonh Exp $ .\" +.\" Copyright (c) 2015 Nicholas Marriott .\" Copyright (c) Ian F. Darwin 1986-1995. .\" Software written by Ian F. Darwin and others; .\" maintained 1995-present by Christos Zoulas and others. @@ -27,7 +28,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd $Mdocdate: February 15 2015 $ +.Dd $Mdocdate: April 24 2015 $ .Dt FILE 1 .Os .Sh NAME @@ -36,464 +37,83 @@ .Sh SYNOPSIS .Nm .Bk -words -.Op Fl 0bCcehikLNnprsvz -.Op Fl -help -.Op Fl -mime-encoding -.Op Fl -mime-type -.Op Fl F Ar separator -.Op Fl f Ar namefile -.Op Fl m Ar magicfiles -.Ar file +.Op Fl bciLsW +.Ar .Ek .Sh DESCRIPTION The .Nm -utility tests each argument in an attempt to classify it. -There are three sets of tests, performed in this order: -filesystem tests, magic tests, and language tests. -The first test that succeeds causes the file type to be printed. +utility tests each argument and attempts to determine its type. +Three sets of tests are performed: +.Bl -enum -offset Ds +.It +Filesystem tests, for example if a file is empty, or a special file such as a +socket or named pipe (FIFO). +.It +.Dq Magic +tests for data in particular fixed formats. +These are loaded from the +.Pa /etc/magic +file (or +.Pa ~/.magic +instead if it exists). +The file format is described in +.Xr magic 5 . +.It +Tests for text files such as plain ASCII or C programming language files. +.El .Pp -The type printed will usually contain one of the words +The first test which succeeds causes the file type to be printed. +The type will often contain one of the words .Em text -(the file contains only -printing characters and a few common control -characters and is probably safe to read on an -ASCII terminal), +(contains only printing characters and is probably safe to read on an ASCII +terminal), .Em executable -(the file contains the result of compiling a program -in a form understandable to some -.Ux -kernel or another), +(the file contains a compiled executable program) or .Em data -meaning anything else (data is usually -.Dq binary -or non-printable). -Exceptions are well-known file formats (core files, tar archives) -that are known to contain binary data. -When modifying magic files or the program itself, make sure to -.Em preserve these keywords . -Users depend on knowing that all the readable files in a directory -have the word -.Dq text -printed. -Don't do as Berkeley did and change -.Dq shell commands text -to -.Dq shell script . -.Pp -The filesystem tests are based on examining the return from a -.Xr stat 2 -system call. -The program checks to see if the file is empty, -or if it's some sort of special file. -Any known file types, -such as sockets, symbolic links, and named pipes (FIFOs), -are intuited if they are defined in -the system header file -.In sys/stat.h . -.Pp -The magic tests are used to check for files with data in -particular fixed formats. -The canonical example of this is a binary executable (compiled program) -a.out file, whose format is defined in -.In elf.h , -.In a.out.h , -and possibly -.In exec.h -in the standard include directory. -These files have a -.Dq magic number -stored in a particular place -near the beginning of the file that tells the -.Ux -operating system -that the file is a binary executable, and which of several types thereof. -The concept of a -.Dq magic -has been applied by extension to data files. -Any file with some invariant identifier at a small fixed -offset into the file can usually be described in this way. -The information identifying these files is read from the magic file -.Pa /etc/magic . -In addition, if -.Pa $HOME/.magic.mgc -or -.Pa $HOME/.magic -exists, it will be used in preference to the system magic files. -.Pp -If a file does not match any of the entries in the magic file, -it is examined to see if it seems to be a text file. -ASCII, ISO-8859-x, non-ISO 8-bit extended-ASCII character sets -(such as those used on Macintosh and IBM PC systems), -UTF-8-encoded Unicode, UTF-16-encoded Unicode, and EBCDIC -character sets can be distinguished by the different -ranges and sequences of bytes that constitute printable text -in each set. -If a file passes any of these tests, its character set is reported. -ASCII, ISO-8859-x, UTF-8, and extended-ASCII files are identified -as -.Dq text -because they will be mostly readable on nearly any terminal; -UTF-16 and EBCDIC are only -.Dq character data -because, while -they contain text, it is text that will require translation -before it can be read. -In addition, -.Nm -will attempt to determine other characteristics of text-type files. -If the lines of a file are terminated by CR, CRLF, or NEL, instead -of the Unix-standard LF, this will be reported. -Files that contain embedded escape sequences or overstriking -will also be identified. -.Pp -Once -.Nm -has determined the character set used in a text-type file, -it will -attempt to determine in what language the file is written. -The language tests look for particular strings (cf.\& -.In names.h ) -that can appear anywhere in the first few blocks of a file. -For example, the keyword -.Em .br -indicates that the file is most likely a -troff input file, just as the keyword -.Em struct -indicates a C program. -These tests are less reliable than the previous -two groups, so they are performed last. -The language test routines also test for some miscellany -(such as -.Xr tar 1 -archives). -.Pp -Any file that cannot be identified as having been written -in any of the character sets listed above is simply said to be -.Dq data . +meaning anything else. .Sh OPTIONS .Bl -tag -width indent -.It Fl 0 , -print0 -Output a null character -.Sq \e0 -after the end of the filename. -Nice to -.Xr cut 1 -the output. -This does not affect the separator which is still printed. -.It Fl b , -brief -Do not prepend filenames to output lines (brief mode). -.It Fl C , -compile -Write a -.Pa magic.mgc -output file that contains a pre-parsed version of the magic file or directory. -.It Fl c , -checking-printout -Cause a checking printout of the parsed form of the magic file. -This is usually used in conjunction with the -.Fl m -flag to debug a new magic file before installing it. -.It Fl e , -exclude Ar testname -Exclude the test named in -.Ar testname -from the list of tests made to determine the file type. -Valid test names are: -.Bl -tag -width compress -.It apptype -Check for -.Dv EMX -application type (only on EMX). -.It ascii -Check for various types of ASCII files. -.It compress -Don't look for, or inside, compressed files. -.It elf -Don't print elf details. -.It fortran -Don't look for fortran sequences inside ASCII files. -.It soft -Don't consult magic files. -.It tar -Don't examine tar files. -.It token -Don't look for known tokens inside ASCII files. -.It troff -Don't look for troff sequences inside ASCII files. -.El -.It Fl F , -separator Ar separator -Use the specified string as the separator between the filename and the -file result returned. -Defaults to -.Sq \&: . -.It Fl f , -files-from Ar namefile -Read the names of the files to be examined from -.Ar namefile -(one per line) -before the argument list. -Either -.Ar namefile -or at least one filename argument must be present; -to test the standard input, use -.Sq - -as a filename argument. -.It Fl h , -no-dereference -Causes symlinks not to be followed. -This is the default if the environment variable -.Dv POSIXLY_CORRECT -is not defined. -.It Fl -help -Print a help message and exit. -.It Fl i , -mime -Causes the file command to output mime type strings rather than the more -traditional human readable ones. +.It Fl b +Do not prepend filenames to output lines. +.It Fl c +Print a summary of the parsed magic file, usually used for debugging. +.It Fl i , -mime , -mime-type +Causes the file command to output MIME type strings rather than the more +traditional human-readable ones. Thus it may say -.Dq text/plain charset=us-ascii +.Dq text/plain rather than .Dq ASCII text . -In order for this option to work, +.It Fl L +Causes symlinks to be followed. +.It Fl s +Instructs .Nm -changes the way it handles files recognized by the command itself -(such as many of the text file types, directories etc.), -and makes use of an alternative -.Dq magic -file. -See also -.Sx FILES , -below. -.It Fl -mime-encoding , -mime-type -Like -.Fl i , -but print only the specified element(s). -.It Fl k , -keep-going -Don't stop at the first match, keep going. -Subsequent matches will have the string -.Dq "\[rs]012\- " -prepended. -(If a newline is required, see the -.Fl r -option.) -.It Fl L , -dereference -Causes symlinks to be followed; -analogous to the option of the same name in -.Xr ls 1 . -This is the default if the environment variable -.Dv POSIXLY_CORRECT -is defined. -.It Fl m , -magic-file Ar magicfiles -Specify an alternate list of files and directories containing magic. -This can be a single item, or a colon-separated list. -If a compiled magic file is found alongside a file or directory, -it will be used instead. -.It Fl N , -no-pad -Don't pad filenames so that they align in the output. -.It Fl n , -no-buffer -Force stdout to be flushed after checking each file. -This is only useful if checking a list of files. -It is intended to be used by programs that want filetype output from a pipe. -.It Fl p , -preserve-date -On systems that support -.Xr utime 3 -or -.Xr utimes 2 , -attempt to preserve the access time of files analyzed, to pretend that -.Nm -never read them. -.It Fl r , -raw -Don't translate unprintable characters to \eooo. -Normally -.Nm -translates unprintable characters to their octal representation. -.It Fl s , -special-files -Normally, -.Nm -only attempts to read and determine the type of argument files which +to attempt to read all files, not only those which .Xr stat 2 reports are ordinary files. -This prevents problems, because reading special files may have peculiar -consequences. -Specifying the -.Fl s -option causes -.Nm -to also read argument files which are block or character special files. -This is useful for determining the filesystem types of the data in raw -disk partitions, which are block special files. -This option also causes -.Nm -to disregard the file size as reported by -.Xr stat 2 -since on some systems it reports a zero size for raw disk partitions. -.It Fl v , -version -Print the version of the program and exit. -.It Fl z , -uncompress -Try to look inside compressed files. +.It Fl W +Display warnings when parsing the magic file or applying its tests. +Usually used for debugging. .El -.Sh ENVIRONMENT -The environment variable -.Dv MAGIC -can be used to set the default magic file name. -If that variable is set, then -.Nm -will not attempt to open -.Pa $HOME/.magic . -.Nm -adds -.Dq .mgc -to the value of this variable as appropriate. -The environment variable -.Dv POSIXLY_CORRECT -controls whether -.Nm -will attempt to follow symlinks or not. -If set, then -.Nm -follows symlinks; otherwise it does not. -This is also controlled by the -.Fl L -and -.Fl h -options. .Sh FILES .Bl -tag -width /etc/magic -compact .It Pa /etc/magic -default list of magic numbers +default magic file .El .Sh EXIT STATUS .Ex -std file .Sh SEE ALSO -.Xr hexdump 1 , -.Xr od 1 , -.Xr strings 1 , .Xr magic 5 -.Sh STANDARDS CONFORMANCE -This program is believed to exceed the System V Interface Definition -of FILE(CMD), as near as one can determine from the vague language -contained therein. -Its behavior is mostly compatible with the System V program of the same name. -This version knows more magic, however, so it will produce -different (albeit more accurate) output in many cases. -.\" URL: http://www.opengroup.org/onlinepubs/009695399/utilities/file.html -.Pp -The one significant difference -between this version and System V -is that this version treats any whitespace -as a delimiter, so that spaces in pattern strings must be escaped. -For example, -.Bd -literal -offset indent -\*(Gt10 string language impress\ (imPRESS data) -.Ed -.Pp -in an existing magic file would have to be changed to -.Bd -literal -offset indent -\*(Gt10 string language\e impress (imPRESS data) -.Ed -.Pp -In addition, in this version, if a pattern string contains a backslash, -it must be escaped. -For example -.Bd -literal -offset indent -0 string \ebegindata Andrew Toolkit document -.Ed -.Pp -in an existing magic file would have to be changed to -.Bd -literal -offset indent -0 string \e\ebegindata Andrew Toolkit document -.Ed -.Pp -SunOS releases 3.2 and later from Sun Microsystems include a +.Sh AUTHORS .Nm -command derived from the System V one, but with some extensions. -This version differs from Sun's only in minor ways. -It includes the extension of the -.Sq & -operator, used as, -for example, -.Bd -literal -offset indent -\*(Gt16 long&0x7fffffff \*(Gt0 not stripped -.Ed -.Sh HISTORY -There has been a -.Nm -command in every -.Ux -since at least Research Version 4 -(man page dated November, 1973). -The System V version introduced one significant major change: -the external list of magic types. -This slowed the program down slightly but made it a lot more flexible. -.Pp -This program, based on the System V version, -was written by Ian Darwin -without looking at anybody else's source code. -.Pp -John Gilmore revised the code extensively, making it better than -the first version. -Geoff Collyer found several inadequacies -and provided some magic file entries. -Contributions by the `&' operator by Rob McMahon, 1989. -.Pp -Guy Harris, made many changes from 1993 to the present. -.Pp -Primary development and maintenance from 1990 to the present by -Christos Zoulas. -.Pp -Altered by Chris Lowth, 2000: -Handle the -.Fl i -option to output mime type strings, using an alternative -magic file and internal logic. -.Pp -Altered by Eric Fischer, July, 2000, -to identify character codes and attempt to identify the languages -of non-ASCII files. -.Pp -Altered by Reuben Thomas, 2007 to 2008, to improve MIME -support and merge MIME and non-MIME magic, support directories as well -as files of magic, apply many bug fixes and improve the build system. -.Pp -The list of contributors to the -.Dq magic -directory (magic files) -is too long to include here. -You know who you are; thank you. -Many contributors are listed in the source files. -.Sh BUGS -There must be a better way to automate the construction of the Magic -file from all the glop in Magdir. -What is it? -.Pp -.Nm -uses several algorithms that favor speed over accuracy, -thus it can be misled about the contents of -text -files. -.Pp -The support for text files (primarily for programming languages) -is simplistic, inefficient and requires recompilation to update. -.Pp -The list of keywords in -.Pa ascmagic -probably belongs in the Magic file. -This could be done by using some keyword like -.Sq * -for the offset value. -.Pp -Complain about conflicts in the magic file entries. -Make a rule that the magic entries sort based on file offset rather -than position within the magic file? -.Pp -The program should provide a way to give an estimate -of -.Dq how good -a guess is. -We end up removing guesses (e.g. -.Dq From\ -as first 5 chars of file) because -they are not as good as other guesses (e.g.\& -.Dq Newsgroups: -versus -.Dq Return-Path: ) . -Still, if the others don't pan out, it should be possible to use the -first guess. +commands have appeared in many previous versions of +.Ux . +This version was written by Nicholas Marriott for +.Ox 5.8 +to replace the previous version originally written by Ian Darwin. .Pp -This manual page, and particularly this section, is too long. +There is a large number of contributors to the magic files; many are listed in +the source files. diff --git a/usr.bin/file/file.c b/usr.bin/file/file.c index fcd6c34cd5f..d83a1f4515f 100644 --- a/usr.bin/file/file.c +++ b/usr.bin/file/file.c @@ -1,475 +1,513 @@ -/* $OpenBSD: file.c,v 1.26 2015/01/16 18:08:15 millert Exp $ */ -/* - * Copyright (c) Ian F. Darwin 1986-1995. - * Software written by Ian F. Darwin and others; - * maintained 1995-present by Christos Zoulas and others. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ +/* $OpenBSD: file.c,v 1.27 2015/04/24 16:24:11 nicm Exp $ */ + /* - * file - find type of a file or files - main program. + * Copyright (c) 2015 Nicholas Marriott + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include -#include - -#include "file.h" -#include "magic.h" +#include +#include -#include +#include +#include +#include +#include +#include #include #include -#include -#include -#ifdef RESTORE_TIME -# if (__COHERENT__ >= 0x420) -# include -# else -# ifdef USE_UTIMES -# include -# else -# include -# endif -# endif -#endif -#ifdef HAVE_UNISTD_H -#include /* for read() */ -#endif -#ifdef HAVE_LOCALE_H -#include -#endif -#ifdef HAVE_WCHAR_H -#include -#endif -#include -#ifndef HAVE_GETOPT_LONG -int getopt_long(int argc, char * const *argv, const char *optstring, const struct option *longopts, int *longindex); -#endif +#include "file.h" +#include "magic.h" +#include "xmalloc.h" -#include /* for byte swapping */ +struct input_file +{ + struct magic *m; -#include "patchlevel.h" + const char *path; + const char *label; + int fd; + struct stat sb; + const char *error; -#ifdef S_IFLNK -#define SYMLINKFLAG "Lh" -#else -#define SYMLINKFLAG "" -#endif + void *base; + size_t size; + int mapped; + char *result; -# define USAGE "Usage: %s [-bcik" SYMLINKFLAG "nNprsvz0] [-e test] [-f namefile] [-F separator] [-m magicfiles] file...\n" \ - " %s -C -m magicfiles\n" + char link_path[PATH_MAX]; + const char *link_error; + int link_target; +}; -private int /* Global command-line options */ - bflag = 0, /* brief output format */ - nopad = 0, /* Don't pad output */ - nobuffer = 0, /* Do not buffer stdout */ - nulsep = 0; /* Append '\0' to the separator */ +extern char *__progname; -private const char *magicfile = 0; /* where the magic is */ -private const char *default_magicfile = MAGIC; -private const char *separator = ":"; /* Default field separator */ +__dead void usage(void); -extern char *__progname; /* used throughout */ +static void open_file(struct input_file *, const char *, int *); +static void read_link(struct input_file *); +static void test_file(struct magic *, struct input_file *, int); -private struct magic_set *magic; +static int try_stat(struct input_file *); +static int try_empty(struct input_file *); +static int try_access(struct input_file *); +static int try_text(struct input_file *); +static int try_magic(struct input_file *); +static int try_unknown(struct input_file *); -private void unwrap(char *); -private void usage(void); -private void help(void); +static int bflag; +static int cflag; +static int iflag; +static int Lflag; +static int sflag; +static int Wflag; -int main(int, char *[]); -private void process(const char *, int); -private void load(const char *, int); +static struct option longopts[] = { + { "mime", no_argument, NULL, 'i' }, + { "mime-type", no_argument, NULL, 'i' }, + { NULL, 0, NULL, 0 } +}; +__dead void +usage(void) +{ + fprintf(stderr, "usage: %s [-bchiLsW] [file ...]\n", __progname); + exit(1); +} -/* - * main - parse arguments and handle options - */ int -main(int argc, char *argv[]) +main(int argc, char **argv) { - int c; - size_t i; - int action = 0, didsomefiles = 0, errflg = 0; - int flags = 0; - char *home, *usermagic; - struct stat sb; - static const char hmagic[] = "/.magic"; -#define OPTSTRING "bcCde:f:F:hikLm:nNprsvz0" - int longindex; - static const struct option long_options[] = - { -#define OPT(shortname, longname, opt, doc) \ - {longname, opt, NULL, shortname}, -#define OPT_LONGONLY(longname, opt, doc) \ - {longname, opt, NULL, 0}, -#include "file_opts.h" -#undef OPT -#undef OPT_LONGONLY - {0, 0, NULL, 0} -}; - - static const struct { - const char *name; - int value; - } nv[] = { - { "apptype", MAGIC_NO_CHECK_APPTYPE }, - { "ascii", MAGIC_NO_CHECK_ASCII }, - { "compress", MAGIC_NO_CHECK_COMPRESS }, - { "elf", MAGIC_NO_CHECK_ELF }, - { "soft", MAGIC_NO_CHECK_SOFT }, - { "tar", MAGIC_NO_CHECK_TAR }, - { "tokens", MAGIC_NO_CHECK_TOKENS }, - }; - - /* makes islower etc work for other langs */ - (void)setlocale(LC_CTYPE, ""); - -#ifdef __EMX__ - /* sh-like wildcard expansion! Shouldn't hurt at least ... */ - _wildcard(&argc, &argv); -#endif - - magicfile = default_magicfile; - if ((usermagic = getenv("MAGIC")) != NULL) - magicfile = usermagic; - else - if ((home = getenv("HOME")) != NULL) { - size_t len = strlen(home) + sizeof(hmagic); - if ((usermagic = malloc(len)) != NULL) { - (void)strlcpy(usermagic, home, len); - (void)strlcat(usermagic, hmagic, len); - if (stat(usermagic, &sb)<0) - free(usermagic); - else - magicfile = usermagic; - } - } - -#ifdef S_IFLNK - flags |= getenv("POSIXLY_CORRECT") ? MAGIC_SYMLINK : 0; -#endif - while ((c = getopt_long(argc, argv, OPTSTRING, long_options, - &longindex)) != -1) - switch (c) { - case 0 : - switch (longindex) { - case 0: - help(); - break; - case 10: - flags |= MAGIC_MIME_TYPE; - break; - case 11: - flags |= MAGIC_MIME_ENCODING; - break; - } - break; - case '0': - nulsep = 1; + struct input_file *files = NULL; + int nfiles, opt, i, width = 0; + FILE *f; + struct magic *m; + char *home, *path; + struct passwd *pw; + + for (;;) { + opt = getopt_long(argc, argv, "bchiLsW", longopts, NULL); + if (opt == -1) break; + switch (opt) { case 'b': - bflag++; + bflag = 1; break; case 'c': - action = FILE_CHECK; - break; - case 'C': - action = FILE_COMPILE; - break; - case 'd': - flags |= MAGIC_DEBUG|MAGIC_CHECK; - break; - case 'e': - for (i = 0; i < sizeof(nv) / sizeof(nv[0]); i++) - if (strcmp(nv[i].name, optarg) == 0) - break; - - if (i == sizeof(nv) / sizeof(nv[0])) - errflg++; - else - flags |= nv[i].value; - break; - - case 'f': - if(action) - usage(); - load(magicfile, flags); - unwrap(optarg); - ++didsomefiles; + cflag = 1; break; - case 'F': - separator = optarg; + case 'h': + Lflag = 0; break; case 'i': - flags |= MAGIC_MIME; - break; - case 'k': - flags |= MAGIC_CONTINUE; - break; - case 'm': - magicfile = optarg; - break; - case 'n': - ++nobuffer; + iflag = 1; break; - case 'N': - ++nopad; - break; -#if defined(HAVE_UTIME) || defined(HAVE_UTIMES) - case 'p': - flags |= MAGIC_PRESERVE_ATIME; - break; -#endif - case 'r': - flags |= MAGIC_RAW; + case 'L': + Lflag = 1; break; case 's': - flags |= MAGIC_DEVICES; - break; - case 'v': - (void)fprintf(stderr, "%s-%d.%.2d\n", __progname, - FILE_VERSION_MAJOR, patchlevel); - (void)fprintf(stderr, "magic file from %s\n", - magicfile); - return 1; - case 'z': - flags |= MAGIC_COMPRESS; + sflag = 1; break; -#ifdef S_IFLNK - case 'L': - flags |= MAGIC_SYMLINK; - break; - case 'h': - flags &= ~MAGIC_SYMLINK; + case 'W': + Wflag = 1; break; -#endif - case '?': default: - errflg++; - break; + usage(); } - - if (errflg) { - usage(); } + argc -= optind; + argv += optind; + if (cflag) { + if (argc != 0) + usage(); + } else if (argc == 0) + usage(); - switch(action) { - case FILE_CHECK: - case FILE_COMPILE: - magic = magic_open(flags|MAGIC_CHECK); - if (magic == NULL) { - (void)fprintf(stderr, "%s: %s\n", __progname, - strerror(errno)); - return 1; - } - c = action == FILE_CHECK ? magic_check(magic, magicfile) : - magic_compile(magic, magicfile); - if (c == -1) { - (void)fprintf(stderr, "%s: %s\n", __progname, - magic_error(magic)); - return -1; - } - return 0; - default: - load(magicfile, flags); - break; + nfiles = argc; + if (nfiles != 0) { + files = xcalloc(nfiles, sizeof *files); + for (i = 0; i < argc; i++) + open_file(&files[i], argv[i], &width); } - if (optind == argc) { - if (!didsomefiles) { - usage(); - } + home = getenv("HOME"); + if (home == NULL || *home == '\0') { + pw = getpwuid(getuid()); + if (pw != NULL) + home = pw->pw_dir; + else + home = NULL; } - else { - size_t j, wid, nw; - for (wid = 0, j = (size_t)optind; j < (size_t)argc; j++) { - nw = file_mbswidth(argv[j]); - if (nw > wid) - wid = nw; - } - /* - * If bflag is only set twice, set it depending on - * number of files [this is undocumented, and subject to change] - */ - if (bflag == 2) { - bflag = optind >= argc - 1; - } - for (; optind < argc; optind++) - process(argv[optind], wid); + if (home != NULL) { + xasprintf(&path, "%s/.magic", home); + f = fopen(path, "r"); + } else + f = NULL; + if (f == NULL) { + path = xstrdup("/etc/magic"); + f = fopen(path, "r"); + } + if (f == NULL) + err(1, "%s", path); + + if (geteuid() == 0) { + pw = getpwnam(FILE_USER); + if (pw == NULL) + errx(1, "unknown user %s", FILE_USER); + if (setgroups(1, &pw->pw_gid) != 0) + err(1, "setgroups"); + if (setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) != 0) + err(1, "setresgid"); + if (setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid) != 0) + err(1, "setresuid"); + } + + m = magic_load(f, path, cflag || Wflag); + if (cflag) { + magic_dump(m); + exit(0); } - c = magic->haderr ? 1 : 0; - magic_close(magic); - return c; + for (i = 0; i < nfiles; i++) + test_file(m, &files[i], width); + exit(0); } +static void +open_file(struct input_file *inf, const char *path, int *width) +{ + char *label; + int n, retval; + + inf->path = xstrdup(path); -private void -/*ARGSUSED*/ -load(const char *m, int flags) + n = xasprintf(&label, "%s:", inf->path); + if (n > *width) + *width = n; + inf->label = label; + + retval = lstat(inf->path, &inf->sb); + if (retval == -1) { + inf->error = strerror(errno); + return; + } + + if (S_ISLNK(inf->sb.st_mode)) + read_link(inf); + inf->fd = open(inf->path, O_RDONLY|O_NONBLOCK); +} + +static void +read_link(struct input_file *inf) { - if (magic || m == NULL) + struct stat sb; + char path[PATH_MAX]; + char *copy, *root; + int used; + ssize_t size; + + size = readlink(inf->path, path, sizeof path); + if (size == -1) { + inf->link_error = strerror(errno); return; - magic = magic_open(flags); - if (magic == NULL) { - (void)fprintf(stderr, "%s: %s\n", __progname, strerror(errno)); - exit(1); } - if (magic_load(magic, magicfile) == -1) { - (void)fprintf(stderr, "%s: %s\n", - __progname, magic_error(magic)); - exit(1); + path[size] = '\0'; + + if (*path == '/') + strlcpy(inf->link_path, path, sizeof inf->link_path); + else { + copy = xstrdup(inf->path); + + root = dirname(copy); + if (*root == '\0' || strcmp(root, ".") == 0 || + strcmp (root, "/") == 0) + strlcpy(inf->link_path, path, sizeof inf->link_path); + else { + used = snprintf(inf->link_path, sizeof inf->link_path, + "%s/%s", root, path); + if (used < 0 || (size_t)used >= sizeof inf->link_path) { + inf->link_error = strerror(ENAMETOOLONG); + return; + } + } + + free(copy); + } + + if (Lflag) { + if (stat(inf->path, &inf->sb) == -1) + inf->error = strerror(errno); + } else { + if (stat(inf->link_path, &sb) == -1) + inf->link_target = errno; } } -/* - * unwrap -- read a file of filenames, do each one. - */ -private void -unwrap(char *fn) +static void * +fill_buffer(struct input_file *inf) { - char buf[PATH_MAX]; - FILE *f; - int wid = 0, cwid; + static void *buffer; + ssize_t got; + size_t left; + void *next; + + if (buffer == NULL) + buffer = xmalloc(FILE_READ_SIZE); + + next = buffer; + left = inf->size; + while (left != 0) { + got = read(inf->fd, next, left); + if (got == -1) { + if (errno == EINTR) + continue; + return NULL; + } + if (got == 0) + break; + next = (char*)next + got; + left -= got; + } - if (strcmp("-", fn) == 0) { - f = stdin; - wid = 1; - } else { - if ((f = fopen(fn, "r")) == NULL) { - (void)fprintf(stderr, "%s: Cannot open `%s' (%s).\n", - __progname, fn, strerror(errno)); - exit(1); + return buffer; +} + +static int +load_file(struct input_file *inf) +{ + int available; + + inf->size = inf->sb.st_size; + if (inf->size > FILE_READ_SIZE) + inf->size = FILE_READ_SIZE; + if (S_ISFIFO(inf->sb.st_mode)) { + if (ioctl(inf->fd, FIONREAD, &available) == -1) { + xasprintf(&inf->result, "cannot read '%s' (%s)", + inf->path, strerror(errno)); + return (1); + } + inf->size = available; + } else if (!S_ISREG(inf->sb.st_mode) && inf->size == 0) + inf->size = FILE_READ_SIZE; + if (inf->size == 0) + return (0); + + inf->base = mmap(NULL, inf->size, PROT_READ, MAP_PRIVATE, inf->fd, 0); + if (inf->base == MAP_FAILED) { + inf->base = fill_buffer(inf); + if (inf->base == NULL) { + xasprintf(&inf->result, "cannot read '%s' (%s)", + inf->path, strerror(errno)); + return (1); } + } else + inf->mapped = 1; + return (0); +} - while (fgets(buf, sizeof(buf), f) != NULL) { - buf[strcspn(buf, "\n")] = '\0'; - cwid = file_mbswidth(buf); - if (cwid > wid) - wid = cwid; +static int +try_stat(struct input_file *inf) +{ + if (inf->error != NULL) { + xasprintf(&inf->result, "cannot stat '%s' (%s)", inf->path, + inf->error); + return (1); + } + if (sflag) { + switch (inf->sb.st_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + case S_IFIFO: + case S_IFREG: + return (0); } + } - rewind(f); + if (iflag && (inf->sb.st_mode & S_IFMT) != S_IFREG) { + xasprintf(&inf->result, "application/x-not-regular-file"); + return (1); } - while (fgets(buf, sizeof(buf), f) != NULL) { - buf[strcspn(buf, "\n")] = '\0'; - process(buf, wid); - if(nobuffer) - (void)fflush(stdout); + + switch (inf->sb.st_mode & S_IFMT) { + case S_IFDIR: + xasprintf(&inf->result, "directory"); + return (1); + case S_IFLNK: + if (inf->link_error != NULL) { + xasprintf(&inf->result, "unreadable symlink '%s' (%s)", + inf->path, inf->link_error); + return (1); + } + if (inf->link_target == ELOOP) + xasprintf(&inf->result, "symbolic link in a loop"); + else if (inf->link_target != 0) { + xasprintf(&inf->result, "broken symbolic link to '%s'", + inf->link_path); + } else { + xasprintf(&inf->result, "symbolic link to '%s'", + inf->link_path); + } + return (1); + case S_IFSOCK: + xasprintf(&inf->result, "socket"); + return (1); + case S_IFBLK: + xasprintf(&inf->result, "block special (%ld/%ld)", + (long)major(inf->sb.st_rdev), (long)minor(inf->sb.st_rdev)); + return (1); + case S_IFCHR: + xasprintf(&inf->result, "character special (%ld/%ld)", + (long)major(inf->sb.st_rdev), (long)minor(inf->sb.st_rdev)); + return (1); + case S_IFIFO: + xasprintf(&inf->result, "fifo (named pipe)"); + return (1); } + return (0); +} + +static int +try_empty(struct input_file *inf) +{ + if (inf->size != 0) + return (0); - (void)fclose(f); + if (iflag) + xasprintf(&inf->result, "application/x-empty"); + else + xasprintf(&inf->result, "empty"); + return (1); } -/* - * Called for each input file on the command line (or in a list of files) - */ -private void -process(const char *inname, int wid) +static int +try_access(struct input_file *inf) { - const char *type; - int std_in = strcmp(inname, "-") == 0; + char tmp[256] = ""; + + if (inf->fd != -1) + return (0); + + if (inf->sb.st_mode & 0222) + strlcat(tmp, "writable, ", sizeof tmp); + if (inf->sb.st_mode & 0111) + strlcat(tmp, "executable, ", sizeof tmp); + if (S_ISREG(inf->sb.st_mode)) + strlcat(tmp, "regular file, ", sizeof tmp); + strlcat(tmp, "no read permission", sizeof tmp); + + inf->result = xstrdup(tmp); + return (1); +} + +static int +try_text(struct input_file *inf) +{ + const char *type, *s; + int flags; + + flags = MAGIC_TEST_TEXT; + if (iflag) + flags |= MAGIC_TEST_MIME; + + type = text_get_type(inf->base, inf->size); + if (type == NULL) + return (0); + + s = magic_test(inf->m, inf->base, inf->size, flags); + if (s != NULL) { + inf->result = xstrdup(s); + return (1); + } - if (wid > 0 && !bflag) { - (void)printf("%s", std_in ? "/dev/stdin" : inname); - if (nulsep) - (void)putc('\0', stdout); + s = text_try_words(inf->base, inf->size, flags); + if (s != NULL) { + if (iflag) + inf->result = xstrdup(s); else - (void)printf("%s", separator); - (void)printf("%*s ", - (int) (nopad ? 0 : (wid - file_mbswidth(inname))), ""); + xasprintf(&inf->result, "%s %s text", type, s); + return (1); } - type = magic_file(magic, std_in ? NULL : inname); - if (type == NULL) - (void)printf("ERROR: %s\n", magic_error(magic)); + if (iflag) + inf->result = xstrdup("text/plain"); else - (void)printf("%s\n", type); + xasprintf(&inf->result, "%s text", type); + return (1); } -size_t -file_mbswidth(const char *s) +static int +try_magic(struct input_file *inf) { -#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) - size_t bytesconsumed, old_n, n, width = 0; - mbstate_t state; - wchar_t nextchar; - (void)memset(&state, 0, sizeof(mbstate_t)); - old_n = n = strlen(s); - int w; - - while (n > 0) { - bytesconsumed = mbrtowc(&nextchar, s, n, &state); - if (bytesconsumed == (size_t)(-1) || - bytesconsumed == (size_t)(-2)) { - /* Something went wrong, return something reasonable */ - return old_n; - } - if (s[0] == '\n') { - /* - * do what strlen() would do, so that caller - * is always right - */ - width++; - } else { - w = wcwidth(nextchar); - if (w > 0) - width += w; - } + const char *s; + int flags; + + flags = 0; + if (iflag) + flags |= MAGIC_TEST_MIME; - s += bytesconsumed, n -= bytesconsumed; + s = magic_test(inf->m, inf->base, inf->size, flags); + if (s != NULL) { + inf->result = xstrdup(s); + return (1); } - return width; -#else - return strlen(s); -#endif + return (0); } -private void -usage(void) +static int +try_unknown(struct input_file *inf) { - (void)fprintf(stderr, USAGE, __progname, __progname); - (void)fputs("Try `file --help' for more information.\n", stderr); - exit(1); + if (iflag) + xasprintf(&inf->result, "application/x-not-regular-file"); + else + xasprintf(&inf->result, "data"); + return (1); } -private void -help(void) +static void +test_file(struct magic *m, struct input_file *inf, int width) { - (void)fputs( -"Usage: file [OPTION...] [FILE...]\n" -"Determine type of FILEs.\n" -"\n", stderr); -#define OPT(shortname, longname, opt, doc) \ - fprintf(stderr, " -%c, --" longname doc, shortname); -#define OPT_LONGONLY(longname, opt, doc) \ - fprintf(stderr, " --" longname doc); -#include "file_opts.h" -#undef OPT -#undef OPT_LONGONLY - exit(0); + int stop; + + inf->m = m; + + stop = 0; + if (!stop) + stop = try_stat(inf); + if (!stop) + stop = try_access(inf); + if (!stop) + stop = load_file(inf); + if (!stop) + stop = try_empty(inf); + if (!stop) + stop = try_magic(inf); + if (!stop) + stop = try_text(inf); + if (!stop) + stop = try_unknown(inf); + + if (bflag) + printf("%s\n", inf->result); + else + printf("%-*s %s\n", width, inf->label, inf->result); + + if (inf->mapped && inf->base != NULL) + munmap(inf->base, inf->size); + inf->base = NULL; + + free(inf->result); } diff --git a/usr.bin/file/file.h b/usr.bin/file/file.h index 27229b1a6c3..77b6e85da3c 100644 --- a/usr.bin/file/file.h +++ b/usr.bin/file/file.h @@ -1,386 +1,32 @@ -/* $OpenBSD: file.h,v 1.24 2014/05/18 17:50:11 espie Exp $ */ -/* - * Copyright (c) Ian F. Darwin 1986-1995. - * Software written by Ian F. Darwin and others; - * maintained 1995-present by Christos Zoulas and others. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ +/* $OpenBSD: file.h,v 1.25 2015/04/24 16:24:11 nicm Exp $ */ + /* - * file.h - definitions for file(1) program - * @(#)$Id: file.h,v 1.24 2014/05/18 17:50:11 espie Exp $ + * Copyright (c) 2015 Nicholas Marriott + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#ifndef __file_h__ -#define __file_h__ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include /* Include that here, to make sure __P gets defined */ -#include -#include /* For open and flags */ -#ifdef HAVE_STDINT_H -#include -#endif -#ifdef HAVE_INTTYPES_H -#include -#endif -#include -#include -/* Do this here and now, because struct stat gets re-defined on solaris */ -#include -#include - -#define ENABLE_CONDITIONALS - -#ifndef MAGIC -#define MAGIC "/etc/magic" -#endif - -#ifdef __EMX__ -#define PATHSEP ';' -#else -#define PATHSEP ':' -#endif - -#define private static -#ifndef protected -#define protected -#endif -#define public - -#ifndef __GNUC_PREREQ__ -#ifdef __GNUC__ -#define __GNUC_PREREQ__(x, y) \ - ((__GNUC__ == (x) && __GNUC_MINOR__ >= (y)) || \ - (__GNUC__ > (x))) -#else -#define __GNUC_PREREQ__(x, y) 0 -#endif -#endif - -#ifndef MIN -#define MIN(a,b) (((a) < (b)) ? (a) : (b)) -#endif - -#ifndef MAX -#define MAX(a,b) (((a) > (b)) ? (a) : (b)) -#endif - -#ifndef HOWMANY -# define HOWMANY (256 * 1024) /* how much of the file to look at */ -#endif -#define MAXMAGIS 8192 /* max entries in any one magic file - or directory */ -#define MAXDESC 64 /* max leng of text description/MIME type */ -#define MAXstring 32 /* max leng of "string" types */ - -#define MAGICNO 0xF11E041C -#define VERSIONNO 5 -#define FILE_MAGICSIZE (32 * 6) - -#define FILE_LOAD 0 -#define FILE_CHECK 1 -#define FILE_COMPILE 2 - -struct magic { - /* Word 1 */ - uint16_t cont_level; /* level of ">" */ - uint8_t flag; -#define INDIR 0x01 /* if '(...)' appears */ -#define OFFADD 0x02 /* if '>&' or '>...(&' appears */ -#define INDIROFFADD 0x04 /* if '>&(' appears */ -#define UNSIGNED 0x08 /* comparison is unsigned */ -#define NOSPACE 0x10 /* suppress space character before output */ -#define BINTEST 0x20 /* test is for a binary type (set only - for top-level tests) */ -#define TEXTTEST 0 /* for passing to file_softmagic */ - - uint8_t dummy1; - - /* Word 2 */ - uint8_t reln; /* relation (0=eq, '>'=gt, etc) */ - uint8_t vallen; /* length of string value, if any */ - uint8_t type; /* comparison type (FILE_*) */ - uint8_t in_type; /* type of indirection */ -#define FILE_INVALID 0 -#define FILE_BYTE 1 -#define FILE_SHORT 2 -#define FILE_DEFAULT 3 -#define FILE_LONG 4 -#define FILE_STRING 5 -#define FILE_DATE 6 -#define FILE_BESHORT 7 -#define FILE_BELONG 8 -#define FILE_BEDATE 9 -#define FILE_LESHORT 10 -#define FILE_LELONG 11 -#define FILE_LEDATE 12 -#define FILE_PSTRING 13 -#define FILE_LDATE 14 -#define FILE_BELDATE 15 -#define FILE_LELDATE 16 -#define FILE_REGEX 17 -#define FILE_BESTRING16 18 -#define FILE_LESTRING16 19 -#define FILE_SEARCH 20 -#define FILE_MEDATE 21 -#define FILE_MELDATE 22 -#define FILE_MELONG 23 -#define FILE_QUAD 24 -#define FILE_LEQUAD 25 -#define FILE_BEQUAD 26 -#define FILE_QDATE 27 -#define FILE_LEQDATE 28 -#define FILE_BEQDATE 29 -#define FILE_QLDATE 30 -#define FILE_LEQLDATE 31 -#define FILE_BEQLDATE 32 -#define FILE_FLOAT 33 -#define FILE_BEFLOAT 34 -#define FILE_LEFLOAT 35 -#define FILE_DOUBLE 36 -#define FILE_BEDOUBLE 37 -#define FILE_LEDOUBLE 38 -#define FILE_NAMES_SIZE 39/* size of array to contain all names */ - -#define IS_STRING(t) \ - ((t) == FILE_STRING || \ - (t) == FILE_PSTRING || \ - (t) == FILE_BESTRING16 || \ - (t) == FILE_LESTRING16 || \ - (t) == FILE_REGEX || \ - (t) == FILE_SEARCH || \ - (t) == FILE_DEFAULT) - -#define FILE_FMT_NONE 0 -#define FILE_FMT_NUM 1 /* "cduxXi" */ -#define FILE_FMT_STR 2 /* "s" */ -#define FILE_FMT_QUAD 3 /* "ll" */ -#define FILE_FMT_FLOAT 4 /* "eEfFgG" */ -#define FILE_FMT_DOUBLE 5 /* "eEfFgG" */ - - /* Word 3 */ - uint8_t in_op; /* operator for indirection */ - uint8_t mask_op; /* operator for mask */ -#ifdef ENABLE_CONDITIONALS - uint8_t cond; /* conditional type */ - uint8_t dummy2; -#else - uint8_t dummy2; - uint8_t dummy3; -#endif - -#define FILE_OPS "&|^+-*/%" -#define FILE_OPAND 0 -#define FILE_OPOR 1 -#define FILE_OPXOR 2 -#define FILE_OPADD 3 -#define FILE_OPMINUS 4 -#define FILE_OPMULTIPLY 5 -#define FILE_OPDIVIDE 6 -#define FILE_OPMODULO 7 -#define FILE_OPS_MASK 0x07 /* mask for above ops */ -#define FILE_UNUSED_1 0x08 -#define FILE_UNUSED_2 0x10 -#define FILE_UNUSED_3 0x20 -#define FILE_OPINVERSE 0x40 -#define FILE_OPINDIRECT 0x80 - -#ifdef ENABLE_CONDITIONALS -#define COND_NONE 0 -#define COND_IF 1 -#define COND_ELIF 2 -#define COND_ELSE 3 -#endif /* ENABLE_CONDITIONALS */ - - /* Word 4 */ - uint32_t offset; /* offset to magic number */ - /* Word 5 */ - int32_t in_offset; /* offset from indirection */ - /* Word 6 */ - uint32_t lineno; /* line number in magic file */ - /* Word 7,8 */ - union { - uint64_t _mask; /* for use with numeric and date types */ - struct { - uint32_t _count; /* repeat/line count */ - uint32_t _flags; /* modifier flags */ - } _s; /* for use with string types */ - } _u; -#define num_mask _u._mask -#define str_range _u._s._count -#define str_flags _u._s._flags - - /* Words 9-16 */ - union VALUETYPE { - uint8_t b; - uint16_t h; - uint32_t l; - uint64_t q; - uint8_t hs[2]; /* 2 bytes of a fixed-endian "short" */ - uint8_t hl[4]; /* 4 bytes of a fixed-endian "long" */ - uint8_t hq[8]; /* 8 bytes of a fixed-endian "quad" */ - char s[MAXstring]; /* the search string or regex pattern */ - float f; - double d; - } value; /* either number or string */ - /* Words 17..31 */ - char desc[MAXDESC]; /* description */ - /* Words 32..47 */ - char mimetype[MAXDESC]; /* MIME type */ -}; - -#define BIT(A) (1 << (A)) -#define STRING_COMPACT_BLANK BIT(0) -#define STRING_COMPACT_OPTIONAL_BLANK BIT(1) -#define STRING_IGNORE_LOWERCASE BIT(2) -#define STRING_IGNORE_UPPERCASE BIT(3) -#define REGEX_OFFSET_START BIT(4) -#define CHAR_COMPACT_BLANK 'B' -#define CHAR_COMPACT_OPTIONAL_BLANK 'b' -#define CHAR_IGNORE_LOWERCASE 'c' -#define CHAR_IGNORE_UPPERCASE 'C' -#define CHAR_REGEX_OFFSET_START 's' -#define STRING_IGNORE_CASE (STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE) -#define STRING_DEFAULT_RANGE 100 - - -/* list of magic entries */ -struct mlist { - struct magic *magic; /* array of magic entries */ - uint32_t nmagic; /* number of entries in array */ - int mapped; /* allocation type: 0 => apprentice_file - * 1 => apprentice_map + malloc - * 2 => apprentice_map + mmap */ - struct mlist *next, *prev; -}; - -struct magic_set { - struct mlist *mlist; - struct cont { - size_t len; - struct level_info { - int32_t off; - int got_match; -#ifdef ENABLE_CONDITIONALS - int last_match; - int last_cond; /* used for error checking by parse() */ -#endif - } *li; - } c; - struct out { - char *buf; /* Accumulation buffer */ - char *pbuf; /* Printable buffer */ - } o; - uint32_t offset; - int error; - int flags; - int haderr; - const char *file; - size_t line; /* current magic line number */ - - /* data for searches */ - struct { - const char *s; /* start of search in original source */ - size_t s_len; /* length of search region */ - size_t offset; /* starting offset in source: XXX - should this be off_t? */ - size_t rm_len; /* match length */ - } search; - - /* FIXME: Make the string dynamically allocated so that e.g. - strings matched in files can be longer than MAXstring */ - union VALUETYPE ms_value; /* either number or string */ -}; - -/* Type for Unicode characters */ -typedef unsigned long unichar; - -struct stat; -protected const char *file_fmttime(uint64_t, int); -protected int file_buffer(struct magic_set *, int, const char *, const void *, - size_t); -protected int file_fsmagic(struct magic_set *, const char *, struct stat *); -protected int file_pipe2file(struct magic_set *, int, const void *, size_t); -protected int file_printf(struct magic_set *, const char *, ...); -protected int file_reset(struct magic_set *); -protected int file_tryelf(struct magic_set *, int, const unsigned char *, - size_t); -protected int file_zmagic(struct magic_set *, int, const char *, - const unsigned char *, size_t); -protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t); -protected int file_is_tar(struct magic_set *, const unsigned char *, size_t); -protected int file_softmagic(struct magic_set *, const unsigned char *, size_t, int); -protected struct mlist *file_apprentice(struct magic_set *, const char *, int); -protected uint64_t file_signextend(struct magic_set *, struct magic *, - uint64_t); -protected void file_delmagic(struct magic *, int type, size_t entries); -protected void file_badread(struct magic_set *); -protected void file_badseek(struct magic_set *); -protected void file_oomem(struct magic_set *, size_t); -protected void file_oomem2(struct magic_set *, size_t, size_t); -protected void file_error(struct magic_set *, int, const char *, ...); -protected void file_magerror(struct magic_set *, const char *, ...); -protected void file_magwarn(struct magic_set *, const char *, ...); -protected void file_mdump(struct magic *); -protected void file_showstr(FILE *, const char *, size_t); -protected size_t file_mbswidth(const char *); -protected const char *file_getbuffer(struct magic_set *); -protected ssize_t sread(int, void *, size_t, int); -protected int file_check_mem(struct magic_set *, unsigned int); -protected int file_looks_utf8(const unsigned char *, size_t, unichar *, size_t *); - -#ifndef COMPILE_ONLY -extern const char *file_names[]; -extern const size_t file_nnames; -#endif - -#ifndef HAVE_STRERROR -extern int sys_nerr; -extern char *sys_errlist[]; -#define strerror(e) \ - (((e) >= 0 && (e) < sys_nerr) ? sys_errlist[(e)] : "Unknown error") -#endif - -#ifndef HAVE_STRTOUL -#define strtoul(a, b, c) strtol(a, b, c) -#endif +#ifndef FILE_H +#define FILE_H -#ifndef HAVE_VASPRINTF -int vasprintf(char **ptr, const char *format_string, va_list vargs); -#endif -#ifndef HAVE_ASPRINTF -int asprintf(char **ptr, const char *format_string, ...); -#endif +/* Bytes to read if can't use the whole file. */ +#define FILE_READ_SIZE (256 * 1024) -#if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK) -#define QUICK -#endif +/* User to drop to if run as root. */ +#define FILE_USER "nobody" -#ifndef O_BINARY -#define O_BINARY 0 -#endif +/* text.c */ +const char *text_get_type(const void *, size_t); +const char *text_try_words(const void *, size_t, int); -#endif /* __file_h__ */ +#endif /* FILE_H */ diff --git a/usr.bin/file/file_opts.h b/usr.bin/file/file_opts.h deleted file mode 100644 index 593bc2f7661..00000000000 --- a/usr.bin/file/file_opts.h +++ /dev/null @@ -1,49 +0,0 @@ -/* $OpenBSD: file_opts.h,v 1.2 2009/04/26 14:17:45 chl Exp $ */ -/* - * Table of command-line options - * - * The first column specifies the short name, if any, or 0 if none. - * The second column specifies the long name. - * The third column specifies whether it takes a parameter. - * The fourth column is the documentation. - * - * N.B. The long options' order must correspond to the code in file.c, - * and OPTSTRING must be kept up-to-date with the short options. - * Pay particular attention to the numbers of long-only options in the - * switch statement! - */ - -OPT_LONGONLY("help", 0, " display this help and exit\n") -OPT('v', "version", 0, " output version information and exit\n") -OPT('m', "magic-file", 1, " LIST use LIST as a colon-separated list of magic\n" - " number files\n") -OPT('z', "uncompress", 0, " try to look inside compressed files\n") -OPT('b', "brief", 0, " do not prepend filenames to output lines\n") -OPT('c', "checking-printout", 0, " print the parsed form of the magic file, use in\n" - " conjunction with -m to debug a new magic file\n" - " before installing it\n") -OPT('e', "exclude", 1, " TEST exclude TEST from the list of test to be\n" - " performed for file. Valid tests are:\n" - " ascii, apptype, compress, elf, soft, tar, tokens, troff\n") -OPT('f', "files-from", 1, " FILE read the filenames to be examined from FILE\n") -OPT('F', "separator", 1, " STRING use string as separator instead of `:'\n") -OPT('i', "mime", 0, " output MIME type strings (--mime-type and\n" - " --mime-encoding)\n") -OPT_LONGONLY("mime-type", 0, " output the MIME type\n") -OPT_LONGONLY("mime-encoding", 0, " output the MIME encoding\n") -OPT('k', "keep-going", 0, " don't stop at the first match\n") -#ifdef S_IFLNK -OPT('L', "dereference", 0, " follow symlinks (default)\n") -OPT('h', "no-dereference", 0, " don't follow symlinks\n") -#endif -OPT('n', "no-buffer", 0, " do not buffer output\n") -OPT('N', "no-pad", 0, " do not pad output\n") -OPT('0', "print0", 0, " terminate filenames with ASCII NUL\n") -#if defined(HAVE_UTIME) || defined(HAVE_UTIMES) -OPT('p', "preserve-date", 0, " preserve access times on files\n") -#endif -OPT('r', "raw", 0, " don't translate unprintable chars to \\ooo\n") -OPT('s', "special-files", 0, " treat special (block/char devices) files as\n" - " ordinary ones\n") -OPT('C', "compile", 0, " compile file specified by -m\n") -OPT('d', "debug", 0, " print debugging messages\n") diff --git a/usr.bin/file/fsmagic.c b/usr.bin/file/fsmagic.c deleted file mode 100644 index 80ccf6e46d0..00000000000 --- a/usr.bin/file/fsmagic.c +++ /dev/null @@ -1,309 +0,0 @@ -/* $OpenBSD: fsmagic.c,v 1.14 2009/10/27 23:59:37 deraadt Exp $ */ -/* - * Copyright (c) Ian F. Darwin 1986-1995. - * Software written by Ian F. Darwin and others; - * maintained 1995-present by Christos Zoulas and others. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * fsmagic - magic based on filesystem info - directory, special files, etc. - */ - -#include "file.h" -#include "magic.h" -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#include -/* Since major is a function on SVR4, we cannot use `ifndef major'. */ -#ifdef MAJOR_IN_MKDEV -# include -# define HAVE_MAJOR -#endif -#ifdef MAJOR_IN_SYSMACROS -# include -# define HAVE_MAJOR -#endif -#ifdef major /* Might be defined in sys/types.h. */ -# define HAVE_MAJOR -#endif - -#ifndef HAVE_MAJOR -# define major(dev) (((dev) >> 8) & 0xff) -# define minor(dev) ((dev) & 0xff) -#endif -#undef HAVE_MAJOR - -private int -bad_link(struct magic_set *ms, int err, char *buf) -{ - char *errfmt; - if (err == ELOOP) - errfmt = "symbolic link in a loop"; - else - errfmt = "broken symbolic link to `%s'"; - if (ms->flags & MAGIC_ERROR) { - file_error(ms, err, errfmt, buf); - return -1; - } - if (file_printf(ms, errfmt, buf) == -1) - return -1; - return 1; -} - -protected int -file_fsmagic(struct magic_set *ms, const char *fn, struct stat *sb) -{ - int ret = 0; - int mime = ms->flags & MAGIC_MIME; -#ifdef S_IFLNK - char buf[BUFSIZ+4]; - int nch; - struct stat tstatbuf; -#endif - - if (fn == NULL) - return 0; - - /* - * Fstat is cheaper but fails for files you don't have read perms on. - * On 4.2BSD and similar systems, use lstat() to identify symlinks. - */ -#ifdef S_IFLNK - if ((ms->flags & MAGIC_SYMLINK) == 0) - ret = lstat(fn, sb); - else -#endif - ret = stat(fn, sb); /* don't merge into if; see "ret =" above */ - - if (ret) { - if (ms->flags & MAGIC_ERROR) { - file_error(ms, errno, "cannot stat `%s'", fn); - return -1; - } - if (file_printf(ms, "cannot open `%s' (%s)", - fn, strerror(errno)) == -1) - return -1; - return 1; - } - - if (mime) { - if ((sb->st_mode & S_IFMT) != S_IFREG) { - if ((mime & MAGIC_MIME_TYPE) && - file_printf(ms, "application/x-not-regular-file") - == -1) - return -1; - return 1; - } - } - else { -#ifdef S_ISUID - if (sb->st_mode & S_ISUID) - if (file_printf(ms, "setuid ") == -1) - return -1; -#endif -#ifdef S_ISGID - if (sb->st_mode & S_ISGID) - if (file_printf(ms, "setgid ") == -1) - return -1; -#endif -#ifdef S_ISVTX - if (sb->st_mode & S_ISVTX) - if (file_printf(ms, "sticky ") == -1) - return -1; -#endif - } - - switch (sb->st_mode & S_IFMT) { - case S_IFDIR: - if (file_printf(ms, "directory") == -1) - return -1; - return 1; -#ifdef S_IFCHR - case S_IFCHR: - /* - * If -s has been specified, treat character special files - * like ordinary files. Otherwise, just report that they - * are block special files and go on to the next file. - */ - if ((ms->flags & MAGIC_DEVICES) != 0) - break; -#ifdef HAVE_STAT_ST_RDEV -# ifdef dv_unit - if (file_printf(ms, "character special (%d/%d/%d)", - major(sb->st_rdev), dv_unit(sb->st_rdev), - dv_subunit(sb->st_rdev)) == -1) - return -1; -# else - if (file_printf(ms, "character special (%ld/%ld)", - (long) major(sb->st_rdev), (long) minor(sb->st_rdev)) == -1) - return -1; -# endif -#else - if (file_printf(ms, "character special") == -1) - return -1; -#endif - return 1; -#endif -#ifdef S_IFBLK - case S_IFBLK: - /* - * If -s has been specified, treat block special files - * like ordinary files. Otherwise, just report that they - * are block special files and go on to the next file. - */ - if ((ms->flags & MAGIC_DEVICES) != 0) - break; -#ifdef HAVE_STAT_ST_RDEV -# ifdef dv_unit - if (file_printf(ms, "block special (%d/%d/%d)", - major(sb->st_rdev), dv_unit(sb->st_rdev), - dv_subunit(sb->st_rdev)) == -1) - return -1; -# else - if (file_printf(ms, "block special (%ld/%ld)", - (long)major(sb->st_rdev), (long)minor(sb->st_rdev)) == -1) - return -1; -# endif -#else - if (file_printf(ms, "block special") == -1) - return -1; -#endif - return 1; -#endif - /* TODO add code to handle V7 MUX and Blit MUX files */ -#ifdef S_IFIFO - case S_IFIFO: - if((ms->flags & MAGIC_DEVICES) != 0) - break; - if (file_printf(ms, "fifo (named pipe)") == -1) - return -1; - return 1; -#endif -#ifdef S_IFDOOR - case S_IFDOOR: - if (file_printf(ms, "door") == -1) - return -1; - return 1; -#endif -#ifdef S_IFLNK - case S_IFLNK: - if ((nch = readlink(fn, buf, BUFSIZ-1)) <= 0) { - if (ms->flags & MAGIC_ERROR) { - file_error(ms, errno, "unreadable symlink `%s'", - fn); - return -1; - } - if (file_printf(ms, - "unreadable symlink `%s' (%s)", fn, - strerror(errno)) == -1) - return -1; - return 1; - } - buf[nch] = '\0'; /* readlink(2) does not do this */ - - /* If broken symlink, say so and quit early. */ - if (*buf == '/') { - if (stat(buf, &tstatbuf) < 0) - return bad_link(ms, errno, buf); - } else { - char *tmp; - char buf2[BUFSIZ+BUFSIZ+4]; - - if ((tmp = strrchr(fn, '/')) == NULL) { - tmp = buf; /* in current directory anyway */ - } else { - if (tmp - fn + 1 > BUFSIZ) { - if (ms->flags & MAGIC_ERROR) { - file_error(ms, 0, - "path too long: `%s'", buf); - return -1; - } - if (file_printf(ms, - "path too long: `%s'", fn) == -1) - return -1; - return 1; - } - (void)strlcpy(buf2, fn, sizeof buf2); /* take dir part */ - buf2[tmp - fn + 1] = '\0'; - (void)strlcat(buf2, buf, sizeof buf2); /* plus (rel) link */ - tmp = buf2; - } - if (stat(tmp, &tstatbuf) < 0) - return bad_link(ms, errno, buf); - } - - /* Otherwise, handle it. */ - if ((ms->flags & MAGIC_SYMLINK) != 0) { - const char *p; - ms->flags &= MAGIC_SYMLINK; - p = magic_file(ms, buf); - ms->flags |= MAGIC_SYMLINK; - return p != NULL ? 1 : -1; - } else { /* just print what it points to */ - if (file_printf(ms, "symbolic link to `%s'", - buf) == -1) - return -1; - } - return 1; -#endif -#ifdef S_IFSOCK -#ifndef __COHERENT__ - case S_IFSOCK: - if (file_printf(ms, "socket") == -1) - return -1; - return 1; -#endif -#endif - case S_IFREG: - break; - default: - file_error(ms, 0, "invalid mode 0%o", sb->st_mode); - return -1; - /*NOTREACHED*/ - } - - /* - * regular file, check next possibility - * - * If stat() tells us the file has zero length, report here that - * the file is empty, so we can skip all the work of opening and - * reading the file. - * But if the -s option has been given, we skip this optimization, - * since on some systems, stat() reports zero size for raw disk - * partitions. (If the block special device really has zero length, - * the fact that it is empty will be detected and reported correctly - * when we read the file.) - */ - if ((ms->flags & MAGIC_DEVICES) == 0 && sb->st_size == 0) { - if ((!mime || (mime & MAGIC_MIME_TYPE)) && - file_printf(ms, mime ? "application/x-empty" : - "empty") == -1) - return -1; - return 1; - } - return 0; -} diff --git a/usr.bin/file/funcs.c b/usr.bin/file/funcs.c deleted file mode 100644 index cdc593ee54f..00000000000 --- a/usr.bin/file/funcs.c +++ /dev/null @@ -1,332 +0,0 @@ -/* $OpenBSD: funcs.c,v 1.8 2014/05/18 17:50:11 espie Exp $ */ -/* - * Copyright (c) Christos Zoulas 2003. - * All Rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include "file.h" -#include "magic.h" -#include -#include -#include -#include -#if defined(HAVE_WCHAR_H) -#include -#endif -#if defined(HAVE_WCTYPE_H) -#include -#endif - -/* - * Like printf, only we append to a buffer. - */ -protected int -file_printf(struct magic_set *ms, const char *fmt, ...) -{ - va_list ap; - int len; - char *buf, *newstr; - - va_start(ap, fmt); - len = vasprintf(&buf, fmt, ap); - if (len < 0) - goto out; - va_end(ap); - - if (ms->o.buf != NULL) { - len = asprintf(&newstr, "%s%s", ms->o.buf, buf); - free(buf); - if (len < 0) - goto out; - free(ms->o.buf); - buf = newstr; - } - ms->o.buf = buf; - return 0; -out: - file_error(ms, errno, "vasprintf failed"); - return -1; -} - -/* - * error - print best error message possible - */ -/*VARARGS*/ -private void -file_error_core(struct magic_set *ms, int error, const char *f, va_list va, - uint32_t lineno) -{ - /* Only the first error is ok */ - if (ms->haderr) - return; - if (lineno != 0) { - free(ms->o.buf); - ms->o.buf = NULL; - file_printf(ms, "line %u: ", lineno); - } - file_printf(ms, f, va); - if (error > 0) - file_printf(ms, " (%s)", strerror(error)); - ms->haderr++; - ms->error = error; -} - -/*VARARGS*/ -protected void -file_error(struct magic_set *ms, int error, const char *f, ...) -{ - va_list va; - va_start(va, f); - file_error_core(ms, error, f, va, 0); - va_end(va); -} - -/* - * Print an error with magic line number. - */ -/*VARARGS*/ -protected void -file_magerror(struct magic_set *ms, const char *f, ...) -{ - va_list va; - va_start(va, f); - file_error_core(ms, 0, f, va, ms->line); - va_end(va); -} - -protected void -file_oomem(struct magic_set *ms, size_t len) -{ - file_error(ms, errno, "cannot allocate %zu bytes", len); -} - -protected void -file_oomem2(struct magic_set *ms, size_t len, size_t l2) -{ - file_error(ms, errno, "cannot allocate %zu * %zu bytes", len, l2); -} -protected void -file_badseek(struct magic_set *ms) -{ - file_error(ms, errno, "error seeking"); -} - -protected void -file_badread(struct magic_set *ms) -{ - file_error(ms, errno, "error reading"); -} - -#ifndef COMPILE_ONLY -protected int -file_buffer(struct magic_set *ms, int fd, const char *inname, const void *buf, - size_t nb) -{ - int m; - int mime = ms->flags & MAGIC_MIME; - - if (nb == 0) { - if ((!mime || (mime & MAGIC_MIME_TYPE)) && - file_printf(ms, mime ? "application/x-empty" : - "empty") == -1) - return -1; - return 1; - } else if (nb == 1) { - if ((!mime || (mime & MAGIC_MIME_TYPE)) && - file_printf(ms, mime ? "application/octet-stream" : - "very short file (no magic)") == -1) - return -1; - return 1; - } - -#ifdef __EMX__ - if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) { - switch (file_os2_apptype(ms, inname, buf, nb)) { - case -1: - return -1; - case 0: - break; - default: - return 1; - } - } -#endif - - /* try compression stuff */ - if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) != 0 || - (m = file_zmagic(ms, fd, inname, buf, nb)) == 0) { - /* Check if we have a tar file */ - if ((ms->flags & MAGIC_NO_CHECK_TAR) != 0 || - (m = file_is_tar(ms, buf, nb)) == 0) { - /* try tests in /etc/magic (or surrogate magic file) */ - if ((ms->flags & MAGIC_NO_CHECK_SOFT) != 0 || - (m = file_softmagic(ms, buf, nb, BINTEST)) == 0) { - /* try known keywords, check whether it is ASCII */ - if ((ms->flags & MAGIC_NO_CHECK_ASCII) != 0 || - (m = file_ascmagic(ms, buf, nb)) == 0) { - /* abandon hope, all ye who remain here */ - if ((!mime || (mime & MAGIC_MIME_TYPE)) && - file_printf(ms, mime ? "application/octet-stream" : - "data") == -1) - return -1; - m = 1; - } - } - } - } -#ifdef BUILTIN_ELF - if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && m == 1 && - nb > 5 && fd != -1) { - /* - * We matched something in the file, so this *might* - * be an ELF file, and the file is at least 5 bytes - * long, so if it's an ELF file it has at least one - * byte past the ELF magic number - try extracting - * information from the ELF headers that cannot easily - * be extracted with rules in the magic file. - */ - (void)file_tryelf(ms, fd, buf, nb); - } -#endif - return m; -} -#endif - -protected int -file_reset(struct magic_set *ms) -{ - if (ms->mlist == NULL) { - file_error(ms, 0, "no magic files loaded"); - return -1; - } - ms->o.buf = NULL; - ms->haderr = 0; - ms->error = -1; - return 0; -} - -#define OCTALIFY(n, o) \ - /*LINTED*/ \ - (void)(*(n)++ = '\\', \ - *(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \ - *(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \ - *(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \ - (o)++) - -protected const char * -file_getbuffer(struct magic_set *ms) -{ - char *pbuf, *op, *np; - size_t psize, len; - - if (ms->haderr) - return NULL; - - if (ms->flags & MAGIC_RAW) - return ms->o.buf; - - /* * 4 is for octal representation, + 1 is for NUL */ - len = strlen(ms->o.buf); - if (len > (SIZE_MAX - 1) / 4) { - file_oomem(ms, len); - return NULL; - } - psize = len * 4 + 1; - if ((pbuf = realloc(ms->o.pbuf, psize)) == NULL) { - file_oomem(ms, psize); - return NULL; - } - ms->o.pbuf = pbuf; - -#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) - { - mbstate_t state; - wchar_t nextchar; - int mb_conv = 1; - size_t bytesconsumed; - char *eop; - (void)memset(&state, 0, sizeof(mbstate_t)); - - np = ms->o.pbuf; - op = ms->o.buf; - eop = op + len; - - while (op < eop) { - bytesconsumed = mbrtowc(&nextchar, op, - (size_t)(eop - op), &state); - if (bytesconsumed == (size_t)(-1) || - bytesconsumed == (size_t)(-2)) { - mb_conv = 0; - break; - } - - if (iswprint(nextchar)) { - (void)memcpy(np, op, bytesconsumed); - op += bytesconsumed; - np += bytesconsumed; - } else { - while (bytesconsumed-- > 0) - OCTALIFY(np, op); - } - } - *np = '\0'; - - /* Parsing succeeded as a multi-byte sequence */ - if (mb_conv != 0) - return ms->o.pbuf; - } -#endif - - for (np = ms->o.pbuf, op = ms->o.buf; *op; op++) { - if (isprint((unsigned char)*op)) { - *np++ = *op; - } else { - OCTALIFY(np, op); - } - } - *np = '\0'; - return ms->o.pbuf; -} - -protected int -file_check_mem(struct magic_set *ms, unsigned int level) -{ - size_t len; - - if (level >= ms->c.len) { - len = (ms->c.len += 20) * sizeof(*ms->c.li); - ms->c.li = (ms->c.li == NULL) ? malloc(len) : - realloc(ms->c.li, len); - if (ms->c.li == NULL) { - file_oomem(ms, len); - return -1; - } - } - ms->c.li[level].got_match = 0; -#ifdef ENABLE_CONDITIONALS - ms->c.li[level].last_match = 0; - ms->c.li[level].last_cond = COND_NONE; -#endif /* ENABLE_CONDITIONALS */ - return 0; -} diff --git a/usr.bin/file/is_tar.c b/usr.bin/file/is_tar.c deleted file mode 100644 index 6d84bfccd5a..00000000000 --- a/usr.bin/file/is_tar.c +++ /dev/null @@ -1,153 +0,0 @@ -/* $OpenBSD: is_tar.c,v 1.10 2009/10/27 23:59:37 deraadt Exp $ */ -/* - * Copyright (c) Ian F. Darwin 1986-1995. - * Software written by Ian F. Darwin and others; - * maintained 1995-present by Christos Zoulas and others. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * is_tar() -- figure out whether file is a tar archive. - * - * Stolen (by the author!) from the public domain tar program: - * Public Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu). - * - * @(#)list.c 1.18 9/23/86 Public Domain - gnu - * - * Comments changed and some code/comments reformatted - * for file command by Ian Darwin. - */ - -#include "file.h" -#include "magic.h" -#include -#include -#include -#include "tar.h" - -#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') ) - -private int is_tar(const unsigned char *, size_t); -private int from_oct(int, const char *); /* Decode octal number */ - -static const char tartype[][32] = { - "tar archive", - "POSIX tar archive", - "POSIX tar archive (GNU)", -}; - -protected int -file_is_tar(struct magic_set *ms, const unsigned char *buf, size_t nbytes) -{ - /* - * Do the tar test first, because if the first file in the tar - * archive starts with a dot, we can confuse it with an nroff file. - */ - int tar = is_tar(buf, nbytes); - int mime = ms->flags & MAGIC_MIME; - - if (tar < 1 || tar > 3) - return 0; - - if (mime == MAGIC_MIME_ENCODING) - return 0; - - if (file_printf(ms, mime ? "application/x-tar" : - tartype[tar - 1]) == -1) - return -1; - return 1; -} - -/* - * Return - * 0 if the checksum is bad (i.e., probably not a tar archive), - * 1 for old UNIX tar file, - * 2 for Unix Std (POSIX) tar file, - * 3 for GNU tar file. - */ -private int -is_tar(const unsigned char *buf, size_t nbytes) -{ - const union record *header = (const union record *)(const void *)buf; - int i; - int sum, recsum; - const char *p; - - if (nbytes < sizeof(union record)) - return 0; - - recsum = from_oct(8, header->header.chksum); - - sum = 0; - p = header->charptr; - for (i = sizeof(union record); --i >= 0;) { - /* - * We cannot use unsigned char here because of old compilers, - * e.g. V7. - */ - sum += 0xFF & *p++; - } - - /* Adjust checksum to count the "chksum" field as blanks. */ - for (i = sizeof(header->header.chksum); --i >= 0;) - sum -= 0xFF & header->header.chksum[i]; - sum += ' '* sizeof header->header.chksum; - - if (sum != recsum) - return 0; /* Not a tar archive */ - - if (strcmp(header->header.magic, GNUTMAGIC) == 0) - return 3; /* GNU Unix Standard tar archive */ - if (strcmp(header->header.magic, TMAGIC) == 0) - return 2; /* Unix Standard tar archive */ - - return 1; /* Old fashioned tar archive */ -} - - -/* - * Quick and dirty octal conversion. - * - * Result is -1 if the field is invalid (all blank, or nonoctal). - */ -private int -from_oct(int digs, const char *where) -{ - int value; - - while (isspace((unsigned char)*where)) { /* Skip spaces */ - where++; - if (--digs <= 0) - return -1; /* All blank field */ - } - value = 0; - while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */ - value = (value << 3) | (*where++ - '0'); - --digs; - } - - if (digs > 0 && *where && !isspace((unsigned char)*where)) - return -1; /* Ended on non-space/nul */ - - return value; -} diff --git a/usr.bin/file/magic-common.c b/usr.bin/file/magic-common.c new file mode 100644 index 00000000000..e84d113b962 --- /dev/null +++ b/usr.bin/file/magic-common.c @@ -0,0 +1,83 @@ +/* $OpenBSD: magic-common.c,v 1.1 2015/04/24 16:24:11 nicm Exp $ */ + +/* + * Copyright (c) 2015 Nicholas Marriott + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "magic.h" + +char * +magic_strtoull(const char *s, uint64_t *u) +{ + char *endptr; + + if (*s == '-') + return (NULL); + errno = 0; + *u = strtoull(s, &endptr, 0); + if (*s == '\0') + return (NULL); + if (errno == ERANGE && *u == ULLONG_MAX) + return (NULL); + if (*endptr == 'L') + endptr++; + return (endptr); +} + +char * +magic_strtoll(const char *s, int64_t *i) +{ + char *endptr; + + errno = 0; + *i = strtoll(s, &endptr, 0); + if (*s == '\0') + return (NULL); + if (errno == ERANGE && *i == LLONG_MAX) + return (NULL); + if (*endptr == 'L') + endptr++; + return (endptr); +} + +void +magic_warn(struct magic_line *ml, const char *fmt, ...) +{ + va_list ap; + char *msg; + + if (!ml->root->warnings) + return; + + va_start(ap, fmt); + if (vasprintf(&msg, fmt, ap) == -1) { + va_end(ap); + return; + } + va_end(ap); + + fprintf(stderr, "%s:%u: %s\n", ml->root->path, ml->line, msg); + free(msg); +} diff --git a/usr.bin/file/magic-dump.c b/usr.bin/file/magic-dump.c new file mode 100644 index 00000000000..286680b5312 --- /dev/null +++ b/usr.bin/file/magic-dump.c @@ -0,0 +1,53 @@ +/* $OpenBSD: magic-dump.c,v 1.1 2015/04/24 16:24:11 nicm Exp $ */ + +/* + * Copyright (c) 2015 Nicholas Marriott + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +#include + +#include "magic.h" + +static void +magic_dump_line(struct magic_line *ml, u_int depth) +{ + struct magic_line *child; + u_int i; + + printf("%u", ml->line); + for (i = 0; i < depth; i++) + printf(">"); + printf(" %s/%s%s%s%s [%u]%s\n", ml->type_string, + ml->result == NULL ? "" : ml->result, + ml->mimetype == NULL ? "" : " (", + ml->mimetype == NULL ? "" : ml->mimetype, + ml->mimetype == NULL ? "" : ")", + ml->strength, ml->text ? " (text)" : ""); + + TAILQ_FOREACH(child, &ml->children, entry) + magic_dump_line(child, depth + 1); + +} + +void +magic_dump(struct magic *m) +{ + struct magic_line *ml; + + RB_FOREACH(ml, magic_tree, &m->tree) + magic_dump_line(ml, 0); +} diff --git a/usr.bin/file/magic-load.c b/usr.bin/file/magic-load.c new file mode 100644 index 00000000000..29455141d57 --- /dev/null +++ b/usr.bin/file/magic-load.c @@ -0,0 +1,1020 @@ +/* $OpenBSD: magic-load.c,v 1.1 2015/04/24 16:24:11 nicm Exp $ */ + +/* + * Copyright (c) 2015 Nicholas Marriott + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "magic.h" +#include "xmalloc.h" + +static int +magic_odigit(u_char c) +{ + if (c >= '0' && c <= '7') + return (c - '0'); + return (-1); +} + +static int +magic_xdigit(u_char c) +{ + if (c >= '0' && c <= '9') + return (c - '0'); + if (c >= 'a' && c <= 'f') + return (10 + c - 'a'); + if (c >= 'A' && c <= 'F') + return (10 + c - 'A'); + return (-1); +} + +static void +magic_mark_text(struct magic_line *ml, int text) +{ + do { + ml->text = text; + ml = ml->parent; + } while (ml != NULL); +} + +static int +magic_make_pattern(struct magic_line *ml, const char *name, regex_t *re, + const char *p) +{ + int error; + char errbuf[256]; + + error = regcomp(re, p, REG_EXTENDED|REG_NOSUB); + if (error != 0) { + regerror(error, re, errbuf, sizeof errbuf); + magic_warn(ml, "bad %s pattern: %s", name, errbuf); + return (-1); + } + return (0); +} + +static int +magic_set_result(struct magic_line *ml, const char *s) +{ + const char *fmt; + const char *endfmt; + const char *cp; + regex_t *re = NULL; + regmatch_t pmatch; + size_t fmtlen; + + while (isspace((u_char)*s)) + s++; + if (*s == '\0') { + ml->result = NULL; + return (0); + } + ml->result = xstrdup(s); + + fmt = NULL; + for (cp = s; *cp != '\0'; cp++) { + if (cp[0] == '%' && cp[1] != '%') { + if (fmt != NULL) { + magic_warn(ml, "multiple formats"); + return (-1); + } + fmt = cp; + } + } + if (fmt == NULL) + return (0); + fmt++; + + for (endfmt = fmt; *endfmt != '\0'; endfmt++) { + if (strchr("diouxXeEfFgGsc", *endfmt) != NULL) + break; + } + if (*endfmt == '\0') { + magic_warn(ml, "unterminated format"); + return (-1); + } + fmtlen = endfmt + 1 - fmt; + if (fmtlen > 32) { + magic_warn(ml, "format too long"); + return (-1); + } + + if (*endfmt == 's') { + switch (ml->type) { + case MAGIC_TYPE_DATE: + case MAGIC_TYPE_LDATE: + case MAGIC_TYPE_UDATE: + case MAGIC_TYPE_ULDATE: + case MAGIC_TYPE_BEDATE: + case MAGIC_TYPE_BELDATE: + case MAGIC_TYPE_UBEDATE: + case MAGIC_TYPE_UBELDATE: + case MAGIC_TYPE_QDATE: + case MAGIC_TYPE_QLDATE: + case MAGIC_TYPE_UQDATE: + case MAGIC_TYPE_UQLDATE: + case MAGIC_TYPE_BEQDATE: + case MAGIC_TYPE_BEQLDATE: + case MAGIC_TYPE_UBEQDATE: + case MAGIC_TYPE_UBEQLDATE: + case MAGIC_TYPE_LEQDATE: + case MAGIC_TYPE_LEQLDATE: + case MAGIC_TYPE_ULEQDATE: + case MAGIC_TYPE_ULEQLDATE: + case MAGIC_TYPE_LEDATE: + case MAGIC_TYPE_LELDATE: + case MAGIC_TYPE_ULEDATE: + case MAGIC_TYPE_ULELDATE: + case MAGIC_TYPE_MEDATE: + case MAGIC_TYPE_MELDATE: + case MAGIC_TYPE_STRING: + case MAGIC_TYPE_PSTRING: + case MAGIC_TYPE_BESTRING16: + case MAGIC_TYPE_LESTRING16: + case MAGIC_TYPE_REGEX: + case MAGIC_TYPE_SEARCH: + break; + default: + ml->stringify = 1; + break; + } + } + + if (!ml->root->compiled) { + /* + * XXX %ld (and %lu and so on) is invalid on 64-bit platforms + * with byte, short, long. We get lucky because our first and + * only argument ends up in a register. Accept it for now. + */ + if (magic_make_pattern(ml, "short", &ml->root->format_short, + "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0) + return (-1); + if (magic_make_pattern(ml, "long", &ml->root->format_long, + "^-?[0-9]*(\\.[0-9]*)?(c|(l|h|hh)?[iduxX])$") != 0) + return (-1); + if (magic_make_pattern(ml, "quad", &ml->root->format_quad, + "^-?[0-9]*(\\.[0-9]*)?ll[iduxX]$") != 0) + return (-1); + if (magic_make_pattern(ml, "float", &ml->root->format_float, + "^-?[0-9]*(\\.[0-9]*)?[eEfFgG]$") != 0) + return (-1); + if (magic_make_pattern(ml, "string", &ml->root->format_string, + "^-?[0-9]*(\\.[0-9]*)?s$") != 0) + return (-1); + ml->root->compiled = 1; + } + + if (ml->stringify) + re = &ml->root->format_string; + else { + switch (ml->type) { + case MAGIC_TYPE_NONE: + case MAGIC_TYPE_DEFAULT: + return (0); /* don't use result */ + case MAGIC_TYPE_BYTE: + case MAGIC_TYPE_UBYTE: + case MAGIC_TYPE_SHORT: + case MAGIC_TYPE_USHORT: + case MAGIC_TYPE_BESHORT: + case MAGIC_TYPE_UBESHORT: + case MAGIC_TYPE_LESHORT: + case MAGIC_TYPE_ULESHORT: + re = &ml->root->format_short; + break; + case MAGIC_TYPE_LONG: + case MAGIC_TYPE_ULONG: + case MAGIC_TYPE_BELONG: + case MAGIC_TYPE_UBELONG: + case MAGIC_TYPE_LELONG: + case MAGIC_TYPE_ULELONG: + case MAGIC_TYPE_MELONG: + re = &ml->root->format_long; + break; + case MAGIC_TYPE_QUAD: + case MAGIC_TYPE_UQUAD: + case MAGIC_TYPE_BEQUAD: + case MAGIC_TYPE_UBEQUAD: + case MAGIC_TYPE_LEQUAD: + case MAGIC_TYPE_ULEQUAD: + re = &ml->root->format_quad; + break; + case MAGIC_TYPE_FLOAT: + case MAGIC_TYPE_BEFLOAT: + case MAGIC_TYPE_LEFLOAT: + case MAGIC_TYPE_DOUBLE: + case MAGIC_TYPE_BEDOUBLE: + case MAGIC_TYPE_LEDOUBLE: + re = &ml->root->format_float; + break; + case MAGIC_TYPE_DATE: + case MAGIC_TYPE_LDATE: + case MAGIC_TYPE_UDATE: + case MAGIC_TYPE_ULDATE: + case MAGIC_TYPE_BEDATE: + case MAGIC_TYPE_BELDATE: + case MAGIC_TYPE_UBEDATE: + case MAGIC_TYPE_UBELDATE: + case MAGIC_TYPE_QDATE: + case MAGIC_TYPE_QLDATE: + case MAGIC_TYPE_UQDATE: + case MAGIC_TYPE_UQLDATE: + case MAGIC_TYPE_BEQDATE: + case MAGIC_TYPE_BEQLDATE: + case MAGIC_TYPE_UBEQDATE: + case MAGIC_TYPE_UBEQLDATE: + case MAGIC_TYPE_LEQDATE: + case MAGIC_TYPE_LEQLDATE: + case MAGIC_TYPE_ULEQDATE: + case MAGIC_TYPE_ULEQLDATE: + case MAGIC_TYPE_LEDATE: + case MAGIC_TYPE_LELDATE: + case MAGIC_TYPE_ULEDATE: + case MAGIC_TYPE_ULELDATE: + case MAGIC_TYPE_MEDATE: + case MAGIC_TYPE_MELDATE: + case MAGIC_TYPE_STRING: + case MAGIC_TYPE_PSTRING: + case MAGIC_TYPE_REGEX: + case MAGIC_TYPE_SEARCH: + re = &ml->root->format_string; + break; + case MAGIC_TYPE_BESTRING16: + case MAGIC_TYPE_LESTRING16: + magic_warn(ml, "unsupported type %s", ml->type_string); + return (-1); + } + } + + pmatch.rm_so = 0; + pmatch.rm_eo = fmtlen; + if (regexec(re, fmt, 1, &pmatch, REG_STARTEND) != 0) { + magic_warn(ml, "bad format for %s: %%%.*s", ml->type_string, + (int)fmtlen, fmt); + return (-1); + } + + return (0); +} + +static u_int +magic_get_strength(struct magic_line *ml) +{ + int n; + size_t size; + + if (ml->test_not || ml->test_operator == 'x') + return (1); + + n = 20; + switch (ml->type) { + case MAGIC_TYPE_NONE: + case MAGIC_TYPE_DEFAULT: + return (0); + case MAGIC_TYPE_BYTE: + case MAGIC_TYPE_UBYTE: + n += 1 * MAGIC_STRENGTH_MULTIPLIER; + break; + case MAGIC_TYPE_SHORT: + case MAGIC_TYPE_USHORT: + case MAGIC_TYPE_BESHORT: + case MAGIC_TYPE_UBESHORT: + case MAGIC_TYPE_LESHORT: + case MAGIC_TYPE_ULESHORT: + n += 2 * MAGIC_STRENGTH_MULTIPLIER; + break; + case MAGIC_TYPE_LONG: + case MAGIC_TYPE_ULONG: + case MAGIC_TYPE_FLOAT: + case MAGIC_TYPE_DATE: + case MAGIC_TYPE_LDATE: + case MAGIC_TYPE_UDATE: + case MAGIC_TYPE_ULDATE: + case MAGIC_TYPE_BELONG: + case MAGIC_TYPE_UBELONG: + case MAGIC_TYPE_BEFLOAT: + case MAGIC_TYPE_BEDATE: + case MAGIC_TYPE_BELDATE: + case MAGIC_TYPE_UBEDATE: + case MAGIC_TYPE_UBELDATE: + n += 4 * MAGIC_STRENGTH_MULTIPLIER; + break; + case MAGIC_TYPE_QUAD: + case MAGIC_TYPE_UQUAD: + case MAGIC_TYPE_DOUBLE: + case MAGIC_TYPE_QDATE: + case MAGIC_TYPE_QLDATE: + case MAGIC_TYPE_UQDATE: + case MAGIC_TYPE_UQLDATE: + case MAGIC_TYPE_BEQUAD: + case MAGIC_TYPE_UBEQUAD: + case MAGIC_TYPE_BEDOUBLE: + case MAGIC_TYPE_BEQDATE: + case MAGIC_TYPE_BEQLDATE: + case MAGIC_TYPE_UBEQDATE: + case MAGIC_TYPE_UBEQLDATE: + case MAGIC_TYPE_LEQUAD: + case MAGIC_TYPE_ULEQUAD: + case MAGIC_TYPE_LEDOUBLE: + case MAGIC_TYPE_LEQDATE: + case MAGIC_TYPE_LEQLDATE: + case MAGIC_TYPE_ULEQDATE: + case MAGIC_TYPE_ULEQLDATE: + case MAGIC_TYPE_LELONG: + case MAGIC_TYPE_ULELONG: + case MAGIC_TYPE_LEFLOAT: + case MAGIC_TYPE_LEDATE: + case MAGIC_TYPE_LELDATE: + case MAGIC_TYPE_ULEDATE: + case MAGIC_TYPE_ULELDATE: + case MAGIC_TYPE_MELONG: + case MAGIC_TYPE_MEDATE: + case MAGIC_TYPE_MELDATE: + n += 8 * MAGIC_STRENGTH_MULTIPLIER; + break; + case MAGIC_TYPE_STRING: + case MAGIC_TYPE_PSTRING: + n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER; + break; + case MAGIC_TYPE_BESTRING16: + case MAGIC_TYPE_LESTRING16: + n += ml->test_string_size * MAGIC_STRENGTH_MULTIPLIER / 2; + break; + case MAGIC_TYPE_REGEX: + case MAGIC_TYPE_SEARCH: + size = MAGIC_STRENGTH_MULTIPLIER / ml->test_string_size; + if (size < 1) + size = 1; + n += ml->test_string_size * size; + break; + } + switch (ml->test_operator) { + case '=': + n += MAGIC_STRENGTH_MULTIPLIER; + break; + case '<': + case '>': + case '[': + case ']': + n -= 2 * MAGIC_STRENGTH_MULTIPLIER; + break; + case '^': + case '&': + n -= MAGIC_STRENGTH_MULTIPLIER; + break; + } + return (n <= 0 ? 1 : n); +} + +static int +magic_get_string(char **line, char *out, size_t *outlen) +{ + char *start, *cp, c; + int d0, d1, d2; + + start = out; + for (cp = *line; *cp != '\0' && !isspace((u_char)*cp); cp++) { + if (*cp != '\\') { + *out++ = *cp; + continue; + } + + switch (c = *++cp) { + case ' ': + *out++ = ' '; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + d0 = magic_odigit(cp[0]); + if (cp[0] != '\0') + d1 = magic_odigit(cp[1]); + else + d1 = -1; + if (cp[0] != '\0' && cp[1] != '\0') + d2 = magic_odigit(cp[2]); + else + d2 = -1; + + if (d0 != -1 && d1 != -1 && d2 != -1) { + *out = d2 | (d1 << 3) | (d0 << 6); + cp += 2; + } else if (d0 != -1 && d1 != -1) { + *out = d1 | (d0 << 3); + cp++; + } else if (d0 != -1) + *out = d0; + else + return (-1); + out++; + break; + case 'x': + d0 = magic_xdigit(cp[1]); + if (cp[1] != '\0') + d1 = magic_xdigit(cp[2]); + else + d1 = -1; + + if (d0 != -1 && d1 != -1) { + *out = d1 | (d0 << 4); + cp += 2; + } else if (d0 != -1) { + *out = d0; + cp++; + } else + return (-1); + out++; + + break; + case 'a': + *out++ = '\a'; + break; + case 'b': + *out++ = '\b'; + break; + case 't': + *out++ = '\t'; + break; + case 'f': + *out++ = '\f'; + break; + case 'n': + *out++ = '\n'; + break; + case 'r': + *out++ = '\r'; + break; + case '\\': + *out++ = '\\'; + break; + case '\'': + *out++ = '\''; + break; + case '\"': + *out++ = '\"'; + break; + default: + *out++ = c; + break; + } + } + *out = '\0'; + *outlen = out - start; + + *line = cp; + return (0); +} + +static int +magic_parse_offset(struct magic_line *ml, char **line) +{ + char *copy, *s, *cp, *endptr; + + while (isspace((u_char)**line)) + (*line)++; + copy = s = cp = xmalloc(strlen(*line) + 1); + while (**line != '\0' && !isspace((u_char)**line)) + *cp++ = *(*line)++; + *cp = '\0'; + + ml->offset = 0; + ml->offset_relative = 0; + + ml->indirect_type = ' '; + ml->indirect_relative = 0; + ml->indirect_offset = 0; + ml->indirect_operator = ' '; + ml->indirect_operand = 0; + + if (*s == '&') { + ml->offset_relative = 1; + s++; + } + + if (*s != '(') { + endptr = magic_strtoll(s, &ml->offset); + if (endptr == NULL || *endptr != '\0') { + magic_warn(ml, "missing closing bracket"); + goto fail; + } + if (ml->offset < 0 && !ml->offset_relative) { + magic_warn(ml, "negative absolute offset"); + goto fail; + } + goto done; + } + s++; + + if (*s == '&') { + ml->indirect_relative = 1; + s++; + } + + endptr = magic_strtoll(s, &ml->indirect_offset); + if (endptr == NULL) { + magic_warn(ml, "can't parse offset"); + goto fail; + } + s = endptr; + if (*s == ')') + goto done; + + if (*s == '.') { + s++; + if (strchr("bslBSL", *s) == NULL) { + magic_warn(ml, "unknown offset type"); + goto fail; + } + ml->indirect_type = *s; + s++; + if (*s == ')') + goto done; + } + + if (strchr("+-*", *s) == NULL) { + magic_warn(ml, "unknown offset operator"); + goto fail; + } + ml->indirect_operator = *s; + s++; + if (*s == ')') + goto done; + + if (*s == '(') { + s++; + endptr = magic_strtoll(s, &ml->indirect_operand); + if (endptr == NULL || *endptr != ')') { + magic_warn(ml, "missing closing bracket"); + goto fail; + } + if (*++endptr != ')') { + magic_warn(ml, "missing closing bracket"); + goto fail; + } + } else { + endptr = magic_strtoll(s, &ml->indirect_operand); + if (endptr == NULL || *endptr != ')') { + magic_warn(ml, "missing closing bracket"); + goto fail; + } + } + +done: + free(copy); + return (0); + +fail: + free(copy); + return (-1); +} + +static int +magic_parse_type(struct magic_line *ml, char **line) +{ + char *copy, *s, *cp, *endptr; + + while (isspace((u_char)**line)) + (*line)++; + copy = s = cp = xmalloc(strlen(*line) + 1); + while (**line != '\0' && !isspace((u_char)**line)) + *cp++ = *(*line)++; + *cp = '\0'; + + ml->type = MAGIC_TYPE_NONE; + ml->type_string = xstrdup(s); + + ml->type_operator = ' '; + ml->type_operand = 0; + + if (strncmp(s, "string", (sizeof "string") - 1) == 0) { + ml->type = MAGIC_TYPE_STRING; + magic_mark_text(ml, 0); + goto done; + } + if (strncmp(s, "search", (sizeof "search") - 1) == 0) { + ml->type = MAGIC_TYPE_SEARCH; + goto done; + } + if (strncmp(s, "regex", (sizeof "regex") - 1) == 0) { + ml->type = MAGIC_TYPE_REGEX; + goto done; + } + + cp = &s[strcspn(s, "-&")]; + if (*cp != '\0') { + ml->type_operator = *cp; + endptr = magic_strtoull(cp + 1, &ml->type_operand); + if (endptr == NULL || *endptr != '\0') { + magic_warn(ml, "can't parse operand"); + goto fail; + } + *cp = '\0'; + } + + if (strcmp(s, "byte") == 0) + ml->type = MAGIC_TYPE_BYTE; + else if (strcmp(s, "short") == 0) + ml->type = MAGIC_TYPE_SHORT; + else if (strcmp(s, "long") == 0) + ml->type = MAGIC_TYPE_LONG; + else if (strcmp(s, "quad") == 0) + ml->type = MAGIC_TYPE_QUAD; + else if (strcmp(s, "ubyte") == 0) + ml->type = MAGIC_TYPE_UBYTE; + else if (strcmp(s, "ushort") == 0) + ml->type = MAGIC_TYPE_USHORT; + else if (strcmp(s, "ulong") == 0) + ml->type = MAGIC_TYPE_ULONG; + else if (strcmp(s, "uquad") == 0) + ml->type = MAGIC_TYPE_UQUAD; + else if (strcmp(s, "float") == 0) + ml->type = MAGIC_TYPE_FLOAT; + else if (strcmp(s, "double") == 0) + ml->type = MAGIC_TYPE_DOUBLE; + else if (strcmp(s, "pstring") == 0) + ml->type = MAGIC_TYPE_PSTRING; + else if (strcmp(s, "date") == 0) + ml->type = MAGIC_TYPE_DATE; + else if (strcmp(s, "qdate") == 0) + ml->type = MAGIC_TYPE_QDATE; + else if (strcmp(s, "ldate") == 0) + ml->type = MAGIC_TYPE_LDATE; + else if (strcmp(s, "qldate") == 0) + ml->type = MAGIC_TYPE_QLDATE; + else if (strcmp(s, "udate") == 0) + ml->type = MAGIC_TYPE_UDATE; + else if (strcmp(s, "uqdate") == 0) + ml->type = MAGIC_TYPE_UQDATE; + else if (strcmp(s, "uldate") == 0) + ml->type = MAGIC_TYPE_ULDATE; + else if (strcmp(s, "uqldate") == 0) + ml->type = MAGIC_TYPE_UQLDATE; + else if (strcmp(s, "beshort") == 0) + ml->type = MAGIC_TYPE_BESHORT; + else if (strcmp(s, "belong") == 0) + ml->type = MAGIC_TYPE_BELONG; + else if (strcmp(s, "bequad") == 0) + ml->type = MAGIC_TYPE_BEQUAD; + else if (strcmp(s, "ubeshort") == 0) + ml->type = MAGIC_TYPE_UBESHORT; + else if (strcmp(s, "ubelong") == 0) + ml->type = MAGIC_TYPE_UBELONG; + else if (strcmp(s, "ubequad") == 0) + ml->type = MAGIC_TYPE_UBEQUAD; + else if (strcmp(s, "befloat") == 0) + ml->type = MAGIC_TYPE_BEFLOAT; + else if (strcmp(s, "bedouble") == 0) + ml->type = MAGIC_TYPE_BEDOUBLE; + else if (strcmp(s, "bedate") == 0) + ml->type = MAGIC_TYPE_BEDATE; + else if (strcmp(s, "beqdate") == 0) + ml->type = MAGIC_TYPE_BEQDATE; + else if (strcmp(s, "beldate") == 0) + ml->type = MAGIC_TYPE_BELDATE; + else if (strcmp(s, "beqldate") == 0) + ml->type = MAGIC_TYPE_BEQLDATE; + else if (strcmp(s, "ubedate") == 0) + ml->type = MAGIC_TYPE_UBEDATE; + else if (strcmp(s, "ubeqdate") == 0) + ml->type = MAGIC_TYPE_UBEQDATE; + else if (strcmp(s, "ubeldate") == 0) + ml->type = MAGIC_TYPE_UBELDATE; + else if (strcmp(s, "ubeqldate") == 0) + ml->type = MAGIC_TYPE_UBEQLDATE; + else if (strcmp(s, "bestring16") == 0) + ml->type = MAGIC_TYPE_BESTRING16; + else if (strcmp(s, "leshort") == 0) + ml->type = MAGIC_TYPE_LESHORT; + else if (strcmp(s, "lelong") == 0) + ml->type = MAGIC_TYPE_LELONG; + else if (strcmp(s, "lequad") == 0) + ml->type = MAGIC_TYPE_LEQUAD; + else if (strcmp(s, "uleshort") == 0) + ml->type = MAGIC_TYPE_ULESHORT; + else if (strcmp(s, "ulelong") == 0) + ml->type = MAGIC_TYPE_ULELONG; + else if (strcmp(s, "ulequad") == 0) + ml->type = MAGIC_TYPE_ULEQUAD; + else if (strcmp(s, "lefloat") == 0) + ml->type = MAGIC_TYPE_LEFLOAT; + else if (strcmp(s, "ledouble") == 0) + ml->type = MAGIC_TYPE_LEDOUBLE; + else if (strcmp(s, "ledate") == 0) + ml->type = MAGIC_TYPE_LEDATE; + else if (strcmp(s, "leqdate") == 0) + ml->type = MAGIC_TYPE_LEQDATE; + else if (strcmp(s, "leldate") == 0) + ml->type = MAGIC_TYPE_LELDATE; + else if (strcmp(s, "leqldate") == 0) + ml->type = MAGIC_TYPE_LEQLDATE; + else if (strcmp(s, "uledate") == 0) + ml->type = MAGIC_TYPE_ULEDATE; + else if (strcmp(s, "uleqdate") == 0) + ml->type = MAGIC_TYPE_ULEQDATE; + else if (strcmp(s, "uleldate") == 0) + ml->type = MAGIC_TYPE_ULELDATE; + else if (strcmp(s, "uleqldate") == 0) + ml->type = MAGIC_TYPE_ULEQLDATE; + else if (strcmp(s, "lestring16") == 0) + ml->type = MAGIC_TYPE_LESTRING16; + else if (strcmp(s, "melong") == 0) + ml->type = MAGIC_TYPE_MELONG; + else if (strcmp(s, "medate") == 0) + ml->type = MAGIC_TYPE_MEDATE; + else if (strcmp(s, "meldate") == 0) + ml->type = MAGIC_TYPE_MELDATE; + else if (strcmp(s, "default") == 0) + ml->type = MAGIC_TYPE_DEFAULT; + else { + magic_warn(ml, "unknown type"); + goto fail; + } + magic_mark_text(ml, 0); + +done: + free(copy); + return (0); + +fail: + free(copy); + return (-1); +} + +static int +magic_parse_value(struct magic_line *ml, char **line) +{ + char *copy, *s, *cp, *endptr; + size_t slen; + + while (isspace((u_char)**line)) + (*line)++; + + ml->test_operator = '='; + ml->test_not = 0; + ml->test_string = NULL; + ml->test_string_size = 0; + ml->test_unsigned = 0; + ml->test_signed = 0; + + s = *line; + if (s[0] == 'x' && (s[1] == '\0' || isspace((u_char)s[1]))) { + (*line)++; + ml->test_operator = 'x'; + return (0); + } + + if (**line == '!') { + ml->test_not = 1; + (*line)++; + } + + switch (ml->type) { + case MAGIC_TYPE_STRING: + case MAGIC_TYPE_PSTRING: + case MAGIC_TYPE_SEARCH: + if (**line == '>' || **line == '<' || **line == '=') { + ml->test_operator = **line; + (*line)++; + } + /* FALLTHROUGH */ + case MAGIC_TYPE_REGEX: + copy = s = xmalloc(strlen(*line) + 1); + if (magic_get_string(line, s, &slen) != 0) { + magic_warn(ml, "can't parse string"); + goto fail; + } + ml->test_string_size = slen; + ml->test_string = s; + return (0); /* do not free */ + default: + break; + } + + copy = s = cp = xmalloc(strlen(*line) + 1); + if ((*line)[0] == '=' && (*line)[1] == ' ') { + /* + * Extra spaces such as "byte&7 = 0" are accepted, which is + * annoying. But it seems to be only for =, so special case it. + */ + *cp++ = '='; + (*line) += 2; + } + while (**line != '\0' && !isspace((u_char)**line)) + *cp++ = *(*line)++; + *cp = '\0'; + + if (*s == '\0') + goto done; + + if (s[0] == '<' && s[1] == '=') { + ml->test_operator = '['; + s += 2; + } else if (s[0] == '>' && s[1] == '=') { + ml->test_operator = ']'; + s += 2; + } else if (strchr("=<>&^", *s) != NULL) { + ml->test_operator = *s; + s++; + } + + if (*ml->type_string == 'u') + endptr = magic_strtoull(s, &ml->test_unsigned); + else + endptr = magic_strtoll(s, &ml->test_signed); + if (endptr == NULL || *endptr != '\0') { + magic_warn(ml, "can't parse number"); + goto fail; + } + +done: + free(copy); + return (0); + +fail: + free(copy); + return (-1); +} + +static void +magic_free_line(struct magic_line *ml) +{ + free((void*)ml->type_string); + + free((void*)ml->mimetype); + free((void*)ml->result); + + free(ml); +} + +int +magic_compare(struct magic_line *ml1, struct magic_line *ml2) +{ + if (ml1->strength < ml2->strength) + return (1); + if (ml1->strength > ml2->strength) + return (-1); + + /* + * The original file depends on the (undefined!) qsort(3) behaviour + * when the strength is equal. This is impossible to reproduce with an + * RB tree so just use the line number and hope for the best. + */ + if (ml1->line < ml2->line) + return (-1); + if (ml1->line > ml2->line) + return (1); + + return (0); +} +RB_GENERATE(magic_tree, magic_line, node, magic_compare); + +static void +magic_set_mimetype(struct magic *m, u_int at, struct magic_line *ml, char *line) +{ + char *mimetype, *cp; + + mimetype = line + (sizeof "!:mime") - 1; + while (isspace((u_char)*mimetype)) + mimetype++; + + cp = strchr(mimetype, '#'); + if (cp != NULL) + *cp = '\0'; + + if (*mimetype != '\0') { + cp = mimetype + strlen(mimetype) - 1; + while (cp != mimetype && isspace((u_char)*cp)) + *cp-- = '\0'; + } + + cp = mimetype; + while (*cp != '\0') { + if (!isalnum((u_char)*cp) && strchr("/-.+", *cp) == NULL) + break; + cp++; + } + if (*mimetype == '\0' || *cp != '\0') { + fprintf(stderr, "%s:%u: invalid MIME type: %s\n", m->path, at, + mimetype); + return; + } + if (ml == NULL) { + fprintf(stderr, "%s:%u: stray MIME type: %s\n", m->path, at, + mimetype); + return; + } + ml->mimetype = xstrdup(mimetype); +} + +struct magic * +magic_load(FILE *f, const char *path, int warnings) +{ + struct magic *m; + struct magic_line *ml = NULL, *parent, *parent0; + char *line, *tmp; + size_t size; + u_int at, level, n, i; + + m = xcalloc(1, sizeof *m); + m->path = xstrdup(path); + m->warnings = warnings; + RB_INIT(&m->tree); + + parent = NULL; + parent0 = NULL; + level = 0; + + at = 0; + tmp = NULL; + while ((line = fgetln(f, &size))) { + if (line[size - 1] == '\n') + line[size - 1] = '\0'; + else { + tmp = xmalloc(size + 1); + memcpy(tmp, line, size); + tmp[size] = '\0'; + line = tmp; + } + at++; + + while (isspace((u_char)*line)) + line++; + if (*line == '\0' || *line == '#') + continue; + + if (strncmp (line, "!:mime", (sizeof "!:mime") - 1) == 0) { + magic_set_mimetype(m, at, ml, line); + continue; + } + + n = 0; + for (; *line == '>'; line++) + n++; + + ml = xcalloc(1, sizeof *ml); + ml->root = m; + ml->line = at; + ml->type = MAGIC_TYPE_NONE; + TAILQ_INIT(&ml->children); + ml->text = 1; + + if (n == level + 1) { + parent = parent0; + } else if (n < level) { + for (i = n; i < level && parent != NULL; i++) + parent = parent->parent; + } else if (n != level) { + magic_warn(ml, "level skipped (%u->%u)", level, n); + free(ml); + continue; + } + ml->parent = parent; + level = n; + + if (magic_parse_offset(ml, &line) != 0 || + magic_parse_type(ml, &line) != 0 || + magic_parse_value(ml, &line) != 0 || + magic_set_result(ml, line) != 0) { + magic_free_line(ml); + ml = NULL; + continue; + } + + ml->strength = magic_get_strength(ml); + if (ml->parent == NULL) + RB_INSERT(magic_tree, &m->tree, ml); + else + TAILQ_INSERT_TAIL(&ml->parent->children, ml, entry); + parent0 = ml; + } + free(tmp); + + fclose(f); + return (m); +} diff --git a/usr.bin/file/magic-test.c b/usr.bin/file/magic-test.c new file mode 100644 index 00000000000..2c33b7f5545 --- /dev/null +++ b/usr.bin/file/magic-test.c @@ -0,0 +1,1121 @@ +/* $OpenBSD: magic-test.c,v 1.1 2015/04/24 16:24:11 nicm Exp $ */ + +/* + * Copyright (c) 2015 Nicholas Marriott + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "magic.h" +#include "xmalloc.h" + +static int +magic_one_eq(char a, char b, int cflag) +{ + if (a == b) + return (1); + if (cflag && tolower((u_char)a) == tolower((u_char)b)) + return (1); + return (0); +} + +static int +magic_test_eq(const char *ap, size_t asize, const char *bp, size_t bsize, + int cflag, int bflag, int Bflag) +{ + size_t aoff, boff, aspaces, bspaces; + + aoff = boff = 0; + while (aoff != asize && boff != bsize) { + if (Bflag && isspace((u_char)ap[aoff])) { + aspaces = 0; + while (aoff != asize && isspace((u_char)ap[aoff])) { + aspaces++; + aoff++; + } + bspaces = 0; + while (boff != bsize && isspace((u_char)bp[boff])) { + bspaces++; + boff++; + } + if (bspaces >= aspaces) + continue; + return (1); + } + if (magic_one_eq(ap[aoff], bp[boff], cflag)) { + aoff++; + boff++; + continue; + } + if (bflag && isspace((u_char)bp[boff])) { + boff++; + continue; + } + if (ap[aoff] < bp[boff]) + return (-1); + return (1); + } + return (0); +} + +static int +magic_copy_from(struct magic_state *ms, ssize_t offset, void *dst, size_t size) +{ + if (offset < 0) + offset = ms->offset; + if (offset + size > ms->size) + return (-1); + memcpy(dst, ms->base + offset, size); + return (0); +} + +static void +magic_add_result(struct magic_state *ms, struct magic_line *ml, + const char *fmt, ...) +{ + va_list ap; + int separate; + char *s, *tmp, *add; + + va_start(ap, fmt); + if (ml->stringify) { + if (vasprintf(&s, fmt, ap) == -1) { + va_end(ap); + return; + } + va_end(ap); + if (asprintf(&tmp, ml->result, s) == -1) { + free(s); + return; + } + free(s); + } else { + if (vasprintf(&tmp, ml->result, ap) == -1) { + va_end(ap); + return; + } + va_end(ap); + } + + separate = 1; + if (tmp[0] == '\\' && tmp[1] == 'b') { + separate = 0; + add = tmp + 2; + } else + add = tmp; + + if (separate && *ms->out != '\0') + strlcat(ms->out, " ", sizeof ms->out); + strlcat(ms->out, add, sizeof ms->out); + + free(tmp); +} + +static void +magic_add_string(struct magic_state *ms, struct magic_line *ml, + const char* s, size_t slen) +{ + char *out; + size_t outlen, offset; + + outlen = MAGIC_STRING_SIZE; + if (outlen > slen) + outlen = slen; + for (offset = 0; offset < outlen; offset++) { + if (s[offset] == '\0' || !isprint((u_char)s[offset])) { + outlen = offset; + break; + } + } + out = xreallocarray(NULL, 4, outlen + 1); + strvisx(out, s, outlen, VIS_TAB|VIS_NL|VIS_CSTYLE|VIS_OCTAL); + magic_add_result(ms, ml, "%s", out); + free(out); +} + +static int +magic_test_signed(struct magic_line *ml, int64_t value, int64_t wanted) +{ + switch (ml->test_operator) { + case 'x': + return (1); + case '<': + return (value < wanted); + case '[': + return (value <= wanted); + case '>': + return (value > wanted); + case ']': + return (value >= wanted); + case '=': + return (value == wanted); + case '&': + return ((value & wanted) == wanted); + case '^': + return ((~value & wanted) == wanted); + } + return (-1); +} + +static int +magic_test_unsigned(struct magic_line *ml, uint64_t value, uint64_t wanted) +{ + switch (ml->test_operator) { + case 'x': + return (1); + case '<': + return (value < wanted); + case '[': + return (value <= wanted); + case '>': + return (value > wanted); + case ']': + return (value >= wanted); + case '=': + return (value == wanted); + case '&': + return ((value & wanted) == wanted); + case '^': + return ((~value & wanted) == wanted); + } + return (-1); +} + +static int +magic_test_type_none(__unused struct magic_line *ml, + __unused struct magic_state *ms) +{ + return (0); +} + +static int +magic_test_type_byte(struct magic_line *ml, struct magic_state *ms) +{ + int8_t value; + int result; + + if (magic_copy_from(ms, -1, &value, sizeof value) != 0) + return (0); + + if (ml->type_operator == '&') + value &= (int8_t)ml->type_operand; + else if (ml->type_operator != ' ') + return (-1); + + result = magic_test_signed(ml, value, (int8_t)ml->test_signed); + if (result == !ml->test_not && ml->result != NULL) { + magic_add_result(ms, ml, "%c", (int)value); + ms->offset += sizeof value; + } + return (result); +} + +static int +magic_test_type_short(struct magic_line *ml, struct magic_state *ms) +{ + int16_t value; + int result; + + if (magic_copy_from(ms, -1, &value, sizeof value) != 0) + return (0); + if (ml->type == MAGIC_TYPE_BESHORT) + value = betoh16(value); + if (ml->type == MAGIC_TYPE_LESHORT) + value = letoh16(value); + + if (ml->type_operator == '&') + value &= (int16_t)ml->type_operand; + else if (ml->type_operator != ' ') + return (-1); + + result = magic_test_signed(ml, value, (int16_t)ml->test_signed); + if (result == !ml->test_not && ml->result != NULL) { + magic_add_result(ms, ml, "%hd", (int)value); + ms->offset += sizeof value; + } + return (result); +} + +static int +magic_test_type_long(struct magic_line *ml, struct magic_state *ms) +{ + int32_t value; + int result; + + if (magic_copy_from(ms, -1, &value, sizeof value) != 0) + return (0); + if (ml->type == MAGIC_TYPE_BELONG) + value = betoh32(value); + if (ml->type == MAGIC_TYPE_LELONG) + value = letoh32(value); + + if (ml->type_operator == '&') + value &= (int32_t)ml->type_operand; + else if (ml->type_operator != ' ') + return (-1); + + result = magic_test_signed(ml, value, (int32_t)ml->test_signed); + if (result == !ml->test_not && ml->result != NULL) { + magic_add_result(ms, ml, "%d", (int)value); + ms->offset += sizeof value; + } + return (result); +} + +static int +magic_test_type_quad(struct magic_line *ml, struct magic_state *ms) +{ + int64_t value; + int result; + + if (magic_copy_from(ms, -1, &value, sizeof value) != 0) + return (0); + if (ml->type == MAGIC_TYPE_BEQUAD) + value = betoh64(value); + if (ml->type == MAGIC_TYPE_LEQUAD) + value = letoh64(value); + + if (ml->type_operator == '&') + value &= (int64_t)ml->type_operand; + else if (ml->type_operator != ' ') + return (-1); + + result = magic_test_signed(ml, value, (int64_t)ml->test_signed); + if (result == !ml->test_not && ml->result != NULL) { + magic_add_result(ms, ml, "%lld", (long long)value); + ms->offset += sizeof value; + } + return (result); +} + +static int +magic_test_type_ubyte(struct magic_line *ml, struct magic_state *ms) +{ + uint8_t value; + int result; + + if (magic_copy_from(ms, -1, &value, sizeof value) != 0) + return (0); + + if (ml->type_operator == '&') + value &= (uint8_t)ml->type_operand; + else if (ml->type_operator != ' ') + return (-1); + + result = magic_test_unsigned(ml, value, (uint8_t)ml->test_unsigned); + if (result == !ml->test_not && ml->result != NULL) { + magic_add_result(ms, ml, "%c", (unsigned int)value); + ms->offset += sizeof value; + } + return (result); +} + +static int +magic_test_type_ushort(struct magic_line *ml, struct magic_state *ms) +{ + uint16_t value; + int result; + + if (magic_copy_from(ms, -1, &value, sizeof value) != 0) + return (0); + if (ml->type == MAGIC_TYPE_UBESHORT) + value = betoh16(value); + if (ml->type == MAGIC_TYPE_ULESHORT) + value = letoh16(value); + + if (ml->type_operator == '&') + value &= (uint16_t)ml->type_operand; + else if (ml->type_operator != ' ') + return (-1); + + result = magic_test_unsigned(ml, value, (uint16_t)ml->test_unsigned); + if (result == !ml->test_not && ml->result != NULL) { + magic_add_result(ms, ml, "%hu", (unsigned int)value); + ms->offset += sizeof value; + } + return (result); +} + +static int +magic_test_type_ulong(struct magic_line *ml, struct magic_state *ms) +{ + uint32_t value; + int result; + + if (magic_copy_from(ms, -1, &value, sizeof value) != 0) + return (0); + if (ml->type == MAGIC_TYPE_UBELONG) + value = betoh32(value); + if (ml->type == MAGIC_TYPE_ULELONG) + value = letoh32(value); + + if (ml->type_operator == '&') + value &= (uint32_t)ml->type_operand; + else if (ml->type_operator != ' ') + return (-1); + + result = magic_test_unsigned(ml, value, (uint32_t)ml->test_unsigned); + if (result == !ml->test_not && ml->result != NULL) { + magic_add_result(ms, ml, "%u", (unsigned int)value); + ms->offset += sizeof value; + } + return (result); +} + +static int +magic_test_type_uquad(struct magic_line *ml, struct magic_state *ms) +{ + uint64_t value; + int result; + + if (magic_copy_from(ms, -1, &value, sizeof value) != 0) + return (0); + if (ml->type == MAGIC_TYPE_UBEQUAD) + value = betoh64(value); + if (ml->type == MAGIC_TYPE_ULEQUAD) + value = letoh64(value); + + if (ml->type_operator == '&') + value &= (uint64_t)ml->type_operand; + else if (ml->type_operator != ' ') + return (-1); + + result = magic_test_unsigned(ml, value, (uint64_t)ml->test_unsigned); + if (result == !ml->test_not && ml->result != NULL) { + magic_add_result(ms, ml, "%llu", (unsigned long long)value); + ms->offset += sizeof value; + } + return (result); +} + +static int +magic_test_type_float(struct magic_line *ml, struct magic_state *ms) +{ + uint32_t value0; + double value; + + if (magic_copy_from(ms, -1, &value0, sizeof value0) != 0) + return (0); + if (ml->type == MAGIC_TYPE_BEFLOAT) + value0 = betoh32(value0); + if (ml->type == MAGIC_TYPE_LEFLOAT) + value0 = letoh32(value0); + memcpy(&value, &value0, sizeof value); + + if (ml->type_operator != ' ') + return (-1); + + if (ml->test_operator != 'x') + return (-1); + + magic_add_result(ms, ml, "%g", value); + ms->offset += sizeof value0; + return (1); +} + +static int +magic_test_type_double(struct magic_line *ml, struct magic_state *ms) +{ + uint64_t value0; + double value; + + if (magic_copy_from(ms, -1, &value0, sizeof value0) != 0) + return (0); + if (ml->type == MAGIC_TYPE_BEDOUBLE) + value0 = betoh64(value0); + if (ml->type == MAGIC_TYPE_LEDOUBLE) + value0 = letoh64(value0); + memcpy(&value, &value0, sizeof value); + + if (ml->type_operator != ' ') + return (-1); + + if (ml->test_operator != 'x') + return (-1); + + magic_add_result(ms, ml, "%g", value); + ms->offset += sizeof value0; + return (1); +} + +static int +magic_test_type_string(struct magic_line *ml, struct magic_state *ms) +{ + const char *s, *cp; + size_t slen; + int result, cflag = 0, bflag = 0, Bflag = 0; + + cp = &ml->type_string[(sizeof "string") - 1]; + if (*cp != '\0') { + if (*cp != '/') + return (-1); + cp++; + for (; *cp != '\0'; cp++) { + switch (*cp) { + case 'B': + Bflag = 1; + break; + case 'b': + bflag = 1; + break; + case 'c': + cflag = 1; + break; + default: + return (-1); + } + } + } + + s = ms->base + ms->offset; + slen = ms->size - ms->offset; + if (slen < ml->test_string_size) + return (0); + + result = magic_test_eq(s, slen, ml->test_string, ml->test_string_size, + cflag, bflag, Bflag); + switch (ml->test_operator) { + case 'x': + result = 1; + break; + case '<': + result = result < 0; + break; + case '>': + result = result > 0; + break; + case '=': + result = result == 0; + break; + default: + result = -1; + break; + } + if (result == !ml->test_not) { + if (ml->result != NULL) + magic_add_string(ms, ml, s, slen); + if (result && ml->test_operator == '=') + ms->offset = s - ms->base + ml->test_string_size; + } + return (result); +} + +static int +magic_test_type_pstring(struct magic_line *ml, struct magic_state *ms) +{ + const char *s; + size_t slen; + int result; + + s = ms->base + ms->offset; + if (ms->size - ms->offset < 1) + return (-1); + slen = *(u_char *)s; + if (slen > ms->size - ms->offset) + return (-1); + s++; + + if (slen < ml->test_string_size) + result = -1; + else if (slen > ml->test_string_size) + result = 1; + else + result = memcmp(s, ml->test_string, ml->test_string_size); + switch (ml->test_operator) { + case 'x': + result = 1; + break; + case '<': + result = result < 0; + break; + case '>': + result = result > 0; + break; + case '=': + result = result == 0; + break; + default: + result = -1; + break; + } + if (result == !ml->test_not) { + if (ml->result != NULL) + magic_add_string(ms, ml, s, slen); + if (result) + ms->offset += slen + 1; + } + return (result); +} + +static int +magic_test_type_date(struct magic_line *ml, struct magic_state *ms) +{ + int32_t value; + int result; + time_t t; + char s[64]; + + if (magic_copy_from(ms, -1, &value, sizeof value) != 0) + return (0); + if (ml->type == MAGIC_TYPE_BEDATE || + ml->type == MAGIC_TYPE_BELDATE) + value = betoh32(value); + if (ml->type == MAGIC_TYPE_LEDATE || + ml->type == MAGIC_TYPE_LELDATE) + value = letoh32(value); + + if (ml->type_operator == '&') + value &= (int32_t)ml->type_operand; + else if (ml->type_operator != ' ') + return (-1); + + result = magic_test_signed(ml, value, (int32_t)ml->test_signed); + if (result == !ml->test_not && ml->result != NULL) { + t = value; + switch (ml->type) { + case MAGIC_TYPE_LDATE: + case MAGIC_TYPE_LELDATE: + case MAGIC_TYPE_BELDATE: + ctime_r(&t, s); + break; + default: + asctime_r(localtime(&t), s); + break; + } + s[strcspn(s, "\n")] = '\0'; + magic_add_result(ms, ml, "%s", s); + ms->offset += sizeof value; + } + return (result); +} + +static int +magic_test_type_qdate(struct magic_line *ml, struct magic_state *ms) +{ + int64_t value; + int result; + time_t t; + char s[64]; + + if (magic_copy_from(ms, -1, &value, sizeof value) != 0) + return (0); + if (ml->type == MAGIC_TYPE_BEQDATE || + ml->type == MAGIC_TYPE_BEQLDATE) + value = betoh64(value); + if (ml->type == MAGIC_TYPE_LEQDATE || + ml->type == MAGIC_TYPE_LEQLDATE) + value = letoh64(value); + + if (ml->type_operator == '&') + value &= (int64_t)ml->type_operand; + else if (ml->type_operator != ' ') + return (-1); + + result = magic_test_signed(ml, value, (int64_t)ml->test_signed); + if (result == !ml->test_not && ml->result != NULL) { + t = value; + switch (ml->type) { + case MAGIC_TYPE_QLDATE: + case MAGIC_TYPE_LEQLDATE: + case MAGIC_TYPE_BEQLDATE: + ctime_r(&t, s); + break; + default: + asctime_r(localtime(&t), s); + break; + } + s[strcspn(s, "\n")] = '\0'; + magic_add_result(ms, ml, "%s", s); + ms->offset += sizeof value; + } + return (result); +} + +static int +magic_test_type_udate(struct magic_line *ml, struct magic_state *ms) +{ + uint32_t value; + int result; + time_t t; + char s[64]; + + if (magic_copy_from(ms, -1, &value, sizeof value) != 0) + return (0); + if (ml->type == MAGIC_TYPE_BEDATE || + ml->type == MAGIC_TYPE_BELDATE) + value = betoh32(value); + if (ml->type == MAGIC_TYPE_LEDATE || + ml->type == MAGIC_TYPE_LELDATE) + value = letoh32(value); + + if (ml->type_operator == '&') + value &= (uint32_t)ml->type_operand; + else if (ml->type_operator != ' ') + return (-1); + + result = magic_test_unsigned(ml, value, (uint32_t)ml->test_unsigned); + if (result == !ml->test_not && ml->result != NULL) { + t = value; + switch (ml->type) { + case MAGIC_TYPE_LDATE: + case MAGIC_TYPE_LELDATE: + case MAGIC_TYPE_BELDATE: + ctime_r(&t, s); + break; + default: + asctime_r(gmtime(&t), s); + break; + } + s[strcspn(s, "\n")] = '\0'; + magic_add_result(ms, ml, "%s", s); + ms->offset += sizeof value; + } + return (result); +} + +static int +magic_test_type_uqdate(struct magic_line *ml, struct magic_state *ms) +{ + uint64_t value; + int result; + time_t t; + char s[64]; + + if (magic_copy_from(ms, -1, &value, sizeof value) != 0) + return (0); + if (ml->type == MAGIC_TYPE_UBEQDATE || + ml->type == MAGIC_TYPE_UBEQLDATE) + value = betoh64(value); + if (ml->type == MAGIC_TYPE_ULEQDATE || + ml->type == MAGIC_TYPE_ULEQLDATE) + value = letoh64(value); + + if (ml->type_operator == '&') + value &= (uint64_t)ml->type_operand; + else if (ml->type_operator != ' ') + return (-1); + + result = magic_test_unsigned(ml, value, (uint64_t)ml->test_unsigned); + if (result == !ml->test_not && ml->result != NULL) { + t = value; + switch (ml->type) { + case MAGIC_TYPE_UQLDATE: + case MAGIC_TYPE_ULEQLDATE: + case MAGIC_TYPE_UBEQLDATE: + ctime_r(&t, s); + break; + default: + asctime_r(gmtime(&t), s); + break; + } + s[strcspn(s, "\n")] = '\0'; + magic_add_result(ms, ml, "%s", s); + ms->offset += sizeof value; + } + return (result); +} + +static int +magic_test_type_bestring16(__unused struct magic_line *ml, + __unused struct magic_state *ms) +{ + return (-2); +} + +static int +magic_test_type_lestring16(__unused struct magic_line *ml, + __unused struct magic_state *ms) +{ + return (-2); +} + +static int +magic_test_type_melong(__unused struct magic_line *ml, + __unused struct magic_state *ms) +{ + return (-2); +} + +static int +magic_test_type_medate(__unused struct magic_line *ml, + __unused struct magic_state *ms) +{ + return (-2); +} + +static int +magic_test_type_meldate(__unused struct magic_line *ml, + __unused struct magic_state *ms) +{ + return (-2); +} + +static int +magic_test_type_regex(struct magic_line *ml, struct magic_state *ms) +{ + const char *cp; + regex_t re; + regmatch_t m; + int result, flags = 0, sflag = 0; + + cp = &ml->type_string[(sizeof "regex") - 1]; + if (*cp != '\0') { + if (*cp != '/') + return (-1); + cp++; + for (; *cp != '\0'; cp++) { + switch (*cp) { + case 's': + sflag = 1; + break; + case 'c': + flags |= REG_ICASE; + break; + default: + return (-1); + } + } + } + + if (regcomp(&re, ml->test_string, REG_EXTENDED) != 0) + return (-1); + m.rm_so = ms->offset; + m.rm_eo = ms->size; + + result = (regexec(&re, ms->base, 1, &m, REG_STARTEND) == 0); + if (result == !ml->test_not && ml->result != NULL) { + magic_add_result(ms, ml, "%s", ""); + if (result) { + if (sflag) + ms->offset = m.rm_so; + else + ms->offset = m.rm_eo; + } + } + regfree(&re); + return (result); +} + +static int +magic_test_type_search(struct magic_line *ml, struct magic_state *ms) +{ + const char *cp, *endptr, *start, *found; + size_t size, end, i; + uint64_t range; + int result, n, cflag = 0, bflag = 0, Bflag = 0; + + cp = &ml->type_string[(sizeof "search") - 1]; + if (*cp != '\0') { + if (*cp != '/') + return (-1); + cp++; + + endptr = magic_strtoull(cp, &range); + if (endptr == NULL || (*endptr != '/' && *endptr != '\0')) + return (-1); + + if (*endptr == '/') { + for (cp = endptr + 1; *cp != '\0'; cp++) { + switch (*cp) { + case 'B': + Bflag = 1; + break; + case 'b': + bflag = 1; + break; + case 'c': + cflag = 1; + break; + default: + return (-1); + } + } + } + } else + range = UINT64_MAX; + if (range > (uint64_t)ms->size - ms->offset) + range = ms->size - ms->offset; + size = ml->test_string_size; + + /* Want to search every starting position from up to range + size. */ + end = range + size; + if (end > ms->size - ms->offset) { + if (size > ms->size - ms->offset) + end = 0; + else + end = ms->size - ms->offset - size; + } + + /* + * < and > and the flags are only in /etc/magic with search/1 so don't + * support them with anything else. + */ + start = ms->base + ms->offset; + if (end == 0) + found = NULL; + else if (ml->test_operator == 'x') + found = start; + else if (range == 1) { + n = magic_test_eq(start, ms->size - ms->offset, ml->test_string, + size, cflag, bflag, Bflag); + if (n == -1 && ml->test_operator == '<') + found = start; + else if (n == 1 && ml->test_operator == '>') + found = start; + else if (n == 0 && ml->test_operator == '=') + found = start; + else + found = NULL; + } else { + if (ml->test_operator != '=') + return (-2); + for (i = 0; i < end; i++) { + n = magic_test_eq(start + i, ms->size - ms->offset - i, + ml->test_string, size, cflag, bflag, Bflag); + if (n == 0) { + found = start + i; + break; + } + } + if (i == end) + found = NULL; + } + result = (found != NULL); + + if (result == !ml->test_not && ml->result != NULL && found != NULL) { + magic_add_string(ms, ml, found, ms->size - ms->offset); + ms->offset = found - start + size; + } + return (result); +} + +static int +magic_test_type_default(__unused struct magic_line *ml, + __unused struct magic_state *ms) +{ + return (1); +} + +static int (*magic_test_functions[])(struct magic_line *, + struct magic_state *) = { + magic_test_type_none, + magic_test_type_byte, + magic_test_type_short, + magic_test_type_long, + magic_test_type_quad, + magic_test_type_ubyte, + magic_test_type_ushort, + magic_test_type_ulong, + magic_test_type_uquad, + magic_test_type_float, + magic_test_type_double, + magic_test_type_string, + magic_test_type_pstring, + magic_test_type_date, + magic_test_type_qdate, + magic_test_type_date, + magic_test_type_qdate, + magic_test_type_udate, + magic_test_type_uqdate, + magic_test_type_udate, + magic_test_type_qdate, + magic_test_type_short, + magic_test_type_long, + magic_test_type_quad, + magic_test_type_ushort, + magic_test_type_ulong, + magic_test_type_uquad, + magic_test_type_float, + magic_test_type_double, + magic_test_type_date, + magic_test_type_qdate, + magic_test_type_date, + magic_test_type_qdate, + magic_test_type_udate, + magic_test_type_uqdate, + magic_test_type_udate, + magic_test_type_uqdate, + magic_test_type_bestring16, + magic_test_type_short, + magic_test_type_long, + magic_test_type_quad, + magic_test_type_ushort, + magic_test_type_ulong, + magic_test_type_uquad, + magic_test_type_float, + magic_test_type_double, + magic_test_type_date, + magic_test_type_qdate, + magic_test_type_date, + magic_test_type_qdate, + magic_test_type_udate, + magic_test_type_uqdate, + magic_test_type_udate, + magic_test_type_uqdate, + magic_test_type_lestring16, + magic_test_type_melong, + magic_test_type_medate, + magic_test_type_meldate, + magic_test_type_regex, + magic_test_type_search, + magic_test_type_default, +}; + +static int +magic_test_line(struct magic_line *ml, struct magic_state *ms) +{ + struct magic_line *child; + int64_t offset, wanted, next; + int result; + uint8_t b; + uint16_t s; + uint32_t l; + + if (ml->indirect_type == ' ') + wanted = ml->offset; + else { + wanted = ml->indirect_offset; + if (ml->indirect_relative) { + if (wanted < 0 && -wanted > ms->offset) + return (0); + if (wanted > 0 && ms->offset + wanted > ms->size) + return (0); + next = ms->offset + ml->indirect_offset; + } else + next = wanted; + + switch (ml->indirect_type) { + case 'b': + case 'B': + if (magic_copy_from(ms, next, &b, sizeof b) != 0) + return (0); + wanted = b; + break; + case 's': + if (magic_copy_from(ms, next, &s, sizeof s) != 0) + return (0); + wanted = letoh16(s); + break; + case 'S': + if (magic_copy_from(ms, next, &s, sizeof s) != 0) + return (0); + wanted = betoh16(s); + break; + case 'l': + if (magic_copy_from(ms, next, &l, sizeof l) != 0) + return (0); + wanted = letoh16(l); + break; + case 'L': + if (magic_copy_from(ms, next, &l, sizeof l) != 0) + return (0); + wanted = betoh16(l); + break; + } + + switch (ml->indirect_operator) { + case '+': + wanted += ml->indirect_operand; + break; + case '-': + wanted -= ml->indirect_operand; + break; + case '*': + wanted *= ml->indirect_operand; + break; + } + } + + if (ml->offset_relative) { + if (wanted < 0 && -wanted > ms->offset) + return (0); + if (wanted > 0 && ms->offset + wanted > ms->size) + return (0); + offset = ms->offset + wanted; + } else + offset = wanted; + if (offset < 0 || offset > ms->size) + return (0); + ms->offset = offset; + + result = magic_test_functions[ml->type](ml, ms); + if (result == -1) { + magic_warn(ml, "test %s/%c failed", ml->type_string, + ml->test_operator); + return (0); + } + if (result == -2) { + magic_warn(ml, "test %s/%c not implemented", ml->type_string, + ml->test_operator); + return (0); + } + if (result == ml->test_not) + return (0); + if (ml->mimetype != NULL) + ms->mimetype = ml->mimetype; + + magic_warn(ml, "test %s/%c matched at offset %llu: '%s'", + ml->type_string, ml->test_operator, ms->offset, + ml->result == NULL ? "" : ml->result); + + offset = ms->offset; + TAILQ_FOREACH(child, &ml->children, entry) { + ms->offset = offset; + magic_test_line(child, ms); + } + return (1); +} + +const char * +magic_test(struct magic *m, const void *base, size_t size, int flags) +{ + struct magic_line *ml; + static struct magic_state ms; + + memset(&ms, 0, sizeof ms); + + ms.base = base; + ms.size = size; + + ms.text = !!(flags & MAGIC_TEST_TEXT); + + RB_FOREACH(ml, magic_tree, &m->tree) { + ms.offset = 0; + if (ml->text == ms.text && magic_test_line(ml, &ms)) + break; + } + + if (*ms.out != '\0') { + if (flags & MAGIC_TEST_MIME) { + if (ms.mimetype) + return (xstrdup(ms.mimetype)); + return (NULL); + } + return (xstrdup(ms.out)); + } + return (NULL); +} diff --git a/usr.bin/file/magic.c b/usr.bin/file/magic.c deleted file mode 100644 index 0e9fd1c66c6..00000000000 --- a/usr.bin/file/magic.c +++ /dev/null @@ -1,395 +0,0 @@ -/* $OpenBSD: magic.c,v 1.10 2015/01/16 08:24:04 doug Exp $ */ -/* - * Copyright (c) Christos Zoulas 2003. - * All Rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -#include - -#include "file.h" -#include "magic.h" - -#include -#include -#include -#include -#ifdef QUICK -#include -#endif -#include /* for PIPE_BUF */ - -#if defined(HAVE_UTIMES) -# include -#elif defined(HAVE_UTIME) -# if defined(HAVE_SYS_UTIME_H) -# include -# elif defined(HAVE_UTIME_H) -# include -# endif -#endif - -#ifdef HAVE_UNISTD_H -#include /* for read() */ -#endif - -#ifdef HAVE_LOCALE_H -#include -#endif - -#include /* for byte swapping */ - -#include "patchlevel.h" - -#ifndef PIPE_BUF -/* Get the PIPE_BUF from pathconf */ -#ifdef _PC_PIPE_BUF -#define PIPE_BUF pathconf(".", _PC_PIPE_BUF) -#else -#define PIPE_BUF 512 -#endif -#endif - -#ifdef __EMX__ -private char *apptypeName = NULL; -protected int file_os2_apptype(struct magic_set *ms, const char *fn, - const void *buf, size_t nb); -#endif /* __EMX__ */ - -private void free_mlist(struct mlist *); -private void close_and_restore(const struct magic_set *, const char *, int, - const struct stat *); -private int info_from_stat(struct magic_set *, mode_t); -#ifndef COMPILE_ONLY -private const char *file_or_fd(struct magic_set *, const char *, int); -#endif - -#ifndef STDIN_FILENO -#define STDIN_FILENO 0 -#endif - -public struct magic_set * -magic_open(int flags) -{ - struct magic_set *ms; - - if ((ms = calloc((size_t)1, sizeof(struct magic_set))) == NULL) - return NULL; - - if (magic_setflags(ms, flags) == -1) { - errno = EINVAL; - goto free; - } - - ms->o.buf = ms->o.pbuf = NULL; - - ms->c.len = 10; - ms->c.li = reallocarray(NULL, ms->c.len, sizeof(*ms->c.li)); - if (ms->c.li == NULL) - goto free; - - ms->haderr = 0; - ms->error = -1; - ms->mlist = NULL; - ms->file = "unknown"; - ms->line = 0; - return ms; -free: - free(ms); - return NULL; -} - -private void -free_mlist(struct mlist *mlist) -{ - struct mlist *ml; - - if (mlist == NULL) - return; - - for (ml = mlist->next; ml != mlist;) { - struct mlist *next = ml->next; - struct magic *mg = ml->magic; - file_delmagic(mg, ml->mapped, ml->nmagic); - free(ml); - ml = next; - } - free(ml); -} - -private int -info_from_stat(struct magic_set *ms, mode_t md) -{ - /* We cannot open it, but we were able to stat it. */ - if (md & 0222) - if (file_printf(ms, "writable, ") == -1) - return -1; - if (md & 0111) - if (file_printf(ms, "executable, ") == -1) - return -1; - if (S_ISREG(md)) - if (file_printf(ms, "regular file, ") == -1) - return -1; - if (file_printf(ms, "no read permission") == -1) - return -1; - return 0; -} - -public void -magic_close(struct magic_set *ms) -{ - free_mlist(ms->mlist); - free(ms->o.pbuf); - free(ms->o.buf); - free(ms->c.li); - free(ms); -} - -/* - * load a magic file - */ -public int -magic_load(struct magic_set *ms, const char *magicfile) -{ - struct mlist *ml = file_apprentice(ms, magicfile, FILE_LOAD); - if (ml) { - free_mlist(ms->mlist); - ms->mlist = ml; - return 0; - } - return -1; -} - -public int -magic_compile(struct magic_set *ms, const char *magicfile) -{ - struct mlist *ml = file_apprentice(ms, magicfile, FILE_COMPILE); - free_mlist(ml); - return ml ? 0 : -1; -} - -public int -magic_check(struct magic_set *ms, const char *magicfile) -{ - struct mlist *ml = file_apprentice(ms, magicfile, FILE_CHECK); - free_mlist(ml); - return ml ? 0 : -1; -} - -private void -close_and_restore(const struct magic_set *ms, const char *name, int fd, - const struct stat *sb) -{ - if (fd == STDIN_FILENO) - return; - (void) close(fd); - - if ((ms->flags & MAGIC_PRESERVE_ATIME) != 0) { - /* - * Try to restore access, modification times if read it. - * This is really *bad* because it will modify the status - * time of the file... And of course this will affect - * backup programs - */ -#ifdef HAVE_UTIMES - struct timeval utsbuf[2]; - (void)memset(utsbuf, 0, sizeof(utsbuf)); - utsbuf[0].tv_sec = sb->st_atime; - utsbuf[1].tv_sec = sb->st_mtime; - - (void) utimes(name, utsbuf); /* don't care if loses */ -#elif defined(HAVE_UTIME_H) || defined(HAVE_SYS_UTIME_H) - struct utimbuf utbuf; - - (void)memset(utbuf, 0, sizeof(utbuf)); - utbuf.actime = sb->st_atime; - utbuf.modtime = sb->st_mtime; - (void) utime(name, &utbuf); /* don't care if loses */ -#endif - } -} - -#ifndef COMPILE_ONLY - -/* - * find type of descriptor - */ -public const char * -magic_descriptor(struct magic_set *ms, int fd) -{ - return file_or_fd(ms, NULL, fd); -} - -/* - * find type of named file - */ -public const char * -magic_file(struct magic_set *ms, const char *inname) -{ - return file_or_fd(ms, inname, STDIN_FILENO); -} - -private const char * -file_or_fd(struct magic_set *ms, const char *inname, int fd) -{ - int rv = -1; - unsigned char *buf; - struct stat sb; - ssize_t nbytes = 0; /* number of bytes read from a datafile */ - int ispipe = 0; - - /* - * one extra for terminating '\0', and - * some overlapping space for matches near EOF - */ -#define SLOP (1 + sizeof(union VALUETYPE)) - if ((buf = malloc(HOWMANY + SLOP)) == NULL) - return NULL; - - if (file_reset(ms) == -1) - goto done; - - switch (file_fsmagic(ms, inname, &sb)) { - case -1: /* error */ - goto done; - case 0: /* nothing found */ - break; - default: /* matched it and printed type */ - rv = 0; - goto done; - } - - if (inname == NULL) { - if (fstat(fd, &sb) == 0 && S_ISFIFO(sb.st_mode)) - ispipe = 1; - } else { - int flags = O_RDONLY|O_BINARY; - - if (stat(inname, &sb) == 0 && S_ISFIFO(sb.st_mode)) { - flags |= O_NONBLOCK; - ispipe = 1; - } - - errno = 0; - if ((fd = open(inname, flags)) < 0) { -#ifdef __CYGWIN__ - /* FIXME: Do this with EXEEXT from autotools */ - char *tmp = alloca(strlen(inname) + 5); - (void)strcat(strcpy(tmp, inname), ".exe"); - if ((fd = open(tmp, flags)) < 0) { -#endif - fprintf(stderr, "couldn't open file\n"); - if (info_from_stat(ms, sb.st_mode) == -1) - goto done; - rv = 0; - goto done; -#ifdef __CYGWIN__ - } -#endif - } -#ifdef O_NONBLOCK - if ((flags = fcntl(fd, F_GETFL)) != -1) { - flags &= ~O_NONBLOCK; - (void)fcntl(fd, F_SETFL, flags); - } -#endif - } - - /* - * try looking at the first HOWMANY bytes - */ - if (ispipe) { - ssize_t r = 0; - - while ((r = sread(fd, (void *)&buf[nbytes], - (size_t)(HOWMANY - nbytes), 1)) > 0) { - nbytes += r; - if (r < PIPE_BUF) break; - } - - if (nbytes == 0) { - /* We can not read it, but we were able to stat it. */ - if (info_from_stat(ms, sb.st_mode) == -1) - goto done; - rv = 0; - goto done; - } - - } else { - if ((nbytes = read(fd, (char *)buf, HOWMANY)) == -1) { - file_error(ms, errno, "cannot read `%s'", inname); - goto done; - } - } - - (void)memset(buf + nbytes, 0, SLOP); /* NUL terminate */ - if (file_buffer(ms, fd, inname, buf, (size_t)nbytes) == -1) - goto done; - rv = 0; -done: - free(buf); - close_and_restore(ms, inname, fd, &sb); - return rv == 0 ? file_getbuffer(ms) : NULL; -} - - -public const char * -magic_buffer(struct magic_set *ms, const void *buf, size_t nb) -{ - if (file_reset(ms) == -1) - return NULL; - /* - * The main work is done here! - * We have the file name and/or the data buffer to be identified. - */ - if (file_buffer(ms, -1, NULL, buf, nb) == -1) { - return NULL; - } - return file_getbuffer(ms); -} -#endif - -public const char * -magic_error(struct magic_set *ms) -{ - return ms->haderr ? ms->o.buf : NULL; -} - -public int -magic_errno(struct magic_set *ms) -{ - return ms->haderr ? ms->error : 0; -} - -public int -magic_setflags(struct magic_set *ms, int flags) -{ -#if !defined(HAVE_UTIME) && !defined(HAVE_UTIMES) - if (flags & MAGIC_PRESERVE_ATIME) - return -1; -#endif - ms->flags = flags; - return 0; -} diff --git a/usr.bin/file/magic.h b/usr.bin/file/magic.h index 03d0b1b0d6c..3ee8331e0f5 100644 --- a/usr.bin/file/magic.h +++ b/usr.bin/file/magic.h @@ -1,83 +1,180 @@ -/* $OpenBSD: magic.h,v 1.4 2009/04/24 18:54:34 chl Exp $ */ +/* $OpenBSD: magic.h,v 1.5 2015/04/24 16:24:11 nicm Exp $ */ + /* - * Copyright (c) Christos Zoulas 2003. - * All Rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. + * Copyright (c) 2015 Nicholas Marriott + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#ifndef _MAGIC_H -#define _MAGIC_H - -#include - -#define MAGIC_NONE 0x000000 /* No flags */ -#define MAGIC_DEBUG 0x000001 /* Turn on debugging */ -#define MAGIC_SYMLINK 0x000002 /* Follow symlinks */ -#define MAGIC_COMPRESS 0x000004 /* Check inside compressed files */ -#define MAGIC_DEVICES 0x000008 /* Look at the contents of devices */ -#define MAGIC_MIME_TYPE 0x000010 /* Return only the MIME type */ -#define MAGIC_CONTINUE 0x000020 /* Return all matches */ -#define MAGIC_CHECK 0x000040 /* Print warnings to stderr */ -#define MAGIC_PRESERVE_ATIME 0x000080 /* Restore access time on exit */ -#define MAGIC_RAW 0x000100 /* Don't translate unprint chars */ -#define MAGIC_ERROR 0x000200 /* Handle ENOENT etc as real errors */ -#define MAGIC_MIME_ENCODING 0x000400 /* Return only the MIME encoding */ -#define MAGIC_MIME (MAGIC_MIME_TYPE|MAGIC_MIME_ENCODING) -#define MAGIC_NO_CHECK_COMPRESS 0x001000 /* Don't check for compressed files */ -#define MAGIC_NO_CHECK_TAR 0x002000 /* Don't check for tar files */ -#define MAGIC_NO_CHECK_SOFT 0x004000 /* Don't check magic entries */ -#define MAGIC_NO_CHECK_APPTYPE 0x008000 /* Don't check application type */ -#define MAGIC_NO_CHECK_ELF 0x010000 /* Don't check for elf details */ -#define MAGIC_NO_CHECK_ASCII 0x020000 /* Don't check for ascii files */ -#define MAGIC_NO_CHECK_TOKENS 0x100000 /* Don't check ascii/tokens */ - -/* Defined for backwards compatibility; do nothing */ -#define MAGIC_NO_CHECK_FORTRAN 0x000000 /* Don't check ascii/fortran */ -#define MAGIC_NO_CHECK_TROFF 0x000000 /* Don't check ascii/troff */ - - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct magic_set *magic_t; -magic_t magic_open(int); -void magic_close(magic_t); - -const char *magic_file(magic_t, const char *); -const char *magic_descriptor(magic_t, int); -const char *magic_buffer(magic_t, const void *, size_t); - -const char *magic_error(magic_t); -int magic_setflags(magic_t, int); - -int magic_load(magic_t, const char *); -int magic_compile(magic_t, const char *); -int magic_check(magic_t, const char *); -int magic_errno(magic_t); - -#ifdef __cplusplus + +#ifndef MAGIC_H +#define MAGIC_H + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define MAGIC_STRING_SIZE 31 +#define MAGIC_STRENGTH_MULTIPLIER 20 + +enum magic_type { + MAGIC_TYPE_NONE = 0, + MAGIC_TYPE_BYTE, + MAGIC_TYPE_SHORT, + MAGIC_TYPE_LONG, + MAGIC_TYPE_QUAD, + MAGIC_TYPE_UBYTE, + MAGIC_TYPE_USHORT, + MAGIC_TYPE_ULONG, + MAGIC_TYPE_UQUAD, + MAGIC_TYPE_FLOAT, + MAGIC_TYPE_DOUBLE, + MAGIC_TYPE_STRING, + MAGIC_TYPE_PSTRING, + MAGIC_TYPE_DATE, + MAGIC_TYPE_QDATE, + MAGIC_TYPE_LDATE, + MAGIC_TYPE_QLDATE, + MAGIC_TYPE_UDATE, + MAGIC_TYPE_UQDATE, + MAGIC_TYPE_ULDATE, + MAGIC_TYPE_UQLDATE, + MAGIC_TYPE_BESHORT, + MAGIC_TYPE_BELONG, + MAGIC_TYPE_BEQUAD, + MAGIC_TYPE_UBESHORT, + MAGIC_TYPE_UBELONG, + MAGIC_TYPE_UBEQUAD, + MAGIC_TYPE_BEFLOAT, + MAGIC_TYPE_BEDOUBLE, + MAGIC_TYPE_BEDATE, + MAGIC_TYPE_BEQDATE, + MAGIC_TYPE_BELDATE, + MAGIC_TYPE_BEQLDATE, + MAGIC_TYPE_UBEDATE, + MAGIC_TYPE_UBEQDATE, + MAGIC_TYPE_UBELDATE, + MAGIC_TYPE_UBEQLDATE, + MAGIC_TYPE_BESTRING16, + MAGIC_TYPE_LESHORT, + MAGIC_TYPE_LELONG, + MAGIC_TYPE_LEQUAD, + MAGIC_TYPE_ULESHORT, + MAGIC_TYPE_ULELONG, + MAGIC_TYPE_ULEQUAD, + MAGIC_TYPE_LEFLOAT, + MAGIC_TYPE_LEDOUBLE, + MAGIC_TYPE_LEDATE, + MAGIC_TYPE_LEQDATE, + MAGIC_TYPE_LELDATE, + MAGIC_TYPE_LEQLDATE, + MAGIC_TYPE_ULEDATE, + MAGIC_TYPE_ULEQDATE, + MAGIC_TYPE_ULELDATE, + MAGIC_TYPE_ULEQLDATE, + MAGIC_TYPE_LESTRING16, + MAGIC_TYPE_MELONG, + MAGIC_TYPE_MEDATE, + MAGIC_TYPE_MELDATE, + MAGIC_TYPE_REGEX, + MAGIC_TYPE_SEARCH, + MAGIC_TYPE_DEFAULT, }; -#endif -#endif /* _MAGIC_H */ +TAILQ_HEAD(magic_lines, magic_line); +RB_HEAD(magic_tree, magic_line); + +struct magic_line { + struct magic *root; + u_int line; + u_int strength; + struct magic_line *parent; + + int text; + + int64_t offset; + int offset_relative; + + char indirect_type; + int indirect_relative; + int64_t indirect_offset; + char indirect_operator; + int64_t indirect_operand; + + enum magic_type type; + const char *type_string; + char type_operator; + int64_t type_operand; + + char test_operator; + int test_not; + const char *test_string; + size_t test_string_size; + uint64_t test_unsigned; + int64_t test_signed; + + int stringify; + const char *result; + const char *mimetype; + + struct magic_lines children; + TAILQ_ENTRY(magic_line) entry; + RB_ENTRY(magic_line) node; +}; + +struct magic { + const char *path; + int warnings; + + struct magic_tree tree; + + int compiled; + regex_t format_short; + regex_t format_long; + regex_t format_quad; + regex_t format_float; + regex_t format_string; +}; + +struct magic_state { + char out[4096]; + const char *mimetype; + int text; + + const char *base; + size_t size; + int64_t offset; +}; + +#define MAGIC_TEST_TEXT 0x1 +#define MAGIC_TEST_MIME 0x2 + +int magic_compare(struct magic_line *, struct magic_line *); +RB_PROTOTYPE(magic_tree, magic_line, node, magic_compare); + +char *magic_strtoull(const char *, uint64_t *); +char *magic_strtoll(const char *, int64_t *); +void magic_warn(struct magic_line *, const char *, ...) + __attribute__ ((format (printf, 2, 3))); + +void magic_dump(struct magic *); +struct magic *magic_load(FILE *, const char *, int); +const char *magic_test(struct magic *, const void *, size_t, int); + +#endif /* MAGIC_H */ diff --git a/usr.bin/file/names.h b/usr.bin/file/names.h deleted file mode 100644 index ef5fdf86d98..00000000000 --- a/usr.bin/file/names.h +++ /dev/null @@ -1,174 +0,0 @@ -/* $OpenBSD: names.h,v 1.8 2009/04/24 18:54:34 chl Exp $ */ -/* - * Copyright (c) Ian F. Darwin 1986-1995. - * Software written by Ian F. Darwin and others; - * maintained 1995-present by Christos Zoulas and others. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * Names.h - names and types used by ascmagic in file(1). - * These tokens are here because they can appear anywhere in - * the first HOWMANY bytes, while tokens in MAGIC must - * appear at fixed offsets into the file. Don't make HOWMANY - * too high unless you have a very fast CPU. - * - * $Id: names.h,v 1.8 2009/04/24 18:54:34 chl Exp $ - */ - -/* - modified by Chris Lowth - 9 April 2000 - to add mime type strings to the types table. -*/ - -/* these types are used to index the table 'types': keep em in sync! */ -#define L_C 0 /* first and foremost on UNIX */ -#define L_CC 1 /* Bjarne's postincrement */ -#define L_MAKE 2 /* Makefiles */ -#define L_PLI 3 /* PL/1 */ -#define L_MACH 4 /* some kinda assembler */ -#define L_ENG 5 /* English */ -#define L_PAS 6 /* Pascal */ -#define L_MAIL 7 /* Electronic mail */ -#define L_NEWS 8 /* Usenet Netnews */ -#define L_JAVA 9 /* Java code */ -#define L_HTML 10 /* HTML */ -#define L_BCPL 11 /* BCPL */ -#define L_M4 12 /* M4 */ -#define L_PO 13 /* PO */ - -static const struct { - char human[48]; - char mime[16]; -} types[] = { - { "C program", "text/x-c", }, - { "C++ program", "text/x-c++" }, - { "make commands", "text/x-makefile" }, - { "PL/1 program", "text/x-pl1" }, - { "assembler program", "text/x-asm" }, - { "English", "text/plain" }, - { "Pascal program", "text/x-pascal" }, - { "mail", "text/x-mail" }, - { "news", "text/x-news" }, - { "Java program", "text/x-java" }, - { "HTML document", "text/html", }, - { "BCPL program", "text/x-bcpl" }, - { "M4 macro language pre-processor", "text/x-m4" }, - { "PO (gettext message catalogue)", "text/x-po" }, - { "cannot happen error on names.h/types", "error/x-error" } -}; - -/* - * XXX - how should we distinguish Java from C++? - * The trick used in a Debian snapshot, of having "extends" or "implements" - * as tags for Java, doesn't work very well, given that those keywords - * are often preceded by "class", which flags it as C++. - * - * Perhaps we need to be able to say - * - * If "class" then - * - * if "extends" or "implements" then - * Java - * else - * C++ - * endif - * - * Or should we use other keywords, such as "package" or "import"? - * Unfortunately, Ada95 uses "package", and Modula-3 uses "import", - * although I infer from the language spec at - * - * http://www.research.digital.com/SRC/m3defn/html/m3.html - * - * that Modula-3 uses "IMPORT" rather than "import", i.e. it must be - * in all caps. - * - * So, for now, we go with "import". We must put it before the C++ - * stuff, so that we don't misidentify Java as C++. Not using "package" - * means we won't identify stuff that defines a package but imports - * nothing; hopefully, very little Java code imports nothing (one of the - * reasons for doing OO programming is to import as much as possible - * and write only what you need to, right?). - * - * Unfortunately, "import" may cause us to misidentify English text - * as Java, as it comes after "the" and "The". Perhaps we need a fancier - * heuristic to identify Java? - */ -static const struct names { - char name[14]; - short type; -} names[] = { - /* These must be sorted by eye for optimal hit rate */ - /* Add to this list only after substantial meditation */ - {"msgid", L_PO}, - {"dnl", L_M4}, - {"import", L_JAVA}, - {"\"libhdr\"", L_BCPL}, - {"\"LIBHDR\"", L_BCPL}, - {"//", L_CC}, - {"template", L_CC}, - {"virtual", L_CC}, - {"class", L_CC}, - {"public:", L_CC}, - {"private:", L_CC}, - {"/*", L_C}, /* must precede "The", "the", etc. */ - {"#include", L_C}, - {"char", L_C}, - {"The", L_ENG}, - {"the", L_ENG}, - {"double", L_C}, - {"extern", L_C}, - {"float", L_C}, - {"struct", L_C}, - {"union", L_C}, - {"CFLAGS", L_MAKE}, - {"LDFLAGS", L_MAKE}, - {"all:", L_MAKE}, - {".PRECIOUS", L_MAKE}, - {".ascii", L_MACH}, - {".asciiz", L_MACH}, - {".byte", L_MACH}, - {".even", L_MACH}, - {".globl", L_MACH}, - {".text", L_MACH}, - {"clr", L_MACH}, - {"(input,", L_PAS}, - {"program", L_PAS}, - {"record", L_PAS}, - {"dcl", L_PLI}, - {"Received:", L_MAIL}, - {">From", L_MAIL}, - {"Return-Path:",L_MAIL}, - {"Cc:", L_MAIL}, - {"Newsgroups:", L_NEWS}, - {"Path:", L_NEWS}, - {"Organization:",L_NEWS}, - {"href=", L_HTML}, - {"HREF=", L_HTML}, - {" magic_buffer - * - keep only the first error - * - manual page: new sentence, new line - * - fix typo in api function (magic_buf -> magic_buffer) - * - * Revision 1.44 2003/03/23 22:23:31 christos - * finish librarification. - * - * Revision 1.43 2003/03/23 21:16:26 christos - * update copyrights. - * - * Revision 1.42 2003/03/23 04:06:05 christos - * Library re-organization - * - * Revision 1.41 2003/02/27 20:53:45 christos - * - fix memory allocation problem (Jeff Johnson) - * - fix stack overflow corruption (David Endler) - * - fixes from NetBSD source (Antti Kantee) - * - magic fixes - * - * Revision 1.40 2003/02/08 18:33:53 christos - * - detect inttypes.h too (Dave Love ) - * - eliminate unsigned char warnings (Petter Reinholdtsen ) - * - better elf PT_NOTE handling (Nalin Dahyabhai ) - * - add options to format the output differently - * - much more magic. - * - * Revision 1.39 2002/07/03 18:57:52 christos - * - ansify/c99ize - * - more magic - * - better COMPILE_ONLY support. - * - new magic files. - * - fix solaris compilation problems. - * - * Revision 1.38 2002/05/16 18:45:56 christos - * - pt_note elf additions from NetBSD - * - EMX os specific changes (Alexander Mai) - * - stdint.h detection, acconfig.h fixes (Maciej W. Rozycki, Franz Korntner) - * - regex file additions (Kim Cromie) - * - getopt_long support and misc cleanups (Michael Piefel) - * - many magic fixes and additions - * - * Revision 1.37 2001/09/03 14:44:22 christos - * daylight/tm_isdst detection - * magic fixes - * don't eat the whole file if it has only nulls - * - * Revision 1.36 2001/07/22 21:04:15 christos - * - magic fixes - * - add new operators, pascal strings, UTC date printing, $HOME/.magic - * [from "Tom N Harris" ] - * - * Revision 1.35 2001/04/24 14:40:25 christos - * - rename magic file sgi to mips and fix it - * - add support for building magic.mgc - * - portability fixes for mmap() - * - try gzip before uncompress, because uncompress sometimes hangs - * - be more conservative about pipe reads and writes - * - many magic fixes - * - * Revision 1.34 2001/03/12 05:05:57 christos - * - new compiled magic format - * - lots of magic additions - * - * Revision 1.33 2000/11/13 00:30:50 christos - * - wordperfect magic fix: freebsd pr 9388 - * - more msdos fixes from freebsd pr's 20131 and 20812 - * - sas and spss magic [Bruce Foster] - * - mkinstalldirs [John Fremlin] - * - sgi opengl fixes [Michael Pruett] - * - netbsd magic fixes [Ignatios Souvatzis] - * - audio additions [Michael Pruett] - * - fix problem with non ansi RCSID [Andreas Ley] - * - oggs magic [Felix von Leitner] - * - gmon magic [Eugen Dedu] - * - TNEF magic [Joomy] - * - netpbm magic and misc other image stuff [Bryan Henderson] - * - * Revision 1.32 2000/08/05 18:24:18 christos - * Correct indianness detection in elf (Charles Hannum) - * FreeBSD elf core support (Guy Harris) - * Use gzip in systems that don't have uncompress (Anthon van der Neut) - * Internationalization/EBCDIC support (Eric Fisher) - * Many many magic changes - * - * Revision 1.31 2000/05/14 17:58:36 christos - * - new magic for claris files - * - new magic for mathematica and maple files - * - new magic for msvc files - * - new -k flag to keep going matching all possible entries - * - add the word executable on #! magic files, and fix the usage of - * the word script - * - lots of other magic fixes - * - fix typo test -> text - * - * Revision 1.30 2000/04/11 02:41:17 christos - * - add support for mime output (-i) - * - make sure we free memory in case realloc fails - * - magic fixes - * - * Revision 1.29 1999/11/28 20:02:29 christos - * new string/[Bcb] magic from anthon, and adjustments to the magic files to - * use it. - * - * Revision 1.28 1999/10/31 22:11:48 christos - * - add "char" type for compatibility with HP/UX - * - recognize HP/UX syntax &=n etc. - * - include errno.h for CYGWIN - * - conditionalize the S_IS* macros - * - revert the SHT_DYNSYM test that broke the linux stripped binaries test - * - lots of Magdir changes - * - * Revision 1.27 1999/02/14 17:21:41 christos - * Automake support and misc cleanups from Rainer Orth - * Enable reading character and block special files from Dale R. Worley - * - * Revision 1.26 1998/09/12 13:19:39 christos - * - add support for bi-endian indirect offsets (Richard Verhoeven) - * - add recognition for bcpl (Joseph Myers) - * - remove non magic files from Magdir to avoid difficulties building - * on os2 where files are case independent - * - magic fixes. - * - * Revision 1.25 1998/06/27 14:04:04 christos - * OLF patch Guy Harris - * Recognize java/html (debian linux) - * Const poisoning (debian linux) - * More magic! - * - * Revision 1.24 1998/02/15 23:20:38 christos - * Autoconf patch: Felix von Leitner - * More magic fixes - * Elf64 fixes - * - * Revision 1.23 1997/11/05 16:03:37 christos - * - correct elf prps offset for SunOS-2.5.1 [guy@netapp.com] - * - handle 64 bit time_t's correctly [ewt@redhat.com] - * - new mime style magic [clarosse@netvista.net] - * - new TI calculator magic [rmcguire@freenet.columbus.oh.us] - * - new figlet fonts [obrien@freebsd.org] - * - new cisco magic, and elf fixes [jhawk@bbnplanet.com] - * - -b flag addition, and x86 filesystem magic [vax@linkhead.paranoia.com] - * - s/Mpeg/MPEG, header and elf typo fixes [guy@netapp.com] - * - Windows/NT registry files, audio code [guy@netapp.com] - * - libGrx graphics lib fonts [guy@netapp.com] - * - PNG fixes [guy@netapp.com] - * - more m$ document magic [guy@netapp.com] - * - PPD files [guy@netapp.com] - * - archive magic cleanup [guy@netapp.com] - * - linux kernel magic cleanup [guy@netapp.com] - * - lecter magic [guy@netapp.com] - * - vgetty magic [guy@netapp.com] - * - sniffer additions [guy@netapp.com] - * - * Revision 1.22 1997/01/15 17:23:24 christos - * - add support for elf core files: find the program name under SVR4 [Ken Pizzini] - * - print strings only up to the first carriage return [various] - * - freebsd international ascii support [J Wunsch] - * - magic fixes and additions [Guy Harris] - * - 64 bit fixes [Larry Schwimmer] - * - support for both utime and utimes, but don't restore file access times - * by default [various] - * - \xXX only takes 2 hex digits, not 3. - * - re-implement support for core files [Guy Harris] - * - * Revision 1.21 1996/10/05 18:15:29 christos - * Segregate elf stuff and conditionally enable it with -DBUILTIN_ELF - * More magic fixes - * - * Revision 1.20 1996/06/22 22:15:52 christos - * - support relative offsets of the form >& - * - fix bug with truncating magic strings that contain \n - * - file -f - did not read from stdin as documented - * - support elf file parsing using our own elf support. - * - as always magdir fixes and additions. - * - * Revision 1.19 1995/10/27 23:14:46 christos - * Ability to parse colon separated list of magic files - * New LEGAL.NOTICE - * Various magic file changes - * - * Revision 1.18 1995/05/20 22:09:21 christos - * Passed incorrect argument to eatsize(). - * Use %ld and %lx where appropriate. - * Remove unused variables - * ELF support for both big and little endian - * Fixes for small files again. - * - * Revision 1.17 1995/04/28 17:29:13 christos - * - Incorrect nroff detection fix from der Mouse - * - Lost and incorrect magic entries. - * - Added ELF stripped binary detection [in C; ugh] - * - Look for $MAGIC to find the magic file. - * - Eat trailing size specifications from numbers i.e. ignore 10L - * - More fixes for very short files - * - * Revision 1.16 1995/03/25 22:06:45 christos - * - use strtoul() where it exists. - * - fix sign-extend bug - * - try to detect tar archives before nroff files, otherwise - * tar files where the first file starts with a . will not work - * - * Revision 1.15 1995/01/21 21:03:35 christos - * Added CSECTION for the file man page - * Added version flag -v - * Fixed bug with -f input flag (from iorio@violet.berkeley.edu) - * Lots of magic fixes and reorganization... - * - * Revision 1.14 1994/05/03 17:58:23 christos - * changes from mycroft@gnu.ai.mit.edu (Charles Hannum) for unsigned - * - * Revision 1.13 1994/01/21 01:27:01 christos - * Fixed null termination bug from Don Seeley at BSDI in ascmagic.c - * - * Revision 1.12 1993/10/27 20:59:05 christos - * Changed -z flag to understand gzip format too. - * Moved builtin compression detection to a table, and move - * the compress magic entry out of the source. - * Made printing of numbers unsigned, and added the mask to it. - * Changed the buffer size to 8k, because gzip will refuse to - * unzip just a few bytes. - * - * Revision 1.11 1993/09/24 18:49:06 christos - * Fixed small bug in softmagic.c introduced by - * copying the data to be examined out of the input - * buffer. Changed the Makefile to use sed to create - * the correct man pages. - * - * Revision 1.10 1993/09/23 21:56:23 christos - * Passed purify. Fixed indirections. Fixed byte order printing. - * Fixed segmentation faults caused by referencing past the end - * of the magic buffer. Fixed bus errors caused by referencing - * unaligned shorts or longs. - * - * Revision 1.9 1993/03/24 14:23:40 ian - * Batch of minor changes from several contributors. - * - * Revision 1.8 93/02/19 15:01:26 ian - * Numerous changes from Guy Harris too numerous to mention but including - * byte-order independance, fixing "old-style masking", etc. etc. A bugfix - * for broken symlinks from martin@@d255s004.zfe.siemens.de. - * - * Revision 1.7 93/01/05 14:57:27 ian - * Couple of nits picked by Christos (again, thanks). - * - * Revision 1.6 93/01/05 13:51:09 ian - * Lotsa work on the Magic directory. - * - * Revision 1.5 92/09/14 14:54:51 ian - * Fix a tiny null-pointer bug in previous fix for tar archive + uncompress. - * - */ diff --git a/usr.bin/file/print.c b/usr.bin/file/print.c deleted file mode 100644 index 8b5b5d63b90..00000000000 --- a/usr.bin/file/print.c +++ /dev/null @@ -1,233 +0,0 @@ -/* $OpenBSD: print.c,v 1.17 2013/04/17 15:01:26 deraadt Exp $ */ -/* - * Copyright (c) Ian F. Darwin 1986-1995. - * Software written by Ian F. Darwin and others; - * maintained 1995-present by Christos Zoulas and others. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * print.c - debugging printout routines - */ - -#include "file.h" -#include -#include -#include -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#include - -#define SZOF(a) (sizeof(a) / sizeof(a[0])) - -#ifndef COMPILE_ONLY -protected void -file_mdump(struct magic *m) -{ - private const char optyp[] = { FILE_OPS }; - - (void) fprintf(stderr, "[%u", m->lineno); - (void) fprintf(stderr, ">>>>>>>> %u" + 8 - (m->cont_level & 7), - m->offset); - - if (m->flag & INDIR) { - (void) fprintf(stderr, "(%s,", - /* Note: type is unsigned */ - (m->in_type < file_nnames) ? - file_names[m->in_type] : "*bad*"); - if (m->in_op & FILE_OPINVERSE) - (void) fputc('~', stderr); - (void) fprintf(stderr, "%c%u),", - ((m->in_op & FILE_OPS_MASK) < SZOF(optyp)) ? - optyp[m->in_op & FILE_OPS_MASK] : '?', - m->in_offset); - } - (void) fprintf(stderr, " %s%s", (m->flag & UNSIGNED) ? "u" : "", - /* Note: type is unsigned */ - (m->type < file_nnames) ? file_names[m->type] : "*bad*"); - if (m->mask_op & FILE_OPINVERSE) - (void) fputc('~', stderr); - - if (IS_STRING(m->type)) { - if (m->str_flags) { - (void) fputc('/', stderr); - if (m->str_flags & STRING_COMPACT_BLANK) - (void) fputc(CHAR_COMPACT_BLANK, stderr); - if (m->str_flags & STRING_COMPACT_OPTIONAL_BLANK) - (void) fputc(CHAR_COMPACT_OPTIONAL_BLANK, - stderr); - if (m->str_flags & STRING_IGNORE_LOWERCASE) - (void) fputc(CHAR_IGNORE_LOWERCASE, stderr); - if (m->str_flags & STRING_IGNORE_UPPERCASE) - (void) fputc(CHAR_IGNORE_UPPERCASE, stderr); - if (m->str_flags & REGEX_OFFSET_START) - (void) fputc(CHAR_REGEX_OFFSET_START, stderr); - } - if (m->str_range) - (void) fprintf(stderr, "/%u", m->str_range); - } - else { - if ((m->mask_op & FILE_OPS_MASK) < SZOF(optyp)) - (void) fputc(optyp[m->mask_op & FILE_OPS_MASK], stderr); - else - (void) fputc('?', stderr); - - if (m->num_mask) { - (void) fprintf(stderr, "%.8llx", - (unsigned long long)m->num_mask); - } - } - (void) fprintf(stderr, ",%c", m->reln); - - if (m->reln != 'x') { - switch (m->type) { - case FILE_BYTE: - case FILE_SHORT: - case FILE_LONG: - case FILE_LESHORT: - case FILE_LELONG: - case FILE_MELONG: - case FILE_BESHORT: - case FILE_BELONG: - (void) fprintf(stderr, "%d", m->value.l); - break; - case FILE_BEQUAD: - case FILE_LEQUAD: - case FILE_QUAD: - (void) fprintf(stderr, "%lld", - (unsigned long long)m->value.q); - break; - case FILE_PSTRING: - case FILE_STRING: - case FILE_REGEX: - case FILE_BESTRING16: - case FILE_LESTRING16: - case FILE_SEARCH: - file_showstr(stderr, m->value.s, (size_t)m->vallen); - break; - case FILE_DATE: - case FILE_LEDATE: - case FILE_BEDATE: - case FILE_MEDATE: - (void)fprintf(stderr, "%s,", - file_fmttime(m->value.l, 1)); - break; - case FILE_LDATE: - case FILE_LELDATE: - case FILE_BELDATE: - case FILE_MELDATE: - (void)fprintf(stderr, "%s,", - file_fmttime(m->value.l, 0)); - break; - case FILE_QDATE: - case FILE_LEQDATE: - case FILE_BEQDATE: - (void)fprintf(stderr, "%s,", - file_fmttime(m->value.q, 1)); - break; - case FILE_QLDATE: - case FILE_LEQLDATE: - case FILE_BEQLDATE: - (void)fprintf(stderr, "%s,", - file_fmttime(m->value.q, 0)); - break; - case FILE_FLOAT: - case FILE_BEFLOAT: - case FILE_LEFLOAT: - (void) fprintf(stderr, "%G", m->value.f); - break; - case FILE_DOUBLE: - case FILE_BEDOUBLE: - case FILE_LEDOUBLE: - (void) fprintf(stderr, "%G", m->value.d); - break; - case FILE_DEFAULT: - /* XXX - do anything here? */ - break; - default: - (void) fputs("*bad*", stderr); - break; - } - } - (void) fprintf(stderr, ",\"%s\"]\n", m->desc); -} -#endif - -/*VARARGS*/ -protected void -file_magwarn(struct magic_set *ms, const char *f, ...) -{ - va_list va; - - /* cuz we use stdout for most, stderr here */ - (void) fflush(stdout); - - if (ms->file) - (void) fprintf(stderr, "%s, %lu: ", ms->file, - (unsigned long)ms->line); - (void) fprintf(stderr, "Warning: "); - va_start(va, f); - (void) vfprintf(stderr, f, va); - va_end(va); - (void) fputc('\n', stderr); -} - -protected const char * -file_fmttime(uint64_t v, int local) -{ - char *pp; - time_t t = (time_t)v; - struct tm *tm; - - if (local) { - pp = ctime(&t); - } else { -#ifndef HAVE_DAYLIGHT - private int daylight = 0; -#ifdef HAVE_TM_ISDST - private time_t now = (time_t)0; - - if (now == (time_t)0) { - struct tm *tm1; - (void)time(&now); - tm1 = localtime(&now); - if (tm1 == NULL) - return "*Invalid time*"; - daylight = tm1->tm_isdst; - } -#endif /* HAVE_TM_ISDST */ -#endif /* HAVE_DAYLIGHT */ - if (daylight) - t += 3600; - tm = gmtime(&t); - if (tm == NULL) - return "*Invalid time*"; - pp = asctime(tm); - } - - pp[strcspn(pp, "\n")] = '\0'; - return pp; -} diff --git a/usr.bin/file/readelf.c b/usr.bin/file/readelf.c deleted file mode 100644 index f328873f73e..00000000000 --- a/usr.bin/file/readelf.c +++ /dev/null @@ -1,1020 +0,0 @@ -/* $OpenBSD: readelf.c,v 1.12 2014/11/04 16:18:54 deraadt Exp $ */ -/* - * Copyright (c) Christos Zoulas 2003. - * All Rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -#include "file.h" - -#ifdef BUILTIN_ELF -#include -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif - -#include "readelf.h" -#include "magic.h" - -#ifdef ELFCORE -private int dophn_core(struct magic_set *, int, int, int, off_t, int, size_t, - off_t, int *); -#endif -private int dophn_exec(struct magic_set *, int, int, int, off_t, int, size_t, - off_t, int *); -private int doshn(struct magic_set *, int, int, int, off_t, int, size_t, int *); -private size_t donote(struct magic_set *, unsigned char *, size_t, size_t, int, - int, size_t, int *); - -#define ELF_ALIGN(a) ((((a) + align - 1) / align) * align) - -#define isquote(c) (strchr("'\"`", (c)) != NULL) - -private uint16_t getu16(int, uint16_t); -private uint32_t getu32(int, uint32_t); -private uint64_t getu64(int, uint64_t); - -private uint16_t -getu16(int swap, uint16_t value) -{ - union { - uint16_t ui; - char c[2]; - } retval, tmpval; - - if (swap) { - tmpval.ui = value; - - retval.c[0] = tmpval.c[1]; - retval.c[1] = tmpval.c[0]; - - return retval.ui; - } else - return value; -} - -private uint32_t -getu32(int swap, uint32_t value) -{ - union { - uint32_t ui; - char c[4]; - } retval, tmpval; - - if (swap) { - tmpval.ui = value; - - retval.c[0] = tmpval.c[3]; - retval.c[1] = tmpval.c[2]; - retval.c[2] = tmpval.c[1]; - retval.c[3] = tmpval.c[0]; - - return retval.ui; - } else - return value; -} - -private uint64_t -getu64(int swap, uint64_t value) -{ - union { - uint64_t ui; - char c[8]; - } retval, tmpval; - - if (swap) { - tmpval.ui = value; - - retval.c[0] = tmpval.c[7]; - retval.c[1] = tmpval.c[6]; - retval.c[2] = tmpval.c[5]; - retval.c[3] = tmpval.c[4]; - retval.c[4] = tmpval.c[3]; - retval.c[5] = tmpval.c[2]; - retval.c[6] = tmpval.c[1]; - retval.c[7] = tmpval.c[0]; - - return retval.ui; - } else - return value; -} - -#define elf_getu16(swap, value) getu16(swap, value) -#define elf_getu32(swap, value) getu32(swap, value) -#ifdef USE_ARRAY_FOR_64BIT_TYPES -# define elf_getu64(swap, array) \ - ((swap ? ((uint64_t)elf_getu32(swap, array[0])) << 32 : elf_getu32(swap, array[0])) + \ - (swap ? elf_getu32(swap, array[1]) : ((uint64_t)elf_getu32(swap, array[1]) << 32))) -#else -# define elf_getu64(swap, value) getu64(swap, value) -#endif - -#define xsh_addr (class == ELFCLASS32 \ - ? (void *) &sh32 \ - : (void *) &sh64) -#define xsh_sizeof (class == ELFCLASS32 \ - ? sizeof sh32 \ - : sizeof sh64) -#define xsh_size (class == ELFCLASS32 \ - ? elf_getu32(swap, sh32.sh_size) \ - : elf_getu64(swap, sh64.sh_size)) -#define xsh_offset (class == ELFCLASS32 \ - ? elf_getu32(swap, sh32.sh_offset) \ - : elf_getu64(swap, sh64.sh_offset)) -#define xsh_type (class == ELFCLASS32 \ - ? elf_getu32(swap, sh32.sh_type) \ - : elf_getu32(swap, sh64.sh_type)) -#define xph_addr (class == ELFCLASS32 \ - ? (void *) &ph32 \ - : (void *) &ph64) -#define xph_sizeof (class == ELFCLASS32 \ - ? sizeof ph32 \ - : sizeof ph64) -#define xph_type (class == ELFCLASS32 \ - ? elf_getu32(swap, ph32.p_type) \ - : elf_getu32(swap, ph64.p_type)) -#define xph_offset (off_t)(class == ELFCLASS32 \ - ? elf_getu32(swap, ph32.p_offset) \ - : elf_getu64(swap, ph64.p_offset)) -#define xph_align (size_t)((class == ELFCLASS32 \ - ? (off_t) (ph32.p_align ? \ - elf_getu32(swap, ph32.p_align) : 4) \ - : (off_t) (ph64.p_align ? \ - elf_getu64(swap, ph64.p_align) : 4))) -#define xph_filesz (size_t)((class == ELFCLASS32 \ - ? elf_getu32(swap, ph32.p_filesz) \ - : elf_getu64(swap, ph64.p_filesz))) -#define xnh_addr (class == ELFCLASS32 \ - ? (void *) &nh32 \ - : (void *) &nh64) -#define xph_memsz (size_t)((class == ELFCLASS32 \ - ? elf_getu32(swap, ph32.p_memsz) \ - : elf_getu64(swap, ph64.p_memsz))) -#define xnh_sizeof (class == ELFCLASS32 \ - ? sizeof nh32 \ - : sizeof nh64) -#define xnh_type (class == ELFCLASS32 \ - ? elf_getu32(swap, nh32.n_type) \ - : elf_getu32(swap, nh64.n_type)) -#define xnh_namesz (class == ELFCLASS32 \ - ? elf_getu32(swap, nh32.n_namesz) \ - : elf_getu32(swap, nh64.n_namesz)) -#define xnh_descsz (class == ELFCLASS32 \ - ? elf_getu32(swap, nh32.n_descsz) \ - : elf_getu32(swap, nh64.n_descsz)) -#define prpsoffsets(i) (class == ELFCLASS32 \ - ? prpsoffsets32[i] \ - : prpsoffsets64[i]) - -#ifdef ELFCORE -/* - * Try larger offsets first to avoid false matches - * from earlier data that happen to look like strings. - */ -static const size_t prpsoffsets32[] = { -#ifdef USE_NT_PSINFO - 104, /* SunOS 5.x (command line) */ - 88, /* SunOS 5.x (short name) */ -#endif /* USE_NT_PSINFO */ - - 100, /* SunOS 5.x (command line) */ - 84, /* SunOS 5.x (short name) */ - - 44, /* Linux (command line) */ - 28, /* Linux 2.0.36 (short name) */ - - 8, /* FreeBSD */ -}; - -static const size_t prpsoffsets64[] = { -#ifdef USE_NT_PSINFO - 152, /* SunOS 5.x (command line) */ - 136, /* SunOS 5.x (short name) */ -#endif /* USE_NT_PSINFO */ - - 136, /* SunOS 5.x, 64-bit (command line) */ - 120, /* SunOS 5.x, 64-bit (short name) */ - - 56, /* Linux (command line) */ - 40, /* Linux (tested on core from 2.4.x, short name) */ - - 16, /* FreeBSD, 64-bit */ -}; - -#define NOFFSETS32 (sizeof prpsoffsets32 / sizeof prpsoffsets32[0]) -#define NOFFSETS64 (sizeof prpsoffsets64 / sizeof prpsoffsets64[0]) - -#define NOFFSETS (class == ELFCLASS32 ? NOFFSETS32 : NOFFSETS64) - -/* - * Look through the program headers of an executable image, searching - * for a PT_NOTE section of type NT_PRPSINFO, with a name "CORE" or - * "FreeBSD"; if one is found, try looking in various places in its - * contents for a 16-character string containing only printable - * characters - if found, that string should be the name of the program - * that dropped core. Note: right after that 16-character string is, - * at least in SunOS 5.x (and possibly other SVR4-flavored systems) and - * Linux, a longer string (80 characters, in 5.x, probably other - * SVR4-flavored systems, and Linux) containing the start of the - * command line for that program. - * - * SunOS 5.x core files contain two PT_NOTE sections, with the types - * NT_PRPSINFO (old) and NT_PSINFO (new). These structs contain the - * same info about the command name and command line, so it probably - * isn't worthwhile to look for NT_PSINFO, but the offsets are provided - * above (see USE_NT_PSINFO), in case we ever decide to do so. The - * NT_PRPSINFO and NT_PSINFO sections are always in order and adjacent; - * the SunOS 5.x file command relies on this (and prefers the latter). - * - * The signal number probably appears in a section of type NT_PRSTATUS, - * but that's also rather OS-dependent, in ways that are harder to - * dissect with heuristics, so I'm not bothering with the signal number. - * (I suppose the signal number could be of interest in situations where - * you don't have the binary of the program that dropped core; if you - * *do* have that binary, the debugger will probably tell you what - * signal it was.) - */ - -#define OS_STYLE_SVR4 0 -#define OS_STYLE_FREEBSD 1 -#define OS_STYLE_NETBSD 2 - -private const char os_style_names[][8] = { - "SVR4", - "FreeBSD", - "NetBSD", -}; - -#define FLAGS_DID_CORE 1 -#define FLAGS_DID_NOTE 2 -#define FLAGS_DID_CORE_STYLE 4 - -private int -dophn_core(struct magic_set *ms, int class, int swap, int fd, off_t off, - int num, size_t size, off_t fsize, int *flags) -{ - Elf32_Phdr ph32; - Elf64_Phdr ph64; - size_t offset; - unsigned char nbuf[BUFSIZ]; - ssize_t bufsize; - off_t savedoffset; - struct stat st; - - if (fstat(fd, &st) < 0) { - file_badread(ms); - return -1; - } - - if (size != xph_sizeof) { - if (file_printf(ms, ", corrupted program header size") == -1) - return -1; - return 0; - } - - /* - * Loop through all the program headers. - */ - for ( ; num; num--) { - if ((savedoffset = lseek(fd, off, SEEK_SET)) == (off_t)-1) { - file_badseek(ms); - return -1; - } - if (read(fd, xph_addr, xph_sizeof) == -1) { - file_badread(ms); - return -1; - } - if (xph_offset > fsize) { - if (lseek(fd, savedoffset, SEEK_SET) == (off_t)-1) { - file_badseek(ms); - return -1; - } - continue; - } - - off += size; - if (xph_type != PT_NOTE) - continue; - - /* - * This is a PT_NOTE section; loop through all the notes - * in the section. - */ - if (lseek(fd, xph_offset, SEEK_SET) == (off_t)-1) { - file_badseek(ms); - return -1; - } - bufsize = read(fd, nbuf, - ((xph_filesz < sizeof(nbuf)) ? xph_filesz : sizeof(nbuf))); - if (bufsize == -1) { - file_badread(ms); - return -1; - } - offset = 0; - for (;;) { - if (offset >= (size_t)bufsize) - break; - offset = donote(ms, nbuf, offset, (size_t)bufsize, - class, swap, 4, flags); - if (offset == 0) - break; - - } - } - return 0; -} -#endif - -private size_t -donote(struct magic_set *ms, unsigned char *nbuf, size_t offset, size_t size, - int class, int swap, size_t align, int *flags) -{ - Elf32_Nhdr nh32; - Elf64_Nhdr nh64; - size_t noff, doff; -#ifdef ELFCORE - int os_style = -1; -#endif - uint32_t namesz, descsz; - - if (xnh_sizeof + offset > size) { - /* - * We're out of note headers. - */ - return xnh_sizeof + offset; - } - - (void)memcpy(xnh_addr, &nbuf[offset], xnh_sizeof); - offset += xnh_sizeof; - - namesz = xnh_namesz; - descsz = xnh_descsz; - if ((namesz == 0) && (descsz == 0)) { - /* - * We're out of note headers. - */ - return (offset >= size) ? offset : size; - } - - if (namesz & 0x80000000) { - (void)file_printf(ms, ", bad note name size 0x%lx", - (unsigned long)namesz); - return offset; - } - - if (descsz & 0x80000000) { - (void)file_printf(ms, ", bad note description size 0x%lx", - (unsigned long)descsz); - return offset; - } - - - noff = offset; - doff = ELF_ALIGN(offset + namesz); - - if (offset + namesz > size) { - /* - * We're past the end of the buffer. - */ - return doff; - } - - offset = ELF_ALIGN(doff + descsz); - if (doff + descsz > size) { - /* - * We're past the end of the buffer. - */ - return (offset >= size) ? offset : size; - } - - if (*flags & FLAGS_DID_NOTE) - goto core; - - if (namesz == 4 && strcmp((char *)&nbuf[noff], "GNU") == 0 && - xnh_type == NT_GNU_VERSION && descsz == 16) { - uint32_t desc[4]; - (void)memcpy(desc, &nbuf[doff], sizeof(desc)); - - if (file_printf(ms, ", for GNU/") == -1) - return size; - switch (elf_getu32(swap, desc[0])) { - case GNU_OS_LINUX: - if (file_printf(ms, "Linux") == -1) - return size; - break; - case GNU_OS_HURD: - if (file_printf(ms, "Hurd") == -1) - return size; - break; - case GNU_OS_SOLARIS: - if (file_printf(ms, "Solaris") == -1) - return size; - break; - case GNU_OS_KFREEBSD: - if (file_printf(ms, "kFreeBSD") == -1) - return size; - break; - case GNU_OS_KNETBSD: - if (file_printf(ms, "kNetBSD") == -1) - return size; - break; - default: - if (file_printf(ms, "") == -1) - return size; - } - if (file_printf(ms, " %d.%d.%d", elf_getu32(swap, desc[1]), - elf_getu32(swap, desc[2]), elf_getu32(swap, desc[3])) == -1) - return size; - *flags |= FLAGS_DID_NOTE; - return size; - } - - if (namesz == 7 && strcmp((char *)&nbuf[noff], "NetBSD") == 0 && - xnh_type == NT_NETBSD_VERSION && descsz == 4) { - uint32_t desc; - (void)memcpy(&desc, &nbuf[doff], sizeof(desc)); - desc = elf_getu32(swap, desc); - - if (file_printf(ms, ", for NetBSD") == -1) - return size; - /* - * The version number used to be stuck as 199905, and was thus - * basically content-free. Newer versions of NetBSD have fixed - * this and now use the encoding of __NetBSD_Version__: - * - * MMmmrrpp00 - * - * M = major version - * m = minor version - * r = release ["",A-Z,Z[A-Z] but numeric] - * p = patchlevel - */ - if (desc > 100000000U) { - uint32_t ver_patch = (desc / 100) % 100; - uint32_t ver_rel = (desc / 10000) % 100; - uint32_t ver_min = (desc / 1000000) % 100; - uint32_t ver_maj = desc / 100000000; - - if (file_printf(ms, " %u.%u", ver_maj, ver_min) == -1) - return size; - if (ver_rel == 0 && ver_patch != 0) { - if (file_printf(ms, ".%u", ver_patch) == -1) - return size; - } else if (ver_rel != 0) { - while (ver_rel > 26) { - if (file_printf(ms, "Z") == -1) - return size; - ver_rel -= 26; - } - if (file_printf(ms, "%c", 'A' + ver_rel - 1) - == -1) - return size; - } - } - *flags |= FLAGS_DID_NOTE; - return size; - } - - if (namesz == 8 && strcmp((char *)&nbuf[noff], "FreeBSD") == 0 && - xnh_type == NT_FREEBSD_VERSION && descsz == 4) { - uint32_t desc; - (void)memcpy(&desc, &nbuf[doff], sizeof(desc)); - desc = elf_getu32(swap, desc); - if (file_printf(ms, ", for FreeBSD") == -1) - return size; - - /* - * Contents is __FreeBSD_version, whose relation to OS - * versions is defined by a huge table in the Porter's - * Handbook. This is the general scheme: - * - * Releases: - * Mmp000 (before 4.10) - * Mmi0p0 (before 5.0) - * Mmm0p0 - * - * Development branches: - * Mmpxxx (before 4.6) - * Mmp1xx (before 4.10) - * Mmi1xx (before 5.0) - * M000xx (pre-M.0) - * Mmm1xx - * - * M = major version - * m = minor version - * i = minor version increment (491000 -> 4.10) - * p = patchlevel - * x = revision - * - * The first release of FreeBSD to use ELF by default - * was version 3.0. - */ - if (desc == 460002) { - if (file_printf(ms, " 4.6.2") == -1) - return size; - } else if (desc < 460100) { - if (file_printf(ms, " %d.%d", desc / 100000, - desc / 10000 % 10) == -1) - return size; - if (desc / 1000 % 10 > 0) - if (file_printf(ms, ".%d", desc / 1000 % 10) - == -1) - return size; - if ((desc % 1000 > 0) || (desc % 100000 == 0)) - if (file_printf(ms, " (%d)", desc) == -1) - return size; - } else if (desc < 500000) { - if (file_printf(ms, " %d.%d", desc / 100000, - desc / 10000 % 10 + desc / 1000 % 10) == -1) - return size; - if (desc / 100 % 10 > 0) { - if (file_printf(ms, " (%d)", desc) == -1) - return size; - } else if (desc / 10 % 10 > 0) { - if (file_printf(ms, ".%d", desc / 10 % 10) - == -1) - return size; - } - } else { - if (file_printf(ms, " %d.%d", desc / 100000, - desc / 1000 % 100) == -1) - return size; - if ((desc / 100 % 10 > 0) || - (desc % 100000 / 100 == 0)) { - if (file_printf(ms, " (%d)", desc) == -1) - return size; - } else if (desc / 10 % 10 > 0) { - if (file_printf(ms, ".%d", desc / 10 % 10) - == -1) - return size; - } - } - *flags |= FLAGS_DID_NOTE; - return size; - } - - if (namesz == 8 && strcmp((char *)&nbuf[noff], "OpenBSD") == 0 && - xnh_type == NT_OPENBSD_VERSION && descsz == 4) { - if (file_printf(ms, ", for OpenBSD") == -1) - return size; - /* Content of note is always 0 */ - *flags |= FLAGS_DID_NOTE; - return size; - } - - if (namesz == 10 && strcmp((char *)&nbuf[noff], "DragonFly") == 0 && - xnh_type == NT_DRAGONFLY_VERSION && descsz == 4) { - uint32_t desc; - if (file_printf(ms, ", for DragonFly") == -1) - return size; - (void)memcpy(&desc, &nbuf[doff], sizeof(desc)); - desc = elf_getu32(swap, desc); - if (file_printf(ms, " %d.%d.%d", desc / 100000, - desc / 10000 % 10, desc % 10000) == -1) - return size; - *flags |= FLAGS_DID_NOTE; - return size; - } - -core: - /* - * Sigh. The 2.0.36 kernel in Debian 2.1, at - * least, doesn't correctly implement name - * sections, in core dumps, as specified by - * the "Program Linking" section of "UNIX(R) System - * V Release 4 Programmer's Guide: ANSI C and - * Programming Support Tools", because my copy - * clearly says "The first 'namesz' bytes in 'name' - * contain a *null-terminated* [emphasis mine] - * character representation of the entry's owner - * or originator", but the 2.0.36 kernel code - * doesn't include the terminating null in the - * name.... - */ - if ((namesz == 4 && strncmp((char *)&nbuf[noff], "CORE", 4) == 0) || - (namesz == 5 && strcmp((char *)&nbuf[noff], "CORE") == 0)) { - os_style = OS_STYLE_SVR4; - } - - if ((namesz == 8 && strcmp((char *)&nbuf[noff], "FreeBSD") == 0)) { - os_style = OS_STYLE_FREEBSD; - } - - if ((namesz >= 11 && strncmp((char *)&nbuf[noff], "NetBSD-CORE", 11) - == 0)) { - os_style = OS_STYLE_NETBSD; - } - -#ifdef ELFCORE - if ((*flags & FLAGS_DID_CORE) != 0) - return size; - - if (os_style != -1 && (*flags & FLAGS_DID_CORE_STYLE) == 0) { - if (file_printf(ms, ", %s-style", os_style_names[os_style]) - == -1) - return size; - *flags |= FLAGS_DID_CORE_STYLE; - } - - switch (os_style) { - case OS_STYLE_NETBSD: - if (xnh_type == NT_NETBSD_CORE_PROCINFO) { - uint32_t signo; - /* - * Extract the program name. It is at - * offset 0x7c, and is up to 32-bytes, - * including the terminating NUL. - */ - if (file_printf(ms, ", from '%.31s'", - &nbuf[doff + 0x7c]) == -1) - return size; - - /* - * Extract the signal number. It is at - * offset 0x08. - */ - (void)memcpy(&signo, &nbuf[doff + 0x08], - sizeof(signo)); - if (file_printf(ms, " (signal %u)", - elf_getu32(swap, signo)) == -1) - return size; - *flags |= FLAGS_DID_CORE; - return size; - } - break; - - default: - if (xnh_type == NT_PRPSINFO) { - size_t i, j; - unsigned char c; - /* - * Extract the program name. We assume - * it to be 16 characters (that's what it - * is in SunOS 5.x and Linux). - * - * Unfortunately, it's at a different offset - * in various OSes, so try multiple offsets. - * If the characters aren't all printable, - * reject it. - */ - for (i = 0; i < NOFFSETS; i++) { - unsigned char *cname, *cp; - size_t reloffset = prpsoffsets(i); - size_t noffset = doff + reloffset; - for (j = 0; j < 16; j++, noffset++, - reloffset++) { - /* - * Make sure we're not past - * the end of the buffer; if - * we are, just give up. - */ - if (noffset >= size) - goto tryanother; - - /* - * Make sure we're not past - * the end of the contents; - * if we are, this obviously - * isn't the right offset. - */ - if (reloffset >= descsz) - goto tryanother; - - c = nbuf[noffset]; - if (c == '\0') { - /* - * A '\0' at the - * beginning is - * obviously wrong. - * Any other '\0' - * means we're done. - */ - if (j == 0) - goto tryanother; - else - break; - } else { - /* - * A nonprintable - * character is also - * wrong. - */ - if (!isprint(c) || isquote(c)) - goto tryanother; - } - } - /* - * Well, that worked. - */ - cname = (unsigned char *) - &nbuf[doff + prpsoffsets(i)]; - for (cp = cname; *cp && isprint(*cp); cp++) - continue; - /* - * Linux apparently appends a space at the end - * of the command line: remove it. - */ - while (cp > cname && isspace(cp[-1])) - cp--; - if (file_printf(ms, ", from '%.*s'", - (int)(cp - cname), cname) == -1) - return size; - *flags |= FLAGS_DID_CORE; - return size; - - tryanother: - ; - } - } - break; - } -#endif - return offset; -} - -private int -doshn(struct magic_set *ms, int class, int swap, int fd, off_t off, int num, - size_t size, int *flags) -{ - Elf32_Shdr sh32; - Elf64_Shdr sh64; - int stripped = 1; - void *nbuf; - off_t noff; - - if (size != xsh_sizeof) { - if (file_printf(ms, ", corrupted section header size") == -1) - return -1; - return 0; - } - - if (lseek(fd, off, SEEK_SET) == (off_t)-1) { - file_badseek(ms); - return -1; - } - - for ( ; num; num--) { - if (read(fd, xsh_addr, xsh_sizeof) == -1) { - file_badread(ms); - return -1; - } - switch (xsh_type) { - case SHT_SYMTAB: -#if 0 - case SHT_DYNSYM: -#endif - stripped = 0; - break; - case SHT_NOTE: - if ((off = lseek(fd, (off_t)0, SEEK_CUR)) == - (off_t)-1) { - file_badread(ms); - return -1; - } - if ((nbuf = malloc((size_t)xsh_size)) == NULL) { - file_error(ms, errno, "Cannot allocate memory" - " for note"); - return -1; - } - if ((noff = lseek(fd, (off_t)xsh_offset, SEEK_SET)) == - (off_t)-1) { - file_badread(ms); - free(nbuf); - return -1; - } - if (read(fd, nbuf, (size_t)xsh_size) != - (ssize_t)xsh_size) { - free(nbuf); - file_badread(ms); - return -1; - } - - noff = 0; - for (;;) { - if (noff >= (size_t)xsh_size) - break; - noff = donote(ms, nbuf, (size_t)noff, - (size_t)xsh_size, class, swap, 4, - flags); - if (noff == 0) - break; - } - if ((lseek(fd, off, SEEK_SET)) == (off_t)-1) { - free(nbuf); - file_badread(ms); - return -1; - } - free(nbuf); - break; - } - } - if (file_printf(ms, ", %sstripped", stripped ? "" : "not ") == -1) - return -1; - return 0; -} - -/* - * Look through the program headers of an executable image, searching - * for a PT_INTERP section; if one is found, it's dynamically linked, - * otherwise it's statically linked. - */ -private int -dophn_exec(struct magic_set *ms, int class, int swap, int fd, off_t off, - int num, size_t size, off_t fsize, int *flags) -{ - Elf32_Phdr ph32; - Elf64_Phdr ph64; - const char *linking_style = "statically"; - const char *shared_libraries = ""; - unsigned char nbuf[BUFSIZ]; - int bufsize; - size_t offset, align; - off_t savedoffset = (off_t)-1; - struct stat st; - - if (fstat(fd, &st) < 0) { - file_badread(ms); - return -1; - } - - if (size != xph_sizeof) { - if (file_printf(ms, ", corrupted program header size") == -1) - return -1; - return 0; - } - - if (lseek(fd, off, SEEK_SET) == (off_t)-1) { - file_badseek(ms); - return -1; - } - - for ( ; num; num--) { - if (read(fd, xph_addr, xph_sizeof) == -1) { - file_badread(ms); - return -1; - } - if (xph_offset > st.st_size && savedoffset != (off_t)-1) { - if (lseek(fd, savedoffset, SEEK_SET) == (off_t)-1) { - file_badseek(ms); - return -1; - } - continue; - } - - if ((savedoffset = lseek(fd, (off_t)0, SEEK_CUR)) == (off_t)-1) { - file_badseek(ms); - return -1; - } - - if (xph_offset > fsize) { - if (lseek(fd, savedoffset, SEEK_SET) == (off_t)-1) { - file_badseek(ms); - return -1; - } - continue; - } - - switch (xph_type) { - case PT_DYNAMIC: - linking_style = "dynamically"; - break; - case PT_INTERP: - shared_libraries = " (uses shared libs)"; - break; - case PT_NOTE: - if ((align = xph_align) & 0x80000000) { - if (file_printf(ms, - ", invalid note alignment 0x%lx", - (unsigned long)align) == -1) - return -1; - align = 4; - } - /* - * This is a PT_NOTE section; loop through all the notes - * in the section. - */ - if (lseek(fd, xph_offset, SEEK_SET) - == (off_t)-1) { - file_badseek(ms); - return -1; - } - bufsize = read(fd, nbuf, ((xph_filesz < sizeof(nbuf)) ? - xph_filesz : sizeof(nbuf))); - if (bufsize == -1) { - file_badread(ms); - return -1; - } - offset = 0; - for (;;) { - if (offset >= (size_t)bufsize) - break; - offset = donote(ms, nbuf, offset, - (size_t)bufsize, class, swap, align, - flags); - if (offset == 0) - break; - } - if (lseek(fd, savedoffset, SEEK_SET) == (off_t)-1) { - file_badseek(ms); - return -1; - } - break; - default: - break; - } - } - if (file_printf(ms, ", %s linked%s", linking_style, shared_libraries) - == -1) - return -1; - return 0; -} - - -protected int -file_tryelf(struct magic_set *ms, int fd, const unsigned char *buf, - size_t nbytes) -{ - union { - int32_t l; - char c[sizeof (int32_t)]; - } u; - int class; - int swap; - struct stat st; - off_t fsize; - int flags = 0; - Elf32_Ehdr elf32hdr; - Elf64_Ehdr elf64hdr; - uint16_t type; - - if (ms->flags & MAGIC_MIME) - return 0; - /* - * ELF executables have multiple section headers in arbitrary - * file locations and thus file(1) cannot determine it from easily. - * Instead we traverse thru all section headers until a symbol table - * one is found or else the binary is stripped. - * Return immediately if it's not ELF (so we avoid pipe2file unless needed). - */ - if (buf[EI_MAG0] != ELFMAG0 - || (buf[EI_MAG1] != ELFMAG1 && buf[EI_MAG1] != OLFMAG1) - || buf[EI_MAG2] != ELFMAG2 || buf[EI_MAG3] != ELFMAG3) - return 0; - - /* - * If we cannot seek, it must be a pipe, socket or fifo. - */ - if((lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) && (errno == ESPIPE)) - fd = file_pipe2file(ms, fd, buf, nbytes); - - if (fstat(fd, &st) == -1) { - file_badread(ms); - return -1; - } - fsize = st.st_size; - - class = buf[EI_CLASS]; - - switch (class) { - case ELFCLASS32: -#undef elf_getu -#define elf_getu(a, b) elf_getu32(a, b) -#undef elfhdr -#define elfhdr elf32hdr -#include "elfclass.h" - case ELFCLASS64: -#undef elf_getu -#define elf_getu(a, b) elf_getu64(a, b) -#undef elfhdr -#define elfhdr elf64hdr -#include "elfclass.h" - default: - if (file_printf(ms, ", unknown class %d", class) == -1) - return -1; - break; - } - return 0; -} -#endif diff --git a/usr.bin/file/readelf.h b/usr.bin/file/readelf.h deleted file mode 100644 index 6d5bc30158f..00000000000 --- a/usr.bin/file/readelf.h +++ /dev/null @@ -1,237 +0,0 @@ -/* $OpenBSD: readelf.h,v 1.7 2009/04/24 18:54:34 chl Exp $ */ -/* - * Copyright (c) Christos Zoulas 2003. - * All Rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * @(#)Id: readelf.h,v 1.9 2002/05/16 18:45:56 christos Exp - * - * Provide elf data structures for non-elf machines, allowing file - * non-elf hosts to determine if an elf binary is stripped. - * Note: cobbled from the linux header file, with modifications - */ -#ifndef __fake_elf_h__ -#define __fake_elf_h__ - -#if HAVE_STDINT_H -#include -#endif - -typedef uint32_t Elf32_Addr; -typedef uint32_t Elf32_Off; -typedef uint16_t Elf32_Half; -typedef uint32_t Elf32_Word; -typedef uint8_t Elf32_Char; - -#if SIZEOF_LONG_LONG != 8 -#define USE_ARRAY_FOR_64BIT_TYPES -typedef uint32_t Elf64_Addr[2]; -typedef uint32_t Elf64_Off[2]; -typedef uint32_t Elf64_Xword[2]; -#else -#undef USE_ARRAY_FOR_64BIT_TYPES -typedef uint64_t Elf64_Addr; -typedef uint64_t Elf64_Off; -typedef uint64_t Elf64_Xword; -#endif -typedef uint16_t Elf64_Half; -typedef uint32_t Elf64_Word; -typedef uint8_t Elf64_Char; - -#define EI_NIDENT 16 - -typedef struct { - Elf32_Char e_ident[EI_NIDENT]; - Elf32_Half e_type; - Elf32_Half e_machine; - Elf32_Word e_version; - Elf32_Addr e_entry; /* Entry point */ - Elf32_Off e_phoff; - Elf32_Off e_shoff; - Elf32_Word e_flags; - Elf32_Half e_ehsize; - Elf32_Half e_phentsize; - Elf32_Half e_phnum; - Elf32_Half e_shentsize; - Elf32_Half e_shnum; - Elf32_Half e_shstrndx; -} Elf32_Ehdr; - -typedef struct { - Elf64_Char e_ident[EI_NIDENT]; - Elf64_Half e_type; - Elf64_Half e_machine; - Elf64_Word e_version; - Elf64_Addr e_entry; /* Entry point */ - Elf64_Off e_phoff; - Elf64_Off e_shoff; - Elf64_Word e_flags; - Elf64_Half e_ehsize; - Elf64_Half e_phentsize; - Elf64_Half e_phnum; - Elf64_Half e_shentsize; - Elf64_Half e_shnum; - Elf64_Half e_shstrndx; -} Elf64_Ehdr; - -/* e_type */ -#define ET_REL 1 -#define ET_EXEC 2 -#define ET_DYN 3 -#define ET_CORE 4 - -/* sh_type */ -#define SHT_SYMTAB 2 -#define SHT_NOTE 7 -#define SHT_DYNSYM 11 - -/* elf type */ -#define ELFDATANONE 0 /* e_ident[EI_DATA] */ -#define ELFDATA2LSB 1 -#define ELFDATA2MSB 2 - -/* elf class */ -#define ELFCLASSNONE 0 -#define ELFCLASS32 1 -#define ELFCLASS64 2 - -/* magic number */ -#define EI_MAG0 0 /* e_ident[] indexes */ -#define EI_MAG1 1 -#define EI_MAG2 2 -#define EI_MAG3 3 -#define EI_CLASS 4 -#define EI_DATA 5 -#define EI_VERSION 6 -#define EI_PAD 7 - -#define ELFMAG0 0x7f /* EI_MAG */ -#define ELFMAG1 'E' -#define ELFMAG2 'L' -#define ELFMAG3 'F' -#define ELFMAG "\177ELF" - -#define OLFMAG1 'O' -#define OLFMAG "\177OLF" - -typedef struct { - Elf32_Word p_type; - Elf32_Off p_offset; - Elf32_Addr p_vaddr; - Elf32_Addr p_paddr; - Elf32_Word p_filesz; - Elf32_Word p_memsz; - Elf32_Word p_flags; - Elf32_Word p_align; -} Elf32_Phdr; - -typedef struct { - Elf64_Word p_type; - Elf64_Word p_flags; - Elf64_Off p_offset; - Elf64_Addr p_vaddr; - Elf64_Addr p_paddr; - Elf64_Xword p_filesz; - Elf64_Xword p_memsz; - Elf64_Xword p_align; -} Elf64_Phdr; - -#define PT_NULL 0 /* p_type */ -#define PT_LOAD 1 -#define PT_DYNAMIC 2 -#define PT_INTERP 3 -#define PT_NOTE 4 -#define PT_SHLIB 5 -#define PT_PHDR 6 -#define PT_NUM 7 - -typedef struct { - Elf32_Word sh_name; - Elf32_Word sh_type; - Elf32_Word sh_flags; - Elf32_Addr sh_addr; - Elf32_Off sh_offset; - Elf32_Word sh_size; - Elf32_Word sh_link; - Elf32_Word sh_info; - Elf32_Word sh_addralign; - Elf32_Word sh_entsize; -} Elf32_Shdr; - -typedef struct { - Elf64_Word sh_name; - Elf64_Word sh_type; - Elf64_Off sh_flags; - Elf64_Addr sh_addr; - Elf64_Off sh_offset; - Elf64_Off sh_size; - Elf64_Word sh_link; - Elf64_Word sh_info; - Elf64_Off sh_addralign; - Elf64_Off sh_entsize; -} Elf64_Shdr; - -#define NT_NETBSD_CORE_PROCINFO 1 - -/* Note header in a PT_NOTE section */ -typedef struct elf_note { - Elf32_Word n_namesz; /* Name size */ - Elf32_Word n_descsz; /* Content size */ - Elf32_Word n_type; /* Content type */ -} Elf32_Nhdr; - -typedef struct { - Elf64_Word n_namesz; - Elf64_Word n_descsz; - Elf64_Word n_type; -} Elf64_Nhdr; - -/* Notes used in ET_CORE */ -#define NT_PRSTATUS 1 -#define NT_PRFPREG 2 -#define NT_PRPSINFO 3 -#define NT_PRXREG 4 -#define NT_TASKSTRUCT 4 -#define NT_PLATFORM 5 -#define NT_AUXV 6 - -/* Note types used in executables */ -/* NetBSD executables (name = "NetBSD") */ -#define NT_NETBSD_VERSION 1 -#define NT_NETBSD_EMULATION 2 -#define NT_FREEBSD_VERSION 1 -#define NT_OPENBSD_VERSION 1 -#define NT_DRAGONFLY_VERSION 1 -/* GNU executables (name = "GNU") */ -#define NT_GNU_VERSION 1 - -/* GNU OS tags */ -#define GNU_OS_LINUX 0 -#define GNU_OS_HURD 1 -#define GNU_OS_SOLARIS 2 -#define GNU_OS_KFREEBSD 3 -#define GNU_OS_KNETBSD 4 - -#endif diff --git a/usr.bin/file/softmagic.c b/usr.bin/file/softmagic.c deleted file mode 100644 index 6aa16d2725f..00000000000 --- a/usr.bin/file/softmagic.c +++ /dev/null @@ -1,1821 +0,0 @@ -/* $OpenBSD: softmagic.c,v 1.17 2013/04/17 15:01:26 deraadt Exp $ */ -/* - * Copyright (c) Ian F. Darwin 1986-1995. - * Software written by Ian F. Darwin and others; - * maintained 1995-present by Christos Zoulas and others. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * softmagic - interpret variable magic from MAGIC - */ - -#include "file.h" -#include "magic.h" -#include -#include -#include -#include - - -private int match(struct magic_set *, struct magic *, uint32_t, - const unsigned char *, size_t, int); -private int mget(struct magic_set *, const unsigned char *, - struct magic *, size_t, unsigned int); -private int magiccheck(struct magic_set *, struct magic *); -private int32_t mprint(struct magic_set *, struct magic *); -private void mdebug(uint32_t, const char *, size_t); -private int mcopy(struct magic_set *, union VALUETYPE *, int, int, - const unsigned char *, uint32_t, size_t, size_t); -private int mconvert(struct magic_set *, struct magic *); -private int print_sep(struct magic_set *, int); -private void cvt_8(union VALUETYPE *, const struct magic *); -private void cvt_16(union VALUETYPE *, const struct magic *); -private void cvt_32(union VALUETYPE *, const struct magic *); -private void cvt_64(union VALUETYPE *, const struct magic *); - -/* - * Macro to give description string according to whether we want plain - * text or MIME type - */ -#define MAGIC_DESC ((ms->flags & MAGIC_MIME) ? m->mimetype : m->desc) - -/* - * softmagic - lookup one file in parsed, in-memory copy of database - * Passed the name and FILE * of one file to be typed. - */ -/*ARGSUSED1*/ /* nbytes passed for regularity, maybe need later */ -protected int -file_softmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes, int mode) -{ - struct mlist *ml; - int rv; - for (ml = ms->mlist->next; ml != ms->mlist; ml = ml->next) - if ((rv = match(ms, ml->magic, ml->nmagic, buf, nbytes, mode)) != 0) - return rv; - - return 0; -} - -/* - * Go through the whole list, stopping if you find a match. Process all - * the continuations of that match before returning. - * - * We support multi-level continuations: - * - * At any time when processing a successful top-level match, there is a - * current continuation level; it represents the level of the last - * successfully matched continuation. - * - * Continuations above that level are skipped as, if we see one, it - * means that the continuation that controls them - i.e, the - * lower-level continuation preceding them - failed to match. - * - * Continuations below that level are processed as, if we see one, - * it means we've finished processing or skipping higher-level - * continuations under the control of a successful or unsuccessful - * lower-level continuation, and are now seeing the next lower-level - * continuation and should process it. The current continuation - * level reverts to the level of the one we're seeing. - * - * Continuations at the current level are processed as, if we see - * one, there's no lower-level continuation that may have failed. - * - * If a continuation matches, we bump the current continuation level - * so that higher-level continuations are processed. - */ -private int -match(struct magic_set *ms, struct magic *magic, uint32_t nmagic, - const unsigned char *s, size_t nbytes, int mode) -{ - uint32_t magindex = 0; - unsigned int cont_level = 0; - int need_separator = 0; - int returnval = 0; /* if a match is found it is set to 1*/ - int firstline = 1; /* a flag to print X\n X\n- X */ - int printed_something = 0; - - if (file_check_mem(ms, cont_level) == -1) - return -1; - - for (magindex = 0; magindex < nmagic; magindex++) { - int flush; - struct magic *m = &magic[magindex]; - - if ((m->flag & BINTEST) != mode) { - /* Skip sub-tests */ - while (magic[magindex + 1].cont_level != 0 && - ++magindex < nmagic) - continue; - continue; /* Skip to next top-level test*/ - } - - ms->offset = m->offset; - ms->line = m->lineno; - - /* if main entry matches, print it... */ - flush = !mget(ms, s, m, nbytes, cont_level); - if (flush) { - if (m->reln == '!') - flush = 0; - } else { - switch (magiccheck(ms, m)) { - case -1: - return -1; - case 0: - flush++; - break; - default: - break; - } - } - if (flush) { - /* - * main entry didn't match, - * flush its continuations - */ - while (magindex < nmagic - 1 && - magic[magindex + 1].cont_level != 0) - magindex++; - continue; - } - - /* - * If we are going to print something, we'll need to print - * a blank before we print something else. - */ - if (*MAGIC_DESC) { - need_separator = 1; - printed_something = 1; - if (print_sep(ms, firstline) == -1) - return -1; - } - - if ((ms->c.li[cont_level].off = mprint(ms, m)) == -1) - return -1; - - /* and any continuations that match */ - if (file_check_mem(ms, ++cont_level) == -1) - return -1; - - while (magic[magindex+1].cont_level != 0 && - ++magindex < nmagic) { - m = &magic[magindex]; - ms->line = m->lineno; /* for messages */ - - if (cont_level < m->cont_level) - continue; - if (cont_level > m->cont_level) { - /* - * We're at the end of the level - * "cont_level" continuations. - */ - cont_level = m->cont_level; - } - ms->offset = m->offset; - if (m->flag & OFFADD) { - ms->offset += - ms->c.li[cont_level - 1].off; - } - -#ifdef ENABLE_CONDITIONALS - if (m->cond == COND_ELSE || - m->cond == COND_ELIF) { - if (ms->c.li[cont_level].last_match == 1) - continue; - } -#endif - flush = !mget(ms, s, m, nbytes, cont_level); - if (flush && m->reln != '!') - continue; - - switch (flush ? 1 : magiccheck(ms, m)) { - case -1: - return -1; - case 0: -#ifdef ENABLE_CONDITIONALS - ms->c.li[cont_level].last_match = 0; -#endif - break; - default: -#ifdef ENABLE_CONDITIONALS - ms->c.li[cont_level].last_match = 1; -#endif - if (m->type != FILE_DEFAULT) - ms->c.li[cont_level].got_match = 1; - else if (ms->c.li[cont_level].got_match) { - ms->c.li[cont_level].got_match = 0; - break; - } - /* - * If we are going to print something, - * make sure that we have a separator first. - */ - if (*MAGIC_DESC) { - printed_something = 1; - if (print_sep(ms, firstline) == -1) - return -1; - } - /* - * This continuation matched. Print - * its message, with a blank before it - * if the previous item printed and - * this item isn't empty. - */ - /* space if previous printed */ - if (need_separator - && ((m->flag & NOSPACE) == 0) - && *MAGIC_DESC) { - if (file_printf(ms, " ") == -1) - return -1; - need_separator = 0; - } - if ((ms->c.li[cont_level].off = mprint(ms, m)) == -1) - return -1; - if (*MAGIC_DESC) - need_separator = 1; - - /* - * If we see any continuations - * at a higher level, - * process them. - */ - if (file_check_mem(ms, ++cont_level) == -1) - return -1; - break; - } - } - if (printed_something) { - firstline = 0; - returnval = 1; - } - if ((ms->flags & MAGIC_CONTINUE) == 0 && printed_something) { - return 1; /* don't keep searching */ - } - } - return returnval; /* This is hit if -k is set or there is no match */ -} - -private int -check_fmt(struct magic_set *ms, struct magic *m) -{ - regex_t rx; - int rc; - - if (strchr(MAGIC_DESC, '%') == NULL) - return 0; - - rc = regcomp(&rx, "%[-0-9\\.]*s", REG_EXTENDED|REG_NOSUB); - if (rc) { - char errmsg[512]; - (void)regerror(rc, &rx, errmsg, sizeof(errmsg)); - file_magerror(ms, "regex error %d, (%s)", rc, errmsg); - return -1; - } else { - rc = regexec(&rx, MAGIC_DESC, 0, 0, 0); - regfree(&rx); - return !rc; - } -} - -#ifndef HAVE_STRNDUP -char * strndup(const char *, size_t); - -char * -strndup(const char *str, size_t n) -{ - size_t len; - char *copy; - - for (len = 0; len < n && str[len]; len++) - continue; - if ((copy = malloc(len + 1)) == NULL) - return NULL; - (void)memcpy(copy, str, len); - copy[len] = '\0'; - return copy; -} -#endif /* HAVE_STRNDUP */ - -private int32_t -mprint(struct magic_set *ms, struct magic *m) -{ - uint64_t v; - float vf; - double vd; - int64_t t = 0; - char *buf; - union VALUETYPE *p = &ms->ms_value; - - switch (m->type) { - case FILE_BYTE: - v = file_signextend(ms, m, (uint64_t)p->b); - switch (check_fmt(ms, m)) { - case -1: - return -1; - case 1: - if (asprintf(&buf, "%c", (unsigned char)v) < 0) - return -1; - if (file_printf(ms, MAGIC_DESC, buf) == -1) - return -1; - break; - default: - if (file_printf(ms, MAGIC_DESC, (unsigned char) v) == -1) - return -1; - break; - } - t = ms->offset + sizeof(char); - break; - - case FILE_SHORT: - case FILE_BESHORT: - case FILE_LESHORT: - v = file_signextend(ms, m, (uint64_t)p->h); - switch (check_fmt(ms, m)) { - case -1: - return -1; - case 1: - if (asprintf(&buf, "%hu", (unsigned short)v) < 0) - return -1; - if (file_printf(ms, MAGIC_DESC, buf) == -1) - return -1; - break; - default: - if (file_printf(ms, MAGIC_DESC, (unsigned short) v) == -1) - return -1; - break; - } - t = ms->offset + sizeof(short); - break; - - case FILE_LONG: - case FILE_BELONG: - case FILE_LELONG: - case FILE_MELONG: - v = file_signextend(ms, m, (uint64_t)p->l); - switch (check_fmt(ms, m)) { - case -1: - return -1; - case 1: - if (asprintf(&buf, "%u", (uint32_t)v) < 0) - return -1; - if (file_printf(ms, MAGIC_DESC, buf) == -1) - return -1; - break; - default: - if (file_printf(ms, MAGIC_DESC, (uint32_t) v) == -1) - return -1; - break; - } - t = ms->offset + sizeof(int32_t); - break; - - case FILE_QUAD: - case FILE_BEQUAD: - case FILE_LEQUAD: - v = file_signextend(ms, m, p->q); - if (file_printf(ms, MAGIC_DESC, (uint64_t) v) == -1) - return -1; - t = ms->offset + sizeof(int64_t); - break; - - case FILE_STRING: - case FILE_PSTRING: - case FILE_BESTRING16: - case FILE_LESTRING16: - if (m->reln == '=' || m->reln == '!') { - if (file_printf(ms, MAGIC_DESC, m->value.s) == -1) - return -1; - t = ms->offset + m->vallen; - } - else { - if (*m->value.s == '\0') - p->s[strcspn(p->s, "\n")] = '\0'; - if (file_printf(ms, MAGIC_DESC, p->s) == -1) - return -1; - t = ms->offset + strlen(p->s); - if (m->type == FILE_PSTRING) - t++; - } - break; - - case FILE_DATE: - case FILE_BEDATE: - case FILE_LEDATE: - case FILE_MEDATE: - if (file_printf(ms, MAGIC_DESC, file_fmttime(p->l, 1)) == -1) - return -1; - t = ms->offset + sizeof(int32_t); - break; - - case FILE_LDATE: - case FILE_BELDATE: - case FILE_LELDATE: - case FILE_MELDATE: - if (file_printf(ms, MAGIC_DESC, file_fmttime(p->l, 0)) == -1) - return -1; - t = ms->offset + sizeof(int32_t); - break; - - case FILE_QDATE: - case FILE_BEQDATE: - case FILE_LEQDATE: - if (file_printf(ms, MAGIC_DESC, file_fmttime(p->q, 1)) - == -1) - return -1; - t = ms->offset + sizeof(uint64_t); - break; - - case FILE_QLDATE: - case FILE_BEQLDATE: - case FILE_LEQLDATE: - if (file_printf(ms, MAGIC_DESC, file_fmttime(p->q, 0)) - == -1) - return -1; - t = ms->offset + sizeof(uint64_t); - break; - - case FILE_FLOAT: - case FILE_BEFLOAT: - case FILE_LEFLOAT: - vf = p->f; - switch (check_fmt(ms, m)) { - case -1: - return -1; - case 1: - if (asprintf(&buf, "%g", vf) < 0) - return -1; - if (file_printf(ms, MAGIC_DESC, buf) == -1) - return -1; - break; - default: - if (file_printf(ms, MAGIC_DESC, vf) == -1) - return -1; - break; - } - t = ms->offset + sizeof(float); - break; - - case FILE_DOUBLE: - case FILE_BEDOUBLE: - case FILE_LEDOUBLE: - vd = p->d; - switch (check_fmt(ms, m)) { - case -1: - return -1; - case 1: - if (asprintf(&buf, "%g", vd) < 0) - return -1; - if (file_printf(ms, MAGIC_DESC, buf) == -1) - return -1; - break; - default: - if (file_printf(ms, MAGIC_DESC, vd) == -1) - return -1; - break; - } - t = ms->offset + sizeof(double); - break; - - case FILE_REGEX: { - char *cp; - int rval; - - cp = strndup((const char *)ms->search.s, ms->search.rm_len); - if (cp == NULL) { - file_oomem(ms, ms->search.rm_len); - return -1; - } - rval = file_printf(ms, MAGIC_DESC, cp); - free(cp); - - if (rval == -1) - return -1; - - if ((m->str_flags & REGEX_OFFSET_START)) - t = ms->search.offset; - else - t = ms->search.offset + ms->search.rm_len; - break; - } - - case FILE_SEARCH: - if (file_printf(ms, MAGIC_DESC, m->value.s) == -1) - return -1; - if ((m->str_flags & REGEX_OFFSET_START)) - t = ms->search.offset; - else - t = ms->search.offset + m->vallen; - break; - - case FILE_DEFAULT: - if (file_printf(ms, MAGIC_DESC, m->value.s) == -1) - return -1; - t = ms->offset; - break; - - default: - file_magerror(ms, "invalid m->type (%d) in mprint()", m->type); - return -1; - } - return(t); -} - - -#define DO_CVT(fld, cast) \ - if (m->num_mask) \ - switch (m->mask_op & FILE_OPS_MASK) { \ - case FILE_OPAND: \ - p->fld &= cast m->num_mask; \ - break; \ - case FILE_OPOR: \ - p->fld |= cast m->num_mask; \ - break; \ - case FILE_OPXOR: \ - p->fld ^= cast m->num_mask; \ - break; \ - case FILE_OPADD: \ - p->fld += cast m->num_mask; \ - break; \ - case FILE_OPMINUS: \ - p->fld -= cast m->num_mask; \ - break; \ - case FILE_OPMULTIPLY: \ - p->fld *= cast m->num_mask; \ - break; \ - case FILE_OPDIVIDE: \ - p->fld /= cast m->num_mask; \ - break; \ - case FILE_OPMODULO: \ - p->fld %= cast m->num_mask; \ - break; \ - } \ - if (m->mask_op & FILE_OPINVERSE) \ - p->fld = ~p->fld \ - -private void -cvt_8(union VALUETYPE *p, const struct magic *m) -{ - DO_CVT(b, (uint8_t)); -} - -private void -cvt_16(union VALUETYPE *p, const struct magic *m) -{ - DO_CVT(h, (uint16_t)); -} - -private void -cvt_32(union VALUETYPE *p, const struct magic *m) -{ - DO_CVT(l, (uint32_t)); -} - -private void -cvt_64(union VALUETYPE *p, const struct magic *m) -{ - DO_CVT(q, (uint64_t)); -} - -#define DO_CVT2(fld, cast) \ - if (m->num_mask) \ - switch (m->mask_op & FILE_OPS_MASK) { \ - case FILE_OPADD: \ - p->fld += cast m->num_mask; \ - break; \ - case FILE_OPMINUS: \ - p->fld -= cast m->num_mask; \ - break; \ - case FILE_OPMULTIPLY: \ - p->fld *= cast m->num_mask; \ - break; \ - case FILE_OPDIVIDE: \ - p->fld /= cast m->num_mask; \ - break; \ - } \ - -private void -cvt_float(union VALUETYPE *p, const struct magic *m) -{ - DO_CVT2(f, (float)); -} - -private void -cvt_double(union VALUETYPE *p, const struct magic *m) -{ - DO_CVT2(d, (double)); -} - -/* - * Convert the byte order of the data we are looking at - * While we're here, let's apply the mask operation - * (unless you have a better idea) - */ -private int -mconvert(struct magic_set *ms, struct magic *m) -{ - union VALUETYPE *p = &ms->ms_value; - - switch (m->type) { - case FILE_BYTE: - cvt_8(p, m); - return 1; - case FILE_SHORT: - cvt_16(p, m); - return 1; - case FILE_LONG: - case FILE_DATE: - case FILE_LDATE: - cvt_32(p, m); - return 1; - case FILE_QUAD: - case FILE_QDATE: - case FILE_QLDATE: - cvt_64(p, m); - return 1; - case FILE_STRING: - case FILE_BESTRING16: - case FILE_LESTRING16: { - size_t len; - - /* Null terminate and eat *trailing* return */ - p->s[sizeof(p->s) - 1] = '\0'; - len = strlen(p->s); - if (len-- && p->s[len] == '\n') - p->s[len] = '\0'; - return 1; - } - case FILE_PSTRING: { - char *ptr1 = p->s, *ptr2 = ptr1 + 1; - size_t len = *p->s; - if (len >= sizeof(p->s)) - len = sizeof(p->s) - 1; - while (len--) - *ptr1++ = *ptr2++; - *ptr1 = '\0'; - len = strlen(p->s); - if (len-- && p->s[len] == '\n') - p->s[len] = '\0'; - return 1; - } - case FILE_BESHORT: - p->h = (short)((p->hs[0]<<8)|(p->hs[1])); - cvt_16(p, m); - return 1; - case FILE_BELONG: - case FILE_BEDATE: - case FILE_BELDATE: - p->l = (int32_t) - ((p->hl[0]<<24)|(p->hl[1]<<16)|(p->hl[2]<<8)|(p->hl[3])); - cvt_32(p, m); - return 1; - case FILE_BEQUAD: - case FILE_BEQDATE: - case FILE_BEQLDATE: - p->q = (uint64_t) - (((uint64_t)p->hq[0]<<56)|((uint64_t)p->hq[1]<<48)| - ((uint64_t)p->hq[2]<<40)|((uint64_t)p->hq[3]<<32)| - ((uint64_t)p->hq[4]<<24)|((uint64_t)p->hq[5]<<16)| - ((uint64_t)p->hq[6]<<8)|((uint64_t)p->hq[7])); - cvt_64(p, m); - return 1; - case FILE_LESHORT: - p->h = (short)((p->hs[1]<<8)|(p->hs[0])); - cvt_16(p, m); - return 1; - case FILE_LELONG: - case FILE_LEDATE: - case FILE_LELDATE: - p->l = (int32_t) - ((p->hl[3]<<24)|(p->hl[2]<<16)|(p->hl[1]<<8)|(p->hl[0])); - cvt_32(p, m); - return 1; - case FILE_LEQUAD: - case FILE_LEQDATE: - case FILE_LEQLDATE: - p->q = (uint64_t) - (((uint64_t)p->hq[7]<<56)|((uint64_t)p->hq[6]<<48)| - ((uint64_t)p->hq[5]<<40)|((uint64_t)p->hq[4]<<32)| - ((uint64_t)p->hq[3]<<24)|((uint64_t)p->hq[2]<<16)| - ((uint64_t)p->hq[1]<<8)|((uint64_t)p->hq[0])); - cvt_64(p, m); - return 1; - case FILE_MELONG: - case FILE_MEDATE: - case FILE_MELDATE: - p->l = (int32_t) - ((p->hl[1]<<24)|(p->hl[0]<<16)|(p->hl[3]<<8)|(p->hl[2])); - cvt_32(p, m); - return 1; - case FILE_FLOAT: - cvt_float(p, m); - return 1; - case FILE_BEFLOAT: - p->l = ((uint32_t)p->hl[0]<<24)|((uint32_t)p->hl[1]<<16)| - ((uint32_t)p->hl[2]<<8) |((uint32_t)p->hl[3]); - cvt_float(p, m); - return 1; - case FILE_LEFLOAT: - p->l = ((uint32_t)p->hl[3]<<24)|((uint32_t)p->hl[2]<<16)| - ((uint32_t)p->hl[1]<<8) |((uint32_t)p->hl[0]); - cvt_float(p, m); - return 1; - case FILE_DOUBLE: - cvt_double(p, m); - return 1; - case FILE_BEDOUBLE: - p->q = ((uint64_t)p->hq[0]<<56)|((uint64_t)p->hq[1]<<48)| - ((uint64_t)p->hq[2]<<40)|((uint64_t)p->hq[3]<<32)| - ((uint64_t)p->hq[4]<<24)|((uint64_t)p->hq[5]<<16)| - ((uint64_t)p->hq[6]<<8) |((uint64_t)p->hq[7]); - cvt_double(p, m); - return 1; - case FILE_LEDOUBLE: - p->q = ((uint64_t)p->hq[7]<<56)|((uint64_t)p->hq[6]<<48)| - ((uint64_t)p->hq[5]<<40)|((uint64_t)p->hq[4]<<32)| - ((uint64_t)p->hq[3]<<24)|((uint64_t)p->hq[2]<<16)| - ((uint64_t)p->hq[1]<<8) |((uint64_t)p->hq[0]); - cvt_double(p, m); - return 1; - case FILE_REGEX: - case FILE_SEARCH: - case FILE_DEFAULT: - return 1; - default: - file_magerror(ms, "invalid type %d in mconvert()", m->type); - return 0; - } -} - - -private void -mdebug(uint32_t offset, const char *str, size_t len) -{ - (void) fprintf(stderr, "mget @%d: ", offset); - file_showstr(stderr, str, len); - (void) fputc('\n', stderr); - (void) fputc('\n', stderr); -} - -private int -mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir, - const unsigned char *s, uint32_t offset, size_t nbytes, size_t linecnt) -{ - /* - * Note: FILE_SEARCH and FILE_REGEX do not actually copy - * anything, but setup pointers into the source - */ - if (indir == 0) { - switch (type) { - case FILE_SEARCH: - ms->search.s = (const char *)s + offset; - ms->search.s_len = nbytes - offset; - ms->search.offset = offset; - return 0; - - case FILE_REGEX: { - const char *b; - const char *c; - const char *last; /* end of search region */ - const char *buf; /* start of search region */ - size_t lines; - - if (s == NULL) { - ms->search.s_len = 0; - ms->search.s = NULL; - return 0; - } - buf = (const char *)s + offset; - last = (const char *)s + nbytes; - /* mget() guarantees buf <= last */ - for (lines = linecnt, b = buf; - lines && ((b = strchr(c = b, '\n')) || (b = strchr(c, '\r'))); - lines--, b++) { - last = b; - if (b[0] == '\r' && b[1] == '\n') - b++; - } - if (lines) - last = (const char *)s + nbytes; - - ms->search.s = buf; - ms->search.s_len = last - buf; - ms->search.offset = offset; - ms->search.rm_len = 0; - return 0; - } - case FILE_BESTRING16: - case FILE_LESTRING16: { - const unsigned char *src = s + offset; - const unsigned char *esrc = s + nbytes; - char *dst = p->s; - char *edst = &p->s[sizeof(p->s) - 1]; - - if (type == FILE_BESTRING16) - src++; - - /* check for pointer overflow */ - if (src < s) { - file_magerror(ms, "invalid offset %zu in mcopy()", - offset); - return -1; - } - for (/*EMPTY*/; src < esrc; src += 2, dst++) { - if (dst < edst) - *dst = *src; - else - break; - if (*dst == '\0') { - if (type == FILE_BESTRING16 ? - *(src - 1) != '\0' : - *(src + 1) != '\0') - *dst = ' '; - } - } - *edst = '\0'; - return 0; - } - case FILE_STRING: /* XXX - these two should not need */ - case FILE_PSTRING: /* to copy anything, but do anyway. */ - default: - break; - } - } - - if (offset >= nbytes) { - (void)memset(p, '\0', sizeof(*p)); - return 0; - } - if (nbytes - offset < sizeof(*p)) - nbytes = nbytes - offset; - else - nbytes = sizeof(*p); - - (void)memcpy(p, s + offset, nbytes); - - /* - * the usefulness of padding with zeroes eludes me, it - * might even cause problems - */ - if (nbytes < sizeof(*p)) - (void)memset(((char *)(void *)p) + nbytes, '\0', - sizeof(*p) - nbytes); - return 0; -} - -private int -mget(struct magic_set *ms, const unsigned char *s, - struct magic *m, size_t nbytes, unsigned int cont_level) -{ - uint32_t offset = ms->offset; - uint32_t count = m->str_range; - union VALUETYPE *p = &ms->ms_value; - - if (mcopy(ms, p, m->type, m->flag & INDIR, s, offset, nbytes, count) == -1) - return -1; - - if ((ms->flags & MAGIC_DEBUG) != 0) { - mdebug(offset, (char *)(void *)p, sizeof(union VALUETYPE)); - file_mdump(m); - } - - if (m->flag & INDIR) { - int off = m->in_offset; - if (m->in_op & FILE_OPINDIRECT) { - const union VALUETYPE *q = - ((const void *)(s + offset + off)); - switch (m->in_type) { - case FILE_BYTE: - off = q->b; - break; - case FILE_SHORT: - off = q->h; - break; - case FILE_BESHORT: - off = (short)((q->hs[0]<<8)|(q->hs[1])); - break; - case FILE_LESHORT: - off = (short)((q->hs[1]<<8)|(q->hs[0])); - break; - case FILE_LONG: - off = q->l; - break; - case FILE_BELONG: - off = (int32_t)((q->hl[0]<<24)|(q->hl[1]<<16)| - (q->hl[2]<<8)|(q->hl[3])); - break; - case FILE_LELONG: - off = (int32_t)((q->hl[3]<<24)|(q->hl[2]<<16)| - (q->hl[1]<<8)|(q->hl[0])); - break; - case FILE_MELONG: - off = (int32_t)((q->hl[1]<<24)|(q->hl[0]<<16)| - (q->hl[3]<<8)|(q->hl[2])); - break; - } - } - switch (m->in_type) { - case FILE_BYTE: - if (nbytes < (offset + 1)) - return 0; - if (off) { - switch (m->in_op & FILE_OPS_MASK) { - case FILE_OPAND: - offset = p->b & off; - break; - case FILE_OPOR: - offset = p->b | off; - break; - case FILE_OPXOR: - offset = p->b ^ off; - break; - case FILE_OPADD: - offset = p->b + off; - break; - case FILE_OPMINUS: - offset = p->b - off; - break; - case FILE_OPMULTIPLY: - offset = p->b * off; - break; - case FILE_OPDIVIDE: - offset = p->b / off; - break; - case FILE_OPMODULO: - offset = p->b % off; - break; - } - } else - offset = p->b; - if (m->in_op & FILE_OPINVERSE) - offset = ~offset; - break; - case FILE_BESHORT: - if (nbytes < (offset + 2)) - return 0; - if (off) { - switch (m->in_op & FILE_OPS_MASK) { - case FILE_OPAND: - offset = (short)((p->hs[0]<<8)| - (p->hs[1])) & - off; - break; - case FILE_OPOR: - offset = (short)((p->hs[0]<<8)| - (p->hs[1])) | - off; - break; - case FILE_OPXOR: - offset = (short)((p->hs[0]<<8)| - (p->hs[1])) ^ - off; - break; - case FILE_OPADD: - offset = (short)((p->hs[0]<<8)| - (p->hs[1])) + - off; - break; - case FILE_OPMINUS: - offset = (short)((p->hs[0]<<8)| - (p->hs[1])) - - off; - break; - case FILE_OPMULTIPLY: - offset = (short)((p->hs[0]<<8)| - (p->hs[1])) * - off; - break; - case FILE_OPDIVIDE: - offset = (short)((p->hs[0]<<8)| - (p->hs[1])) / - off; - break; - case FILE_OPMODULO: - offset = (short)((p->hs[0]<<8)| - (p->hs[1])) % - off; - break; - } - } else - offset = (short)((p->hs[0]<<8)| - (p->hs[1])); - if (m->in_op & FILE_OPINVERSE) - offset = ~offset; - break; - case FILE_LESHORT: - if (nbytes < (offset + 2)) - return 0; - if (off) { - switch (m->in_op & FILE_OPS_MASK) { - case FILE_OPAND: - offset = (short)((p->hs[1]<<8)| - (p->hs[0])) & - off; - break; - case FILE_OPOR: - offset = (short)((p->hs[1]<<8)| - (p->hs[0])) | - off; - break; - case FILE_OPXOR: - offset = (short)((p->hs[1]<<8)| - (p->hs[0])) ^ - off; - break; - case FILE_OPADD: - offset = (short)((p->hs[1]<<8)| - (p->hs[0])) + - off; - break; - case FILE_OPMINUS: - offset = (short)((p->hs[1]<<8)| - (p->hs[0])) - - off; - break; - case FILE_OPMULTIPLY: - offset = (short)((p->hs[1]<<8)| - (p->hs[0])) * - off; - break; - case FILE_OPDIVIDE: - offset = (short)((p->hs[1]<<8)| - (p->hs[0])) / - off; - break; - case FILE_OPMODULO: - offset = (short)((p->hs[1]<<8)| - (p->hs[0])) % - off; - break; - } - } else - offset = (short)((p->hs[1]<<8)| - (p->hs[0])); - if (m->in_op & FILE_OPINVERSE) - offset = ~offset; - break; - case FILE_SHORT: - if (nbytes < (offset + 2)) - return 0; - if (off) { - switch (m->in_op & FILE_OPS_MASK) { - case FILE_OPAND: - offset = p->h & off; - break; - case FILE_OPOR: - offset = p->h | off; - break; - case FILE_OPXOR: - offset = p->h ^ off; - break; - case FILE_OPADD: - offset = p->h + off; - break; - case FILE_OPMINUS: - offset = p->h - off; - break; - case FILE_OPMULTIPLY: - offset = p->h * off; - break; - case FILE_OPDIVIDE: - offset = p->h / off; - break; - case FILE_OPMODULO: - offset = p->h % off; - break; - } - } - else - offset = p->h; - if (m->in_op & FILE_OPINVERSE) - offset = ~offset; - break; - case FILE_BELONG: - if (nbytes < (offset + 4)) - return 0; - if (off) { - switch (m->in_op & FILE_OPS_MASK) { - case FILE_OPAND: - offset = (int32_t)((p->hl[0]<<24)| - (p->hl[1]<<16)| - (p->hl[2]<<8)| - (p->hl[3])) & - off; - break; - case FILE_OPOR: - offset = (int32_t)((p->hl[0]<<24)| - (p->hl[1]<<16)| - (p->hl[2]<<8)| - (p->hl[3])) | - off; - break; - case FILE_OPXOR: - offset = (int32_t)((p->hl[0]<<24)| - (p->hl[1]<<16)| - (p->hl[2]<<8)| - (p->hl[3])) ^ - off; - break; - case FILE_OPADD: - offset = (int32_t)((p->hl[0]<<24)| - (p->hl[1]<<16)| - (p->hl[2]<<8)| - (p->hl[3])) + - off; - break; - case FILE_OPMINUS: - offset = (int32_t)((p->hl[0]<<24)| - (p->hl[1]<<16)| - (p->hl[2]<<8)| - (p->hl[3])) - - off; - break; - case FILE_OPMULTIPLY: - offset = (int32_t)((p->hl[0]<<24)| - (p->hl[1]<<16)| - (p->hl[2]<<8)| - (p->hl[3])) * - off; - break; - case FILE_OPDIVIDE: - offset = (int32_t)((p->hl[0]<<24)| - (p->hl[1]<<16)| - (p->hl[2]<<8)| - (p->hl[3])) / - off; - break; - case FILE_OPMODULO: - offset = (int32_t)((p->hl[0]<<24)| - (p->hl[1]<<16)| - (p->hl[2]<<8)| - (p->hl[3])) % - off; - break; - } - } else - offset = (int32_t)((p->hl[0]<<24)| - (p->hl[1]<<16)| - (p->hl[2]<<8)| - (p->hl[3])); - if (m->in_op & FILE_OPINVERSE) - offset = ~offset; - break; - case FILE_LELONG: - if (nbytes < (offset + 4)) - return 0; - if (off) { - switch (m->in_op & FILE_OPS_MASK) { - case FILE_OPAND: - offset = (int32_t)((p->hl[3]<<24)| - (p->hl[2]<<16)| - (p->hl[1]<<8)| - (p->hl[0])) & - off; - break; - case FILE_OPOR: - offset = (int32_t)((p->hl[3]<<24)| - (p->hl[2]<<16)| - (p->hl[1]<<8)| - (p->hl[0])) | - off; - break; - case FILE_OPXOR: - offset = (int32_t)((p->hl[3]<<24)| - (p->hl[2]<<16)| - (p->hl[1]<<8)| - (p->hl[0])) ^ - off; - break; - case FILE_OPADD: - offset = (int32_t)((p->hl[3]<<24)| - (p->hl[2]<<16)| - (p->hl[1]<<8)| - (p->hl[0])) + - off; - break; - case FILE_OPMINUS: - offset = (int32_t)((p->hl[3]<<24)| - (p->hl[2]<<16)| - (p->hl[1]<<8)| - (p->hl[0])) - - off; - break; - case FILE_OPMULTIPLY: - offset = (int32_t)((p->hl[3]<<24)| - (p->hl[2]<<16)| - (p->hl[1]<<8)| - (p->hl[0])) * - off; - break; - case FILE_OPDIVIDE: - offset = (int32_t)((p->hl[3]<<24)| - (p->hl[2]<<16)| - (p->hl[1]<<8)| - (p->hl[0])) / - off; - break; - case FILE_OPMODULO: - offset = (int32_t)((p->hl[3]<<24)| - (p->hl[2]<<16)| - (p->hl[1]<<8)| - (p->hl[0])) % - off; - break; - } - } else - offset = (int32_t)((p->hl[3]<<24)| - (p->hl[2]<<16)| - (p->hl[1]<<8)| - (p->hl[0])); - if (m->in_op & FILE_OPINVERSE) - offset = ~offset; - break; - case FILE_MELONG: - if (nbytes < (offset + 4)) - return 0; - if (off) { - switch (m->in_op & FILE_OPS_MASK) { - case FILE_OPAND: - offset = (int32_t)((p->hl[1]<<24)| - (p->hl[0]<<16)| - (p->hl[3]<<8)| - (p->hl[2])) & - off; - break; - case FILE_OPOR: - offset = (int32_t)((p->hl[1]<<24)| - (p->hl[0]<<16)| - (p->hl[3]<<8)| - (p->hl[2])) | - off; - break; - case FILE_OPXOR: - offset = (int32_t)((p->hl[1]<<24)| - (p->hl[0]<<16)| - (p->hl[3]<<8)| - (p->hl[2])) ^ - off; - break; - case FILE_OPADD: - offset = (int32_t)((p->hl[1]<<24)| - (p->hl[0]<<16)| - (p->hl[3]<<8)| - (p->hl[2])) + - off; - break; - case FILE_OPMINUS: - offset = (int32_t)((p->hl[1]<<24)| - (p->hl[0]<<16)| - (p->hl[3]<<8)| - (p->hl[2])) - - off; - break; - case FILE_OPMULTIPLY: - offset = (int32_t)((p->hl[1]<<24)| - (p->hl[0]<<16)| - (p->hl[3]<<8)| - (p->hl[2])) * - off; - break; - case FILE_OPDIVIDE: - offset = (int32_t)((p->hl[1]<<24)| - (p->hl[0]<<16)| - (p->hl[3]<<8)| - (p->hl[2])) / - off; - break; - case FILE_OPMODULO: - offset = (int32_t)((p->hl[1]<<24)| - (p->hl[0]<<16)| - (p->hl[3]<<8)| - (p->hl[2])) % - off; - break; - } - } else - offset = (int32_t)((p->hl[1]<<24)| - (p->hl[0]<<16)| - (p->hl[3]<<8)| - (p->hl[2])); - if (m->in_op & FILE_OPINVERSE) - offset = ~offset; - break; - case FILE_LONG: - if (nbytes < (offset + 4)) - return 0; - if (off) { - switch (m->in_op & FILE_OPS_MASK) { - case FILE_OPAND: - offset = p->l & off; - break; - case FILE_OPOR: - offset = p->l | off; - break; - case FILE_OPXOR: - offset = p->l ^ off; - break; - case FILE_OPADD: - offset = p->l + off; - break; - case FILE_OPMINUS: - offset = p->l - off; - break; - case FILE_OPMULTIPLY: - offset = p->l * off; - break; - case FILE_OPDIVIDE: - offset = p->l / off; - break; - case FILE_OPMODULO: - offset = p->l % off; - break; - } - } else - offset = p->l; - if (m->in_op & FILE_OPINVERSE) - offset = ~offset; - break; - } - - if (m->flag & INDIROFFADD) - offset += ms->c.li[cont_level-1].off; - if (mcopy(ms, p, m->type, 0, s, offset, nbytes, count) == -1) - return -1; - ms->offset = offset; - - if ((ms->flags & MAGIC_DEBUG) != 0) { - mdebug(offset, (char *)(void *)p, - sizeof(union VALUETYPE)); - file_mdump(m); - } - } - - /* Verify we have enough data to match magic type */ - switch (m->type) { - case FILE_BYTE: - if (nbytes < (offset + 1)) /* should alway be true */ - return 0; - break; - - case FILE_SHORT: - case FILE_BESHORT: - case FILE_LESHORT: - if (nbytes < (offset + 2)) - return 0; - break; - - case FILE_LONG: - case FILE_BELONG: - case FILE_LELONG: - case FILE_MELONG: - case FILE_DATE: - case FILE_BEDATE: - case FILE_LEDATE: - case FILE_MEDATE: - case FILE_LDATE: - case FILE_BELDATE: - case FILE_LELDATE: - case FILE_MELDATE: - case FILE_FLOAT: - case FILE_BEFLOAT: - case FILE_LEFLOAT: - if (nbytes < (offset + 4)) - return 0; - break; - - case FILE_DOUBLE: - case FILE_BEDOUBLE: - case FILE_LEDOUBLE: - if (nbytes < (offset + 8)) - return 0; - break; - - case FILE_STRING: - case FILE_PSTRING: - case FILE_SEARCH: - if (nbytes < (offset + m->vallen)) - return 0; - break; - - case FILE_REGEX: - if (nbytes < offset) - return 0; - break; - - case FILE_DEFAULT: /* nothing to check */ - default: - break; - } - if (!mconvert(ms, m)) - return 0; - return 1; -} - -private uint64_t -file_strncmp(const char *s1, const char *s2, size_t len, uint32_t flags) -{ - /* - * Convert the source args to unsigned here so that (1) the - * compare will be unsigned as it is in strncmp() and (2) so - * the ctype functions will work correctly without extra - * casting. - */ - const unsigned char *a = (const unsigned char *)s1; - const unsigned char *b = (const unsigned char *)s2; - uint64_t v; - - /* - * What we want here is v = strncmp(s1, s2, len), - * but ignoring any nulls. - */ - v = 0; - if (0L == flags) { /* normal string: do it fast */ - while (len-- > 0) - if ((v = *b++ - *a++) != '\0') - break; - } - else { /* combine the others */ - while (len-- > 0) { - if ((flags & STRING_IGNORE_LOWERCASE) && - islower(*a)) { - if ((v = tolower(*b++) - *a++) != '\0') - break; - } - else if ((flags & STRING_IGNORE_UPPERCASE) && - isupper(*a)) { - if ((v = toupper(*b++) - *a++) != '\0') - break; - } - else if ((flags & STRING_COMPACT_BLANK) && - isspace(*a)) { - a++; - if (isspace(*b++)) { - while (isspace(*b)) - b++; - } - else { - v = 1; - break; - } - } - else if ((flags & STRING_COMPACT_OPTIONAL_BLANK) && - isspace(*a)) { - a++; - while (isspace(*b)) - b++; - } - else { - if ((v = *b++ - *a++) != '\0') - break; - } - } - } - return v; -} - -private uint64_t -file_strncmp16(const char *a, const char *b, size_t len, uint32_t flags) -{ - /* - * XXX - The 16-bit string compare probably needs to be done - * differently, especially if the flags are to be supported. - * At the moment, I am unsure. - */ - flags = 0; - return file_strncmp(a, b, len, flags); -} - -private int -magiccheck(struct magic_set *ms, struct magic *m) -{ - uint64_t l = m->value.q; - uint64_t v; - float fl, fv; - double dl, dv; - int matched; - union VALUETYPE *p = &ms->ms_value; - - switch (m->type) { - case FILE_BYTE: - v = p->b; - break; - - case FILE_SHORT: - case FILE_BESHORT: - case FILE_LESHORT: - v = p->h; - break; - - case FILE_LONG: - case FILE_BELONG: - case FILE_LELONG: - case FILE_MELONG: - case FILE_DATE: - case FILE_BEDATE: - case FILE_LEDATE: - case FILE_MEDATE: - case FILE_LDATE: - case FILE_BELDATE: - case FILE_LELDATE: - case FILE_MELDATE: - v = p->l; - break; - - case FILE_QUAD: - case FILE_LEQUAD: - case FILE_BEQUAD: - case FILE_QDATE: - case FILE_BEQDATE: - case FILE_LEQDATE: - case FILE_QLDATE: - case FILE_BEQLDATE: - case FILE_LEQLDATE: - v = p->q; - break; - - case FILE_FLOAT: - case FILE_BEFLOAT: - case FILE_LEFLOAT: - fl = m->value.f; - fv = p->f; - switch (m->reln) { - case 'x': - matched = 1; - break; - - case '!': - matched = fv != fl; - break; - - case '=': - matched = fv == fl; - break; - - case '>': - matched = fv > fl; - break; - - case '<': - matched = fv < fl; - break; - - default: - matched = 0; - file_magerror(ms, "cannot happen with float: invalid relation `%c'", m->reln); - return -1; - } - return matched; - - case FILE_DOUBLE: - case FILE_BEDOUBLE: - case FILE_LEDOUBLE: - dl = m->value.d; - dv = p->d; - switch (m->reln) { - case 'x': - matched = 1; - break; - - case '!': - matched = dv != dl; - break; - - case '=': - matched = dv == dl; - break; - - case '>': - matched = dv > dl; - break; - - case '<': - matched = dv < dl; - break; - - default: - matched = 0; - file_magerror(ms, "cannot happen with double: invalid relation `%c'", m->reln); - return -1; - } - return matched; - - case FILE_DEFAULT: - l = 0; - v = 0; - break; - - case FILE_STRING: - case FILE_PSTRING: - l = 0; - v = file_strncmp(m->value.s, p->s, (size_t)m->vallen, m->str_flags); - break; - - case FILE_BESTRING16: - case FILE_LESTRING16: - l = 0; - v = file_strncmp16(m->value.s, p->s, (size_t)m->vallen, m->str_flags); - break; - - case FILE_SEARCH: { /* search ms->search.s for the string m->value.s */ - size_t slen; - size_t idx; - - if (ms->search.s == NULL) - return 0; - - slen = MIN(m->vallen, sizeof(m->value.s)); - l = 0; - v = 0; - - for (idx = 0; m->str_range == 0 || idx < m->str_range; idx++) { - if (slen + idx > ms->search.s_len) - break; - - v = file_strncmp(m->value.s, ms->search.s + idx, slen, m->str_flags); - if (v == 0) { /* found match */ - ms->search.offset += idx; - break; - } - } - break; - } - case FILE_REGEX: { - int rc; - regex_t rx; - char errmsg[512]; - - if (ms->search.s == NULL) - return 0; - - l = 0; - rc = regcomp(&rx, m->value.s, - REG_EXTENDED|REG_NEWLINE| - ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0)); - if (rc) { - (void)regerror(rc, &rx, errmsg, sizeof(errmsg)); - file_magerror(ms, "regex error %d, (%s)", - rc, errmsg); - v = (uint64_t)-1; - } - else { - regmatch_t pmatch[1]; -#ifndef REG_STARTEND -#define REG_STARTEND 0 - size_t l = ms->search.s_len - 1; - char c = ms->search.s[l]; - ((char *)(intptr_t)ms->search.s)[l] = '\0'; -#else - pmatch[0].rm_so = 0; - pmatch[0].rm_eo = ms->search.s_len; -#endif - rc = regexec(&rx, (const char *)ms->search.s, - 1, pmatch, REG_STARTEND); -#if REG_STARTEND == 0 - ((char *)(intptr_t)ms->search.s)[l] = c; -#endif - switch (rc) { - case 0: - ms->search.s += (int)pmatch[0].rm_so; - ms->search.offset += (size_t)pmatch[0].rm_so; - ms->search.rm_len = - (size_t)(pmatch[0].rm_eo - pmatch[0].rm_so); - v = 0; - break; - - case REG_NOMATCH: - v = 1; - break; - - default: - (void)regerror(rc, &rx, errmsg, sizeof(errmsg)); - file_magerror(ms, "regexec error %d, (%s)", - rc, errmsg); - v = (uint64_t)-1; - break; - } - regfree(&rx); - } - if (v == (uint64_t)-1) - return -1; - break; - } - default: - file_magerror(ms, "invalid type %d in magiccheck()", m->type); - return -1; - } - - v = file_signextend(ms, m, v); - - switch (m->reln) { - case 'x': - if ((ms->flags & MAGIC_DEBUG) != 0) - (void) fprintf(stderr, "%llu == *any* = 1\n", - (unsigned long long)v); - matched = 1; - break; - - case '!': - matched = v != l; - if ((ms->flags & MAGIC_DEBUG) != 0) - (void) fprintf(stderr, "%llu != %llu = %d\n", - (unsigned long long)v, (unsigned long long)l, - matched); - break; - - case '=': - matched = v == l; - if ((ms->flags & MAGIC_DEBUG) != 0) - (void) fprintf(stderr, "%llu == %llu = %d\n", - (unsigned long long)v, (unsigned long long)l, - matched); - break; - - case '>': - if (m->flag & UNSIGNED) { - matched = v > l; - if ((ms->flags & MAGIC_DEBUG) != 0) - (void) fprintf(stderr, "%llu > %llu = %d\n", - (unsigned long long)v, - (unsigned long long)l, matched); - } - else { - matched = (int64_t) v > (int64_t) l; - if ((ms->flags & MAGIC_DEBUG) != 0) - (void) fprintf(stderr, "%lld > %lld = %d\n", - (long long)v, (long long)l, matched); - } - break; - - case '<': - if (m->flag & UNSIGNED) { - matched = v < l; - if ((ms->flags & MAGIC_DEBUG) != 0) - (void) fprintf(stderr, "%llu < %llu = %d\n", - (unsigned long long)v, - (unsigned long long)l, matched); - } - else { - matched = (int64_t) v < (int64_t) l; - if ((ms->flags & MAGIC_DEBUG) != 0) - (void) fprintf(stderr, "%lld < %lld = %d\n", - (long long)v, (long long)l, matched); - } - break; - - case '&': - matched = (v & l) == l; - if ((ms->flags & MAGIC_DEBUG) != 0) - (void) fprintf(stderr, "((%llx & %llx) == %llx) = %d\n", - (unsigned long long)v, (unsigned long long)l, - (unsigned long long)l, matched); - break; - - case '^': - matched = (v & l) != l; - if ((ms->flags & MAGIC_DEBUG) != 0) - (void) fprintf(stderr, "((%llx & %llx) != %llx) = %d\n", - (unsigned long long)v, (unsigned long long)l, - (unsigned long long)l, matched); - break; - - default: - matched = 0; - file_magerror(ms, "cannot happen: invalid relation `%c'", - m->reln); - return -1; - } - - return matched; -} - -private int -print_sep(struct magic_set *ms, int firstline) -{ - if (firstline) - return 0; - /* - * we found another match - * put a newline and '-' to do some simple formatting - */ - return file_printf(ms, "\n- "); -} diff --git a/usr.bin/file/tar.h b/usr.bin/file/tar.h deleted file mode 100644 index 9e6f3a84f3b..00000000000 --- a/usr.bin/file/tar.h +++ /dev/null @@ -1,74 +0,0 @@ -/* $OpenBSD: tar.h,v 1.7 2009/04/24 18:54:34 chl Exp $ */ -/* - * Copyright (c) Ian F. Darwin 1986-1995. - * Software written by Ian F. Darwin and others; - * maintained 1995-present by Christos Zoulas and others. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice immediately at the beginning of the file, without modification, - * this list of conditions, and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * Header file for public domain tar (tape archive) program. - * - * @(#)tar.h 1.20 86/10/29 Public Domain. - * - * Created 25 August 1985 by John Gilmore, ihnp4!hoptoad!gnu. - * - * $Id: tar.h,v 1.7 2009/04/24 18:54:34 chl Exp $ # checkin only - */ - -/* - * Header block on tape. - * - * I'm going to use traditional DP naming conventions here. - * A "block" is a big chunk of stuff that we do I/O on. - * A "record" is a piece of info that we care about. - * Typically many "record"s fit into a "block". - */ -#define RECORDSIZE 512 -#define NAMSIZ 100 -#define TUNMLEN 32 -#define TGNMLEN 32 - -union record { - char charptr[RECORDSIZE]; - struct header { - char name[NAMSIZ]; - char mode[8]; - char uid[8]; - char gid[8]; - char size[12]; - char mtime[12]; - char chksum[8]; - char linkflag; - char linkname[NAMSIZ]; - char magic[8]; - char uname[TUNMLEN]; - char gname[TGNMLEN]; - char devmajor[8]; - char devminor[8]; - } header; -}; - -/* The magic field is filled with this if uname and gname are valid. */ -#define TMAGIC "ustar" /* 5 chars and a null */ -#define GNUTMAGIC "ustar " /* 7 chars and a null */ diff --git a/usr.bin/file/text.c b/usr.bin/file/text.c new file mode 100644 index 00000000000..f835c50cee0 --- /dev/null +++ b/usr.bin/file/text.c @@ -0,0 +1,168 @@ +/* $OpenBSD: text.c,v 1.1 2015/04/24 16:24:11 nicm Exp $ */ + +/* + * Copyright (c) 2015 Nicholas Marriott + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +#include +#include + +#include "file.h" +#include "magic.h" +#include "xmalloc.h" + +static const char *text_words[][3] = { + { "msgid", "PO (gettext message catalogue)", "text/x-po" }, + { "dnl", "M4 macro language pre-processor", "text/x-m4" }, + { "import", "Java program", "text/x-java" }, + { "\"libhdr\"", "BCPL program", "text/x-bcpl" }, + { "\"LIBHDR\"", "BCPL program", "text/x-bcpl" }, + { "//", "C++ program", "text/x-c++" }, + { "virtual", "C++ program", "text/x-c++" }, + { "class", "C++ program", "text/x-c++" }, + { "public:", "C++ program", "text/x-c++" }, + { "private:", "C++ program", "text/x-c++" }, + { "/*", "C program", "text/x-c" }, + { "#include", "C program", "text/x-c" }, + { "char", "C program", "text/x-c" }, + { "The", "English", "text/plain" }, + { "the", "English", "text/plain" }, + { "double", "C program", "text/x-c" }, + { "extern", "C program", "text/x-c" }, + { "float", "C program", "text/x-c" }, + { "struct", "C program", "text/x-c" }, + { "union", "C program", "text/x-c" }, + { "CFLAGS", "make commands", "text/x-makefile" }, + { "LDFLAGS", "make commands", "text/x-makefile" }, + { "all:", "make commands", "text/x-makefile" }, + { ".PRECIOUS", "make commands", "text/x-makefile" }, + { ".ascii", "assembler program", "text/x-asm" }, + { ".asciiz", "assembler program", "text/x-asm" }, + { ".byte", "assembler program", "text/x-asm" }, + { ".even", "assembler program", "text/x-asm" }, + { ".globl", "assembler program", "text/x-asm" }, + { ".text", "assembler program", "text/x-asm" }, + { "clr", "assembler program", "text/x-asm" }, + { "(input", "Pascal program", "text/x-pascal" }, + { "program", "Pascal program", "text/x-pascal" }, + { "record", "Pascal program", "text/x-pascal" }, + { "dcl", "PL/1 program", "text/x-pl1" }, + { "Received:", "mail", "text/x-mail" }, + { ">From", "mail", "text/x-mail" }, + { "Return-Path:", "mail", "text/x-mail" }, + { "Cc:", "mail", "text/x-mail" }, + { "Newsgroups:", "news", "text/x-news" }, + { "Path:", "news", "text/x-news" }, + { "Organization:", "news", "text/x-news" }, + { "href=", "HTML document", "text/html" }, + { "HREF=", "HTML document", "text/html" }, + { " 31 && c < 127); +} + +static int +text_is_latin1(u_char c) +{ + if (c >= 160) + return (1); + return (text_is_ascii(c)); +} + +static int +text_is_extended(u_char c) +{ + if (c >= 128) + return (1); + return (text_is_ascii(c)); +} + +static int +text_try_test(const void *base, size_t size, int (*f)(u_char)) +{ + const u_char *data = base; + size_t offset; + + for (offset = 0; offset < size; offset++) { + if (!f(data[offset])) + return (0); + } + return (1); +} + +const char * +text_get_type(const void *base, size_t size) +{ + if (text_try_test(base, size, text_is_ascii)) + return ("ASCII"); + if (text_try_test(base, size, text_is_latin1)) + return ("ISO-8859"); + if (text_try_test(base, size, text_is_extended)) + return ("Non-ISO extended-ASCII"); + return (NULL); +} + +const char * +text_try_words(const void *base, size_t size, int flags) +{ + const char *cp, *end, *next, *word; + size_t wordlen; + u_int i; + + end = (char*)base + size; + for (cp = base; cp != end; /* nothing */) { + while (cp != end && isspace((u_char)*cp)) + cp++; + + next = cp; + while (next != end && !isspace((u_char)*next)) + next++; + + for (i = 0; /* nothing */; i++) { + word = text_words[i][0]; + if (word == NULL) + break; + wordlen = strlen(word); + + if ((size_t)(next - cp) != wordlen) + continue; + if (memcmp(cp, word, wordlen) != 0) + continue; + if (flags & MAGIC_TEST_MIME) + return (text_words[i][2]); + return (text_words[i][1]); + } + + cp = next; + } + return (NULL); +} diff --git a/usr.bin/file/xmalloc.c b/usr.bin/file/xmalloc.c new file mode 100644 index 00000000000..857bcd91569 --- /dev/null +++ b/usr.bin/file/xmalloc.c @@ -0,0 +1,103 @@ +/* $OpenBSD: xmalloc.c,v 1.1 2015/04/24 16:24:11 nicm Exp $ */ +/* + * Author: Tatu Ylonen + * Copyright (c) 1995 Tatu Ylonen , Espoo, Finland + * All rights reserved + * Versions of malloc and friends that check their results, and never return + * failure (they call fatal if they encounter an error). + * + * As far as I am concerned, the code I have written for this software + * can be used freely for any purpose. Any derived versions of this + * software must be clearly marked as such, and if the derived work is + * incompatible with the protocol description in the RFC file, it must be + * called by a name other than "ssh" or "Secure Shell". + */ + +#include +#include +#include +#include +#include +#include + +#include "xmalloc.h" + +void * +xmalloc(size_t size) +{ + void *ptr; + + if (size == 0) + errx(1, "xmalloc: zero size"); + ptr = malloc(size); + if (ptr == NULL) + errx(1, + "xmalloc: out of memory (allocating %zu bytes)", + size); + return ptr; +} + +void * +xcalloc(size_t nmemb, size_t size) +{ + void *ptr; + + if (size == 0 || nmemb == 0) + errx(1, "xcalloc: zero size"); + if (SIZE_MAX / nmemb < size) + errx(1, "xcalloc: nmemb * size > SIZE_MAX"); + ptr = calloc(nmemb, size); + if (ptr == NULL) + errx(1, "xcalloc: out of memory (allocating %zu bytes)", + (size * nmemb)); + return ptr; +} + +void * +xreallocarray(void *ptr, size_t nmemb, size_t size) +{ + void *new_ptr; + + new_ptr = reallocarray(ptr, nmemb, size); + if (new_ptr == NULL) + errx(1, "xreallocarray: out of memory (new_size %zu bytes)", + nmemb * size); + return new_ptr; +} + +void +xfree(void *ptr) +{ + if (ptr == NULL) + errx(1, "xfree: NULL pointer given as argument"); + free(ptr); +} + +char * +xstrdup(const char *str) +{ + size_t len; + char *cp; + + len = strlen(str) + 1; + cp = xmalloc(len); + if (strlcpy(cp, str, len) >= len) + errx(1, "xstrdup: string truncated"); + return cp; +} + +int +xasprintf(char **ret, const char *fmt, ...) +{ + va_list ap; + int i; + + va_start(ap, fmt); + i = vasprintf(ret, fmt, ap); + va_end(ap); + + if (i < 0 || *ret == NULL) + errx(1, "xasprintf: could not allocate memory"); + + return (i); +} diff --git a/usr.bin/file/xmalloc.h b/usr.bin/file/xmalloc.h new file mode 100644 index 00000000000..8adb3e6f104 --- /dev/null +++ b/usr.bin/file/xmalloc.h @@ -0,0 +1,31 @@ +/* $OpenBSD: xmalloc.h,v 1.1 2015/04/24 16:24:11 nicm Exp $ */ + +/* + * Author: Tatu Ylonen + * Copyright (c) 1995 Tatu Ylonen , Espoo, Finland + * All rights reserved + * Created: Mon Mar 20 22:09:17 1995 ylo + * + * Versions of malloc and friends that check their results, and never return + * failure (they call fatal if they encounter an error). + * + * As far as I am concerned, the code I have written for this software + * can be used freely for any purpose. Any derived versions of this + * software must be clearly marked as such, and if the derived work is + * incompatible with the protocol description in the RFC file, it must be + * called by a name other than "ssh" or "Secure Shell". + */ + +#ifndef XMALLOC_H +#define XMALLOC_H + +void *xmalloc(size_t); +void *xcalloc(size_t, size_t); +void *xreallocarray(void *, size_t, size_t); +void xfree(void *); +char *xstrdup(const char *); +int xasprintf(char **, const char *, ...) + __attribute__((__format__ (printf, 2, 3))) + __attribute__((__nonnull__ (2))); + +#endif /* XMALLOC_H */ -- 2.20.1