From: Jerome Jutteau Date: Wed, 4 Dec 2019 20:03:35 +0000 (+0100) Subject: [FEATURE] Support diffent types of file hashing X-Git-Tag: 4.2.0~81 X-Git-Url: https://git.p6c8.net/jirafeau_mojo42.git/commitdiff_plain/ecfc8a70b8f33915e53ef2fc9ab14e77daa96e72?hp=5d7c0d3ce08074fff55e7731490fe786cd6101e6 [FEATURE] Support diffent types of file hashing Useful to avoid hash efforts on large files. ref #206 Signed-off-by: Jerome Jutteau --- diff --git a/README.md b/README.md index 8e6fe31..615c3ea 100644 --- a/README.md +++ b/README.md @@ -334,9 +334,11 @@ If someone use his/her delete link or an admin cleans expired links, this will d When the counter falls to zero, the file is destroyed. +In order to know if a newly uploaded file already exist, Jirafeau will hash the file using md5 by default but other methods are available (see `file_hash` documentation in `lib/config.original.php`). + ### What is the difference between "delete link" and "delete file and links" in admin interface? -As explained in the previous question, files with the same md5 hash are not duplicated and a reference counter stores the number of links pointing to a single file. +As explained in the previous question, files with the same hash are not duplicated and a reference counter stores the number of links pointing to a single file. So: - The button "delete link" will delete the reference to the file but might not destroy the file. - The button "delete file and links" will delete all references pointing to the file and will destroy the file. diff --git a/lib/config.original.php b/lib/config.original.php index 7b1d4ec..2fe91fd 100644 --- a/lib/config.original.php +++ b/lib/config.original.php @@ -147,6 +147,16 @@ $cfg['maximal_upload_size'] = 0; */ $cfg['proxy_ip'] = array(); +/* File hash + * In order to make file deduplication work, files can be hashed through different methods. + * By default, files are hashed through md5 but other methods are available. + * Possible values are 'md5' and 'md5_outside'. + * With 'md5' option, the whole file is hashed through md5. This is the default. + * With 'md5_outside', md5 is used to hash the first part of the file, the last part of the file + * and the file's size. This method is fast for large files but cannot be perfect. + */ +$cfg['file_hash'] = 'md5'; + /* Required flag to test if the installation is already installed * or needs to start the installation script */ diff --git a/lib/functions.php b/lib/functions.php index 3492745..4e22796 100644 --- a/lib/functions.php +++ b/lib/functions.php @@ -319,6 +319,48 @@ function jirafeau_delete_file($hash) return $count; } + +/** hash file's content + * @param $method hash method, see 'file_hash' option. 'md5' or 'md5_outside'. + * @param $file_path file to hash + * @returns hash string + */ +function jirafeau_hash_file($method, $file_path) +{ + switch ($method) { + case 'md5_outside': + return jirafeau_md5_outside($file_path); + case 'md5': + return md5_file($file_path); + } + return md5_file($file_path); +} + +/** hash part of file: start, end and size. + * This is a partial file hash, faster but weaker. + * @param $file_path file to hash + * @returns hash string + */ +function jirafeau_md5_outside($file_path) +{ + $size = filesize($file_path); + if ($size === false) { + $size = 0; + } + $handle = fopen($file_path, "r"); + if ($handle === false) { + return false; + } + $first = fread($handle, 64); + if ($first === false) { + return false; + } + fseek($handle, $size < 64 ? 0 : $size - 64); + $last = fread($handle, 64); + fclose($handle); + return md5($first . $last . $size); +} + /** * handles an uploaded file * @param $file the file struct given by $_FILE[] @@ -333,7 +375,7 @@ function jirafeau_delete_file($hash) * 'link' => the link name of the uploaded file * 'delete_link' => the link code to delete file */ -function jirafeau_upload($file, $one_time_download, $key, $time, $ip, $crypt, $link_name_length) +function jirafeau_upload($file, $one_time_download, $key, $time, $ip, $crypt, $link_name_length, $file_hash_method) { if (empty($file['tmp_name']) || !is_uploaded_file($file['tmp_name'])) { return (array( @@ -361,7 +403,7 @@ function jirafeau_upload($file, $one_time_download, $key, $time, $ip, $crypt, $l } /* file informations */ - $hash = md5_file($file['tmp_name']); + $hash = jirafeau_hash_file($file_hash_method, $file['tmp_name']); $name = str_replace(NL, '', trim($file['name'])); $mime_type = $file['type']; $size = $file['size']; @@ -893,7 +935,7 @@ function jirafeau_async_push($ref, $data, $code, $max_file_size) * @param $link_name_length link name length * @return a string containing the download reference followed by a delete code or the string 'Error' */ -function jirafeau_async_end($ref, $code, $crypt, $link_name_length) +function jirafeau_async_end($ref, $code, $crypt, $link_name_length, $file_hash_method) { /* Get async infos. */ $a = jirafeau_get_async_ref($ref); @@ -917,7 +959,7 @@ function jirafeau_async_end($ref, $code, $crypt, $link_name_length) } } - $hash = md5_file($p); + $hash = jirafeau_hash_file($file_hash_method, $p); $size = filesize($p); $np = s2p($hash); $delete_link_code = jirafeau_gen_random(5); diff --git a/script.php b/script.php index 13a88b8..5fe60e2 100644 --- a/script.php +++ b/script.php @@ -127,9 +127,10 @@ if (isset($_FILES['file']) && is_writable(VAR_FILES) } $res = jirafeau_upload($_FILES['file'], - isset($_POST['one_time_download']), - $key, $time, get_ip_address($cfg), - $cfg['enable_crypt'], $cfg['link_name_length']); + isset($_POST['one_time_download']), + $key, $time, get_ip_address($cfg), + $cfg['enable_crypt'], $cfg['link_name_length'], + $cfg['file_hash']); if (empty($res) || $res['error']['has_error']) { echo 'Error 6 ' . $res['error']['why']; @@ -466,7 +467,7 @@ elseif (isset($_GET['end_async'])) { || !isset($_POST['code'])) { echo 'Error 24'; } else { - echo jirafeau_async_end($_POST['ref'], $_POST['code'], $cfg['enable_crypt'], $cfg['link_name_length']); + echo jirafeau_async_end($_POST['ref'], $_POST['code'], $cfg['enable_crypt'], $cfg['link_name_length'], $cfg['file_hash']); } } else { echo 'Error 25';