From f831452037401b3bf85b910542a06544111cea60 Mon Sep 17 00:00:00 2001 From: Matt Jankowski Date: Wed, 12 Jul 2023 04:44:58 -0400 Subject: [PATCH] Refactor `Snowflake` to avoid brakeman sql injection warnings (#25879) --- config/brakeman.ignore | 23 ---------- lib/mastodon/snowflake.rb | 92 ++++++++++++++++++++++----------------- 2 files changed, 52 insertions(+), 63 deletions(-) diff --git a/config/brakeman.ignore b/config/brakeman.ignore index d67d6ff0c5..4f21944b66 100644 --- a/config/brakeman.ignore +++ b/config/brakeman.ignore @@ -79,29 +79,6 @@ ], "note": "" }, - { - "warning_type": "SQL Injection", - "warning_code": 0, - "fingerprint": "75fcd147b7611763ab6915faf8c5b0709e612b460f27c05c72d8b9bd0a6a77f8", - "check_name": "SQL", - "message": "Possible SQL injection", - "file": "lib/mastodon/snowflake.rb", - "line": 87, - "link": "https://brakemanscanner.org/docs/warning_types/sql_injection/", - "code": "connection.execute(\"CREATE OR REPLACE FUNCTION timestamp_id(table_name text)\\nRETURNS bigint AS\\n$$\\n DECLARE\\n time_part bigint;\\n sequence_base bigint;\\n tail bigint;\\n BEGIN\\n time_part := (\\n -- Get the time in milliseconds\\n ((date_part('epoch', now()) * 1000))::bigint\\n -- And shift it over two bytes\\n << 16);\\n\\n sequence_base := (\\n 'x' ||\\n -- Take the first two bytes (four hex characters)\\n substr(\\n -- Of the MD5 hash of the data we documented\\n md5(table_name || '#{SecureRandom.hex(16)}' || time_part::text),\\n 1, 4\\n )\\n -- And turn it into a bigint\\n )::bit(16)::bigint;\\n\\n -- Finally, add our sequence number to our base, and chop\\n -- it to the last two bytes\\n tail := (\\n (sequence_base + nextval(table_name || '_id_seq'))\\n & 65535);\\n\\n -- Return the time part and the sequence part. OR appears\\n -- faster here than addition, but they're equivalent:\\n -- time_part has no trailing two bytes, and tail is only\\n -- the last two bytes.\\n RETURN time_part | tail;\\n END\\n$$ LANGUAGE plpgsql VOLATILE;\\n\")", - "render_path": null, - "location": { - "type": "method", - "class": "Mastodon::Snowflake", - "method": "define_timestamp_id" - }, - "user_input": "SecureRandom.hex(16)", - "confidence": "Medium", - "cwe_id": [ - 89 - ], - "note": "" - }, { "warning_type": "Denial of Service", "warning_code": 76, diff --git a/lib/mastodon/snowflake.rb b/lib/mastodon/snowflake.rb index 32ed140c30..8b79541da2 100644 --- a/lib/mastodon/snowflake.rb +++ b/lib/mastodon/snowflake.rb @@ -64,46 +64,7 @@ module Mastodon::Snowflake def define_timestamp_id return if already_defined? - connection.execute(<<~SQL) - CREATE OR REPLACE FUNCTION timestamp_id(table_name text) - RETURNS bigint AS - $$ - DECLARE - time_part bigint; - sequence_base bigint; - tail bigint; - BEGIN - time_part := ( - -- Get the time in milliseconds - ((date_part('epoch', now()) * 1000))::bigint - -- And shift it over two bytes - << 16); - - sequence_base := ( - 'x' || - -- Take the first two bytes (four hex characters) - substr( - -- Of the MD5 hash of the data we documented - md5(table_name || '#{SecureRandom.hex(16)}' || time_part::text), - 1, 4 - ) - -- And turn it into a bigint - )::bit(16)::bigint; - - -- Finally, add our sequence number to our base, and chop - -- it to the last two bytes - tail := ( - (sequence_base + nextval(table_name || '_id_seq')) - & 65535); - - -- Return the time part and the sequence part. OR appears - -- faster here than addition, but they're equivalent: - -- time_part has no trailing two bytes, and tail is only - -- the last two bytes. - RETURN time_part | tail; - END - $$ LANGUAGE plpgsql VOLATILE; - SQL + connection.execute(sanitized_timestamp_id_sql) end def ensure_id_sequences_exist @@ -153,6 +114,57 @@ module Mastodon::Snowflake SQL end + def sanitized_timestamp_id_sql + ActiveRecord::Base.sanitize_sql_array(timestamp_id_sql_array) + end + + def timestamp_id_sql_array + [timestamp_id_sql_string, { random_string: SecureRandom.hex(16) }] + end + + def timestamp_id_sql_string + <<~SQL + CREATE OR REPLACE FUNCTION timestamp_id(table_name text) + RETURNS bigint AS + $$ + DECLARE + time_part bigint; + sequence_base bigint; + tail bigint; + BEGIN + time_part := ( + -- Get the time in milliseconds + ((date_part('epoch', now()) * 1000))::bigint + -- And shift it over two bytes + << 16); + + sequence_base := ( + 'x' || + -- Take the first two bytes (four hex characters) + substr( + -- Of the MD5 hash of the data we documented + md5(table_name || :random_string || time_part::text), + 1, 4 + ) + -- And turn it into a bigint + )::bit(16)::bigint; + + -- Finally, add our sequence number to our base, and chop + -- it to the last two bytes + tail := ( + (sequence_base + nextval(table_name || '_id_seq')) + & 65535); + + -- Return the time part and the sequence part. OR appears + -- faster here than addition, but they're equivalent: + -- time_part has no trailing two bytes, and tail is only + -- the last two bytes. + RETURN time_part | tail; + END + $$ LANGUAGE plpgsql VOLATILE; + SQL + end + def connection ActiveRecord::Base.connection end