From f831452037401b3bf85b910542a06544111cea60 Mon Sep 17 00:00:00 2001
From: Matt Jankowski <matt@jankowski.online>
Date: Wed, 12 Jul 2023 04:44:58 -0400
Subject: [PATCH] Refactor `Snowflake` to avoid brakeman sql injection warnings
 (#25879)

---
 config/brakeman.ignore    | 23 ----------
 lib/mastodon/snowflake.rb | 92 ++++++++++++++++++++++-----------------
 2 files changed, 52 insertions(+), 63 deletions(-)

diff --git a/config/brakeman.ignore b/config/brakeman.ignore
index d67d6ff0c5..4f21944b66 100644
--- a/config/brakeman.ignore
+++ b/config/brakeman.ignore
@@ -79,29 +79,6 @@
       ],
       "note": ""
     },
-    {
-      "warning_type": "SQL Injection",
-      "warning_code": 0,
-      "fingerprint": "75fcd147b7611763ab6915faf8c5b0709e612b460f27c05c72d8b9bd0a6a77f8",
-      "check_name": "SQL",
-      "message": "Possible SQL injection",
-      "file": "lib/mastodon/snowflake.rb",
-      "line": 87,
-      "link": "https://brakemanscanner.org/docs/warning_types/sql_injection/",
-      "code": "connection.execute(\"CREATE OR REPLACE FUNCTION timestamp_id(table_name text)\\nRETURNS bigint AS\\n$$\\n  DECLARE\\n    time_part bigint;\\n    sequence_base bigint;\\n    tail bigint;\\n  BEGIN\\n    time_part := (\\n      -- Get the time in milliseconds\\n      ((date_part('epoch', now()) * 1000))::bigint\\n      -- And shift it over two bytes\\n      << 16);\\n\\n    sequence_base := (\\n      'x' ||\\n      -- Take the first two bytes (four hex characters)\\n      substr(\\n        -- Of the MD5 hash of the data we documented\\n        md5(table_name || '#{SecureRandom.hex(16)}' || time_part::text),\\n        1, 4\\n      )\\n    -- And turn it into a bigint\\n    )::bit(16)::bigint;\\n\\n    -- Finally, add our sequence number to our base, and chop\\n    -- it to the last two bytes\\n    tail := (\\n      (sequence_base + nextval(table_name || '_id_seq'))\\n      & 65535);\\n\\n    -- Return the time part and the sequence part. OR appears\\n    -- faster here than addition, but they're equivalent:\\n    -- time_part has no trailing two bytes, and tail is only\\n    -- the last two bytes.\\n    RETURN time_part | tail;\\n  END\\n$$ LANGUAGE plpgsql VOLATILE;\\n\")",
-      "render_path": null,
-      "location": {
-        "type": "method",
-        "class": "Mastodon::Snowflake",
-        "method": "define_timestamp_id"
-      },
-      "user_input": "SecureRandom.hex(16)",
-      "confidence": "Medium",
-      "cwe_id": [
-        89
-      ],
-      "note": ""
-    },
     {
       "warning_type": "Denial of Service",
       "warning_code": 76,
diff --git a/lib/mastodon/snowflake.rb b/lib/mastodon/snowflake.rb
index 32ed140c30..8b79541da2 100644
--- a/lib/mastodon/snowflake.rb
+++ b/lib/mastodon/snowflake.rb
@@ -64,46 +64,7 @@ module Mastodon::Snowflake
     def define_timestamp_id
       return if already_defined?
 
-      connection.execute(<<~SQL)
-        CREATE OR REPLACE FUNCTION timestamp_id(table_name text)
-        RETURNS bigint AS
-        $$
-          DECLARE
-            time_part bigint;
-            sequence_base bigint;
-            tail bigint;
-          BEGIN
-            time_part := (
-              -- Get the time in milliseconds
-              ((date_part('epoch', now()) * 1000))::bigint
-              -- And shift it over two bytes
-              << 16);
-
-            sequence_base := (
-              'x' ||
-              -- Take the first two bytes (four hex characters)
-              substr(
-                -- Of the MD5 hash of the data we documented
-                md5(table_name || '#{SecureRandom.hex(16)}' || time_part::text),
-                1, 4
-              )
-            -- And turn it into a bigint
-            )::bit(16)::bigint;
-
-            -- Finally, add our sequence number to our base, and chop
-            -- it to the last two bytes
-            tail := (
-              (sequence_base + nextval(table_name || '_id_seq'))
-              & 65535);
-
-            -- Return the time part and the sequence part. OR appears
-            -- faster here than addition, but they're equivalent:
-            -- time_part has no trailing two bytes, and tail is only
-            -- the last two bytes.
-            RETURN time_part | tail;
-          END
-        $$ LANGUAGE plpgsql VOLATILE;
-      SQL
+      connection.execute(sanitized_timestamp_id_sql)
     end
 
     def ensure_id_sequences_exist
@@ -153,6 +114,57 @@ module Mastodon::Snowflake
       SQL
     end
 
+    def sanitized_timestamp_id_sql
+      ActiveRecord::Base.sanitize_sql_array(timestamp_id_sql_array)
+    end
+
+    def timestamp_id_sql_array
+      [timestamp_id_sql_string, { random_string: SecureRandom.hex(16) }]
+    end
+
+    def timestamp_id_sql_string
+      <<~SQL
+        CREATE OR REPLACE FUNCTION timestamp_id(table_name text)
+        RETURNS bigint AS
+        $$
+          DECLARE
+            time_part bigint;
+            sequence_base bigint;
+            tail bigint;
+          BEGIN
+            time_part := (
+              -- Get the time in milliseconds
+              ((date_part('epoch', now()) * 1000))::bigint
+              -- And shift it over two bytes
+              << 16);
+
+            sequence_base := (
+              'x' ||
+              -- Take the first two bytes (four hex characters)
+              substr(
+                -- Of the MD5 hash of the data we documented
+                md5(table_name || :random_string || time_part::text),
+                1, 4
+              )
+            -- And turn it into a bigint
+            )::bit(16)::bigint;
+
+            -- Finally, add our sequence number to our base, and chop
+            -- it to the last two bytes
+            tail := (
+              (sequence_base + nextval(table_name || '_id_seq'))
+              & 65535);
+
+            -- Return the time part and the sequence part. OR appears
+            -- faster here than addition, but they're equivalent:
+            -- time_part has no trailing two bytes, and tail is only
+            -- the last two bytes.
+            RETURN time_part | tail;
+          END
+        $$ LANGUAGE plpgsql VOLATILE;
+      SQL
+    end
+
     def connection
       ActiveRecord::Base.connection
     end