MATCH_RECOGNIZE is one of the most powerful feature of SQL. There are several use cases, such as finding series of consecutive rows, pattern matching, etc. Once you understand MATCH_RECOGNIZE, you will understand how you can easily perform very difficult SQL operations. It can be used as of Oracle 12c.
So, let’s start with a basic example. We have a table that holds the door entry and exit logs. You can find the DDL and DML script of sample data below.
CREATE TABLE T_TEST
(
ID NUMBER,
USERID VARCHAR2(50),
TERMINALID NUMBER,
EVENTTIME DATE
);
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(1, '00000420', 10, TO_DATE('09/02/2022 09:00:55', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(2, '00000420', 20, TO_DATE('09/02/2022 09:14:28', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(3, '00000420', 10, TO_DATE('09/02/2022 09:17:15', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(4, '00000420', 10, TO_DATE('09/02/2022 10:28:32', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(5, '00000420', 20, TO_DATE('09/02/2022 10:29:53', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(6, '00000420', 20, TO_DATE('09/02/2022 12:27:58', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(7, '00000420', 20, TO_DATE('09/02/2022 12:34:40', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(8, '00000420', 10, TO_DATE('09/02/2022 12:35:09', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(9, '00000420', 20, TO_DATE('09/02/2022 13:05:38', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(10, '00000420', 10, TO_DATE('09/02/2022 14:07:30', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(11, '00000420', 20, TO_DATE('09/02/2022 14:14:56', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(12, '00000420', 10, TO_DATE('09/02/2022 14:38:17', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(15, '00000420', 20, TO_DATE('09/02/2022 16:02:12', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(16, '00000420', 10, TO_DATE('09/02/2022 16:13:44', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(17, '00000420', 20, TO_DATE('09/02/2022 17:15:33', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(18, '00000420', 10, TO_DATE('09/02/2022 17:18:33', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(19, '00000420', 20, TO_DATE('09/02/2022 18:10:10', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(20, '00000420', 20, TO_DATE('09/02/2022 18:15:50', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(21, '00000384', 20, TO_DATE('09/02/2022 09:10:52', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(21, '00000384', 10, TO_DATE('09/02/2022 09:12:57', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(22, '00000384', 20, TO_DATE('09/02/2022 09:18:42', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(23, '00000384', 10, TO_DATE('09/02/2022 09:37:07', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(24, '00000384', 20, TO_DATE('09/02/2022 09:53:02', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(25, '00000384', 10, TO_DATE('09/02/2022 09:57:34', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(26, '00000384', 20, TO_DATE('09/02/2022 10:56:25', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(27, '00000384', 10, TO_DATE('09/02/2022 11:15:36', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(28, '00000384', 20, TO_DATE('09/02/2022 11:38:26', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(29, '00000384', 10, TO_DATE('09/02/2022 12:00:37', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(30, '00000384', 10, TO_DATE('09/02/2022 12:18:30', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(31, '00000384', 10, TO_DATE('09/02/2022 13:40:52', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(32, '00000384', 20, TO_DATE('09/02/2022 13:42:32', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(33, '00000384', 10, TO_DATE('09/02/2022 13:44:52', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(34, '00000384', 20, TO_DATE('09/02/2022 14:19:57', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(35, '00000384', 10, TO_DATE('09/02/2022 14:40:05', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(36, '00000384', 20, TO_DATE('09/02/2022 16:00:40', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(37, '00000384', 20, TO_DATE('09/02/2022 16:05:12', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(38, '00000384', 20, TO_DATE('09/02/2022 16:15:52', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(39, '00000384', 10, TO_DATE('09/02/2022 16:28:50', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(40, '00000384', 20, TO_DATE('09/02/2022 18:07:22', 'DD/MM/YYYY HH24:MI:SS'));
INSERT INTO T_TEST(ID, USERID, TERMINALID, EVENTTIME) VALUES(39, '00000384', 10, TO_DATE('09/02/2022 18:28:50', 'DD/MM/YYYY HH24:MI:SS'));
COMMIT;
For each userid I would like to report the data as follows.
USERID ENTRY_TIME OUT_TIME
--------------- ------------------- -------------------
00000384 09/02/2022 09:12:57 09/02/2022 09:18:42
00000384 09/02/2022 09:37:07 09/02/2022 09:53:02
00000384 09/02/2022 09:57:34 09/02/2022 10:56:25
00000384 09/02/2022 11:15:36 09/02/2022 11:38:26
00000384 09/02/2022 12:00:37 09/02/2022 13:42:32
00000384 09/02/2022 13:44:52 09/02/2022 14:19:57
00000384 09/02/2022 14:40:05 09/02/2022 16:15:52
00000384 09/02/2022 16:28:50 09/02/2022 18:07:22
00000420 09/02/2022 09:00:55 09/02/2022 09:14:28
00000420 09/02/2022 09:17:15 09/02/2022 12:34:40
00000420 09/02/2022 12:35:09 09/02/2022 13:05:38
00000420 09/02/2022 14:07:30 09/02/2022 14:14:56
00000420 09/02/2022 14:38:17 09/02/2022 16:02:12
00000420 09/02/2022 16:13:44 09/02/2022 17:15:33
00000420 09/02/2022 17:18:33 09/02/2022 18:15:50
Therefore, there are some business rules.
- Terminal id 10 represents entering the room, whereas 20 represents leaving the room.
- A day must be starts with 10 and ends with 20. Otherwise, for each day sorted by eventtime, rows start with 20 and end with 10 are not included in the report.
- If repetitive terminal ids come consecutively, the first row of the terminal id with 10 and the last row of the terminal id with 20 are included only for the calculation.
SQL> ALTER SESSION SET NLS_DATE_FORMAT='DD/MM/YYYY HH24:MI:SS';
Oturum de?istirildi.
SQL> SET LINESIZE 200
SQL> SET PAGESIZE 100
SQL> SET SPACE 5
SQL> COLUMN ID FORMAT 99999 JUSTIFY CENTER
SQL> COLUMN USERID FORMAT A15 JUSTIFY CENTER
SQL> COLUMN TERMINALID FORMAT 99 JUSTIFY CENTER
SQL> COLUMN EVENTTIME FORMAT A20 JUSTIFY CENTER
SQL> COLUMN MN FORMAT 99 JUSTIFY CENTER
SQL> COLUMN CS FORMAT A15 JUSTIFY CENTER
SQL>
SQL> SELECT * FROM T_TEST;
ID USERID TERMINALID EVENTTIME
------ --------------- ---------- ------------------------------
1 00000420 10 09/02/2022 09:00:55
2 00000420 20 09/02/2022 09:14:28
3 00000420 10 09/02/2022 09:17:15
4 00000420 10 09/02/2022 10:28:32
5 00000420 20 09/02/2022 10:29:53
6 00000420 20 09/02/2022 12:27:58
7 00000420 20 09/02/2022 12:34:40
8 00000420 10 09/02/2022 12:35:09
9 00000420 20 09/02/2022 13:05:38
10 00000420 10 09/02/2022 14:07:30
11 00000420 20 09/02/2022 14:14:56
12 00000420 10 09/02/2022 14:38:17
15 00000420 20 09/02/2022 16:02:12
16 00000420 10 09/02/2022 16:13:44
17 00000420 20 09/02/2022 17:15:33
18 00000420 10 09/02/2022 17:18:33
19 00000420 20 09/02/2022 18:10:10
20 00000420 20 09/02/2022 18:15:50
21 00000384 20 09/02/2022 09:10:52
21 00000384 10 09/02/2022 09:12:57
22 00000384 20 09/02/2022 09:18:42
23 00000384 10 09/02/2022 09:37:07
24 00000384 20 09/02/2022 09:53:02
25 00000384 10 09/02/2022 09:57:34
26 00000384 20 09/02/2022 10:56:25
27 00000384 10 09/02/2022 11:15:36
28 00000384 20 09/02/2022 11:38:26
29 00000384 10 09/02/2022 12:00:37
30 00000384 10 09/02/2022 12:18:30
31 00000384 10 09/02/2022 13:40:52
32 00000384 20 09/02/2022 13:42:32
33 00000384 10 09/02/2022 13:44:52
34 00000384 20 09/02/2022 14:19:57
35 00000384 10 09/02/2022 14:40:05
36 00000384 20 09/02/2022 16:00:40
37 00000384 20 09/02/2022 16:05:12
38 00000384 20 09/02/2022 16:15:52
39 00000384 10 09/02/2022 16:28:50
40 00000384 20 09/02/2022 18:07:22
39 00000384 10 09/02/2022 18:28:50
40 sat?r secildi.
In order to simplify the example, let’s start with a single userid. MEASURES columns must have aliases. MATCH_NUMBER() and CLASSIFIER() are predefined functions.
match_number() – represents the match number for all matching rows.
classifier() – represents that which pattern variable map to which row.
WITH
Q1 AS (
SELECT * FROM T_TEST WHERE USERID = '00000420' AND
TRUNC(EVENTTIME) = TO_DATE('09/02/2022', 'DD/MM/YYYY')
)
SELECT * FROM Q1
MATCH_RECOGNIZE(
PARTITION BY USERID ORDER BY EVENTTIME
MEASURES
MATCH_NUMBER() MN,
CLASSIFIER() CS
ALL ROWS PER MATCH
PATTERN (INP)
DEFINE INP AS EVENTTIME = EVENTTIME
);
USERID EVENTTIME MN CS ID TERMINALID
--------------- -------------------- --- --------------- ------ ----------
00000420 09/02/2022 09:00:55 1 ANYTHING 1 10
00000420 09/02/2022 09:14:28 2 ANYTHING 2 20
00000420 09/02/2022 09:17:15 3 ANYTHING 3 10
00000420 09/02/2022 10:28:32 4 ANYTHING 4 10
00000420 09/02/2022 10:29:53 5 ANYTHING 5 20
00000420 09/02/2022 12:27:58 6 ANYTHING 6 20
00000420 09/02/2022 12:34:40 7 ANYTHING 7 20
00000420 09/02/2022 12:35:09 8 ANYTHING 8 10
00000420 09/02/2022 13:05:38 9 ANYTHING 9 20
00000420 09/02/2022 14:07:30 10 ANYTHING 10 10
00000420 09/02/2022 14:14:56 11 ANYTHING 11 20
00000420 09/02/2022 14:38:17 12 ANYTHING 12 10
00000420 09/02/2022 16:02:12 13 ANYTHING 15 20
00000420 09/02/2022 16:13:44 14 ANYTHING 16 10
00000420 09/02/2022 17:15:33 15 ANYTHING 17 20
00000420 09/02/2022 17:18:33 16 ANYTHING 18 10
00000420 09/02/2022 18:10:10 17 ANYTHING 19 20
00000420 09/02/2022 18:15:50 18 ANYTHING 20 20
18 sat?r secildi.
As you can see, I use ALL ROWS PER MATCH clause to get the all matched rows. Using this with predefined functions are handy for debugging. In addition, you can also use ALL ROWS PER MATCH WITH UNMATCHED ROWS syntax to get all matched and unmatched rows. Now, time to create regex pattern for our business model. INP and OUTP are pattern variables.
WITH
Q1 AS (
SELECT * FROM T_TEST WHERE USERID = '00000420' AND
TRUNC(EVENTTIME) = TO_DATE('09/02/2022', 'DD/MM/YYYY')
)
SELECT
USERID, EVENTTIME, TERMINALID, MN, CS
FROM Q1
MATCH_RECOGNIZE(
PARTITION BY USERID ORDER BY EVENTTIME
MEASURES
MATCH_NUMBER() MN,
CLASSIFIER() CS
ALL ROWS PER MATCH
PATTERN (INP+ OUTP+)
DEFINE
INP AS TERMINALID = 10,
OUTP AS TERMINALID = 20
);
USERID EVENTTIME TERMINALID MN CS
--------------- -------------------- ---------- --- ---------------
00000420 09/02/2022 09:00:55 10 1 INP
00000420 09/02/2022 09:14:28 20 1 OUTP
00000420 09/02/2022 09:17:15 10 2 INP
00000420 09/02/2022 10:28:32 10 2 INP
00000420 09/02/2022 10:29:53 20 2 OUTP
00000420 09/02/2022 12:27:58 20 2 OUTP
00000420 09/02/2022 12:34:40 20 2 OUTP
00000420 09/02/2022 12:35:09 10 3 INP
00000420 09/02/2022 13:05:38 20 3 OUTP
00000420 09/02/2022 14:07:30 10 4 INP
00000420 09/02/2022 14:14:56 20 4 OUTP
00000420 09/02/2022 14:38:17 10 5 INP
00000420 09/02/2022 16:02:12 20 5 OUTP
00000420 09/02/2022 16:13:44 10 6 INP
00000420 09/02/2022 17:15:33 20 6 OUTP
00000420 09/02/2022 17:18:33 10 7 INP
00000420 09/02/2022 18:10:10 20 7 OUTP
00000420 09/02/2022 18:15:50 20 7 OUTP
18 sat²rlar² seildi.
So, you can see every matching row group under MN column. In addition, in order to get first and last values, as you might expect, we can use first() and last() functions. FINAL and RUNNING keywords are also used for windowing clause. These are defining, if it relates from the start of the pattern to the current row or to the whole pattern. Check out the LAST_EVENTTIME and FINAL_EVENTTIME columns below to see the difference.
WITH
Q1 AS (
SELECT * FROM T_TEST WHERE USERID = '00000420' AND
TRUNC(EVENTTIME) = TO_DATE('09/02/2022', 'DD/MM/YYYY')
)
SELECT
USERID, EVENTTIME, TERMINALID, MN, CS, FIRST_EVENTTIME, LAST_EVENTTIME, FINAL_LAST_EVENTTIME
FROM Q1
MATCH_RECOGNIZE(
PARTITION BY USERID ORDER BY EVENTTIME
MEASURES
MATCH_NUMBER() MN,
CLASSIFIER() CS,
FIRST(INP.EVENTTIME) FIRST_EVENTTIME,
LAST(OUTP.EVENTTIME) LAST_EVENTTIME,
FINAL LAST(OUTP.EVENTTIME) FINAL_LAST_EVENTTIME
ALL ROWS PER MATCH
PATTERN (INP+ OUTP+)
DEFINE
INP AS TERMINALID = 10,
OUTP AS TERMINALID = 20
);
USERID EVENTTIME TERMINALID MN CS FIRST_EVENTTIME LAST_EVENTTIME FINAL_EVENTTIME
--------------- -------------------- ---------- --- --------------- ------------------- ------------------- -------------------
00000420 09/02/2022 09:00:55 10 1 INP 09/02/2022 09:00:55 09/02/2022 09:14:28
00000420 09/02/2022 09:14:28 20 1 OUTP 09/02/2022 09:00:55 09/02/2022 09:14:28 09/02/2022 09:14:28
00000420 09/02/2022 09:17:15 10 2 INP 09/02/2022 09:17:15 09/02/2022 12:34:40
00000420 09/02/2022 10:28:32 10 2 INP 09/02/2022 09:17:15 09/02/2022 12:34:40
00000420 09/02/2022 10:29:53 20 2 OUTP 09/02/2022 09:17:15 09/02/2022 10:29:53 09/02/2022 12:34:40
00000420 09/02/2022 12:27:58 20 2 OUTP 09/02/2022 09:17:15 09/02/2022 12:27:58 09/02/2022 12:34:40
00000420 09/02/2022 12:34:40 20 2 OUTP 09/02/2022 09:17:15 09/02/2022 12:34:40 09/02/2022 12:34:40
00000420 09/02/2022 12:35:09 10 3 INP 09/02/2022 12:35:09 09/02/2022 13:05:38
00000420 09/02/2022 13:05:38 20 3 OUTP 09/02/2022 12:35:09 09/02/2022 13:05:38 09/02/2022 13:05:38
00000420 09/02/2022 14:07:30 10 4 INP 09/02/2022 14:07:30 09/02/2022 14:14:56
00000420 09/02/2022 14:14:56 20 4 OUTP 09/02/2022 14:07:30 09/02/2022 14:14:56 09/02/2022 14:14:56
00000420 09/02/2022 14:38:17 10 5 INP 09/02/2022 14:38:17 09/02/2022 16:02:12
00000420 09/02/2022 16:02:12 20 5 OUTP 09/02/2022 14:38:17 09/02/2022 16:02:12 09/02/2022 16:02:12
00000420 09/02/2022 16:13:44 10 6 INP 09/02/2022 16:13:44 09/02/2022 17:15:33
00000420 09/02/2022 17:15:33 20 6 OUTP 09/02/2022 16:13:44 09/02/2022 17:15:33 09/02/2022 17:15:33
00000420 09/02/2022 17:18:33 10 7 INP 09/02/2022 17:18:33 09/02/2022 18:15:50
00000420 09/02/2022 18:10:10 20 7 OUTP 09/02/2022 17:18:33 09/02/2022 18:10:10 09/02/2022 18:15:50
00000420 09/02/2022 18:15:50 20 7 OUTP 09/02/2022 17:18:33 09/02/2022 18:15:50 09/02/2022 18:15:50
18 sat²rlar² seildi.
If the pattern clause meets the business logic. We can use now ONE ROW PER MATCH syntax for the final result.
WITH
Q1 AS (
SELECT * FROM T_TEST WHERE TRUNC(EVENTTIME) = TO_DATE('09/02/2022', 'DD/MM/YYYY')
)
SELECT
*
FROM Q1
MATCH_RECOGNIZE(
PARTITION BY USERID ORDER BY EVENTTIME
MEASURES
FIRST(INP.EVENTTIME) FIRST_EVENTTIME,
LAST(OUTP.EVENTTIME) LAST_EVENTTIME
ONE ROW PER MATCH
PATTERN (INP+ OUTP+)
DEFINE
INP AS TERMINALID = 10,
OUTP AS TERMINALID = 20
) ORDER BY USERID, FIRST_EVENTTIME;
USERID FIRST_EVENTTIME LAST_EVENTTIME
--------------- ------------------- -------------------
00000384 09/02/2022 09:12:57 09/02/2022 09:18:42
00000384 09/02/2022 09:37:07 09/02/2022 09:53:02
00000384 09/02/2022 09:57:34 09/02/2022 10:56:25
00000384 09/02/2022 11:15:36 09/02/2022 11:38:26
00000384 09/02/2022 12:00:37 09/02/2022 13:42:32
00000384 09/02/2022 13:44:52 09/02/2022 14:19:57
00000384 09/02/2022 14:40:05 09/02/2022 16:15:52
00000384 09/02/2022 16:28:50 09/02/2022 18:07:22
00000420 09/02/2022 09:00:55 09/02/2022 09:14:28
00000420 09/02/2022 09:17:15 09/02/2022 12:34:40
00000420 09/02/2022 12:35:09 09/02/2022 13:05:38
00000420 09/02/2022 14:07:30 09/02/2022 14:14:56
00000420 09/02/2022 14:38:17 09/02/2022 16:02:12
00000420 09/02/2022 16:13:44 09/02/2022 17:15:33
00000420 09/02/2022 17:18:33 09/02/2022 18:15:50
15 sat²rlar² seildi.
I hope you find this post helpful 🙂