1+ -- This will need a NUMBERS table, stocked with numbers. If you haven't got one
2+ -- this will create it automatically
3+ IF NOT EXISTS (SELECT 1 FROM information_Schema .Tables
4+ WHERE table_name= ' Numbers' )
5+ BEGIN
6+ CREATE TABLE [dbo].[Numbers]
7+ (
8+ [number] [int],
9+ CONSTRAINT [Index_Numbers] PRIMARY KEY CLUSTERED ([number] ASC )
10+ ON [PRIMARY]
11+ )
12+ ON [PRIMARY]
13+ END
14+ IF NOT EXISTS (SELECT 1 FROM numbers WHERE number = 99999 )
15+ BEGIN
16+ TRUNCATE TABLE numbers
17+ ;WITH Digits(i) AS
18+ (SELECT i
19+ FROM (VALUES (1 ), (2 ), (3 ), (4 ), (5 ), (6 ), (7 ), (8 ), (9 ), (0 )) AS X(i))
20+ INSERT INTO numbers(number )
21+ SELECT (D6 .i * 1000000 + D5 .i * 100000 + D4 .i * 10000 + D3 .i * 1000 + D2 .i * 100
22+ + D1 .i * 10 + D0 .i + 1 ) AS seq
23+ FROM Digits AS D0, Digits AS D1, Digits AS D2, Digits AS D3,
24+ Digits AS D4, Digits AS D5, Digits AS D6
25+ END
26+
27+ IF OBJECT_ID (N ' LongestCommonSubstring' ) IS NOT NULL
28+ DROP FUNCTION LongestCommonSubstring
29+ GO
30+
31+ CREATE FUNCTION LongestCommonSubstring
32+ /* *
33+ summary: >
34+ The longest common subSubstring (LCS) tells you the longest common substring between two strings.
35+ If you, for example, were to compare 'And the Dish ran away with the Spoon' with 'away', you'd
36+ get 'away' as being the string in common. Likewise, comparing '465932859472109683472' with
37+ '697834859472135348' would give you '8594721'. This returns a one-row table that gives you the
38+ length and location of the string as well as the string itself. It can easily be modified to give
39+ you all the substrings (whatever your criteria for the smallest substring. E.g. two characters?
40+
41+ Author: Phil Factor
42+ Revision: 1.0
43+ date: 05 Dec 2014
44+ example:
45+ code: |
46+ Select * from dbo.LongestCommonSubstring ('1234', '1224533324')
47+ Select * from dbo.LongestCommonSubstring ('thisisatest', 'testing123testing')
48+ Select * from dbo.LongestCommonSubstring ( 'findthishere', 'where is this?')
49+ Select * from dbo.LongestCommonSubstring ( null, 'xab')
50+ Select * from dbo.LongestCommonSubstring ( 'not beginning-middle-ending',
51+ 'beginning-diddle-dum-ending')
52+ returns: >
53+ the longest common subString as a string
54+ **/
55+ (
56+ @firstString VARCHAR (MAX ),
57+ @SecondString VARCHAR (MAX )
58+ )
59+ RETURNS @hit TABLE -- returns a single row table
60+ -- (it is easy to change to return a string but I wanted the location of the match)
61+ (
62+ MatchLength INT ,-- the length of the match. Not necessarily the length of input
63+ FirstCharInMatch INT ,-- first character of match in first string
64+ FirstCharInString INT ,-- first character of match in second string
65+ CommonString VARCHAR (8000 ) -- the part of the FirstString successfully matched
66+ )
67+
68+ AS BEGIN
69+ DECLARE @Order INT , @TheGroup INT , @Sequential INT
70+ -- this table is used to do a quirky update to enable a grouping only on sequential characters
71+ DECLARE @Scratch TABLE (TheRightOrder INT IDENTITY PRIMARY KEY ,TheGroup smallint , Sequential INT ,
72+ FirstOrder smallint , SecondOrder smallint , ch CHAR (1 ))
73+ -- first we reduce the amount of data to those characters in the first string that have a match
74+ -- in the second, and where they were.
75+ INSERT INTO @Scratch ( TheGroup , firstorder, secondorder, ch)
76+ SELECT Thefirst .number - TheSecond .number AS TheGroup,Thefirst .number , TheSecond .number , TheSecond .ch
77+ FROM -- divide up the first string into a table of characters/sequence
78+ (SELECT number , SUBSTRING (@FirstString,number ,1 ) AS ch
79+ FROM numbers WHERE number <= LEN (@FirstString)) TheFirst
80+ INNER JOIN -- divide up the second string into a table of characters/sequence
81+ (SELECT number , SUBSTRING (@SecondString,number ,1 ) AS ch
82+ FROM numbers WHERE number <= LEN (@SecondString)) TheSecond
83+ ON Thefirst .ch = Thesecond .ch -- do all valid matches
84+ ORDER BY Thefirst .number - TheSecond .number , TheSecond .number
85+ -- now @scratch has all matches in the correct order for checking unbroken sequence
86+ SELECT @Order= - 1 , @TheGroup= - 1 , @Sequential= 0 -- initialise everything
87+ UPDATE @Scratch -- now check by incrementing a value every time a sequence is broken
88+ SET @Sequential= Sequential =
89+ CASE -- if it is not a sequence from the one before increment the variable
90+ WHEN secondorder= @order+ 1 AND TheGroup= @TheGroup
91+ THEN @Sequential ELSE @Sequential+ 1 END ,
92+ @Order= secondorder,
93+ @TheGroup= TheGroup
94+ -- now we just aggregate it, and choose the first longest match. Easy
95+ INSERT INTO @hit (MatchLength,FirstCharInMatch, FirstCharInString,CommonString)
96+ SELECT TOP 1 -- -just the first. You may want more so feel free to change
97+ COUNT (* ) AS MatchLength,
98+ MIN (firstorder) FirstCharInMatch,
99+ MIN (secondorder) AS FirstCharInString,
100+ SUBSTRING (@SecondString,
101+ MIN (secondorder),
102+ COUNT (* )) AS CommonString
103+ FROM @scratch GROUP BY TheGroup,Sequential
104+ ORDER BY COUNT (* ) DESC , MIN (firstOrder) ASC , MIN (SecondOrder) ASC
105+ RETURN
106+ END -- and we do a test run
107+
108+ go
109+
110+ -- do an outer apply to check the obvious flaws and raise an error
111+ -- if any erros appear.
112+ IF EXISTS (
113+ SELECT firstString, secondString,correct, LCS.*
114+ FROM (VALUES
115+ (' Call me Ishmael. Some years ago...' ,' Something' ,' Some' ),
116+ (' unrestfulness' ,' having little or no money in my purse, and nothing particular to interest me on shore' ,' rest' ),
117+ (' 1234563457' ,' 3456' ,' 3456' ),
118+ (' ' ,' ' ,NULL ),
119+ (NULL ,' ' ,NULL ),
120+ (' I find myself involuntarily pausing before coffin warehouses' ,' Jailhouse rock' ,' house' ),
121+ (' ,.-=dfgd%' ,' -=' ,' -=' ),
122+ (' protest is useless' ,' I need to test this routine. Tests are valuable' ,' test' )
123+ )
124+
125+ AS X(FirstString,secondString, Correct)
126+ OUTER APPLY dbo .LongestCommonSubstring (firstString, secondString) AS LCS
127+ WHERE COALESCE (correct,' null' )<> COALESCE (LCS .CommonString ,' null' )
128+ )
129+ RAISERROR (' the LongestCommonSubstring routine has broken' ,16 ,1 )
0 commit comments