view string_search/kmp.c @ 11:b8cfd6af18b3

Added Knuth-MOrris-Pratt string search and sample text file for testing.
author Eris Caffee <discordia@eldalin.com>
date Tue, 18 Aug 2015 12:22:22 -0500
parents
children 5b67bf33fccc
line source
1 /* Demo client to compare brute force search against Knuth-Morris-Pratt string search.
2 *
3 * gcc --std=c99 kmp.c
4 *
5 * a.out "string to find" file_to_search
6 *
7 */
9 #define _GNU_SOURCE 1
10 #include <stdio.h>
11 #include <string.h>
12 #include <stddef.h>
13 #include <stdlib.h>
14 #include <sys/time.h>
16 /******************************************************************************/
17 /* Knuth-Morris-Pratt algorithm
18 *
19 * As described on Wikipedia
20 * https://duckduckgo.com/l/?kh=-1&uddg=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FKnuth-Morris-Pratt_algorithm
21 */
23 size_t kmp_search( char *haystack, char *needle ) {
25 size_t haystack_length = strlen(haystack);
26 size_t needle_length = strlen(needle);
28 /* Build the backup table */
30 int pos = 2;
31 int cnd = 0;
33 needle_length = strlen( needle );
35 int *table = malloc( needle_length * sizeof(int) );
36 table[0] = -1;
37 table[1] = 0;
39 while ( pos < needle_length ) {
40 if ( needle[pos-1] == needle[cnd] ) {
41 cnd++;
42 table[pos] = cnd;
43 pos++;
44 }
45 else if ( cnd > 0 ) {
46 cnd = table[cnd];
47 }
48 else {
49 table[pos] = 0;
50 pos++;
51 }
52 }
54 /* Do the actual search */
56 size_t m = 0;
57 size_t i = 0;
59 while ( m + i < haystack_length ) {
60 if ( needle[i] == haystack[m+i] ) {
61 if ( i == needle_length - 1 ) {
62 return m;
63 }
64 i++;
65 }
66 else {
67 if ( table[i] > -1 ) {
68 m = m + i - table[i];
69 i = table[i];
70 } else {
71 i = 0;
72 m++;
73 }
74 }
75 }
77 return -1;
78 }
80 /******************************************************************************/
81 /* brute force search
82 */
84 size_t find_string ( char *haystack, char * needle ) {
86 int needle_length = strlen( needle );
87 int haystack_length = strlen( haystack );
88 for ( size_t i = 0; i < haystack_length - needle_length; i++) {
89 if ( strncmp( needle, &haystack[i], needle_length ) == 0 ) {
90 return i;
91 }
92 }
93 return -1;
94 }
96 /******************************************************************************/
98 void timevaldiff( struct timeval *elapsed, struct timeval *start, struct timeval *end ) {
99 time_t sec;
100 suseconds_t usec;
102 sec = end->tv_sec - start->tv_sec;
103 usec = end->tv_usec - start->tv_usec;
105 while ( usec > 1000000 ) {
106 sec += 1;
107 usec -= 1000000;
108 }
110 elapsed->tv_sec = sec;
111 elapsed->tv_usec = usec;
112 }
114 /******************************************************************************/
116 int main( int argc, char **argv ) {
117 char *def_needle = "ABCDABD";
118 char *def_haystack = "ABC ABCDAB ABCDABCDABDE";
120 char * needle;
121 char * haystack;
122 size_t haystack_max;
123 size_t haystack_top = 0;
124 haystack = malloc( sizeof( char ) );
125 haystack_max = 1;
127 if ( argc >= 2 ) {
128 needle = argv[1];
129 }
130 else {
131 needle = def_needle;
132 }
134 if ( argc >= 3 ) {
135 FILE * file;
136 char buffer[65536];
137 size_t bytes_read;
139 file = fopen( argv[2], "r" );
140 while ( bytes_read = fread( buffer, sizeof( char ), sizeof( buffer ), file ) ) {
141 while ( haystack_max - haystack_top < bytes_read ) {
142 size_t new_haystack_max = 2 * haystack_max;
143 char * new_haystack = malloc( new_haystack_max );
144 for ( size_t i = 0; i < haystack_max; i++ ) {
145 new_haystack[i] = haystack[i];
146 }
147 free( haystack );
148 haystack = new_haystack;
149 haystack_max = new_haystack_max;
150 }
151 memcpy( &haystack[ haystack_top ], buffer, bytes_read );
152 }
154 fclose( file );
155 }
156 else {
157 haystack = def_haystack;
158 }
161 struct timeval start;
162 struct timeval end;
163 struct timeval elapsed;
164 size_t pos;
165 char * found;
167 gettimeofday( &start, NULL );
168 pos = kmp_search( haystack, needle );
169 gettimeofday( &end, NULL );
170 timevaldiff( &elapsed, &start, &end );
172 printf( "pos %d\n", pos );
173 found = strndup( &haystack[pos], strlen(needle)+80 );
174 printf( "found >%s<\n", found );
175 printf( "kmp found in %d.%06d seconds\n", elapsed.tv_sec, elapsed.tv_usec );
178 gettimeofday( &start, NULL );
179 pos = find_string( haystack, needle );
180 gettimeofday( &end, NULL );
181 timevaldiff( &elapsed, &start, &end );
183 printf( "pos %d\n", pos );
184 found = strndup( &haystack[pos], strlen(needle)+80 );
185 printf( "found >%s<\n", found );
186 printf( "brute force found in %d.%06d seconds\n", elapsed.tv_sec, elapsed.tv_usec );
188 exit( EXIT_SUCCESS );
189 }