MediaWiki  1.29.1
captcha-old.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 #
3 # Script to generate distorted text images for a captcha system.
4 #
5 # Copyright (C) 2005 Neil Harris
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 # http://www.gnu.org/copyleft/gpl.html
21 #
22 # Further tweaks by Brion Vibber <brion@pobox.com>:
23 # 2006-01-26: Add command-line options for the various parameters
24 # 2007-02-19: Add --dirs param for hash subdirectory splits
25 # Tweaks by Greg Sabino Mullane <greg@turnstep.com>:
26 # 2008-01-06: Add regex check to skip words containing other than a-z
27 
28 import random
29 import math
30 import hashlib
31 from optparse import OptionParser
32 import os
33 import sys
34 import re
35 
36 try:
37  from PIL import Image
38  from PIL import ImageFont
39  from PIL import ImageDraw
40  from PIL import ImageEnhance
41  from PIL import ImageOps
42 except:
43  sys.exit("This script requires the Python Imaging Library - http://www.pythonware.com/products/pil/")
44 
45 nonalpha = re.compile('[^a-z]') # regex to test for suitability of words
46 
47 # Does X-axis wobbly copy, sandwiched between two rotates
48 def wobbly_copy(src, wob, col, scale, ang):
49  x, y = src.size
50  f = random.uniform(4*scale, 5*scale)
51  p = random.uniform(0, math.pi*2)
52  rr = ang+random.uniform(-30, 30) # vary, but not too much
53  int_d = Image.new('RGB', src.size, 0) # a black rectangle
54  rot = src.rotate(rr, Image.BILINEAR)
55  # Do a cheap bounding-box op here to try to limit work below
56  bbx = rot.getbbox()
57  if bbx == None:
58  return src
59  else:
60  l, t, r, b= bbx
61  # and only do lines with content on
62  for i in range(t, b+1):
63  # Drop a scan line in
64  xoff = int(math.sin(p+(i*f/y))*wob)
65  xoff += int(random.uniform(-wob*0.5, wob*0.5))
66  int_d.paste(rot.crop((0, i, x, i+1)), (xoff, i))
67  # try to stop blurring from building up
68  int_d = int_d.rotate(-rr, Image.BILINEAR)
69  enh = ImageEnhance.Sharpness(int_d)
70  return enh.enhance(2)
71 
72 
73 def gen_captcha(text, fontname, fontsize, file_name):
74  """Generate a captcha image"""
75  # white text on a black background
76  bgcolor = 0x0
77  fgcolor = 0xffffff
78  # create a font object
79  font = ImageFont.truetype(fontname,fontsize)
80  # determine dimensions of the text
81  dim = font.getsize(text)
82  # create a new image significantly larger that the text
83  edge = max(dim[0], dim[1]) + 2*min(dim[0], dim[1])
84  im = Image.new('RGB', (edge, edge), bgcolor)
85  d = ImageDraw.Draw(im)
86  x, y = im.size
87  # add the text to the image
88  d.text((x/2-dim[0]/2, y/2-dim[1]/2), text, font=font, fill=fgcolor)
89  k = 3
90  wob = 0.20*dim[1]/k
91  rot = 45
92  # Apply lots of small stirring operations, rather than a few large ones
93  # in order to get some uniformity of treatment, whilst
94  # maintaining randomness
95  for i in range(k):
96  im = wobbly_copy(im, wob, bgcolor, i*2+3, rot+0)
97  im = wobbly_copy(im, wob, bgcolor, i*2+1, rot+45)
98  im = wobbly_copy(im, wob, bgcolor, i*2+2, rot+90)
99  rot += 30
100 
101  # now get the bounding box of the nonzero parts of the image
102  bbox = im.getbbox()
103  bord = min(dim[0], dim[1])/4 # a bit of a border
104  im = im.crop((bbox[0]-bord, bbox[1]-bord, bbox[2]+bord, bbox[3]+bord))
105  # and turn into black on white
106  im = ImageOps.invert(im)
107 
108  # save the image, in format determined from filename
109  im.save(file_name)
110 
111 def gen_subdir(basedir, md5hash, levels):
112  """Generate a subdirectory path out of the first _levels_
113  characters of _hash_, and ensure the directories exist
114  under _basedir_."""
115  subdir = None
116  for i in range(0, levels):
117  char = md5hash[i]
118  if subdir:
119  subdir = os.path.join(subdir, char)
120  else:
121  subdir = char
122  fulldir = os.path.join(basedir, subdir)
123  if not os.path.exists(fulldir):
124  os.mkdir(fulldir)
125  return subdir
126 
127 def try_pick_word(words, blacklist, verbose, nwords, min_length, max_length):
128  if words is not None:
129  word = words[random.randint(0,len(words)-1)]
130  while nwords > 1:
131  word2 = words[random.randint(0,len(words)-1)]
132  word = word + word2
133  nwords = nwords - 1
134  else:
135  word = ''
136  max_length = max_length if max_length > 0 else 10
137  for i in range(0, random.randint(min_length, max_length)):
138  word = word + chr(97 + random.randint(0,25))
139 
140  if verbose:
141  print("word is %s" % word)
142 
143  if len(word) < min_length:
144  if verbose:
145  print("skipping word pair '%s' because it has fewer than %d characters" % (word, min_length))
146  return None
147 
148  if max_length > 0 and len(word) > max_length:
149  if verbose:
150  print("skipping word pair '%s' because it has more than %d characters" % (word, max_length))
151  return None
152 
153  if nonalpha.search(word):
154  if verbose:
155  print("skipping word pair '%s' because it contains non-alphabetic characters" % word)
156  return None
157 
158  for naughty in blacklist:
159  if naughty in word:
160  if verbose:
161  print("skipping word pair '%s' because it contains blacklisted word '%s'" % (word, naughty))
162  return None
163  return word
164 
165 def pick_word(words, blacklist, verbose, nwords, min_length, max_length):
166  for x in range(1000): # If we can't find a valid combination in 1000 tries, just give up
167  word = try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
168  if word:
169  return word
170  sys.exit("Unable to find valid word combinations")
171 
172 def read_wordlist(filename):
173  f = open(filename)
174  words = [x.strip().lower() for x in f.readlines()]
175  f.close()
176  return words
177 
178 if __name__ == '__main__':
179  """This grabs random words from the dictionary 'words' (one
180  word per line) and generates a captcha image for each one,
181  with a keyed salted hash of the correct answer in the filename.
182 
183  To check a reply, hash it in the same way with the same salt and
184  secret key, then compare with the hash value given.
185  """
186  script_dir = os.path.dirname(os.path.realpath(__file__))
187  parser = OptionParser()
188  parser.add_option("--wordlist", help="A list of words (required)", metavar="WORDS.txt")
189  parser.add_option("--random", help="Use random charcters instead of a wordlist", action="store_true")
190  parser.add_option("--key", help="The passphrase set as $wgCaptchaSecret (required)", metavar="KEY")
191  parser.add_option("--output", help="The directory to put the images in - $wgCaptchaDirectory (required)", metavar="DIR")
192  parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf")
193  parser.add_option("--font-size", help="The font size (default 40)", metavar="N", type='int', default=40)
194  parser.add_option("--count", help="The maximum number of images to make (default 20)", metavar="N", type='int', default=20)
195  parser.add_option("--blacklist", help="A blacklist of words that should not be used", metavar="FILE", default=os.path.join(script_dir, "blacklist"))
196  parser.add_option("--fill", help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs", metavar="N", type='int')
197  parser.add_option("--dirs", help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels", metavar="N", type='int')
198  parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true')
199  parser.add_option("--number-words", help="Number of words from the wordlist which make a captcha challenge (default 2)", type='int', default=2)
200  parser.add_option("--min-length", help="Minimum length for a captcha challenge", type='int', default=1)
201  parser.add_option("--max-length", help="Maximum length for a captcha challenge", type='int', default=-1)
202 
203  opts, args = parser.parse_args()
204 
205  if opts.wordlist:
206  wordlist = opts.wordlist
207  elif opts.random:
208  wordlist = None
209  else:
210  sys.exit("Need to specify a wordlist")
211  if opts.key:
212  key = opts.key
213  else:
214  sys.exit("Need to specify a key")
215  if opts.output:
216  output = opts.output
217  else:
218  sys.exit("Need to specify an output directory")
219  if opts.font and os.path.exists(opts.font):
220  font = opts.font
221  else:
222  sys.exit("Need to specify the location of a font")
223 
224  blacklist = read_wordlist(opts.blacklist)
225  count = opts.count
226  fill = opts.fill
227  dirs = opts.dirs
228  verbose = opts.verbose
229  fontsize = opts.font_size
230 
231  if fill:
232  count = max(0, fill - len(os.listdir(output)))
233 
234  words = None
235  if wordlist:
236  words = read_wordlist(wordlist)
237  words = [x for x in words
238  if len(x) in (4,5) and x[0] != "f"
239  and x[0] != x[1] and x[-1] != x[-2]]
240 
241  for i in range(count):
242  word = pick_word(words, blacklist, verbose, opts.number_words, opts.min_length, opts.max_length)
243  salt = "%08x" % random.randrange(2**32)
244  # 64 bits of hash is plenty for this purpose
245  md5hash = hashlib.md5((key+salt+word+key+salt).encode('utf-8')).hexdigest()[:16]
246  filename = "image_%s_%s.png" % (salt, md5hash)
247  if dirs:
248  subdir = gen_subdir(output, md5hash, dirs)
249  filename = os.path.join(subdir, filename)
250  if verbose:
251  print(filename)
252  gen_captcha(word, font, fontsize, os.path.join(output, filename))
253 
captcha-old.wobbly_copy
def wobbly_copy(src, wob, col, scale, ang)
Definition: captcha-old.py:48
captcha-old.try_pick_word
def try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
Definition: captcha-old.py:127
Makefile.open
open
Definition: Makefile.py:18
captcha-old.read_wordlist
def read_wordlist(filename)
Definition: captcha-old.py:172
captcha-old.pick_word
def pick_word(words, blacklist, verbose, nwords, min_length, max_length)
Definition: captcha-old.py:165
captcha-old.gen_captcha
def gen_captcha(text, fontname, fontsize, file_name)
Definition: captcha-old.py:73
captcha-old.gen_subdir
def gen_subdir(basedir, md5hash, levels)
Definition: captcha-old.py:111