diff --git a/PDFKitten.xcodeproj/project.pbxproj b/PDFKitten.xcodeproj/project.pbxproj index e7a2419..82e307c 100644 --- a/PDFKitten.xcodeproj/project.pbxproj +++ b/PDFKitten.xcodeproj/project.pbxproj @@ -52,6 +52,9 @@ 2CE7BCE713897245009784B9 /* Scanner.m in Sources */ = {isa = PBXBuildFile; fileRef = 2CE7BCE613897245009784B9 /* Scanner.m */; }; 2CE7BCEB1389725D009784B9 /* RenderingState.m in Sources */ = {isa = PBXBuildFile; fileRef = 2CE7BCEA1389725D009784B9 /* RenderingState.m */; }; 2CE7BCEF13897294009784B9 /* Selection.m in Sources */ = {isa = PBXBuildFile; fileRef = 2CE7BCEE13897294009784B9 /* Selection.m */; }; + 6F54DFB61B2CD66900AB5626 /* EncodingDifferences.m in Sources */ = {isa = PBXBuildFile; fileRef = 6F54DFB51B2CD66900AB5626 /* EncodingDifferences.m */; }; + 6F58E2761B218DEB003BEF8A /* PDFConverter.m in Sources */ = {isa = PBXBuildFile; fileRef = 6F58E2751B218DEB003BEF8A /* PDFConverter.m */; }; + 6F58E2791B21A5B9003BEF8A /* amazon-dynamo-sosp2007.pdf in Resources */ = {isa = PBXBuildFile; fileRef = 6F58E2771B21A5B9003BEF8A /* amazon-dynamo-sosp2007.pdf */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -144,6 +147,11 @@ 2CE7BCEA1389725D009784B9 /* RenderingState.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = RenderingState.m; path = PDFKitten/RenderingState.m; sourceTree = SOURCE_ROOT; }; 2CE7BCED13897293009784B9 /* Selection.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Selection.h; path = PDFKitten/Selection.h; sourceTree = SOURCE_ROOT; }; 2CE7BCEE13897294009784B9 /* Selection.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = Selection.m; path = PDFKitten/Selection.m; sourceTree = SOURCE_ROOT; }; + 6F54DFB41B2CD66900AB5626 /* EncodingDifferences.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = EncodingDifferences.h; path = PDFKitten/EncodingDifferences.h; sourceTree = SOURCE_ROOT; }; + 6F54DFB51B2CD66900AB5626 /* EncodingDifferences.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; name = EncodingDifferences.m; path = PDFKitten/EncodingDifferences.m; sourceTree = SOURCE_ROOT; }; + 6F58E2741B218DEB003BEF8A /* PDFConverter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PDFConverter.h; sourceTree = ""; }; + 6F58E2751B218DEB003BEF8A /* PDFConverter.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = PDFConverter.m; sourceTree = ""; }; + 6F58E2771B21A5B9003BEF8A /* amazon-dynamo-sosp2007.pdf */ = {isa = PBXFileReference; lastKnownFileType = image.pdf; name = "amazon-dynamo-sosp2007.pdf"; path = "PDFKitten/Samples/amazon-dynamo-sosp2007.pdf"; sourceTree = SOURCE_ROOT; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -272,6 +280,7 @@ 2CA0FC9A13EEB1430028DCCD /* Resources */ = { isa = PBXGroup; children = ( + 6F58E2771B21A5B9003BEF8A /* amazon-dynamo-sosp2007.pdf */, 2C77F7B31442381300767E6A /* Kurt the Cat.pdf */, 2CDC3F7A138977C0006CC6FB /* InfoPlist.strings */, 2CE7BCAA13896EFB009784B9 /* PDFKitten-Info.plist */, @@ -292,6 +301,7 @@ 2CA0FCA713EEB32D0028DCCD /* PDF Core */ = { isa = PBXGroup; children = ( + 6F58E2731B218DEB003BEF8A /* Helper */, 2CE7BCE413897225009784B9 /* Scanner */, 2CE7BCBB13897129009784B9 /* Font */, 2CE7BCE81389724B009784B9 /* RenderingState */, @@ -348,6 +358,8 @@ 2CE7BCBB13897129009784B9 /* Font */ = { isa = PBXGroup; children = ( + 6F54DFB41B2CD66900AB5626 /* EncodingDifferences.h */, + 6F54DFB51B2CD66900AB5626 /* EncodingDifferences.m */, 2CE7BCC013897135009784B9 /* CMap.h */, 2CE7BCC113897135009784B9 /* CMap.m */, 2CE7BCC81389713D009784B9 /* Font.h */, @@ -402,6 +414,16 @@ name = Selection; sourceTree = ""; }; + 6F58E2731B218DEB003BEF8A /* Helper */ = { + isa = PBXGroup; + children = ( + 6F58E2741B218DEB003BEF8A /* PDFConverter.h */, + 6F58E2751B218DEB003BEF8A /* PDFConverter.m */, + ); + name = Helper; + path = PDFKitten/Helper; + sourceTree = SOURCE_ROOT; + }; /* End PBXGroup section */ /* Begin PBXNativeTarget section */ @@ -439,7 +461,7 @@ name = PDFKittenTests; productName = PDFKittenTests; productReference = 2C35AD6E167E02E000BA5F79 /* PDFKittenTests.octest */; - productType = "com.apple.product-type.bundle"; + productType = "com.apple.product-type.bundle.ocunit-test"; }; /* End PBXNativeTarget section */ @@ -447,6 +469,7 @@ 2C0E159E13589B0B004096C7 /* Project object */ = { isa = PBXProject; attributes = { + LastTestingUpgradeCheck = 0630; LastUpgradeCheck = 0450; ORGANIZATIONNAME = "Chalmers Göteborg"; }; @@ -477,6 +500,7 @@ 2CDC3F81138977C1006CC6FB /* MainWindow.xib in Resources */, 2CA0FCA613EEB1C00028DCCD /* PageViewController.xib in Resources */, 2C77F7B41442381300767E6A /* Kurt the Cat.pdf in Resources */, + 6F58E2791B21A5B9003BEF8A /* amazon-dynamo-sosp2007.pdf in Resources */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -512,6 +536,7 @@ isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; files = ( + 6F58E2761B218DEB003BEF8A /* PDFConverter.m in Sources */, 2CE7BC9D13896EF0009784B9 /* PageView.m in Sources */, 2CE7BCAF13896EFB009784B9 /* main.m in Sources */, 2CE7BCB113896EFB009784B9 /* PDFKittenAppDelegate.m in Sources */, @@ -532,6 +557,7 @@ 2CE7BCEB1389725D009784B9 /* RenderingState.m in Sources */, 2CE7BCEF13897294009784B9 /* Selection.m in Sources */, 2CDC3F65138972E5006CC6FB /* StringDetector.m in Sources */, + 6F54DFB61B2CD66900AB5626 /* EncodingDifferences.m in Sources */, 2CA0FC9413EEB11B0028DCCD /* RootViewController.m in Sources */, 2CA0FC9713EEB1210028DCCD /* PDFPage.m in Sources */, 2CA0FCA113EEB1B80028DCCD /* Page.m in Sources */, @@ -627,6 +653,7 @@ isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + CODE_SIGN_IDENTITY = "iPhone Developer"; COPY_PHASE_STRIP = NO; FRAMEWORK_SEARCH_PATHS = ( "$(inherited)", @@ -647,6 +674,7 @@ isa = XCBuildConfiguration; buildSettings = { ALWAYS_SEARCH_USER_PATHS = NO; + CODE_SIGN_IDENTITY = "iPhone Developer"; COPY_PHASE_STRIP = YES; FRAMEWORK_SEARCH_PATHS = ( "$(inherited)", @@ -672,6 +700,7 @@ CLANG_CXX_LIBRARY = "libc++"; CLANG_WARN_EMPTY_BODY = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "iPhone Developer"; COPY_PHASE_STRIP = NO; FRAMEWORK_SEARCH_PATHS = ( "\"$(SDKROOT)/Developer/Library/Frameworks\"", @@ -703,6 +732,7 @@ CLANG_CXX_LIBRARY = "libc++"; CLANG_WARN_EMPTY_BODY = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "iPhone Developer"; COPY_PHASE_STRIP = YES; FRAMEWORK_SEARCH_PATHS = ( "\"$(SDKROOT)/Developer/Library/Frameworks\"", diff --git a/PDFKitten.xcodeproj/project.xcworkspace/xcshareddata/PDFKitten.xccheckout b/PDFKitten.xcodeproj/project.xcworkspace/xcshareddata/PDFKitten.xccheckout new file mode 100644 index 0000000..3315b24 --- /dev/null +++ b/PDFKitten.xcodeproj/project.xcworkspace/xcshareddata/PDFKitten.xccheckout @@ -0,0 +1,41 @@ + + + + + IDESourceControlProjectFavoriteDictionaryKey + + IDESourceControlProjectIdentifier + B0D2DE6B-BC47-463C-8432-CD1C128CFF29 + IDESourceControlProjectName + PDFKitten + IDESourceControlProjectOriginsDictionary + + DD37BB420EBB61E23BECC947958601C145484998 + https://github.com/kenneth488/PDFKitten.git + + IDESourceControlProjectPath + PDFKitten.xcodeproj + IDESourceControlProjectRelativeInstallPathDictionary + + DD37BB420EBB61E23BECC947958601C145484998 + ../.. + + IDESourceControlProjectURL + https://github.com/kenneth488/PDFKitten.git + IDESourceControlProjectVersion + 111 + IDESourceControlProjectWCCIdentifier + DD37BB420EBB61E23BECC947958601C145484998 + IDESourceControlProjectWCConfigurations + + + IDESourceControlRepositoryExtensionIdentifierKey + public.vcs.git + IDESourceControlWCCIdentifierKey + DD37BB420EBB61E23BECC947958601C145484998 + IDESourceControlWCCName + Kenneth%20Kitten + + + + diff --git a/PDFKitten/CIDFont.m b/PDFKitten/CIDFont.m index 512cecd..cb622ff 100644 --- a/PDFKitten/CIDFont.m +++ b/PDFKitten/CIDFont.m @@ -5,7 +5,7 @@ @implementation CIDFont - (NSString *)stringWithPDFString:(CGPDFStringRef)pdfString { unichar *characterIDs = (unichar *) CGPDFStringGetBytePtr(pdfString); - int length = CGPDFStringGetLength(pdfString) / sizeof(unichar); + int length = (int)(CGPDFStringGetLength(pdfString) / sizeof(unichar)); int magicalOffset = ([self isIdentity] ? 0 : 30); NSMutableString *unicodeString = [NSMutableString string]; for (int i = 0; i < length; i++) diff --git a/PDFKitten/CIDType2Font.m b/PDFKitten/CIDType2Font.m index c2c1437..0810aa7 100644 --- a/PDFKitten/CIDType2Font.m +++ b/PDFKitten/CIDType2Font.m @@ -9,16 +9,18 @@ - (id)initWithFontDictionary:(CGPDFDictionaryRef)dict { // Type 2 CID font only: set CID/GID mapping CGPDFObjectRef streamOrName = nil; + if (CGPDFDictionaryGetObject(dict, "CIDToGIDMap", &streamOrName)) { CGPDFObjectType type = CGPDFObjectGetType(streamOrName); identity = (type == kCGPDFObjectTypeName); + if (type == kCGPDFObjectTypeStream) { CGPDFStreamRef stream = nil; if (CGPDFObjectGetValue(streamOrName, kCGPDFObjectTypeStream, &stream)) { - cidGidMap = (NSData *) CGPDFStreamCopyData(stream, nil); + cidGidMap = (__bridge NSData *) CGPDFStreamCopyData(stream, nil); } } } @@ -34,12 +36,6 @@ - (unichar)gidWithCid:(unsigned char)cid return (unichar) gid; } -- (void)dealloc -{ - [cidGidMap release]; - [super dealloc]; -} - - (NSString *)stringWithPDFString:(CGPDFStringRef)pdfString { size_t length = CGPDFStringGetLength(pdfString); diff --git a/PDFKitten/CMap.h b/PDFKitten/CMap.h index 8a5c51e..0d61d67 100644 --- a/PDFKitten/CMap.h +++ b/PDFKitten/CMap.h @@ -1,25 +1,15 @@ #import -extern NSValue *rangeValue(unsigned int from, unsigned int to); - -@interface Operator : NSObject -+ (Operator *)operatorWithStart:(NSString *)start end:(NSString *)end handler:(SEL)handler; -@property (retain) NSString *start; -@property (retain) NSString *end; -@property SEL handler; -@end - @interface CMap : NSObject { - NSMutableDictionary *context; - - /* CMap ranges */ - NSMutableArray *codeSpaceRanges; - - /* Character mappings */ - NSMutableDictionary *characterMappings; - - /* Character range mappings */ - NSMutableDictionary *characterRangeMappings; + + /* CMap ranges */ + NSMutableArray *codeSpaceRanges; + + /* Character mappings */ + NSMutableDictionary *characterMappings; + + /* Character range mappings */ + NSMutableDictionary *characterRangeMappings; } /* Initialize with PDF stream containing a CMap */ @@ -29,12 +19,14 @@ extern NSValue *rangeValue(unsigned int from, unsigned int to); - (id)initWithString:(NSString *)string; /* Unicode mapping for character ID */ -- (unichar)unicodeCharacter:(unichar)cid; +- (NSUInteger)unicodeCharacter:(unichar)cid; -- (unichar)cidCharacter:(unichar)unicode; +- (NSUInteger)cidCharacter:(unichar)unicode; @property (nonatomic, retain) NSMutableArray *codeSpaceRanges; @property (nonatomic, retain) NSMutableDictionary *characterMappings; @property (nonatomic, retain) NSMutableDictionary *characterRangeMappings; +- (void)enumeratePDFStringCharacters:(CGPDFStringRef)pdfString usingBlock:(void (^)(NSUInteger, NSString *))block; + @end diff --git a/PDFKitten/CMap.m b/PDFKitten/CMap.m index 715e602..93f0664 100644 --- a/PDFKitten/CMap.m +++ b/PDFKitten/CMap.m @@ -1,80 +1,46 @@ #import "CMap.h" -static NSSet *sharedOperators = nil; -static NSCharacterSet *sharedTagSet = nil; -static NSCharacterSet *sharedTokenDelimimerSet = nil; -static NSString *kOperatorKey = @"CurrentOperator"; - -NSValue *rangeValue(unsigned int from, unsigned int to) +static NSValue *rangeValue(NSUInteger from, NSUInteger to) { - return [NSValue valueWithRange:NSMakeRange(from, to-from)]; + return [NSValue valueWithRange:NSMakeRange(from, to - from + 1)]; } -@implementation Operator - -+ (Operator *)operatorWithStart:(NSString *)start end:(NSString *)end handler:(SEL)handler -{ - Operator *op = [[[Operator alloc] init] autorelease]; - op.start = start; - op.end = end; - op.handler = handler; - return op; +@implementation CMap { + + // NSString *_debugString; } -- (void)dealloc -{ - [start release]; - [end release]; - - [super dealloc]; -} - -@synthesize start, end, handler; -@end - -@interface CMap () -- (void)handleCodeSpaceRange:(NSString *)string; -- (void)handleCharacter:(NSString *)string; -- (void)handleCharacterRange:(NSString *)string; -- (void)parse:(NSString *)cMapString; -@property(nonatomic, retain) NSMutableDictionary *context; -@property(readonly) NSCharacterSet *tokenDelimiterSet; -@property(readonly) NSCharacterSet *tagSet; -@property(readonly) NSSet *operators; -@end - -@implementation CMap - - (id)initWithString:(NSString *)string { - if ((self = [super init])) - { - [self parse:string]; - } - return self; + + if ((self = [super init])) + { + [self parse:string]; + // _debugString = [string copy]; + + } + return self; } - (id)initWithPDFStream:(CGPDFStreamRef)stream { - NSData *data = (NSData *) CGPDFStreamCopyData(stream, nil); - NSString *text = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding]; + NSData *data = (__bridge NSData *) CGPDFStreamCopyData(stream, nil); + NSString *text = [[NSString alloc] initWithData:data encoding:NSUTF8StringEncoding]; id obj = [self initWithString:text]; - [text release]; - [data release]; return obj; } - (BOOL)isInCodeSpaceRange:(unichar)cid { - for (NSValue *rangeValue in self.codeSpaceRanges) - { - NSRange range = [rangeValue rangeValue]; - if (cid >= range.location && cid <= NSMaxRange(range)) - { - return YES; - } - } - return NO; + for (NSValue *rangeValue in self.codeSpaceRanges) + { + NSRange range = [rangeValue rangeValue]; + if (cid >= range.location && cid <= NSMaxRange(range)) + { + return YES; + } + } + return NO; } #pragma mark - Public methods @@ -82,43 +48,77 @@ - (BOOL)isInCodeSpaceRange:(unichar)cid /**! * Returns the unicode value mapped by the given character ID */ -- (unichar)unicodeCharacter:(unichar)cid +- (NSUInteger)unicodeCharacter:(unichar)cid { - if (![self isInCodeSpaceRange:cid]) return (unichar) NSNotFound; - - NSArray *mappedRanges = [self.characterRangeMappings allKeys]; - for (NSValue *rangeValue in mappedRanges) - { - NSRange range = [rangeValue rangeValue]; - if (cid >= range.location && cid <= NSMaxRange(range)) - { - NSNumber *offsetValue = [self.characterRangeMappings objectForKey:rangeValue]; - return cid + [offsetValue intValue]; - } - } - - NSNumber *result = [self.characterMappings objectForKey:[NSNumber numberWithInt:cid]]; + if (![self isInCodeSpaceRange:cid]) + return NSNotFound; + + NSArray *mappedRanges = [self.characterRangeMappings allKeys]; + for (NSValue *rangeValue in mappedRanges) + { + NSRange range = [rangeValue rangeValue]; + if (cid >= range.location && cid <= NSMaxRange(range)) + { + NSNumber *offsetValue = [self.characterRangeMappings objectForKey:rangeValue]; + return [offsetValue unsignedIntegerValue] + cid - range.location; + } + } + + NSNumber *result = self.characterMappings[@((NSUInteger)cid)]; if (result) { - return [result intValue]; + return [result unsignedIntegerValue]; } - - return (unichar) NSNotFound; + + return NSNotFound; } +/* +- (void)enumeratePDFStringCharacters:(CGPDFStringRef)pdfString usingBlock:(void (^)(NSUInteger, NSString *))block { + + size_t stringLength = CGPDFStringGetLength(pdfString); + const unsigned char *bytes = CGPDFStringGetBytePtr(pdfString); + + int i = 0; + NSUInteger characterCode = 0; + NSUInteger charSize = 0; + + while (i < stringLength) { + + characterCode = characterCode << 8; + characterCode |= bytes[i++]; + + charSize++; + + if (charSize < _codeSpaceRangeMinSize || (![self isInCodeSpaceRange:characterCode] && charSize <= _codeSpaceRangeMaxSize)) + continue; + + NSString *unicode = [self unicodeMappingString:characterCode]; + + block(characterCode, unicode); + + charSize = 0; + characterCode = 0; + } + +} +*/ -- (unichar)cidCharacter:(unichar)unicode { - __block unichar result = NSNotFound; - +- (NSUInteger)cidCharacter:(unichar)unicode { + __block NSUInteger result = NSNotFound; + [self.characterRangeMappings enumerateKeysAndObjectsUsingBlock:^(NSValue *rangeValue, NSNumber *offset, BOOL *stop) { - NSRange range = [rangeValue rangeValue]; - range.location += [offset intValue]; - if (unicode >= range.location && unicode <= NSMaxRange(range)) { - result = unicode - [offset intValue]; + const NSRange range = [rangeValue rangeValue]; + //range.location += [offset intValue]; + const NSUInteger firstUniChar = [offset unsignedIntegerValue]; + //if (unicode >= range.location && unicode <= NSMaxRange(range)) { + if (unicode >= firstUniChar && unicode <= (firstUniChar + range.length)) { + //result = unicode - [offset intValue]; + result = range.location + unicode - firstUniChar; *stop = YES; } }]; if (result != NSNotFound) return result; - + NSArray *keys = [self.characterMappings allKeysForObject:[NSNumber numberWithInt:unicode]]; if (keys.count) { if (keys.count > 1) { @@ -128,246 +128,135 @@ - (unichar)cidCharacter:(unichar)unicode { } else { return NSNotFound; } -/* - // Look up the offsets dictionary for this unicode - for (NSDictionary *dict in offsets) - { - int firstChar = [[dict objectForKey:@"First"] intValue]; - int lastChar = [[dict objectForKey:@"Last"] intValue]; - int offset = [[dict objectForKey:@"Offset"] intValue]; - - for (int i = 0 ; i <= lastChar-firstChar ; i++) { - unichar dictUnicode = offset+i; - if (dictUnicode == unicode) { - return i; - } - } - } */ } -#pragma mark - -#pragma mark Scanner +enum { + + ParseExtModeNone, + ParseExtModeCodeSpaceRange, + ParseExtModeBFRange, + ParseExtModeBFChar, +}; -- (Operator *)operatorWithStartingToken:(NSString *)token { - if (token) { - for (Operator *op in self.operators) { - if ([op.start isEqualToString:token]) { - return op; +- (NSArray *) exractNumbersFromLine:(NSString *) line +{ + NSMutableArray *ma = [NSMutableArray array]; + + NSScanner *scanner = [NSScanner scannerWithString:line]; + while (!scanner.isAtEnd) { + + if ([scanner scanString:@"<" intoString:nil]) { + + NSString *s; + if (![scanner scanUpToString:@">" intoString:&s]) + break; + if (![scanner scanString:@">" intoString:nil]) + break; + + if (s.length) { + + s = [s stringByReplacingOccurrencesOfString:@" " withString:@""]; + NSScanner *hexScaner = [NSScanner scannerWithString:s]; + + unsigned long long value; + if (![hexScaner scanHexLongLong:&value]) + break; + [ma addObject:@(value)]; } } + + [scanner scanUpToString:@"<" intoString:nil]; } - return nil; -} - -/**! - * Returns the next token that is not a comment. Only remainder-of-line comments are supported. - * The scanner is advanced to past the returned token. - * - * @param scanner a scanner - * @return next non-comment token - */ -- (NSString *)tokenByTrimmingComments:(NSScanner *)scanner -{ - NSString *token = nil; - [scanner scanUpToCharactersFromSet:self.tokenDelimiterSet intoString:&token]; - - static NSString *commentMarker = @"%%"; - NSRange commentMarkerRange = [token rangeOfString:commentMarker]; - if (commentMarkerRange.location != NSNotFound) - { - [scanner scanUpToCharactersFromSet:[NSCharacterSet newlineCharacterSet] intoString:nil]; - token = [token substringToIndex:commentMarkerRange.location]; - if (token.length == 0) - { - return [self tokenByTrimmingComments:scanner]; - } - } - - return token; -} - -/**! - * Parse a CMap. - * - * @param cMapString string representation of a CMap - */ -- (void)parse:(NSString *)cMapString -{ - NSScanner *scanner = [NSScanner scannerWithString:cMapString]; - NSString *token = nil; - while (![scanner isAtEnd]) - { - token = [self tokenByTrimmingComments:scanner]; - - Operator *operator = [self operatorWithStartingToken:token]; - if (operator) - { - // Start a new context - self.context = [NSMutableDictionary dictionaryWithObject:operator forKey:kOperatorKey]; - } - else if (self.context) - { - operator = [self.context valueForKey:kOperatorKey]; - if ([token isEqualToString:operator.end]) - { - // End the current context - self.context = nil; - } - else - { - // Send input to the current context - [self performSelector:operator.handler withObject:token]; - } - } - } -} - - -#pragma mark - -#pragma mark Parsing handlers - -/**! - * Trims tag characters from the argument string, and returns the parsed integer value of the string. - * - * @param tagString string representing a hexadecimal number, possibly within tags - */ -- (unsigned int)valueOfTag:(NSString *)tagString -{ - unsigned int numericValue = 0; - tagString = [tagString stringByTrimmingCharactersInSet:self.tagSet]; - [[NSScanner scannerWithString:tagString] scanHexInt:&numericValue]; - return numericValue; -} - -/**! - * Code space ranges are pairs of hex numbers: - * - */ -- (void)handleCodeSpaceRange:(NSString *)string -{ - static NSString *rangeLowerBound = @"MIN"; - NSNumber *value = [NSNumber numberWithInt:[self valueOfTag:string]]; - NSNumber *low = [self.context valueForKey:rangeLowerBound]; - - if (!low) - { - [self.context setValue:value forKey:rangeLowerBound]; - return; - } - - [self.codeSpaceRanges addObject:rangeValue([low intValue], [value intValue])]; - [self.context removeObjectForKey:rangeLowerBound]; + + return ma; } -/**! - * Character mappings appear in pairs: - * - */ -- (void)handleCharacter:(NSString *)character +- (void) parse:(NSString *)string { - NSNumber *value = [NSNumber numberWithInt:[self valueOfTag:character]]; - static NSString *origin = @"Origin"; - NSNumber *from = [self.context valueForKey:origin]; - if (!from) - { - [self.context setValue:value forKey:origin]; - return; - } - [self.characterMappings setObject:value forKey:from]; - [self.context removeObjectForKey:origin]; -} - -/**! - * Ranges appear on the triplet form: - * - */ -- (void)handleCharacterRange:(NSString *)token -{ - NSNumber *value = [NSNumber numberWithInt:[self valueOfTag:token]]; - static NSString *from = @"From"; - static NSString *to = @"To"; - NSNumber *fromValue = [self.context valueForKey:from]; - NSNumber *toValue = [self.context valueForKey:to]; - if (!fromValue) - { - [self.context setValue:value forKey:from]; - return; - } - else if (!toValue) - { - [self.context setValue:value forKey:to]; - return; - } - NSValue *range = rangeValue([fromValue intValue], [toValue intValue]); - [self.characterRangeMappings setObject:value forKey:range]; - [self.context removeObjectForKey:from]; - [self.context removeObjectForKey:to]; -} - -#pragma mark - -#pragma mark Accessor methods - -- (NSSet *)operators { - if (!sharedOperators) { - sharedOperators = [[NSMutableSet alloc] initWithObjects: - [Operator operatorWithStart:@"begincodespacerange" - end:@"endcodespacerange" - handler:@selector(handleCodeSpaceRange:)], - [Operator operatorWithStart:@"beginbfchar" - end:@"endbfchar" - handler:@selector(handleCharacter:)], - [Operator operatorWithStart:@"beginbfrange" - end:@"endbfrange" - handler:@selector(handleCharacterRange:)], - nil]; + NSUInteger mode = ParseExtModeNone; + + NSArray *lines = [string componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]]; + for (NSString *line in lines) { + + if (mode == ParseExtModeNone) { + + if ([line rangeOfString:@"begincodespacerange"].location != NSNotFound) { + + mode = ParseExtModeCodeSpaceRange; + + } else if ([line rangeOfString:@"beginbfrange"].location != NSNotFound) { + + mode = ParseExtModeBFRange; + + } else if ([line rangeOfString:@"beginbfchar"].location != NSNotFound) { + + mode = ParseExtModeBFChar; + } + } + + if (mode == ParseExtModeCodeSpaceRange) { + + NSArray *numbers = [self exractNumbersFromLine:line]; + if (numbers.count == 2) { + + NSValue *range = rangeValue([numbers[0] integerValue], [numbers[1] integerValue]); + [self.codeSpaceRanges addObject:range]; + } + + if ([line rangeOfString:@"endcodespacerange"].location != NSNotFound) { + mode = ParseExtModeNone; + } + + } else if (mode == ParseExtModeBFRange) { + + // TODO: arrays like <005F> <0061> [<00660066> <00660069> <00660066006C>] + // TODO: unicode32 like + + NSArray *numbers = [self exractNumbersFromLine:line]; + if (numbers.count == 3) { + + NSValue *range = rangeValue([numbers[0] integerValue], [numbers[1] integerValue]); + self.characterRangeMappings[range] = numbers[2]; + } + + if ([line rangeOfString:@"endbfrange"].location != NSNotFound) { + mode = ParseExtModeNone; + } + + } else if (mode == ParseExtModeBFChar) { + + NSArray *numbers = [self exractNumbersFromLine:line]; + if (numbers.count == 2) { + self.characterMappings[numbers[0]] = numbers[1]; + } + + if ([line rangeOfString:@"endbfchar"].location != NSNotFound) { + mode = ParseExtModeNone; + } + } } - return sharedOperators; -} - -- (NSCharacterSet *)tagSet { - if (!sharedTagSet) { - sharedTagSet = [[NSCharacterSet characterSetWithCharactersInString:@"<>"] retain]; - } - return sharedTagSet; -} - -- (NSCharacterSet *)tokenDelimiterSet { - if (!sharedTokenDelimimerSet) { - sharedTokenDelimimerSet = [[NSCharacterSet whitespaceAndNewlineCharacterSet] retain]; - } - return sharedTokenDelimimerSet; } - (NSMutableArray *)codeSpaceRanges { - if (!codeSpaceRanges) { - codeSpaceRanges = [[NSMutableArray alloc] init]; - } - return codeSpaceRanges; + if (!codeSpaceRanges) { + codeSpaceRanges = [[NSMutableArray alloc] init]; + } + return codeSpaceRanges; } - (NSMutableDictionary *)characterMappings { - if (!characterMappings) { - characterMappings = [[NSMutableDictionary alloc] init]; - } - return characterMappings; + if (!characterMappings) { + characterMappings = [[NSMutableDictionary alloc] init]; + } + return characterMappings; } - (NSMutableDictionary *)characterRangeMappings { - if (!characterRangeMappings) { - self.characterRangeMappings = [NSMutableDictionary dictionary]; - } - return characterRangeMappings; -} - -- (void)dealloc -{ - [context release]; - [characterMappings release]; - [characterRangeMappings release]; - [codeSpaceRanges release]; - [super dealloc]; + if (!characterRangeMappings) { + self.characterRangeMappings = [NSMutableDictionary dictionary]; + } + return characterRangeMappings; } -@synthesize context; @synthesize codeSpaceRanges, characterMappings, characterRangeMappings; -@end +@end \ No newline at end of file diff --git a/PDFKitten/CompositeFont.m b/PDFKitten/CompositeFont.m index be833a3..96bf19e 100644 --- a/PDFKitten/CompositeFont.m +++ b/PDFKitten/CompositeFont.m @@ -6,13 +6,15 @@ @implementation CompositeFont - (void)setWidthsWithFontDictionary:(CGPDFDictionaryRef)dict { CGPDFArrayRef widthsArray; - if (CGPDFDictionaryGetArray(dict, "W", &widthsArray)) + + if (CGPDFDictionaryGetArray(dict, "W", &widthsArray)) { [self setWidthsWithArray:widthsArray]; } CGPDFInteger defaultWidthValue; - if (CGPDFDictionaryGetInteger(dict, "DW", &defaultWidthValue)) + + if (CGPDFDictionaryGetInteger(dict, "DW", &defaultWidthValue)) { self.defaultWidth = defaultWidthValue; } @@ -23,6 +25,7 @@ - (void)setWidthsWithArray:(CGPDFArrayRef)widthsArray NSUInteger length = CGPDFArrayGetCount(widthsArray); int idx = 0; CGPDFObjectRef nextObject = nil; + while (idx < length) { CGPDFInteger baseCid = 0; @@ -31,7 +34,8 @@ - (void)setWidthsWithArray:(CGPDFArrayRef)widthsArray CGPDFObjectRef integerOrArray = nil; CGPDFInteger firstCharacter = 0; CGPDFArrayGetObject(widthsArray, idx++, &integerOrArray); - if (CGPDFObjectGetType(integerOrArray) == kCGPDFObjectTypeInteger) + + if (CGPDFObjectGetType(integerOrArray) == kCGPDFObjectTypeInteger) { // [ first last width ] CGPDFInteger maxCid; @@ -43,15 +47,22 @@ - (void)setWidthsWithArray:(CGPDFArrayRef)widthsArray // If the second item is an array, the sequence // defines widths on the form [ first list-of-widths ] CGPDFArrayRef characterWidths; - if (!CGPDFObjectGetValue(nextObject, kCGPDFObjectTypeArray, &characterWidths)) break; - NSUInteger widthsCount = CGPDFArrayGetCount(characterWidths); - for (int index = 0; index < widthsCount ; index++) + + if (!CGPDFObjectGetValue(nextObject, kCGPDFObjectTypeArray, &characterWidths)) + { + break; + } + + NSUInteger widthsCount = CGPDFArrayGetCount(characterWidths); + + for (int index = 0; index < widthsCount ; index++) { CGPDFInteger width; - if (CGPDFArrayGetInteger(characterWidths, index, &width)) + + if (CGPDFArrayGetInteger(characterWidths, index, &width)) { - NSNumber *key = [NSNumber numberWithInt:firstCharacter+index]; - NSNumber *val = [NSNumber numberWithInt:width]; + NSNumber *key = [NSNumber numberWithInt: (int)firstCharacter + index]; + NSNumber *val = [NSNumber numberWithInt: (int)width]; [widths setObject:val forKey:key]; } } @@ -70,7 +81,7 @@ - (void)setWidthsFrom:(CGPDFInteger)cid to:(CGPDFInteger)maxCid width:(CGPDFInte { while (cid <= maxCid) { - [self.widths setObject:[NSNumber numberWithInt:width] forKey:[NSNumber numberWithInt:cid++]]; + [self.widths setObject:[NSNumber numberWithInt:(int)width] forKey:[NSNumber numberWithInt:(int)cid++]]; } } @@ -78,24 +89,28 @@ - (void)setWidthsWithBase:(CGPDFInteger)base array:(CGPDFArrayRef)array { NSInteger count = CGPDFArrayGetCount(array); CGPDFInteger width; + for (int index = 0; index < count ; index++) { if (CGPDFArrayGetInteger(array, index, &width)) { - [self.widths setObject:[NSNumber numberWithInt:width] forKey:[NSNumber numberWithInt:base+index]]; + [self.widths setObject:[NSNumber numberWithInt:(int)width] forKey:[NSNumber numberWithInt:(int)base + index]]; } } } - (CGFloat)widthOfCharacter:(unichar)characher withFontSize:(CGFloat)fontSize { - NSNumber *width = [self.widths objectForKey:[NSNumber numberWithInt:characher - 30]]; - if (!width) + NSNumber *width = [self.widths objectForKey:[NSNumber numberWithInt:characher]]; + + if (!width) { return self.defaultWidth * fontSize; } - return [width floatValue] * fontSize; + + return [width floatValue] * fontSize; } @synthesize defaultWidth; -@end + +@end \ No newline at end of file diff --git a/PDFKitten/DocumentsView.m b/PDFKitten/DocumentsView.m index 22f1e08..8ef4c3b 100644 --- a/PDFKitten/DocumentsView.m +++ b/PDFKitten/DocumentsView.m @@ -85,15 +85,5 @@ - (void)tableView:(UITableView *)tableView didSelectRowAtIndexPath:(NSIndexPath } } - -#pragma mark Memory Management - -- (void)dealloc -{ - [tableViewController release]; - [documents release]; - [super dealloc]; -} - @synthesize delegate; @end diff --git a/PDFKitten/EncodingDifferences.h b/PDFKitten/EncodingDifferences.h new file mode 100644 index 0000000..2f86e84 --- /dev/null +++ b/PDFKitten/EncodingDifferences.h @@ -0,0 +1,21 @@ +// +// PDFKEncodingDifferences.h +// PDFKitten +// +// Created by Kolyvan on 29.06.13. +// Copyright (c) 2013 Konstantin Bukreev. All rights reserved. +// + +#import "Font.h" + +@interface EncodingDifferences : NSObject + +- (id)initWithArray:(CGPDFArrayRef)array; + +- (NSUInteger)mapCid:(unichar)cid withEncoding:(CharacterEncoding)encoding; + +- (NSUInteger)cidForName:(NSString *)name; + +- (NSUInteger)cidCharacter:(unichar)unicode withEncoding:(CharacterEncoding)encoding; + +@end diff --git a/PDFKitten/EncodingDifferences.m b/PDFKitten/EncodingDifferences.m new file mode 100644 index 0000000..9c4418b --- /dev/null +++ b/PDFKitten/EncodingDifferences.m @@ -0,0 +1,281 @@ +// +// PDFKEncodingDifferences.m +// PDFKitten +// +// Created by Kolyvan on 29.06.13. +// Copyright (c) 2013 Konstantin Bukreev. All rights reserved. +// + +#import "EncodingDifferences.h" + +@interface PDFKAdobeCharsetEntry : NSObject +@property (readwrite, nonatomic) NSUInteger stdCode; +@property (readwrite, nonatomic) NSUInteger macCode; +@property (readwrite, nonatomic) NSUInteger winCode; +@property (readwrite, nonatomic) NSUInteger pdfCode; +@end + +@implementation PDFKAdobeCharsetEntry +@end + +@implementation EncodingDifferences { + + NSMutableDictionary *_map; +} + +- (id)initWithArray:(CGPDFArrayRef)array +{ + self = [super init]; + if (self) { + + _map = [[NSMutableDictionary alloc] init]; + + NSUInteger cid = 0; + const NSUInteger count = CGPDFArrayGetCount(array); + for (NSUInteger i = 0; i < count; ++i) { + + CGPDFObjectRef pdfObject; + if (CGPDFArrayGetObject(array, i, &pdfObject)) { + + const CGPDFObjectType objType = CGPDFObjectGetType(pdfObject); + + if (objType == kCGPDFObjectTypeInteger) { + + CGPDFInteger tmp; + if (CGPDFObjectGetValue(pdfObject, kCGPDFObjectTypeInteger, &tmp)) { + + cid = tmp; + } + + } else if (objType == kCGPDFObjectTypeName) { + + const char *name; + if (CGPDFObjectGetValue(pdfObject, kCGPDFObjectTypeName, &name) && + (0 != strcmp(name, ".notdef"))) { + + //Returns f_i, f_l some times? Should this be converted to fi and fl? Else the glyph or character lookup will fail if we do not support f_i;**** + + _map[@(cid)] = @(name); + } + + cid++; + } + } + } + } + return self; +} + + ++ (NSDictionary *) loadAdobeCharsetDict +{ + NSMutableDictionary *md = [NSMutableDictionary dictionary]; + NSString* path = [[[NSBundle mainBundle] resourcePath] stringByAppendingPathComponent:@"NappPDFResources.bundle/adobe_charset"]; + + NSError *error; + NSString *charsets = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error]; + if (!charsets) { + NSLog(@"unable load adobe_charsets from resource '%@', %@", path, error); + return nil; + } + + NSCharacterSet *separator = [NSCharacterSet whitespaceCharacterSet]; + NSArray *lines = [charsets componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]]; + for (NSString *line in lines) { + + NSArray *fields = [line componentsSeparatedByCharactersInSet:separator]; + if (fields.count == 5) { + + NSString *name = fields[0]; + NSString *stdCode = fields[1]; + NSString *macCode = fields[2]; + NSString *winCode = fields[3]; + NSString *pdfCode = fields[4]; + + PDFKAdobeCharsetEntry *entry = [[PDFKAdobeCharsetEntry alloc] init]; + entry.stdCode = [stdCode isEqualToString:@"-"] ? NSNotFound : [stdCode integerValue]; + entry.macCode = [macCode isEqualToString:@"-"] ? NSNotFound : [macCode integerValue]; + entry.winCode = [winCode isEqualToString:@"-"] ? NSNotFound : [winCode integerValue]; + entry.pdfCode = [pdfCode isEqualToString:@"-"] ? NSNotFound : [pdfCode integerValue]; + md[name] = entry; + + } else { + + NSLog(@"invalid line '%@' in adobe_charset", line); + } + } + + return [md copy]; +} + ++ (NSDictionary *) loadAdobeGlyphsDict +{ + NSMutableDictionary *md = [NSMutableDictionary dictionary]; + + NSString* path = [[[NSBundle mainBundle] resourcePath] stringByAppendingPathComponent:@"NappPDFResources.bundle/adobe_glyphs"]; + + NSError *error; + NSString *glyphs = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error]; + if (!glyphs) { + NSLog(@"unable load adobe_glyphs from resource '%@', %@", path, error); + return nil; + } + + NSCharacterSet *separator = [NSCharacterSet characterSetWithCharactersInString:@";"]; + NSArray *lines = [glyphs componentsSeparatedByCharactersInSet:[NSCharacterSet newlineCharacterSet]]; + for (NSString *line in lines) { + + NSArray *fields = [line componentsSeparatedByCharactersInSet:separator]; + if (fields.count == 2) { + + NSString *name = fields[0]; + NSString *uniCode = fields[1]; + + unsigned value; + NSScanner* scanner = [NSScanner scannerWithString:uniCode]; + if ([scanner scanHexInt:&value]) { + md[name] = @(value); + } + + } else { + + NSLog(@"invalid line '%@' in abobe_glyphs", line); + } + } + + return [md copy]; +} + ++ (NSDictionary *) adobeCharset +{ + static NSDictionary *dict; + static dispatch_once_t onceToken; + dispatch_once(&onceToken, ^{ + dict = [self loadAdobeCharsetDict]; + }); + return dict; +} + ++ (NSDictionary *)adobeGlyphs +{ + static NSDictionary *dict; + static dispatch_once_t onceToken; + dispatch_once(&onceToken, ^{ + dict = [self loadAdobeGlyphsDict]; + }); + return dict; +} +/* +- (NSUInteger) mapCid:(unichar)cid withEncoding:(CharacterEncoding)encoding { + + NSString *name = _map[@(cid)]; + if (name) { + if (encoding == StandardEncoding || + encoding == MacRomanEncoding || + encoding == WinAnsiEncoding || + encoding == PDFDocEncoding) { + + NSDictionary *dict = [EncodingDifferences adobeCharset]; + PDFKAdobeCharsetEntry *entry = dict[name]; + if (entry) { + + if (encoding == StandardEncoding) + return entry.stdCode; + + if (encoding == MacRomanEncoding) + return entry.macCode; + + if (encoding == WinAnsiEncoding) + return entry.winCode; + + if (encoding == PDFDocEncoding) + return entry.pdfCode; + } + + } else { + + NSDictionary *dict = [EncodingDifferences adobeGlyphs]; + NSNumber *uniCode = dict[name]; + if (uniCode) { + return [uniCode unsignedIntegerValue]; + } + } + } + + return NSNotFound; +} +*/ + + +- (NSUInteger)mapCid:(unichar)cid withEncoding:(CharacterEncoding)encoding { + + NSString *name = _map[@(cid)]; + + if (name) { + NSDictionary *dict = [EncodingDifferences adobeGlyphs]; + NSNumber *uniCode = dict[name]; + if (uniCode) { + return [uniCode unsignedIntegerValue]; + } + } + return NSNotFound; +} + +- (NSUInteger) cidForName:(NSString *)name +{ + __block NSUInteger cid = NSNotFound; + [_map enumerateKeysAndObjectsUsingBlock:^(NSNumber *key, NSString *val, BOOL *stop) { + + if ([val isEqualToString:name]) { + cid = key.unsignedIntegerValue; + *stop = YES; + } + + }]; + return cid; +} + +- (NSUInteger)cidCharacter:(unichar)unicode + withEncoding:(CharacterEncoding)encoding +{ + __block NSString *name = nil; + + if (encoding == StandardEncoding || + encoding == MacRomanEncoding || + encoding == WinAnsiEncoding || + encoding == PDFDocEncoding) { + + NSDictionary *dict = [EncodingDifferences adobeCharset]; + + [dict enumerateKeysAndObjectsUsingBlock:^(NSString *key, PDFKAdobeCharsetEntry *val, BOOL *stop) { + + if ((encoding == StandardEncoding && val.stdCode == unicode) || + (encoding == MacRomanEncoding && val.macCode == unicode) || + (encoding == WinAnsiEncoding && val.winCode == unicode) || + (encoding == PDFDocEncoding && val.pdfCode == unicode)) { + + name = [key copy]; + *stop = YES; + } + }]; + + } else { + + NSDictionary *dict = [EncodingDifferences adobeGlyphs]; + [dict enumerateKeysAndObjectsUsingBlock:^(NSString *key, NSNumber *val, BOOL *stop) { + + if (val.unsignedIntegerValue == unicode) { + + name = [key copy]; + *stop = YES; + } + }]; + } + + if (name) { + return [self cidForName:name]; + } + return NSNotFound; +} + +@end diff --git a/PDFKitten/Font.h b/PDFKitten/Font.h index acc4dd9..01517b3 100644 --- a/PDFKitten/Font.h +++ b/PDFKitten/Font.h @@ -59,6 +59,9 @@ static inline BOOL knownEncoding(CharacterEncoding encoding) /* Populate the widths array given font dictionary */ - (void)setWidthsWithFontDictionary:(CGPDFDictionaryRef)dict; +/* Given a PDF string, returns a Unicode string */ +- (void)enumeratePDFStringCharacters:(CGPDFStringRef)pdfString usingBlock:(void(^)(NSUInteger cid, NSString *unicode))block; + /* Construct a font descriptor given font dictionary */ - (void)setFontDescriptorWithFontDictionary:(CGPDFDictionaryRef)dict; diff --git a/PDFKitten/Font.m b/PDFKitten/Font.m index 0f3e2df..b89af33 100644 --- a/PDFKitten/Font.m +++ b/PDFKitten/Font.m @@ -41,10 +41,11 @@ @implementation Font + (Font *)fontWithDictionary:(CGPDFDictionaryRef)dictionary { const char *type = nil; + const char *subtype = nil; CGPDFDictionaryGetName(dictionary, kTypeKey, &type); - if (!type || strcmp(type, kFontKey) != 0) return nil; - const char *subtype = nil; CGPDFDictionaryGetName(dictionary, kFontSubtypeKey, &subtype); + + if (!type || strcmp(type, kFontKey) != 0) return nil; Font *font = nil; if (!strcmp(subtype, kType0Key)) { @@ -69,8 +70,7 @@ + (Font *)fontWithDictionary:(CGPDFDictionaryRef)dictionary font = [CIDType2Font alloc]; } - [[font initWithFontDictionary:dictionary] autorelease]; - return font; + return [font initWithFontDictionary:dictionary]; } /* Initialize with font dictionary */ @@ -139,7 +139,6 @@ - (void)setFontDescriptorWithFontDictionary:(CGPDFDictionaryRef)dict if (!CGPDFDictionaryGetDictionary(dict, kFontDescriptorKey, &descriptor)) return; FontDescriptor *desc = [[FontDescriptor alloc] initWithPDFDictionary:descriptor]; self.fontDescriptor = desc; - [desc release]; } /* Populate the widths array given font dictionary */ @@ -155,7 +154,6 @@ - (void)setToUnicodeWithFontDictionary:(CGPDFDictionaryRef)dict if (!CGPDFDictionaryGetStream(dict, kToUnicodeKey, &stream)) return; CMap *map = [[CMap alloc] initWithPDFStream:stream]; self.toUnicode = map; - [map release]; } #pragma mark Font Property Accessors @@ -212,31 +210,72 @@ - (NSString *)stringWithPDFString:(CGPDFStringRef)pdfString return [NSString stringWithString:string]; } -- (NSString *)cidWithPDFString:(CGPDFStringRef)pdfString { +- (NSString *)cidWithPDFString:(CGPDFStringRef)pdfString +{ // Copy PDFString to NSString - NSString *string = (NSString *) CGPDFStringCopyTextString(pdfString); - return [string autorelease]; + NSString *string = (__bridge NSString *) CGPDFStringCopyTextString(pdfString); + return string; } - (NSString *)unicodeWithPDFString:(CGPDFStringRef)pdfString { const unsigned char *bytes = CGPDFStringGetBytePtr(pdfString); NSInteger length = CGPDFStringGetLength(pdfString); - if (self.toUnicode) + + if (self.toUnicode) { NSMutableString *unicodeString = [NSMutableString string]; - for (int i = 0; i < length; i++) + + for (int i = 0; i < length; i++) { const unsigned char cid = bytes[i]; - [unicodeString appendFormat:@"%C", [self.toUnicode unicodeCharacter:cid]]; + [unicodeString appendFormat:@"%C", (unichar)[self.toUnicode unicodeCharacter:cid]]; } return unicodeString; } - else { + else + { return [self stringWithPDFString:pdfString]; } } +//Method for enumerating the characters in a pdf string +- (void)enumeratePDFStringCharacters:(CGPDFStringRef)pdfString usingBlock:(void (^)(NSUInteger, NSString *))block { + + /* + if (self.toUnicode) { + [self.toUnicode enumeratePDFStringCharacters:pdfString usingBlock:block]; + return; + } + */ + + const unsigned char *bytes = CGPDFStringGetBytePtr(pdfString); + NSUInteger length = CGPDFStringGetLength(pdfString); + + if (self.fontDescriptor.fontFile) { + + FontFile *fontFile = self.fontDescriptor.fontFile; + + for (int i = 0; i < length; i++) { + unichar cid = bytes[i]; + NSLog(@"unichar = %c",cid); + block(cid, [fontFile stringWithCode:cid]); + } + + return; + } + + for (int i = 0; i < length; i++) { + unichar cid = bytes[i]; + block(cid, [NSString stringWithFormat:@"%C",cid]); + } + +} + + + + + /* Lowest point of any character */ - (CGFloat)minY { @@ -285,11 +324,14 @@ - (NSString *)description NSMutableString *string = [NSMutableString string]; [string appendFormat:@"%@ {\n", self.baseFont]; [string appendFormat:@"\ttype = %@\n", [self classForKeyedArchiver]]; - [string appendFormat:@"\tcharacter widths = %d\n", [self.widths count]]; + [string appendFormat:@"\tcharacter widths = %lu\n", (unsigned long)[self.widths count]]; [string appendFormat:@"\ttoUnicode = %d\n", (self.toUnicode != nil)]; - if (self.descendantFonts) { - [string appendFormat:@"\tdescendant fonts = %d\n", [self.descendantFonts count]]; + + if (self.descendantFonts) + { + [string appendFormat:@"\tdescendant fonts = %lu\n", (unsigned long)[self.descendantFonts count]]; } + [string appendFormat:@"}\n"]; return string; } @@ -298,25 +340,17 @@ - (NSString *)description - (NSString *)stringByExpandingLigatures:(NSString *)string { NSString *replacement = nil; - for (NSString *ligature in self.ligatures) + + for (NSString *ligature in self.ligatures) { replacement = [self.ligatures objectForKey:ligature]; if (!replacement) continue; string = [string stringByReplacingOccurrencesOfString:ligature withString:replacement]; } - return string; -} - -#pragma mark Memory Management - -- (void)dealloc -{ - [toUnicode release]; - [widths release]; - [fontDescriptor release]; - [baseFont release]; - [super dealloc]; + + return string; } @synthesize fontDescriptor, widths, toUnicode, widthsRange, baseFont, baseFontName, encoding, descendantFonts; -@end + +@end \ No newline at end of file diff --git a/PDFKitten/FontCollection.m b/PDFKitten/FontCollection.m index 3a8c2b3..e6ad415 100644 --- a/PDFKitten/FontCollection.m +++ b/PDFKitten/FontCollection.m @@ -12,7 +12,7 @@ void didScanFont(const char *key, CGPDFObjectRef object, void *collection) Font *font = [Font fontWithDictionary:dict]; if (!font) return; NSString *name = [NSString stringWithUTF8String:key]; - [(NSMutableDictionary *)collection setObject:font forKey:name]; + ((__bridge NSMutableDictionary *)collection)[name] = font; } /* Initialize with a font collection dictionary */ @@ -22,16 +22,18 @@ - (id)initWithFontDictionary:(CGPDFDictionaryRef)dict { fonts = [[NSMutableDictionary alloc] init]; // Enumerate the Font resource dictionary - CGPDFDictionaryApplyFunction(dict, didScanFont, fonts); + CGPDFDictionaryApplyFunction(dict, didScanFont, (__bridge void *)(fonts)); NSMutableArray *namesArray = [NSMutableArray array]; - for (NSString *name in [fonts allKeys]) + + for (NSString *name in [fonts allKeys]) { [namesArray addObject:name]; } - names = [[namesArray sortedArrayUsingSelector:@selector(compare:)] retain]; + names = [namesArray sortedArrayUsingSelector:@selector(compare:)]; } + return self; } @@ -47,14 +49,6 @@ - (Font *)fontNamed:(NSString *)fontName return [fonts objectForKey:fontName]; } -#pragma mark - Memory Management - -- (void)dealloc -{ - [names release]; - [fonts release]; - [super dealloc]; -} - @synthesize names; -@end + +@end \ No newline at end of file diff --git a/PDFKitten/FontDescriptor.m b/PDFKitten/FontDescriptor.m index 6f53d88..babe9df 100644 --- a/PDFKitten/FontDescriptor.m +++ b/PDFKitten/FontDescriptor.m @@ -25,9 +25,10 @@ - (id)initWithPDFDictionary:(CGPDFDictionaryRef)dict { const char *type = nil; CGPDFDictionaryGetName(dict, kTypeKey, &type); - if (!type || strcmp(type, kFontDescriptorKey) != 0) + + if (!type || strcmp(type, kFontDescriptorKey) != 0) { - [self release]; return nil; + return nil; } if ((self = [super init])) @@ -87,67 +88,25 @@ - (id)initWithPDFDictionary:(CGPDFDictionaryRef)dict } CGPDFStreamRef fontFileStream; + if (CGPDFDictionaryGetStream(dict, kFontFileKey, &fontFileStream)) { CGPDFDataFormat format; - NSData *data = (NSData *) CGPDFStreamCopyData(fontFileStream, &format); - /* - NSString *path = [NSSearchPathForDirectoriesInDomains(NSDocumentDirectory, NSUserDomainMask, YES) lastObject]; - path = [path stringByAppendingPathComponent:@"fontfile"]; - [data writeToFile:path atomically:YES]; - */ + NSData *data = (__bridge NSData *) CGPDFStreamCopyData(fontFileStream, &format); fontFile = [[FontFile alloc] initWithData:data]; - [data release]; } - } + return self; } -+ (void)parseFontFile:(NSData *)data -{ -// CGPDFDictionaryRef dict = CGPDFStreamGetDictionary(text); -// -// CGPDFInteger cleartextLength, decryptedLength, fixedLength; -// CGPDFInteger totalLength; -// CGPDFDictionaryGetInteger(dict, "Length1", &cleartextLength); -// CGPDFDictionaryGetInteger(dict, "Length2", &decryptedLength); -// CGPDFDictionaryGetInteger(dict, "Length3", &fixedLength); -// CGPDFDictionaryGetInteger(dict, "Length", &totalLength); -// -// NSLog(@"Lengths: %ld, %ld, %ld", cleartextLength, decryptedLength, fixedLength); -// NSLog(@"Total: %ld", totalLength); -// -// CGPDFDataFormat format; -// CFDataRef data = CGPDFStreamCopyData(text, &format); -// const uint8_t *ptr = CFDataGetBytePtr(data); -// size_t length = CFDataGetLength(data); -// NSData *fontData = [NSData dataWithBytes:ptr length:length]; -// -// size_t digestStringLength = CC_MD5_DIGEST_LENGTH * sizeof(unsigned char); -// unsigned char *digest = malloc(digestStringLength); -// bzero(digest, digestStringLength); -// CC_MD5(data, length, digest); - - // Get first header - -} - /* True if a font is symbolic */ - (BOOL)isSymbolic { return ((self.flags & FontSymbolic) > 0) && ((self.flags & FontNonSymbolic) == 0); } -#pragma mark Memory Management - -- (void)dealloc -{ - [fontFile release]; - [fontName release]; - [super dealloc]; -} - @synthesize ascent, descent, bounds, leading, capHeight, averageWidth, maxWidth, missingWidth, xHeight, flags, verticalStemWidth, horizontalStemWidth, italicAngle, fontName; @synthesize fontFile; -@end + +@end \ No newline at end of file diff --git a/PDFKitten/FontFile.m b/PDFKitten/FontFile.m index f226288..39ddab7 100644 --- a/PDFKitten/FontFile.m +++ b/PDFKitten/FontFile.m @@ -17,19 +17,23 @@ - (id)initWithData:(NSData *)someData { if (!someData) { - [self release]; return nil; } - data = [someData retain]; + + data = someData; NSScanner *scanner = [NSScanner scannerWithString:self.text]; NSCharacterSet *delimiterSet = [NSCharacterSet whitespaceAndNewlineCharacterSet]; NSCharacterSet *newlineCharacterSet = [NSCharacterSet newlineCharacterSet]; names = [NSMutableDictionary dictionary]; NSString *buffer; - while (![scanner isAtEnd]) + + while (![scanner isAtEnd]) { - if (![scanner scanUpToCharactersFromSet:delimiterSet intoString:&buffer]) break; + if (![scanner scanUpToCharactersFromSet:delimiterSet intoString:&buffer]) + { + break; + } if ([buffer hasPrefix:@"%"]) { @@ -43,7 +47,11 @@ - (id)initWithData:(NSData *)someData NSString *name; [scanner scanInt:&code]; [scanner scanUpToCharactersFromSet:delimiterSet intoString:&name]; - if (name) [names setObject:name forKey:[NSNumber numberWithInt:code]]; + + if (name) + { + [names setObject:name forKey:[NSNumber numberWithInt:code]]; + } } } } @@ -106,7 +114,6 @@ - (NSString *)text asciiTextLength = bytes[2] | bytes[3] << 8 | bytes[4] << 16 | bytes[5] << 24; NSData *textData = [[NSData alloc] initWithBytes:bytes+kHeaderLength length:asciiTextLength]; text = [[NSString alloc] initWithData:textData encoding:NSASCIIStringEncoding]; - [textData release]; } else { @@ -116,12 +123,6 @@ - (NSString *)text return text; } -- (void)dealloc -{ - [text release]; - [data release]; - [super dealloc]; -} - @synthesize data, text, names; -@end + +@end \ No newline at end of file diff --git a/PDFKitten/Helper/PDFConverter.h b/PDFKitten/Helper/PDFConverter.h new file mode 100644 index 0000000..6d5679e --- /dev/null +++ b/PDFKitten/Helper/PDFConverter.h @@ -0,0 +1,7 @@ +#import + +@interface PDFConverter : NSObject + ++(CGRect)convertPDFRectToViewRect:(CGRect)PDFAnnotCoordinates pageRect:(CGRect)pageRect pdfRect:(CGRect)pdfRect; + +@end diff --git a/PDFKitten/Helper/PDFConverter.m b/PDFKitten/Helper/PDFConverter.m new file mode 100644 index 0000000..98aa79e --- /dev/null +++ b/PDFKitten/Helper/PDFConverter.m @@ -0,0 +1,104 @@ +#import "PDFConverter.h" + +#define PORTRAIT 1 +#define UPSIDEDOWN 2 + +@implementation PDFConverter + ++(CGRect)convertAnnotRectArrayToRect:(CGPDFArrayRef)rectArray{ + + //If the rect array is empty/nil + if(!rectArray) return CGRectNull; + + size_t count = CGPDFArrayGetCount(rectArray); + + //If there are more or less than 4 elements we have some unintended data + if (count != 4) { + if (count) { + NSLog(@"Warning: Passed rect array empty!!"); + } + return CGRectNull; + } + CGFloat lowerLeftX, lowerLeftY, upperRightX, upperRightY; + CGPDFArrayGetNumber(rectArray, 0, &lowerLeftX); + CGPDFArrayGetNumber(rectArray, 1, &lowerLeftY); + CGPDFArrayGetNumber(rectArray, 2, &upperRightX); + CGPDFArrayGetNumber(rectArray, 3, &upperRightY); + CGRect rectNew = CGRectMake(lowerLeftX, lowerLeftY, upperRightX - lowerLeftX, lowerLeftY - upperRightY); + + + return rectNew; + + +} + ++(CGRect)convertPDFRectToViewRect:(CGRect)PDFAnnotCoordinates pageRect:(CGRect)pageRect pdfRect:(CGRect)pdfRect{ + + if(CGRectIsEmpty(PDFAnnotCoordinates)) { + return CGRectZero; + } + + CGPoint topLeft = [self convertViewPointToPDFPoint:PDFAnnotCoordinates.origin page:pdfRect rect:pageRect]; + CGPoint bottomRight = [self convertViewPointToPDFPoint:CGPointMake(CGRectGetMaxX(PDFAnnotCoordinates), CGRectGetMaxY(PDFAnnotCoordinates)) page:pdfRect rect:pageRect]; + CGRect rect = CGRectMake(topLeft.x, topLeft.y, bottomRight.x - topLeft.x, bottomRight.y - topLeft.y); + return normalizeRect(rect); +} + +CGRect normalizeRect(CGRect rect) { + if (rect.size.height < 0) { + rect.size.height *= -1; + rect.origin.y -= rect.size.height; + } + if (rect.size.width < 0) { + rect.size.width *= -1; + rect.origin.x -= rect.size.width; + } + return rect; +} + +#define IPAD UI_USER_INTERFACE_IDIOM() == UIUserInterfaceIdiomPad +#define kZoomSize (IPAD ? 350 : 100) + + ++(CGRect)pointToRect:(CGPoint)point { + + NSUInteger zoomSizeHalf = kZoomSize / 2; + NSUInteger touchPointX = point.x < zoomSizeHalf ? 0 : (NSUInteger)point.x - zoomSizeHalf; + NSUInteger touchPointY = point.y < zoomSizeHalf ? 0 : (NSUInteger)point.y - zoomSizeHalf; + return CGRectMake(touchPointX, touchPointY, kZoomSize, kZoomSize); +} + ++(CGPoint)convertViewPointToPDFPoint:(CGPoint)point page:(CGRect)pdfPage rect:(CGRect)pageViewRect{ + CGPoint pdfPoint = CGPointMake(0, 0); + + int rotation = 0; + + switch (rotation) { + case 90: + case -270: + pdfPoint.x = pageViewRect.size.width * (point.y - pdfPage.origin.y) / pdfPage.size.height; + pdfPoint.y = pageViewRect.size.height * (point.x - pdfPage.origin.x) / pdfPage.size.width; + break; + case 180: + case -180: + pdfPoint.x = pageViewRect.size.width * (pdfPage.size.width - (point.x - pdfPage.origin.x)) / pdfPage.size.width; + pdfPoint.y = pageViewRect.size.height * (point.y - pdfPage.origin.y) / pdfPage.size.height; + break; + case -90: + case 270: + pdfPoint.x = pageViewRect.size.width * (pdfPage.size.height - (point.y - pdfPage.origin.y)) / pdfPage.size.height; + pdfPoint.y = pageViewRect.size.height * (pdfPage.size.width - (point.x - pdfPage.origin.x)) / pdfPage.size.width; + break; + case 0: + default: + pdfPoint.x = pageViewRect.size.width * (point.x - pdfPage.origin.x) / pdfPage.size.width; + pdfPoint.y = pageViewRect.size.height * (pdfPage.size.height - (point.y - pdfPage.origin.y)) / pdfPage.size.height; + break; + } + + pdfPoint.x = pdfPoint.x + pdfPage.origin.x; + pdfPoint.y = pdfPoint.y + pdfPage.origin.y; + + return pdfPoint; +} +@end diff --git a/PDFKitten/PDFKittenAppDelegate.m b/PDFKitten/PDFKittenAppDelegate.m index 02d20ef..e16e983 100644 --- a/PDFKitten/PDFKittenAppDelegate.m +++ b/PDFKitten/PDFKittenAppDelegate.m @@ -51,10 +51,4 @@ - (void)applicationWillTerminate:(UIApplication *)application */ } -- (void)dealloc -{ - [_window release]; - [super dealloc]; -} - @end diff --git a/PDFKitten/PDFPage.h b/PDFKitten/PDFPage.h index 2b3f408..0ec920d 100644 --- a/PDFKitten/PDFPage.h +++ b/PDFKitten/PDFPage.h @@ -28,6 +28,8 @@ - (void)setPage:(CGPDFPageRef)page; +-(void)displaySearchMarks; + @property (nonatomic, copy) NSString *keyword; @end diff --git a/PDFKitten/PDFPage.m b/PDFKitten/PDFPage.m index d683081..54f051c 100644 --- a/PDFKitten/PDFPage.m +++ b/PDFKitten/PDFPage.m @@ -1,5 +1,6 @@ #import "PDFPage.h" #import +#import "PDFConverter.h" @implementation PDFContentView @@ -27,21 +28,42 @@ + (Class)layerClass - (void)setKeyword:(NSString *)str { - [keyword release]; - keyword = [str retain]; + keyword = str; self.selections = nil; + [self displaySearchMarks]; } -- (NSArray *)selections -{ - @synchronized (self) - { - if (!selections) - { - self.selections = [self.scanner select:self.keyword]; - } - return selections; - } +//This method is doing all the work with the selections. We just call it when the keyword is set. +-(void)displaySearchMarks { + + //Clear old marks + for(UIView *mark in self.subviews) { + if(mark.backgroundColor == [UIColor yellowColor]) { + [mark removeFromSuperview]; + } + } + + //Search for a given string and return the selections + self.selections = [self.scanner searchForString:keyword]; + + //Get the pdfPage frame + CGRect pdfPageFrame = CGPDFPageGetBoxRect(pdfPage, kCGPDFMediaBox); + + for(Selection *selection in selections) { + + //The mark view + + //Get the userSpace frame from the selection PDFframe + CGRect selectionUserSpaceFrame = [PDFConverter convertPDFRectToViewRect:selection.frame pageRect:self.frame pdfRect:pdfPageFrame]; + + //Create the view + UIView *mark = [[UIView alloc] initWithFrame:selectionUserSpaceFrame]; + mark.backgroundColor = [UIColor yellowColor]; + mark.alpha = 0.4; + + //Place the view + [self addSubview:mark]; + } } - (void)drawLayer:(CALayer *)layer inContext:(CGContextRef)ctx @@ -59,19 +81,6 @@ - (void)drawLayer:(CALayer *)layer inContext:(CGContextRef)ctx CGContextConcatCTM(ctx, transform); CGContextDrawPDFPage(ctx, pdfPage); - - if (self.keyword) - { - CGContextSetFillColorWithColor(ctx, [[UIColor yellowColor] CGColor]); - CGContextSetBlendMode(ctx, kCGBlendModeMultiply); - for (Selection *s in self.selections) - { - CGContextSaveGState(ctx); - CGContextConcatCTM(ctx, s.transform); - CGContextFillRect(ctx, s.frame); - CGContextRestoreGState(ctx); - } - } } #pragma mark PDF drawing @@ -93,13 +102,6 @@ - (void)setPage:(CGPDFPageRef)page self.scanner = [Scanner scannerWithPage:pdfPage]; } -- (void)dealloc -{ - [scanner release]; - CGPDFPageRelease(pdfPage); - [super dealloc]; -} - @synthesize keyword, selections, scanner; @end @@ -112,10 +114,9 @@ @implementation PDFPage - (void)setNeedsDisplay { [super setNeedsDisplay]; - [contentView setNeedsDisplay]; } -/* Override implementation to return a PDFContentView */ +/* Override implementation to return a PDFContentView */ - (UIView *)contentView { if (!contentView) diff --git a/PDFKitten/PDFPageDetailsView.m b/PDFKitten/PDFPageDetailsView.m index efe4996..736abf6 100644 --- a/PDFKitten/PDFPageDetailsView.m +++ b/PDFKitten/PDFPageDetailsView.m @@ -68,12 +68,4 @@ - (UITableViewCell *)tableView:(UITableView *)tableView cellForRowAtIndexPath:(N return cell; } - - -- (void)dealloc -{ - [fontCollection release]; - [super dealloc]; -} - @end diff --git a/PDFKitten/Page.m b/PDFKitten/Page.m index a044ee2..0189b29 100644 --- a/PDFKitten/Page.m +++ b/PDFKitten/Page.m @@ -88,11 +88,5 @@ - (void)layoutSubviews [self setMinimumZoomScale:MIN(hScale, vScale)]; } -- (void)dealloc -{ - [super dealloc]; - [detailedView release]; -} - @synthesize pageNumber, contentView, detailedView; @end diff --git a/PDFKitten/PageView.m b/PDFKitten/PageView.m index 926ba8f..3a7922a 100644 --- a/PDFKitten/PageView.m +++ b/PDFKitten/PageView.m @@ -173,12 +173,10 @@ - (NSInteger)page - (void)setKeyword:(NSString *)str { - [keyword release]; - keyword = [str retain]; + keyword = str; for (PDFPage *p in visiblePages) { p.keyword = str; - [p setNeedsDisplay]; } } @@ -218,16 +216,5 @@ - (void)detailedInfoButtonPressed:(UIButton *)sender [UIView transitionFromView:currentPage toView:detailedView duration:1.0 options:UIViewAnimationOptionTransitionFlipFromLeft completion:nil]; } -#pragma mark - Memory Management - -- (void)dealloc -{ - [detailedViewController release]; - [keyword release]; - [recycledPages release]; - [visiblePages release]; - [super dealloc]; -} - @synthesize page, dataSource, keyword, detailViewController; @end diff --git a/PDFKitten/RenderingState.h b/PDFKitten/RenderingState.h index 6dbad30..7790c4c 100644 --- a/PDFKitten/RenderingState.h +++ b/PDFKitten/RenderingState.h @@ -48,4 +48,6 @@ @property (nonatomic, retain) Font *font; @property (nonatomic, assign) CGFloat fontSize; +-(CGRect)frame; + @end diff --git a/PDFKitten/RenderingState.m b/PDFKitten/RenderingState.m index a1bdfb4..f0c4e7a 100644 --- a/PDFKitten/RenderingState.m +++ b/PDFKitten/RenderingState.m @@ -37,7 +37,7 @@ - (id)copyWithZone:(NSZone *)zone - (void)setTextMatrix:(CGAffineTransform)matrix replaceLineMatrix:(BOOL)replace { self.textMatrix = matrix; - if (replace) + if (replace) { self.lineMatrix = matrix; } @@ -86,14 +86,29 @@ - (CGSize)convertSizeToUserSpace:(CGSize)aSize return aSize; } +- (CGRect)frame { + + FontDescriptor *fontDescriptor = font.fontDescriptor; + + CGRect result = fontDescriptor.bounds; + + result.origin.x = 0; + result.origin.y = MAX(result.origin.y, CGRectGetMaxY(result) - fontDescriptor.ascent); + result.size.height = MAX(result.size.height, fontDescriptor.ascent - fontDescriptor.descent); + + CGFloat k = fontSize / kGlyphSpaceScale; + + result.origin.y *= k; + result.size.height *= k; + result.size.width *= k; -#pragma mark - Memory Management - -- (void)dealloc -{ - [font release]; - [super dealloc]; + result.origin = CGPointApplyAffineTransform(result.origin, textMatrix); + + result.size = CGSizeApplyAffineTransform(result.size, textMatrix); + + return result; } @synthesize characterSpacing, wordSpacing, leadning, textRise, horizontalScaling, font, fontSize, lineMatrix, textMatrix, ctm; -@end + +@end \ No newline at end of file diff --git a/PDFKitten/RenderingStateStack.m b/PDFKitten/RenderingStateStack.m index d136f7b..471ef34 100644 --- a/PDFKitten/RenderingStateStack.m +++ b/PDFKitten/RenderingStateStack.m @@ -4,7 +4,7 @@ @implementation RenderingStateStack + (RenderingStateStack *)stack { - return [[[RenderingStateStack alloc] init] autorelease]; + return [[RenderingStateStack alloc] init]; } - (id)init @@ -14,7 +14,6 @@ - (id)init stack = [[NSMutableArray alloc] init]; RenderingState *rootRenderingState = [[RenderingState alloc] init]; [self pushRenderingState:rootRenderingState]; - [rootRenderingState release]; } return self; } @@ -35,18 +34,9 @@ - (void)pushRenderingState:(RenderingState *)state - (RenderingState *)popRenderingState { RenderingState *state = [stack lastObject]; - [[stack retain] autorelease]; [stack removeLastObject]; + return state; } - -#pragma mark - Memory Management - -- (void)dealloc -{ - [stack release]; - [super dealloc]; -} - -@end +@end \ No newline at end of file diff --git a/PDFKitten/RootViewController.m b/PDFKitten/RootViewController.m index b662b1a..7109787 100644 --- a/PDFKitten/RootViewController.m +++ b/PDFKitten/RootViewController.m @@ -112,26 +112,17 @@ - (NSString *)keywordForPageView:(PageView *)pageView - (NSString *)documentPath { - return [[NSBundle mainBundle] pathForResource:@"Kurt the Cat" ofType:@"pdf"]; + return [[NSBundle mainBundle] pathForResource:@"amazon-dynamo-sosp2007" ofType:@"pdf"]; } #pragma mark Search - (void)searchBarSearchButtonClicked:(UISearchBar *)aSearchBar { - [keyword release]; - keyword = [[aSearchBar text] retain]; + keyword = [aSearchBar text]; [pageView setKeyword:keyword]; [aSearchBar resignFirstResponder]; } -#pragma mark Memory Management - -- (void)dealloc -{ - CGPDFDocumentRelease(document); - [super dealloc]; -} - @end diff --git a/PDFKitten/Samples/Greek.pdf b/PDFKitten/Samples/Greek.pdf deleted file mode 100644 index 24a5a2f..0000000 Binary files a/PDFKitten/Samples/Greek.pdf and /dev/null differ diff --git a/PDFKitten/Samples/amazon-dynamo-sosp2007.pdf b/PDFKitten/Samples/amazon-dynamo-sosp2007.pdf new file mode 100755 index 0000000..5c877b0 Binary files /dev/null and b/PDFKitten/Samples/amazon-dynamo-sosp2007.pdf differ diff --git a/PDFKitten/Scanner.h b/PDFKitten/Scanner.h index 9174837..a73cd53 100644 --- a/PDFKitten/Scanner.h +++ b/PDFKitten/Scanner.h @@ -18,7 +18,9 @@ + (Scanner *)scannerWithPage:(CGPDFPageRef)page; -- (NSArray *)select:(NSString *)keyword; +- (void)scan; +- (NSArray *)searchForString:(NSString *)toSearchFor; +- (NSString *)getPageText; @property (nonatomic, readonly) RenderingState *renderingState; diff --git a/PDFKitten/Scanner.m b/PDFKitten/Scanner.m index 4cb8ced..9a5634e 100644 --- a/PDFKitten/Scanner.m +++ b/PDFKitten/Scanner.m @@ -4,7 +4,7 @@ @implementation Scanner + (Scanner *)scannerWithPage:(CGPDFPageRef)page { - return [[[Scanner alloc] initWithPage:page] autorelease]; + return [[Scanner alloc] initWithPage:page]; } - (id)initWithPage:(CGPDFPageRef)page { @@ -17,25 +17,40 @@ - (id)initWithPage:(CGPDFPageRef)page { return self; } -- (NSArray *)select:(NSString *)keyword { +-(NSString *)getPageText { + [self scan]; + return self.content; +} + +-(NSArray *)searchForString:(NSString *)toSearchFor { + + self.selections = [NSMutableArray new]; + + self.stringDetector = [StringDetector detectorWithKeyword:toSearchFor delegate:self]; + + [self scan]; + + self.stringDetector.delegate = nil; + self.stringDetector = nil; + + return [NSArray arrayWithArray:selections]; + +} + +- (void)scan { + self.content = [NSMutableString string]; - self.stringDetector = [StringDetector detectorWithKeyword:keyword delegate:self]; - [self.selections removeAllObjects]; + self.renderingStateStack = [RenderingStateStack stack]; CGPDFOperatorTableRef operatorTable = [self newOperatorTable]; CGPDFContentStreamRef contentStream = CGPDFContentStreamCreateWithPage(pdfPage); - CGPDFScannerRef scanner = CGPDFScannerCreate(contentStream, operatorTable, self); + CGPDFScannerRef scanner = CGPDFScannerCreate(contentStream, operatorTable, (__bridge void *)(self)); CGPDFScannerScan(scanner); CGPDFScannerRelease(scanner); CGPDFContentStreamRelease(contentStream); CGPDFOperatorTableRelease(operatorTable); - - self.stringDetector.delegate = nil; - self.stringDetector = nil; - - return self.selections; } - (CGPDFOperatorTableRef)newOperatorTable { @@ -91,52 +106,83 @@ - (FontCollection *)fontCollectionWithPage:(CGPDFPageRef)page { } FontCollection *collection = [[FontCollection alloc] initWithFontDictionary:fonts]; - return [collection autorelease]; + return collection; } - (void)detector:(StringDetector *)detector didScanCharacter:(unichar)character { + Font *font = self.renderingState.font; + unichar cid = character; - if (font.toUnicode) { + + if (font.toUnicode) + { cid = [font.toUnicode cidCharacter:character]; } CGFloat width = [font widthOfCharacter:cid withFontSize:self.renderingState.fontSize]; width /= 1000; width += self.renderingState.characterSpacing; - if (character == 32) { + + if (character == 32) + { width += self.renderingState.wordSpacing; } - - [self.renderingState translateTextPosition:CGSizeMake(width, 0)]; + + if(detector.rightCharacter && possibleSelection) { + CGRect frameOne = possibleSelection.initialState.frame; + CGRect frameTwo = self.renderingState.frame; + + //Here we check to see if the parsed character is on another line + if(frameOne.origin.y != frameTwo.origin.y) { + + //if so, and it is at the start of the new line, we create a new state + if(!possibleSelection.initialNewLineState) { + + possibleSelection.spansNewLine = YES; + possibleSelection.initialNewLineState = self.renderingState; + [self.renderingState translateTextPosition:CGSizeMake(width, 0)]; + possibleSelection.finalStateNewLineState = self.renderingState; + + //If we are already on a new line, we just update the newLineFinal state, like normal. + }else { + + [self.renderingState translateTextPosition:CGSizeMake(width, 0)]; + possibleSelection.finalStateNewLineState = self.renderingState; + + } + } else { + + //Update the normal state + [self.renderingState translateTextPosition:CGSizeMake(width, 0)]; + possibleSelection.finalState = self.renderingState; + } + + return; + } + + [self.renderingState translateTextPosition:CGSizeMake(width, 0)]; } -- (void)detectorDidStartMatching:(StringDetector *)detector { - possibleSelection = [[Selection selectionWithState:self.renderingState] retain]; +- (void)detectorDidStartMatching:(StringDetector *)detector +{ + possibleSelection = [Selection selectionWithState:self.renderingState]; } -- (void)detectorFoundString:(StringDetector *)detector { - if (possibleSelection) { - possibleSelection.finalState = self.renderingState; +- (void)detectorFoundString:(StringDetector *)detector +{ + if (possibleSelection) + { [self.selections addObject:possibleSelection]; - [possibleSelection release]; possibleSelection = nil; } } -- (RenderingState *)renderingState { +- (RenderingState *)renderingState +{ return [self.renderingStateStack topRenderingState]; } -- (void)dealloc { - [possibleSelection release]; - [fontCollection release]; - [selections release]; - [renderingStateStack release]; - [stringDetector release]; - [content release]; - [super dealloc]; -} - @synthesize stringDetector, fontCollection, renderingStateStack, content, selections, renderingState; -@end + +@end \ No newline at end of file diff --git a/PDFKitten/Selection.h b/PDFKitten/Selection.h index 78bb132..1371d59 100644 --- a/PDFKitten/Selection.h +++ b/PDFKitten/Selection.h @@ -9,11 +9,15 @@ @property (nonatomic, readonly) CGRect frame; @property (nonatomic, readonly) CGAffineTransform transform; +@property (nonatomic, copy) RenderingState *initialNewLineState; +@property (nonatomic, copy) RenderingState *finalStateNewLineState; +@property (nonatomic, assign) BOOL spansNewLine; + @property (nonatomic, copy) RenderingState *initialState; @property (nonatomic, copy) RenderingState *finalState; -@property (nonatomic, readonly) CGFloat height; -@property (nonatomic, readonly) CGFloat width; -@property (nonatomic, readonly) CGFloat descent; -@property (nonatomic, readonly) CGFloat ascent; +-(BOOL)isResultInTwoLines; + +-(CGRect)frameOfSelectionOnNewline; + @end diff --git a/PDFKitten/Selection.m b/PDFKitten/Selection.m index b9aa69c..9c3f4ff 100644 --- a/PDFKitten/Selection.m +++ b/PDFKitten/Selection.m @@ -7,43 +7,31 @@ CGFloat horizontal(CGAffineTransform transform) { @implementation Selection -+ (Selection *)selectionWithState:(RenderingState *)state { ++ (Selection *)selectionWithState:(RenderingState *)state +{ Selection *selection = [[Selection alloc] init]; selection.initialState = state; - return [selection autorelease]; + return selection; } -- (CGAffineTransform)transform { - return CGAffineTransformConcat([self.initialState textMatrix], [self.initialState ctm]); +- (CGRect)frame +{ + CGRect startState = _initialState.frame; + CGRect endState = _finalState.frame; + return CGRectMake(startState.origin.x, startState.origin.y, endState.origin.x - startState.origin.x, endState.size.height); } -- (CGRect)frame { - return CGRectMake(0, self.descent, self.width, self.height); +-(BOOL)isResultInTwoLines { + return _spansNewLine; } -- (CGFloat)height { - return self.ascent - self.descent; +//This method calculates the new marks, presenting the word searched for on the new line. +-(CGRect)frameOfSelectionOnNewline { + CGRect startState = _initialNewLineState.frame; + CGRect endState = _finalStateNewLineState.frame; + return CGRectMake(startState.origin.x, startState.origin.y, endState.origin.x - startState.origin.x, endState.size.height); } -- (CGFloat)width { - return horizontal(self.finalState.textMatrix) - horizontal(self.initialState.textMatrix); -} - -- (CGFloat)ascent { - return MAX([self ascentInUserSpace:self.initialState], [self ascentInUserSpace:self.finalState]); -} - -- (CGFloat)descent { - return MIN([self descentInUserSpace:self.initialState], [self descentInUserSpace:self.finalState]); -} - -- (CGFloat)ascentInUserSpace:(RenderingState *)state { - return state.font.fontDescriptor.ascent * state.fontSize / 1000; -} - -- (CGFloat)descentInUserSpace:(RenderingState *)state { - return state.font.fontDescriptor.descent * state.fontSize / 1000; -} +@synthesize frame; -@synthesize frame, transform; -@end +@end \ No newline at end of file diff --git a/PDFKitten/SimpleFont.h b/PDFKitten/SimpleFont.h index aef94e0..13301d9 100644 --- a/PDFKitten/SimpleFont.h +++ b/PDFKitten/SimpleFont.h @@ -13,6 +13,7 @@ #import #import "Font.h" +#import "EncodingDifferences.h" @interface SimpleFont : Font { } @@ -26,4 +27,6 @@ /* Set encoding, given a font dictionary */ - (void)setEncodingWithFontDictionary:(CGPDFDictionaryRef)dict; +@property (nonatomic, readonly, strong) EncodingDifferences *encodingDifferences; + @end diff --git a/PDFKitten/SimpleFont.m b/PDFKitten/SimpleFont.m index 1379607..71c2fa4 100644 --- a/PDFKitten/SimpleFont.m +++ b/PDFKitten/SimpleFont.m @@ -6,128 +6,195 @@ @implementation SimpleFont /* Initialize with a font dictionary */ - (id)initWithFontDictionary:(CGPDFDictionaryRef)dict { - if ((self = [super initWithFontDictionary:dict])) - { - // Set encoding for any font - [self setEncodingWithFontDictionary:dict]; - } - return self; + if ((self = [super initWithFontDictionary:dict])) + { + // Set encoding for any font + [self setEncodingWithFontDictionary:dict]; + } + return self; } /* Custom implementation for all simple fonts */ - (void)setWidthsWithFontDictionary:(CGPDFDictionaryRef)dict { - CGPDFArrayRef array; - if (!CGPDFDictionaryGetArray(dict, "Widths", &array)) return; - size_t count = CGPDFArrayGetCount(array); - CGPDFInteger firstChar, lastChar; - if (!CGPDFDictionaryGetInteger(dict, "FirstChar", &firstChar)) return; - if (!CGPDFDictionaryGetInteger(dict, "LastChar", &lastChar)) return; - widthsRange = NSMakeRange(firstChar, lastChar-firstChar); - NSMutableDictionary *widthsDict = [NSMutableDictionary dictionary]; - for (int i = 0; i < count; i++) - { - CGPDFReal width; - if (!CGPDFArrayGetNumber(array, i, &width)) continue; - NSNumber *key = [NSNumber numberWithInt:firstChar+i]; - NSNumber *value = [NSNumber numberWithFloat:width]; - [widthsDict setObject:value forKey:key]; - } - self.widths = widthsDict; + CGPDFArrayRef array; + if (!CGPDFDictionaryGetArray(dict, "Widths", &array)) return; + size_t count = CGPDFArrayGetCount(array); + CGPDFInteger firstChar, lastChar; + if (!CGPDFDictionaryGetInteger(dict, "FirstChar", &firstChar)) return; + if (!CGPDFDictionaryGetInteger(dict, "LastChar", &lastChar)) return; + widthsRange = NSMakeRange(firstChar, lastChar-firstChar); + NSMutableDictionary *widthsDict = [NSMutableDictionary dictionary]; + for (int i = 0; i < count; i++) + { + CGPDFReal width; + if (!CGPDFArrayGetNumber(array, i, &width)) continue; + NSNumber *key = [NSNumber numberWithLong:firstChar+i]; + NSNumber *value = [NSNumber numberWithFloat:width]; + [widthsDict setObject:value forKey:key]; + } + self.widths = widthsDict; } /* Set encoding, given a font dictionary */ - (void)setEncodingWithFontDictionary:(CGPDFDictionaryRef)dict { - CGPDFObjectRef encodingObject; - if (!CGPDFDictionaryGetObject(dict, "Encoding", &encodingObject)) return; - [self setEncodingWithEncodingObject:encodingObject]; + CGPDFObjectRef encodingObject; + if (!CGPDFDictionaryGetObject(dict, "Encoding", &encodingObject)) return; + + [self setEncodingWithEncodingObject:encodingObject]; } /* Custom implementation for all simple fonts */ - (NSString *)stringWithPDFString:(CGPDFStringRef)pdfString { - const unsigned char *bytes = CGPDFStringGetBytePtr(pdfString); - NSUInteger length = CGPDFStringGetLength(pdfString); - if (!self.encoding && self.toUnicode) - { - // Use ToUnicode map - NSMutableString *unicodeString = [NSMutableString string]; - - // Translate to Unicode - for (int i = 0; i < length; i++) - { - unichar cid = bytes[i]; + const unsigned char *bytes = CGPDFStringGetBytePtr(pdfString); + NSUInteger length = CGPDFStringGetLength(pdfString); + if (!self.encoding && self.toUnicode) + { + // Use ToUnicode map + NSMutableString *unicodeString = [NSMutableString string]; + + // Translate to Unicode + for (int i = 0; i < length; i++) + { + unichar cid = bytes[i]; unichar uni = [self.toUnicode unicodeCharacter:cid]; //NSLog(@"(%hu) %C -> (%hu) %C", cid, cid, uni, uni); [unicodeString appendFormat:@"%C", uni]; - } - - return unicodeString; - } - else if (!self.encoding) - { - return [super stringWithPDFString:pdfString]; - } + } + + return unicodeString; + } + else if (!self.encoding) + { + return [super stringWithPDFString:pdfString]; + } NSData *rawBytes = [NSData dataWithBytes:bytes length:length]; - NSString *string = [[NSString alloc] initWithData:rawBytes encoding:nativeEncoding(self.encoding)]; - - return [string autorelease]; + NSString *string = [[NSString alloc] initWithData:rawBytes encoding:nativeEncoding(self.encoding)]; + + return string; +} + +//Create the encoding differences +- (void) setupEncodingDifferencesWithEncodingDict:(CGPDFDictionaryRef)encodingDict +{ + CGPDFArrayRef diffArray = nil; + if (CGPDFDictionaryGetArray(encodingDict, "Differences", &diffArray)) { + _encodingDifferences = [[EncodingDifferences alloc] initWithArray:diffArray]; + } +} + +- (void)enumeratePDFStringCharacters:(CGPDFStringRef)pdfString usingBlock:(void (^)(NSUInteger, NSString *))block { + if (_encodingDifferences) { + const unsigned char *bytes = CGPDFStringGetBytePtr(pdfString); + NSUInteger length = CGPDFStringGetLength(pdfString); + + NSMutableString *normalizedString = [NSMutableString new]; + + for (int i = 0; i < length; i++) { + unichar cid = bytes[i]; + NSUInteger chr = [_encodingDifferences mapCid:cid withEncoding:self.encoding]; + NSString *character = [NSString stringWithFormat:@"%C", (unichar)(chr == NSNotFound ? cid : chr)]; + + //Normalize the string + NSString *normString = NormalizeString(character); + [normalizedString appendString:normString]; + } + + block(0,normalizedString); + + return; + } + + [super enumeratePDFStringCharacters:pdfString usingBlock:block]; + +} + + +#pragma mark - String normalization helper +// Simple IMP cache to faster call up a method. +static SEL lengthOfBytesUsingEncodingSEL; +static NSUInteger (*lengthOfBytesUsingEncodingIMP)(id, SEL, NSUInteger); + +// We can't use initialize here, since this might be used from a C function. +__attribute__((constructor)) static void PSPDFPrepareLengthOfBytesUsingEncoding(void) { + lengthOfBytesUsingEncodingSEL = @selector(lengthOfBytesUsingEncoding:); + lengthOfBytesUsingEncodingIMP = (NSUInteger ( *) (id, SEL, NSUInteger))[@"" methodForSelector:lengthOfBytesUsingEncodingSEL]; +} + +NSString *NormalizeString(__unsafe_unretained NSString *string) { + CFStringRef stringRef = (__bridge CFStringRef)(string); + NSUInteger UTF8Length = lengthOfBytesUsingEncodingIMP(string, lengthOfBytesUsingEncodingSEL, NSUTF8StringEncoding); + CFIndex stringRefLength = CFStringGetLength(stringRef); + if (stringRefLength != UTF8Length) { + CFMutableStringRef transformedRef = CFStringCreateMutableCopy(NULL, stringRefLength, stringRef); + CFStringNormalize(transformedRef, kCFStringNormalizationFormKC); + if (stringRefLength != CFStringGetLength(transformedRef)) { + return CFBridgingRelease(transformedRef); + }else { + CFRelease(transformedRef); + } + }else if (CFStringCompare((__bridge CFStringRef)(string), CFSTR("\0"), 0) == kCFCompareEqualTo) { + string = @""; + } + return string; } /* Set encoding with name or dictionary */ - (void)setEncodingWithEncodingObject:(CGPDFObjectRef)object { - CGPDFObjectType type = CGPDFObjectGetType(object); - - /* Encoding dictionary with base encoding and differences */ - if (type == kCGPDFObjectTypeDictionary) - { - /* NOTE: Also needs to capture differences */ - CGPDFDictionaryRef dict = nil; - if (!CGPDFObjectGetValue(object, kCGPDFObjectTypeDictionary, &dict)) return; - CGPDFObjectRef baseEncoding = nil; - if (!CGPDFDictionaryGetObject(dict, "BaseEncoding", &baseEncoding)) return; - [self setEncodingWithEncodingObject:baseEncoding]; - return; - } - - /* Only accept name objects */ - if (type != kCGPDFObjectTypeName) return; - - const char *name; - if (!CGPDFObjectGetValue(object, kCGPDFObjectTypeName, &name)) return; - - if (strcmp(name, "MacRomanEncoding") == 0) - { - self.encoding = MacRomanEncoding; - } - else if (strcmp(name, "MacExpertEncoding") == 0) - { - // What is MacExpertEncoding ?? - self.encoding = MacRomanEncoding; - } - else if (strcmp(name, "WinAnsiEncoding") == 0) - { - self.encoding = WinAnsiEncoding; - } + CGPDFObjectType type = CGPDFObjectGetType(object); + + /* Encoding dictionary with base encoding and differences */ + if (type == kCGPDFObjectTypeDictionary) + { + /* NOTE: Also needs to capture differences */ + CGPDFDictionaryRef dict = nil; + CGPDFObjectRef baseEncoding = nil; + + //Turns the object parsed into an dictionary + if (!CGPDFObjectGetValue(object, kCGPDFObjectTypeDictionary, &dict)) return; + if (!CGPDFDictionaryGetObject(dict, "BaseEncoding", &baseEncoding)) return; + [self setEncodingWithEncodingObject:baseEncoding]; + + [self setupEncodingDifferencesWithEncodingDict:dict]; + return; + } + + /* Only accept name objects */ + if (type != kCGPDFObjectTypeName) return; + + const char *name; + if (!CGPDFObjectGetValue(object, kCGPDFObjectTypeName, &name)) return; + + if (strcmp(name, "MacRomanEncoding") == 0) + { + self.encoding = MacRomanEncoding; + } + else if (strcmp(name, "MacExpertEncoding") == 0) + { + // What is MacExpertEncoding ?? + self.encoding = MacRomanEncoding; + } + else if (strcmp(name, "WinAnsiEncoding") == 0) + { + self.encoding = WinAnsiEncoding; + } } -/* Unicode character with CID */ -//- (NSString *)stringWithCharacters:(const char *)characters -//{ -// return [NSString stringWithCString:characters encoding:encoding]; -//} - - (CGFloat)widthOfSpace { - unichar c = 0x20; + unichar cid = 0x20; if (self.toUnicode) { - c = [self.toUnicode cidCharacter:c]; - if (c == NSNotFound) + cid = [self.toUnicode cidCharacter:cid]; + if (cid == NSNotFound) return 0; } - return [self widthOfCharacter:c withFontSize:1.0]; + + CGFloat width = [self widthOfCharacter:cid withFontSize:1.0]; + + return width; } -@end +@end \ No newline at end of file diff --git a/PDFKitten/StringDetector.h b/PDFKitten/StringDetector.h index 1824e21..10ad177 100644 --- a/PDFKitten/StringDetector.h +++ b/PDFKitten/StringDetector.h @@ -13,8 +13,7 @@ @interface StringDetector : NSObject { NSString *keyword; NSUInteger keywordPosition; - NSMutableString *unicodeContent; - id delegate; + __weak id delegate; } + (StringDetector *)detectorWithKeyword:(NSString *)keyword delegate:(id)delegate; @@ -24,6 +23,6 @@ - (NSString *)appendString:(NSString *)inputString; -@property (nonatomic, assign) id delegate; -@property (nonatomic, retain) NSMutableString *unicodeContent; -@end +@property (nonatomic, weak) id delegate; +@property (nonatomic, assign) BOOL rightCharacter; +@end \ No newline at end of file diff --git a/PDFKitten/StringDetector.m b/PDFKitten/StringDetector.m index 60c41e8..30341c4 100644 --- a/PDFKitten/StringDetector.m +++ b/PDFKitten/StringDetector.m @@ -2,63 +2,85 @@ @implementation StringDetector -+ (StringDetector *)detectorWithKeyword:(NSString *)keyword delegate:(id)delegate { ++ (StringDetector *)detectorWithKeyword:(NSString *)keyword delegate:(id)delegate +{ StringDetector *detector = [[StringDetector alloc] initWithKeyword:keyword]; detector.delegate = delegate; - return [detector autorelease]; + return detector; } -- (id)initWithKeyword:(NSString *)string { - if (self = [super init]) { - keyword = [[string lowercaseString] retain]; - self.unicodeContent = [NSMutableString string]; +- (id)initWithKeyword:(NSString *)string +{ + if (self = [super init]) + { + keyword = [string lowercaseString]; } return self; } -- (NSString *)appendString:(NSString *)inputString { +- (NSString *)appendString:(NSString *)inputString +{ NSString *lowercaseString = [inputString lowercaseString]; int position = 0; - if (lowercaseString) { - [unicodeContent appendString:lowercaseString]; - } - + while (position < inputString.length) { + unichar inputCharacter = [inputString characterAtIndex:position]; unichar actualCharacter = [lowercaseString characterAtIndex:position++]; unichar expectedCharacter = [keyword characterAtIndex:keywordPosition]; - if (actualCharacter != expectedCharacter) { - if (keywordPosition > 0) { - // Read character again - position--; + if (actualCharacter != expectedCharacter) + { + + if (keywordPosition > 0) + { + if(actualCharacter != '-') { + // Read character again + position--; + } else if(actualCharacter == '-'){ + position++; + } } - else if ([delegate respondsToSelector:@selector(detector:didScanCharacter:)]) { + else if ([delegate respondsToSelector:@selector(detector:didScanCharacter:)]) + { + _rightCharacter = NO; [delegate detector:self didScanCharacter:inputCharacter]; } // Reset keyword position - keywordPosition = 0; + if(actualCharacter != '-') { + keywordPosition = 0; + } else { + _rightCharacter = NO; + //[delegate detector:self didScanCharacter:inputCharacter]; + } + continue; } - if (keywordPosition == 0 && [delegate respondsToSelector:@selector(detectorDidStartMatching:)]) { + if (keywordPosition == 0 && [delegate respondsToSelector:@selector(detectorDidStartMatching:)]) + { [delegate detectorDidStartMatching:self]; } - if ([delegate respondsToSelector:@selector(detector:didScanCharacter:)]) { + if ([delegate respondsToSelector:@selector(detector:didScanCharacter:)]) + { + _rightCharacter = YES; [delegate detector:self didScanCharacter:inputCharacter]; } - if (++keywordPosition < keyword.length) { + if (++keywordPosition < keyword.length) + { // Keep matching keyword continue; } // Reset keyword position keywordPosition = 0; - if ([delegate respondsToSelector:@selector(detectorFoundString:)]) { + + if ([delegate respondsToSelector:@selector(detectorFoundString:)]) + { [delegate detectorFoundString:self]; } } @@ -66,22 +88,17 @@ - (NSString *)appendString:(NSString *)inputString { return inputString; } -- (void)setKeyword:(NSString *)kword { - [keyword release]; - keyword = [[kword lowercaseString] retain]; - +- (void)setKeyword:(NSString *)kword +{ + keyword = [kword lowercaseString]; keywordPosition = 0; } -- (void)reset { +- (void)reset +{ keywordPosition = 0; } -- (void)dealloc { - [unicodeContent release]; - [keyword release]; - [super dealloc]; -} +@synthesize delegate; -@synthesize delegate, unicodeContent; -@end +@end \ No newline at end of file diff --git a/PDFKitten/Type0Font.m b/PDFKitten/Type0Font.m index df9da34..60ebc8b 100644 --- a/PDFKitten/Type0Font.m +++ b/PDFKitten/Type0Font.m @@ -25,21 +25,18 @@ - (id)initWithFontDictionary:(CGPDFDictionaryRef)dict const char *subtype; if (!CGPDFDictionaryGetName(fontDict, "Subtype", &subtype)) continue; - NSLog(@"Descendant font type %s", subtype); if (strcmp(subtype, "CIDFontType0") == 0) { // Add descendant font of type 0 CIDType0Font *font = [[CIDType0Font alloc] initWithFontDictionary:fontDict]; if (font) [self.descendantFonts addObject:font]; - [font release]; } else if (strcmp(subtype, "CIDFontType2") == 0) { // Add descendant font of type 2 CIDType2Font *font = [[CIDType2Font alloc] initWithFontDictionary:fontDict]; if (font) [self.descendantFonts addObject:font]; - [font release]; } } } @@ -105,19 +102,27 @@ - (NSString *)stringWithPDFString:(CGPDFStringRef)pdfString return @""; } -- (NSString *)unicodeWithPDFString:(CGPDFStringRef)pdfString { +- (NSString *)unicodeWithPDFString:(CGPDFStringRef)pdfString +{ NSMutableString *result; Font *descendantFont = [self.descendantFonts lastObject]; NSString *descendantResult = [descendantFont stringWithPDFString: pdfString]; - if (self.toUnicode) { - result = [[[NSMutableString alloc] initWithCapacity: [descendantResult length]] autorelease]; - for (int i = 0; i < [descendantResult length]; i++) { + + if (self.toUnicode) + { + result = [[NSMutableString alloc] initWithCapacity: [descendantResult length]]; + + for (int i = 0; i < [descendantResult length]; i++) + { unichar character = [self.toUnicode unicodeCharacter:[descendantResult characterAtIndex:i]]; [result appendFormat:@"%C", character]; } - } else { + } + else + { result = [NSMutableString stringWithString: descendantResult]; } + return result; } @@ -126,7 +131,6 @@ - (NSString *)cidWithPDFString:(CGPDFStringRef)pdfString { return [descendantFont stringWithPDFString: pdfString]; } -#pragma mark - #pragma mark Memory Management - (NSMutableArray *)descendantFonts @@ -135,13 +139,8 @@ - (NSMutableArray *)descendantFonts { descendantFonts = [[NSMutableArray alloc] init]; } + return descendantFonts; } -- (void)dealloc -{ - [descendantFonts release]; - [super dealloc]; -} - -@end +@end \ No newline at end of file diff --git a/PDFKitten/pdfScannerCallbacks.mm b/PDFKitten/pdfScannerCallbacks.mm index 1d573ed..aa7d6c9 100644 --- a/PDFKitten/pdfScannerCallbacks.mm +++ b/PDFKitten/pdfScannerCallbacks.mm @@ -1,11 +1,11 @@ #import "Scanner.h" BOOL isSpace(float width, Scanner *scanner) { - return abs(width) >= scanner.renderingState.font.widthOfSpace; + return fabsf(width) >= scanner.renderingState.font.widthOfSpace; } void didScanSpace(float value, void *info) { - Scanner *scanner = (Scanner *) info; + Scanner *scanner = (__bridge Scanner *) info; float width = [scanner.renderingState convertToUserSpace:value]; [scanner.renderingState translateTextPosition:CGSizeMake(-width, 0)]; if (isSpace(value, scanner)) { @@ -14,14 +14,16 @@ void didScanSpace(float value, void *info) { } void didScanString(CGPDFStringRef pdfString, void *info) { - Scanner *scanner = (Scanner *) info; + Scanner *scanner = (__bridge Scanner *) info; StringDetector *stringDetector = scanner.stringDetector; Font *font = scanner.renderingState.font; - NSString *string = [font stringWithPDFString:pdfString]; - if (string) { - [stringDetector appendString:string]; - [scanner.content appendString:string]; - } + + [font enumeratePDFStringCharacters:pdfString usingBlock:^(NSUInteger cid, NSString *normalizedString) { + if (normalizedString) { + [stringDetector appendString:normalizedString]; + [scanner.content appendString:normalizedString]; + } + }]; } void didScanNewLine(CGPDFScannerRef pdfScanner, Scanner *scanner, BOOL persistLeading) { @@ -77,25 +79,22 @@ float getNumericalValue(CGPDFObjectRef pdfObject, CGPDFObjectType type) { } CGAffineTransform getTransform(CGPDFScannerRef pdfScanner) { - CGAffineTransform transform; - transform.ty = getNumber(pdfScanner); - transform.tx = getNumber(pdfScanner); - transform.d = getNumber(pdfScanner); - transform.c = getNumber(pdfScanner); - transform.b = getNumber(pdfScanner); - transform.a = getNumber(pdfScanner); - return transform; + CGFloat a, b, c, d, tx, ty; + if (CGPDFScannerPopNumber(pdfScanner, &ty) && CGPDFScannerPopNumber(pdfScanner, &tx) && CGPDFScannerPopNumber(pdfScanner, &d) && CGPDFScannerPopNumber(pdfScanner, &c) && CGPDFScannerPopNumber(pdfScanner, &b) && CGPDFScannerPopNumber(pdfScanner, &a)) { + return CGAffineTransformMake(a, b, c, d, tx, ty); + } + return CGAffineTransformIdentity; } #pragma mark Text parameters void setHorizontalScale(CGPDFScannerRef pdfScanner, void *info) { - Scanner *scanner = (Scanner *) info; + Scanner *scanner = (__bridge Scanner *) info; [scanner.renderingState setHorizontalScaling:getNumber(pdfScanner)]; } void setTextLeading(CGPDFScannerRef pdfScanner, void *info) { - Scanner *scanner = (Scanner *) info; + Scanner *scanner = (__bridge Scanner *) info; [scanner.renderingState setLeadning:getNumber(pdfScanner)]; } @@ -104,8 +103,7 @@ void setFont(CGPDFScannerRef pdfScanner, void *info) { const char *fontName; CGPDFScannerPopNumber(pdfScanner, &fontSize); CGPDFScannerPopName(pdfScanner, &fontName); - - Scanner *scanner = (Scanner *) info; + Scanner *scanner = (__bridge Scanner *) info; RenderingState *state = scanner.renderingState; Font *font = [scanner.fontCollection fontNamed:[NSString stringWithUTF8String:fontName]]; [state setFont:font]; @@ -113,17 +111,17 @@ void setFont(CGPDFScannerRef pdfScanner, void *info) { } void setTextRise(CGPDFScannerRef pdfScanner, void *info) { - Scanner *scanner = (Scanner *) info; + Scanner *scanner = (__bridge Scanner *) info; [scanner.renderingState setTextRise:getNumber(pdfScanner)]; } void setCharacterSpacing(CGPDFScannerRef pdfScanner, void *info) { - Scanner *scanner = (Scanner *) info; + Scanner *scanner = (__bridge Scanner *) info; [scanner.renderingState setCharacterSpacing:getNumber(pdfScanner)]; } void setWordSpacing(CGPDFScannerRef pdfScanner, void *info) { - Scanner *scanner = (Scanner *) info; + Scanner *scanner = (__bridge Scanner *) info; [scanner.renderingState setWordSpacing:getNumber(pdfScanner)]; } @@ -131,25 +129,25 @@ void setWordSpacing(CGPDFScannerRef pdfScanner, void *info) { #pragma mark Set position void newLine(CGPDFScannerRef pdfScanner, void *info) { - Scanner *scanner = (Scanner *) info; + Scanner *scanner = (__bridge Scanner *) info; [scanner.renderingState newLine]; } void newLineWithLeading(CGPDFScannerRef pdfScanner, void *info) { - didScanNewLine(pdfScanner, (Scanner *) info, NO); + didScanNewLine(pdfScanner, (__bridge Scanner *) info, NO); } void newLineSetLeading(CGPDFScannerRef pdfScanner, void *info) { - didScanNewLine(pdfScanner, (Scanner *) info, YES); + didScanNewLine(pdfScanner, (__bridge Scanner *) info, YES); } void newParagraph(CGPDFScannerRef pdfScanner, void *info) { - Scanner *scanner = (Scanner *) info; + Scanner *scanner = (__bridge Scanner *) info; [scanner.renderingState setTextMatrix:CGAffineTransformIdentity replaceLineMatrix:YES]; } void setTextMatrix(CGPDFScannerRef pdfScanner, void *info) { - Scanner *scanner = (Scanner *) info; + Scanner *scanner = (__bridge Scanner *) info; [scanner.renderingState setTextMatrix:getTransform(pdfScanner) replaceLineMatrix:YES]; } @@ -157,7 +155,9 @@ void setTextMatrix(CGPDFScannerRef pdfScanner, void *info) { #pragma mark Print strings void printString(CGPDFScannerRef pdfScanner, void *info) { - didScanString(getString(pdfScanner), info); + CGPDFStringRef pdfString = NULL; + if (!CGPDFScannerPopString(pdfScanner, &pdfString)) return; + didScanString(pdfString, info); } void printStringNewLine(CGPDFScannerRef scanner, void *info) { @@ -172,10 +172,14 @@ void printStringNewLineSetSpacing(CGPDFScannerRef scanner, void *info) { } void printStringsAndSpaces(CGPDFScannerRef pdfScanner, void *info) { - CGPDFArrayRef array = getArray(pdfScanner); + + CGPDFArrayRef array; + if (!CGPDFScannerPopArray(pdfScanner, &array)) return; + for (int i = 0; i < CGPDFArrayGetCount(array); i++) { - CGPDFObjectRef pdfObject = getObject(array, i); - CGPDFObjectType valueType = CGPDFObjectGetType(pdfObject); + + CGPDFObjectRef pdfObject = getObject(array, i); + CGPDFObjectType valueType = CGPDFObjectGetType(pdfObject); if (valueType == kCGPDFObjectTypeString) { didScanString(getStringValue(pdfObject), info); @@ -189,21 +193,23 @@ void printStringsAndSpaces(CGPDFScannerRef pdfScanner, void *info) { #pragma mark Graphics state operators -void pushRenderingState(CGPDFScannerRef pdfScanner, void *info) { - Scanner *scanner = (Scanner *) info; +void pushRenderingState(CGPDFScannerRef pdfScanner, void *info) +{ + Scanner *scanner = (__bridge Scanner *) info; RenderingState *state = [scanner.renderingState copy]; [scanner.renderingStateStack pushRenderingState:state]; - [state release]; } -void popRenderingState(CGPDFScannerRef pdfScanner, void *info) { - Scanner *scanner = (Scanner *) info; +void popRenderingState(CGPDFScannerRef pdfScanner, void *info) +{ + Scanner *scanner = (__bridge Scanner *) info; [scanner.renderingStateStack popRenderingState]; } /* Update CTM */ -void applyTransformation(CGPDFScannerRef pdfScanner, void *info) { - Scanner *scanner = (Scanner *) info; +void applyTransformation(CGPDFScannerRef pdfScanner, void *info) +{ + Scanner *scanner = (__bridge Scanner *) info; RenderingState *state = scanner.renderingState; state.ctm = CGAffineTransformConcat(getTransform(pdfScanner), state.ctm); } diff --git a/PDFKittenTests/StringDetectorTest.m b/PDFKittenTests/StringDetectorTest.m index a427c00..4092c01 100644 --- a/PDFKittenTests/StringDetectorTest.m +++ b/PDFKittenTests/StringDetectorTest.m @@ -50,9 +50,4 @@ - (void)detectorFoundString:(StringDetector *)detector { matchCount++; } -- (void)dealloc { - [kurtStory release]; - [super dealloc]; -} - @end diff --git a/README.md b/README.md index c7db4e9..be6fdac 100644 --- a/README.md +++ b/README.md @@ -17,19 +17,55 @@ First, create a new instance of the scanner. Scanner *scanner = [Scanner scannerWithPage:page]; ``` -Set a keyword (case-insensitive) and scan a page. +Set a keyword with a searchBar or call searchForString manually ``` - NSArray *selections = [scanner select:@"happiness"]; + NSArray *selections = [self.scanner searchForString:keyword]; ``` -Finally, scan the page and draw the selections. +Finally, draw the selections, and remember to convert to userSpace coordinates ``` - for (Selection *selection in selections) - { - // draw selection - } + for(Selection *selection in selections) { + + //Get the userSpace frame from the selection PDFframe + CGRect selectionUserSpaceFrame = [PDFConverter convertPDFRectToViewRect:selection.frame pageRect:self.frame pdfRect:pdfPageFrame]; + + //Create the view + UIView *mark = [[UIView alloc] initWithFrame:selectionUserSpaceFrame]; + mark.backgroundColor = [UIColor yellowColor]; + mark.alpha = 0.4; + + //Place the view + [self addSubview:mark]; + + //This is for the multi-lined search marks. If the word spans more than one line, do the following + if(selection.isResultInTwoLines) { + CGRect frameTwo = [PDFConverter convertPDFRectToViewRect:selection.frameOfSelectionOnNewline pageRect:self.frame pdfRect:pdfPageFrame]; + UIView *markTwo = [[UIView alloc] initWithFrame:frameTwo]; + markTwo.backgroundColor = [UIColor yellowColor]; + markTwo.alpha = 0.4; + [page.view addSubview:markTwo]; + } + + } +``` + +Furthermore +``` + The whole point of my restructuring of the library is to give the + user the freedom to be able not to link the search marks directly + with the drawing of the pdf page. This is much more flexible. + Simply draw a pdf page and place the views on top. + + I have implemented methods for simply getting the text of a PDF page. + This method is called getPageText. + + The method searchForString is the method that returns the selections. + + The simple steps for making this work: + Init the Scanner. + Call searchForString. ``` ### Limitations