108 lines
1.2 KiB
Plaintext
108 lines
1.2 KiB
Plaintext
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# RUN THIS CELL to perform standard imports:
|
|
import spacy
|
|
nlp = spacy.load('en_core_web_sm')
|
|
|
|
|
|
|
|
|
|
|
|
# Enter your code here:
|
|
|
|
with open('../TextFiles/owlcreek.txt') as f:
|
|
doc = nlp(f.read())
|
|
|
|
|
|
# Run this cell to verify it worked:
|
|
|
|
doc[:36]
|
|
|
|
|
|
|
|
|
|
|
|
len(doc)
|
|
|
|
|
|
|
|
|
|
|
|
sents = [sent for sent in doc.sents]
|
|
len(sents)
|
|
|
|
|
|
|
|
|
|
|
|
print(sents[1].text)
|
|
|
|
|
|
|
|
|
|
|
|
# NORMAL SOLUTION:
|
|
for token in sents[1]:
|
|
print(token.text, token.pos_, token.dep_, token.lemma_)
|
|
|
|
|
|
# CHALLENGE SOLUTION:
|
|
for token in sents[1]:
|
|
print(f'{token.text:{15}} {token.pos_:{5}} {token.dep_:{10}} {token.lemma_:{15}}')
|
|
|
|
|
|
|
|
|
|
|
|
# Import the Matcher library:
|
|
|
|
from spacy.matcher import Matcher
|
|
matcher = Matcher(nlp.vocab)
|
|
|
|
|
|
# Create a pattern and add it to matcher:
|
|
|
|
pattern = [{'LOWER': 'swimming'}, {'IS_SPACE': True, 'OP':'*'}, {'LOWER': 'vigorously'}]
|
|
|
|
matcher.add('Swimming', None, pattern)
|
|
|
|
|
|
# Create a list of matches called "found_matches" and print the list:
|
|
|
|
found_matches = matcher(doc)
|
|
print(found_matches)
|
|
|
|
|
|
|
|
|
|
|
|
print(doc[1265:1290])
|
|
|
|
|
|
print(doc[3600:3615])
|
|
|
|
|
|
|
|
|
|
|
|
for sent in sents:
|
|
if found_matches[0][1] < sent.end:
|
|
print(sent)
|
|
break
|
|
|
|
|
|
for sent in sents:
|
|
if found_matches[1][1] < sent.end:
|
|
print(sent)
|
|
break
|
|
|
|
|
|
|