189 lines
2.1 KiB
Plaintext
189 lines
2.1 KiB
Plaintext
|
|
|
|
|
|
|
|
|
|
|
|
# Import spaCy and load the language library
|
|
import spacy
|
|
nlp = spacy.load('en_core_web_sm')
|
|
|
|
|
|
# Create a string that includes opening and closing quotation marks
|
|
mystring = '"We\'re moving to L.A.!"'
|
|
print(mystring)
|
|
|
|
|
|
# Create a Doc object and explore tokens
|
|
doc = nlp(mystring)
|
|
|
|
for token in doc:
|
|
print(token.text, end=' | ')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
doc2 = nlp(u"We're here to help! Send snail-mail, email support@oursite.com or visit us at http://www.oursite.com!")
|
|
|
|
for t in doc2:
|
|
print(t)
|
|
|
|
|
|
|
|
|
|
|
|
doc3 = nlp(u'A 5km NYC cab ride costs $10.30')
|
|
|
|
for t in doc3:
|
|
print(t)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year.")
|
|
|
|
for t in doc4:
|
|
print(t)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
len(doc)
|
|
|
|
|
|
|
|
|
|
|
|
len(doc.vocab)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
doc5 = nlp(u'It is better to give than to receive.')
|
|
|
|
# Retrieve the third token:
|
|
doc5[2]
|
|
|
|
|
|
# Retrieve three tokens from the middle:
|
|
doc5[2:5]
|
|
|
|
|
|
# Retrieve the last four tokens:
|
|
doc5[-4:]
|
|
|
|
|
|
|
|
|
|
|
|
doc6 = nlp(u'My dinner was horrible.')
|
|
doc7 = nlp(u'Your dinner was delicious.')
|
|
|
|
|
|
# Try to change "My dinner was horrible" to "My dinner was delicious"
|
|
doc6[3] = doc7[3]
|
|
|
|
|
|
|
|
|
|
|
|
doc8 = nlp(u'Apple to build a Hong Kong factory for $6 million')
|
|
|
|
for token in doc8:
|
|
print(token.text, end=' | ')
|
|
|
|
print('\n----')
|
|
|
|
for ent in doc8.ents:
|
|
print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))
|
|
|
|
|
|
|
|
|
|
|
|
len(doc8.ents)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
doc9 = nlp(u"Autonomous cars shift insurance liability toward manufacturers.")
|
|
|
|
for chunk in doc9.noun_chunks:
|
|
print(chunk.text)
|
|
|
|
|
|
doc10 = nlp(u"Red cars do not carry higher insurance rates.")
|
|
|
|
for chunk in doc10.noun_chunks:
|
|
print(chunk.text)
|
|
|
|
|
|
doc11 = nlp(u"He was a one-eyed, one-horned, flying, purple people-eater.")
|
|
|
|
for chunk in doc11.noun_chunks:
|
|
print(chunk.text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from spacy import displacy
|
|
|
|
doc = nlp(u'Apple is going to build a U.K. factory for $6 million.')
|
|
displacy.render(doc, style='dep', jupyter=True, options={'distance': 110})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
doc = nlp(u'Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.')
|
|
displacy.render(doc, style='ent', jupyter=True)
|
|
|
|
|
|
|
|
|
|
|
|
doc = nlp(u'This is a sentence.')
|
|
displacy.serve(doc, style='dep')
|
|
|
|
|
|
|
|
|
|
|
|
|