character << strings << text
String?¶String is an immutable sequence of characters.String contains numbers, it is still a string.String is converted into a number using int() or float().# Let's define a couple of string variables
name = "VIT Chennai"
program = 'M.Tech CSE (BA)'
message = 'let\'s do strings'
print("Name is", name)
print("Program is", program)
print("Message ---", message)
Name is VIT Chennai Program is M.Tech CSE (BA) Message --- let's do strings
# Let's define more string variables
string_int = '7'
string_float = '8.236'
# Use int(), and float() to convert string into a number
num_int = int(string_int)
num_float = float(string_float)
# Check the data type of the varibles
print("Variable - Data Type")
print("############################") # We will do it in a better way later
print("string1_int - {}".format(type(string_int)))
print("string1_float - {}".format(type(string_float)))
print("num_int - {}".format(type(num_int)))
print("num_float - {}".format(type(num_float)))
Variable - Data Type ############################ string1_int - <class 'str'> string1_float - <class 'str'> num_int - <class 'int'> num_float - <class 'float'>
normal_string = 'It\'s a normal string\n'
raw_string = r'It\'s a raw string\n'
multiline_string = """This
is a
Multiline
String"""
print(normal_string)
print(raw_string)
print(multiline_string)
It's a normal string It\'s a raw string\n This is a Multiline String
String is same as that of a list.String can be accessed as follows:string_name[n-1]
# let's now try accessing an element/slice of a string
my_string = 'This is Programming for Data Science course'
print(my_string)
# Access the 10th element
print("10th element in my_string is ..", my_string[9])
This is Programming for Data Science course 10th element in my_string is .. r
# Access the last 6 characters
print(my_string[-6:])
# Access Every alternate character of my_string
print("Alternate characters ...", my_string[::2])
course Alternate characters ... Ti sPormigfrDt cec ore
# Let's try changing an element in my_string
# This example demonstrates that String is immutable
my_string[1] = 'i'
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Input In [6], in <cell line: 3>() 1 # Let's try changing an element in my_string 2 # This example demonstrates that String is immutable ----> 3 my_string[1] = 'i' TypeError: 'str' object does not support item assignment
# We have two lists namely *first_name* and "last_name* collected from the
# forms in a survey.
# Now, we want to combine these two parts and get one name.
# Assume that the corresponding indices hold the same person's name.
first_name = ["B", "G V", "Xavier"]
last_name = ["Harish", "Asish", "Francis"]
name_list = []
# len() can be used to find the length of a string
for i in range(0, len(first_name)):
name_list.append(first_name[i] + last_name[i])
print(name_list)
['BHarish', 'G VAsish', 'XavierFrancis']
name_list1 = []
for i in range(0, len(first_name)):
name_list1.append(first_name[i] + ' ' +last_name[i])
print(name_list1)
['B Harish', 'G V Asish', 'Xavier Francis']
# Create a string Okay!Okay!Okay!Okay!
# We can use * operator
#In the second code cell of the notebook, I promised that I'll show you
#a better way to do one thing. printing # multiple times (##############).
#Here it is..
my_new_string = "Okay!" * 4
print ("my_new_string ..", my_new_string)
my_new_string .. Okay!Okay!Okay!Okay!
in & not operators¶in operator can be used to check the presence of a string/ substring in another.in evaluates to True if the (sub)string is present, else False.not can be used to check the absence of a sub(string) in another.not evaluates to True if the (sub)string is not present, else False.message1 = ' Error while compiling'
# Here is an example for in operator
if ('Error' in message1):
print("There was a compilation Error")
else:
print('There is no Error')
# Example for not operator
if ('Warning' not in message1):
print("It's not a warning message")
else:
print("It is a warning message")
There was a compilation Error It's not a warning message
list, String also has many useful methods defined on it.help() to check the available methods on String classhelp(str)
Help on class str in module builtins:
class str(object)
| str(object='') -> str
| str(bytes_or_buffer[, encoding[, errors]]) -> str
|
| Create a new string object from the given object. If encoding or
| errors is specified, then the object must expose a data buffer
| that will be decoded using the given encoding and error handler.
| Otherwise, returns the result of object.__str__() (if defined)
| or repr(object).
| encoding defaults to sys.getdefaultencoding().
| errors defaults to 'strict'.
|
| Methods defined here:
|
| __add__(self, value, /)
| Return self+value.
|
| __contains__(self, key, /)
| Return key in self.
|
| __eq__(self, value, /)
| Return self==value.
|
| __format__(self, format_spec, /)
| Return a formatted version of the string as described by format_spec.
|
| __ge__(self, value, /)
| Return self>=value.
|
| __getattribute__(self, name, /)
| Return getattr(self, name).
|
| __getitem__(self, key, /)
| Return self[key].
|
| __getnewargs__(...)
|
| __gt__(self, value, /)
| Return self>value.
|
| __hash__(self, /)
| Return hash(self).
|
| __iter__(self, /)
| Implement iter(self).
|
| __le__(self, value, /)
| Return self<=value.
|
| __len__(self, /)
| Return len(self).
|
| __lt__(self, value, /)
| Return self<value.
|
| __mod__(self, value, /)
| Return self%value.
|
| __mul__(self, value, /)
| Return self*value.
|
| __ne__(self, value, /)
| Return self!=value.
|
| __repr__(self, /)
| Return repr(self).
|
| __rmod__(self, value, /)
| Return value%self.
|
| __rmul__(self, value, /)
| Return value*self.
|
| __sizeof__(self, /)
| Return the size of the string in memory, in bytes.
|
| __str__(self, /)
| Return str(self).
|
| capitalize(self, /)
| Return a capitalized version of the string.
|
| More specifically, make the first character have upper case and the rest lower
| case.
|
| casefold(self, /)
| Return a version of the string suitable for caseless comparisons.
|
| center(self, width, fillchar=' ', /)
| Return a centered string of length width.
|
| Padding is done using the specified fill character (default is a space).
|
| count(...)
| S.count(sub[, start[, end]]) -> int
|
| Return the number of non-overlapping occurrences of substring sub in
| string S[start:end]. Optional arguments start and end are
| interpreted as in slice notation.
|
| encode(self, /, encoding='utf-8', errors='strict')
| Encode the string using the codec registered for encoding.
|
| encoding
| The encoding in which to encode the string.
| errors
| The error handling scheme to use for encoding errors.
| The default is 'strict' meaning that encoding errors raise a
| UnicodeEncodeError. Other possible values are 'ignore', 'replace' and
| 'xmlcharrefreplace' as well as any other name registered with
| codecs.register_error that can handle UnicodeEncodeErrors.
|
| endswith(...)
| S.endswith(suffix[, start[, end]]) -> bool
|
| Return True if S ends with the specified suffix, False otherwise.
| With optional start, test S beginning at that position.
| With optional end, stop comparing S at that position.
| suffix can also be a tuple of strings to try.
|
| expandtabs(self, /, tabsize=8)
| Return a copy where all tab characters are expanded using spaces.
|
| If tabsize is not given, a tab size of 8 characters is assumed.
|
| find(...)
| S.find(sub[, start[, end]]) -> int
|
| Return the lowest index in S where substring sub is found,
| such that sub is contained within S[start:end]. Optional
| arguments start and end are interpreted as in slice notation.
|
| Return -1 on failure.
|
| format(...)
| S.format(*args, **kwargs) -> str
|
| Return a formatted version of S, using substitutions from args and kwargs.
| The substitutions are identified by braces ('{' and '}').
|
| format_map(...)
| S.format_map(mapping) -> str
|
| Return a formatted version of S, using substitutions from mapping.
| The substitutions are identified by braces ('{' and '}').
|
| index(...)
| S.index(sub[, start[, end]]) -> int
|
| Return the lowest index in S where substring sub is found,
| such that sub is contained within S[start:end]. Optional
| arguments start and end are interpreted as in slice notation.
|
| Raises ValueError when the substring is not found.
|
| isalnum(self, /)
| Return True if the string is an alpha-numeric string, False otherwise.
|
| A string is alpha-numeric if all characters in the string are alpha-numeric and
| there is at least one character in the string.
|
| isalpha(self, /)
| Return True if the string is an alphabetic string, False otherwise.
|
| A string is alphabetic if all characters in the string are alphabetic and there
| is at least one character in the string.
|
| isascii(self, /)
| Return True if all characters in the string are ASCII, False otherwise.
|
| ASCII characters have code points in the range U+0000-U+007F.
| Empty string is ASCII too.
|
| isdecimal(self, /)
| Return True if the string is a decimal string, False otherwise.
|
| A string is a decimal string if all characters in the string are decimal and
| there is at least one character in the string.
|
| isdigit(self, /)
| Return True if the string is a digit string, False otherwise.
|
| A string is a digit string if all characters in the string are digits and there
| is at least one character in the string.
|
| isidentifier(self, /)
| Return True if the string is a valid Python identifier, False otherwise.
|
| Call keyword.iskeyword(s) to test whether string s is a reserved identifier,
| such as "def" or "class".
|
| islower(self, /)
| Return True if the string is a lowercase string, False otherwise.
|
| A string is lowercase if all cased characters in the string are lowercase and
| there is at least one cased character in the string.
|
| isnumeric(self, /)
| Return True if the string is a numeric string, False otherwise.
|
| A string is numeric if all characters in the string are numeric and there is at
| least one character in the string.
|
| isprintable(self, /)
| Return True if the string is printable, False otherwise.
|
| A string is printable if all of its characters are considered printable in
| repr() or if it is empty.
|
| isspace(self, /)
| Return True if the string is a whitespace string, False otherwise.
|
| A string is whitespace if all characters in the string are whitespace and there
| is at least one character in the string.
|
| istitle(self, /)
| Return True if the string is a title-cased string, False otherwise.
|
| In a title-cased string, upper- and title-case characters may only
| follow uncased characters and lowercase characters only cased ones.
|
| isupper(self, /)
| Return True if the string is an uppercase string, False otherwise.
|
| A string is uppercase if all cased characters in the string are uppercase and
| there is at least one cased character in the string.
|
| join(self, iterable, /)
| Concatenate any number of strings.
|
| The string whose method is called is inserted in between each given string.
| The result is returned as a new string.
|
| Example: '.'.join(['ab', 'pq', 'rs']) -> 'ab.pq.rs'
|
| ljust(self, width, fillchar=' ', /)
| Return a left-justified string of length width.
|
| Padding is done using the specified fill character (default is a space).
|
| lower(self, /)
| Return a copy of the string converted to lowercase.
|
| lstrip(self, chars=None, /)
| Return a copy of the string with leading whitespace removed.
|
| If chars is given and not None, remove characters in chars instead.
|
| partition(self, sep, /)
| Partition the string into three parts using the given separator.
|
| This will search for the separator in the string. If the separator is found,
| returns a 3-tuple containing the part before the separator, the separator
| itself, and the part after it.
|
| If the separator is not found, returns a 3-tuple containing the original string
| and two empty strings.
|
| removeprefix(self, prefix, /)
| Return a str with the given prefix string removed if present.
|
| If the string starts with the prefix string, return string[len(prefix):].
| Otherwise, return a copy of the original string.
|
| removesuffix(self, suffix, /)
| Return a str with the given suffix string removed if present.
|
| If the string ends with the suffix string and that suffix is not empty,
| return string[:-len(suffix)]. Otherwise, return a copy of the original
| string.
|
| replace(self, old, new, count=-1, /)
| Return a copy with all occurrences of substring old replaced by new.
|
| count
| Maximum number of occurrences to replace.
| -1 (the default value) means replace all occurrences.
|
| If the optional argument count is given, only the first count occurrences are
| replaced.
|
| rfind(...)
| S.rfind(sub[, start[, end]]) -> int
|
| Return the highest index in S where substring sub is found,
| such that sub is contained within S[start:end]. Optional
| arguments start and end are interpreted as in slice notation.
|
| Return -1 on failure.
|
| rindex(...)
| S.rindex(sub[, start[, end]]) -> int
|
| Return the highest index in S where substring sub is found,
| such that sub is contained within S[start:end]. Optional
| arguments start and end are interpreted as in slice notation.
|
| Raises ValueError when the substring is not found.
|
| rjust(self, width, fillchar=' ', /)
| Return a right-justified string of length width.
|
| Padding is done using the specified fill character (default is a space).
|
| rpartition(self, sep, /)
| Partition the string into three parts using the given separator.
|
| This will search for the separator in the string, starting at the end. If
| the separator is found, returns a 3-tuple containing the part before the
| separator, the separator itself, and the part after it.
|
| If the separator is not found, returns a 3-tuple containing two empty strings
| and the original string.
|
| rsplit(self, /, sep=None, maxsplit=-1)
| Return a list of the substrings in the string, using sep as the separator string.
|
| sep
| The separator used to split the string.
|
| When set to None (the default value), will split on any whitespace
| character (including \\n \\r \\t \\f and spaces) and will discard
| empty strings from the result.
| maxsplit
| Maximum number of splits (starting from the left).
| -1 (the default value) means no limit.
|
| Splitting starts at the end of the string and works to the front.
|
| rstrip(self, chars=None, /)
| Return a copy of the string with trailing whitespace removed.
|
| If chars is given and not None, remove characters in chars instead.
|
| split(self, /, sep=None, maxsplit=-1)
| Return a list of the substrings in the string, using sep as the separator string.
|
| sep
| The separator used to split the string.
|
| When set to None (the default value), will split on any whitespace
| character (including \\n \\r \\t \\f and spaces) and will discard
| empty strings from the result.
| maxsplit
| Maximum number of splits (starting from the left).
| -1 (the default value) means no limit.
|
| Note, str.split() is mainly useful for data that has been intentionally
| delimited. With natural text that includes punctuation, consider using
| the regular expression module.
|
| splitlines(self, /, keepends=False)
| Return a list of the lines in the string, breaking at line boundaries.
|
| Line breaks are not included in the resulting list unless keepends is given and
| true.
|
| startswith(...)
| S.startswith(prefix[, start[, end]]) -> bool
|
| Return True if S starts with the specified prefix, False otherwise.
| With optional start, test S beginning at that position.
| With optional end, stop comparing S at that position.
| prefix can also be a tuple of strings to try.
|
| strip(self, chars=None, /)
| Return a copy of the string with leading and trailing whitespace removed.
|
| If chars is given and not None, remove characters in chars instead.
|
| swapcase(self, /)
| Convert uppercase characters to lowercase and lowercase characters to uppercase.
|
| title(self, /)
| Return a version of the string where each word is titlecased.
|
| More specifically, words start with uppercased characters and all remaining
| cased characters have lower case.
|
| translate(self, table, /)
| Replace each character in the string using the given translation table.
|
| table
| Translation table, which must be a mapping of Unicode ordinals to
| Unicode ordinals, strings, or None.
|
| The table must implement lookup/indexing via __getitem__, for instance a
| dictionary or list. If this operation raises LookupError, the character is
| left untouched. Characters mapped to None are deleted.
|
| upper(self, /)
| Return a copy of the string converted to uppercase.
|
| zfill(self, width, /)
| Pad a numeric string with zeros on the left, to fill a field of the given width.
|
| The string is never truncated.
|
| ----------------------------------------------------------------------
| Static methods defined here:
|
| __new__(*args, **kwargs) from builtins.type
| Create and return a new object. See help(type) for accurate signature.
|
| maketrans(...)
| Return a translation table usable for str.translate().
|
| If there is only one argument, it must be a dictionary mapping Unicode
| ordinals (integers) or characters to Unicode ordinals, strings or None.
| Character keys will be then converted to ordinals.
| If there are two arguments, they must be strings of equal length, and
| in the resulting dictionary, each character in x will be mapped to the
| character at the same position in y. If there is a third argument, it
| must be a string, whose characters will be mapped to None in the result.
# Normally user input text may have unwanted spaces around the input
# Hence it is necessary to trim the input information before it is further processed.
name = input("Enter your name: ")
name_trim = name.strip()
print("Before trimming :", name,'And the length of name = ', len(name))
print("After trimming :", name_trim,'And the length of name_trim = ', len(name_trim))
Enter your name: Bhargavi Before trimming : Bhargavi And the length of name = 11 After trimming : Bhargavi And the length of name_trim = 8
# Example of lstrip()
padded_spaces = ' I have spaces prefixed'
trimmed_string = padded_spaces.lstrip()
print('String with spaces ---', padded_spaces)
print('String after trimming ---', trimmed_string)
String with spaces --- I have spaces prefixed String after trimming --- I have spaces prefixed
# Example of rstrip()
padded_spaces = 'I have spaces suffixed '
trimmed_string = padded_spaces.rstrip()
print('String with spaces ---', padded_spaces)
print('String after trimming ---', trimmed_string)
print('Only smart people can find the difference in the output!')
String with spaces --- I have spaces suffixed String after trimming --- I have spaces suffixed Only smart people can find the difference in the output!
ord() is used to get the code point (e.g., its ASCII byte value) of a one-character stringchr() is used to get the one-character string of an integer code point.ord('A')
65
chr(65)
'A'
# Let's now check how each character in your register number is stored internally
register_number = '21MIA1234'
code = []
reconstructed = ''
for each in register_number:
code.append(ord(each))
print('{} is represented as {}'.format(each, ord(each)))
2 is represented as 50 1 is represented as 49 M is represented as 77 I is represented as 73 A is represented as 65 1 is represented as 49 2 is represented as 50 3 is represented as 51 4 is represented as 52
# Let's now do the reverse conversion and see
for each in code:
print('character representation of {} is {}'.format(each, chr(each)))
reconstructed += chr(each)
print("Reconstructed Register number is ", reconstructed )
character representation of 50 is 2 character representation of 49 is 1 character representation of 77 is M character representation of 73 is I character representation of 65 is A character representation of 49 is 1 character representation of 50 is 2 character representation of 51 is 3 character representation of 52 is 4 Reconstructed Register number is 21MIA1234
# Search for the first index of a substring
text = 'Being a vitian, we are supposed to follow rules of VIT'
look_for = 'VIT'
text = text.casefold()
look_for = look_for.casefold()
print(text)
print(look_for)
print(text.index(look_for))
being a vitian, we are supposed to follow rules of vit vit 8
# Let's now find the index of last occurence
print(text.rfind(look_for))
51
split() turns a string into a list of smaller strings seperated by a delimiter.join() takes a list of strings as input and stitches them together as a single string.sentence = 'Here is my full sentence'
words = sentence.split()
print(words)
reconstructed_sentence = ''.join(words)
print("Reconstructed Sentence is ---", reconstructed_sentence)
['Here', 'is', 'my', 'full', 'sentence'] Reconstructed Sentence is --- Hereismyfullsentence
sentence = 'Here is my space delimited sentence'
sentence = sentence.replace(' ', '#')
print(sentence)
words = sentence.split('#')
print(words)
Here#is#my#space#delimited#sentence ['Here', 'is', 'my', 'space', 'delimited', 'sentence']