Reputation: 1
I am currently facing this problem since this morning. I've tried several things, but the result remains the same, I always get the same error:
time data '2024-05-03T17:13:39.000Z' does not match format '%Y-%m-%dT%H:%M:%S.%f%z'
---------------------------------------------------------------------------ValueError Traceback (most recent call last)<ipython-input-11-988e88e9cba4> in <module>
48 at=dt2.strftime("%m/%d/%Y, %H:%M:%S %Z")
49
---> 50 paginator = client.search_recent_tweets(query=query, tweet_fields=["lang", "context_annotations", "public_metrics", "created_at", "id", "text", "author_id", "entities", "geo"], max_results=maxtweets_persearch)
51
52 for page in paginator.flatten():
~/cluster-env/env/lib/python3.6/site-packages/tweepy/client.py in search_recent_tweets(self, query, user_auth, **params)
655 "since_id", "start_time", "tweet.fields", "until_id",
656 "user.fields"
--> 657 ), data_type=Tweet, user_auth=user_auth
658 )
659
~/cluster-env/env/lib/python3.6/site-packages/tweepy/client.py in _make_request(self, method, route, params, endpoint_parameters, json, data_type, user_auth)
174 if data_type is not None:
175 if isinstance(data, list):
--> 176 data = [data_type(result) for result in data]
177 elif data is not None:
178 data = data_type(data)
~/cluster-env/env/lib/python3.6/site-packages/tweepy/client.py in <listcomp>(.0)
174 if data_type is not None:
175 if isinstance(data, list):
--> 176 data = [data_type(result) for result in data]
177 elif data is not None:
178 data = data_type(data)
~/cluster-env/env/lib/python3.6/site-packages/tweepy/tweet.py in __init__(self, data)
39 if self.created_at is not None:
40 self.created_at = datetime.datetime.strptime(
---> 41 self.created_at, "%Y-%m-%dT%H:%M:%S.%f%z"
42 )
43
~/cluster-env/env/lib/python3.6/_strptime.py in _strptime_datetime(cls, data_string, format)
563 """Return a class cls instance based on the input string and the
564 format string."""
--> 565 tt, fraction = _strptime(data_string, format)
566 tzname, gmtoff = tt[-2:]
567 args = tt[:6] + (fraction,)
~/cluster-env/env/lib/python3.6/_strptime.py in _strptime(data_string, format)
360 if not found:
361 raise ValueError("time data %r does not match format %r" %
--> 362 (data_string, format))
363 if len(data_string) != found.end():
364 raise ValueError("unconverted data remains: %s" %
ValueError: time data '2024-05-03T17:13:39.000Z' does not match format '%Y-%m-%dT%H:%M:%S.%f%z' '
The code generating this error is here:
def convert_date(date_str):
try:
# Prioriser les formats avec millisecondes
formats = [
"%Y-%m-%dT%H:%M:%S.%fZ", # Avec millisecondes et fuseau horaire
"%Y-%m-%dT%H:%M:%S.%f", # Avec millisecondes sans fuseau horaire
"%Y-%m-%dT%H:%M:%S%z" # Format d'origine (peut gérer les fuseaux horaires)
]
for fmt in formats:
try:
return dateutil.parser.parse(date_str, fuzzy=True)
except (ValueError, TypeError):
pass # Passer au format suivant en cas d'erreur
# Si aucun format ne correspond
print(f"Format de date inattendu : {date_str}")
return None
except (ValueError, TypeError) as e:
print(f"Erreur de conversion de date : {date_str}, erreur : {e}")
return None
end_date = datetime.utcnow() - timedelta(days=max_days)
all_tweets = []
all_users = []
all_hashtags = []
all_urls = []
all_media = []
all_handles = []
count = 0
query_search = True
dt2 = datetime.now()
ts= int(time.mktime(dt2.timetuple()))
at=dt2.strftime("%m/%d/%Y, %H:%M:%S %Z")
paginator = client.search_recent_tweets(query=query, tweet_fields=["lang", "context_annotations", "public_metrics", "created_at", "id", "text", "author_id", "entities", "geo"], max_results=maxtweets_persearch)
for page in paginator.flatten():
for status in page.data:
tweet = {}
user = {}
tweet_obj = status
if (last_created_at != None and last_created_at != '' and parse(tweet_obj["created_at"]).timestamp() <= last_created_at.timestamp()):
continue
user_obj = tweet_obj["author"]
user["topickey"] = topic
user["id"] = user_obj["id"]
user["document_type"] = "user"
user['inserted_at'] = at
user['inserted_ts'] = ts
user['name'] = user_obj["name"]
user['screen_name'] = user_obj["username"]
user['description'] = user_obj["description"]
user['protected'] = user_obj["protected"]
user['followers_count'] = user_obj["public_metrics"]["followers_count"]
user['friends_count'] = user_obj["public_metrics"]["following_count"]
user['listed_count'] = user_obj["public_metrics"]["listed_count"]
user['profile_image_url'] = user_obj["profile_image_url"]
user['verified'] = user_obj["verified"]
user['created_at'] = convert_date(user_obj["created_at"])
user["month_year"] = str(str(user['created_at'].month) + "_"+str(user['created_at'].year))
user["country_azuremaps"] = ''
user["country_code_azuremaps"] = ''
user_location = tweet_obj["user"]["location"]
if user_location != "" and user_location not in regions:
r_json = get_maps_response(user_location)
if r_json: # i.e. got a response
if r_json["summary"]["numResults"] > 0:
# there is a location detected, so get the country
if "address" in r_json['results'][0].keys():
top_match = r_json['results'][0]["address"]
if "country" in top_match.keys() and "countryCode" in top_match.keys() :
country = top_match["country"]
country_code = top_match["countryCode"]
user["country_azuremaps"] = country
user["country_code_azuremaps"] = country_code
id_str = tweet_obj["id_str"]
if tweet_obj['entities'] is not None:
for key, value in tweet_obj['entities'].items():
if key == 'hashtags':
for h in value:
hashtag = {}
hashtag['id'] = id_str
hashtag['text'] = h['text']
hashtag['created_datetime'] = datetime.now()
all_hashtags.append(hashtag)
for um in tweet_obj['entities']['user_mentions']:
user_mention = {}
user_mention['id'] = id_str
user_mention['screen_name'] = um['screen_name']
user_mention['created_datetime'] = datetime.now()
all_handles.append(um)
for u in tweet_obj['entities']['urls']:
urls = {}
urls['id'] = id_str
urls['url'] = u['url']
urls['expanded_url'] = u['expanded_url']
urls['display_url'] = u['display_url']
urls['created_datetime'] = datetime.now()
all_urls.append(urls)
if 'media' in tweet_obj['entities']:
for m in tweet_obj['entities']['media']:
media = {}
media['id'] = id_str
media['media_url'] = m['media_url']
media['created_datetime'] = datetime.now()
all_media.append(media)
tweet["userid"]=user["id"]
dt2 = datetime.now()
ts = int(time.mktime(dt2.timetuple()))
at = dt2.strftime("%m/%d/%Y, %H:%M:%S %Z")
tweet['inserted_at'] = at
tweet['inserted_ts'] = ts
tweet["originalid"] = tweet_obj["id"]
tweet["id"] = str(int(tweet_obj["id_str"])+abs(hash(topic))) # artifically creating our own ID
tweet["topickey"] = topic
tweet["subtopic"] = subtopic
tweet["created_at"] = convert_date(tweet_obj["created_at"])
tweet["created_date"] = tweet["created_at"].date()
tweet["month_year"] = str(str(tweet["created_at"].month) + "_"+str(tweet["created_at"].year))
tmp_text = tweet_obj["full_text"].replace('\n','. ').replace('\r','.').replace('..','. ').replace(',.','. ').replace(';.','. ').replace('?.','. ').replace('!.','. ').replace(':.','. ').lstrip('.').lstrip(' ')
tmp_text = remove_emojis(tmp_text)
tweet["text"]= tmp_text
tweet["document_type"] = "tweet"
tweet["search_type"]='Topic Search'
tweet["query"] = str(query)
tweet["is_quote_status"] = tweet_obj["is_quote_status"]
tweet["retweet_count"] = tweet_obj["retweet_count"]
tweet["favorite_count"] = tweet_obj["favorite_count"]
tweet["favorited"] = tweet_obj["favorited"]
tweet["retweeted"] = tweet_obj["retweeted"]
tweet["lang"] = tweet_obj["lang"]
tweet["source"] = tweet_obj["source"]
city = 'NA'
country = 'NA'
if tweet_obj['place'] is None:
city = 'NA'
country = 'NA'
else:
city = tweet_obj["place"]['name']
country = tweet_obj["place"]['country']
tweet['city'] = city
tweet['country'] = country
all_tweets.append(tweet)
all_users.append(user)
count +=1
if count > num_tweets:
break
I've tried several date conversion methods, but it always gives the same result as if the code isn't taking my updates into account. Can anyone help me solve this problem, please?
Upvotes: 0
Views: 32