Received error for too many value to unpack when perform zip

I try to follow the follow tutorial but instead of using their dataset, I tried my own. The dataset is as follow.

here are link files please ignore the "......"

en = [1]: htt......ps ...... + ://...we.tl/t-cAKBwUjjsR
jp = [2]: htt......ps.......+ ://....we.tl/t-ck7PksqGfS

then these two files I moved it into a folder path "dataset/raw"

When I try to perform my codes:

from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import unicodedata
import re
import numpy as np
import os
import io
import time

english = open("dataset/raw/en", "r").read().splitlines()
japanese = open("dataset/raw/ja", "r").read().splitlines()


def preprocess_adding_startend(data):
    data = ["<SOE>" + d + "<EOS>" for d in data]
    return data

english = preprocess_adding_startend(english)
japanese = preprocess_adding_startend(japanese)
word_pairs = zip(japanese, english)

def max_length(tensor):
    return max(len(t) for t in tensor)

def tokenize(lang):
  lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(
      filters='')
  lang_tokenizer.fit_on_texts(lang)

  tensor = lang_tokenizer.texts_to_sequences(lang)

  tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor,
                                                         padding='post')

  return tensor, lang_tokenizer

def load_dataset(word_pairs, num_examples=None):
    # creating cleaned input, output pairs
    targ_lang, inp_lang = word_pairs

    input_tensor, inp_lang_tokenizer = tokenize(inp_lang)
    target_tensor, targ_lang_tokenizer = tokenize(targ_lang)

    return input_tensor, target_tensor, inp_lang_tokenizer, targ_lang_tokenizer


# Try experimenting with the size of that dataset
num_examples = 30000
input_tensor, target_tensor, inp_lang, targ_lang = load_dataset(word_pairs, num_examples)

# Calculate max_length of the target tensors
max_length_targ, max_length_inp = max_length(target_tensor), max_length(input_tensor)

Then I received errors such as:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-13-275b9a08d2b3> in <module>
      1 # Try experimenting with the size of that dataset
      2 num_examples = 30000
----> 3 input_tensor, target_tensor, inp_lang, targ_lang = load_dataset(word_pairs, num_examples)
      4 
      5 # Calculate max_length of the target tensors

<ipython-input-12-288e6edbeb9c> in load_dataset(word_pairs, num_examples)
     16 def load_dataset(word_pairs, num_examples=None):
     17     # creating cleaned input, output pairs
---> 18     targ_lang, inp_lang = word_pairs
     19 
     20     input_tensor, inp_lang_tokenizer = tokenize(inp_lang)

ValueError: too many values to unpack (expected 2)

Please anyone can assist me? I really wish to learn so if I cant solve this error, I cant proceed.

Upvotes: 0

Views: 112

Answers (1)

razdi
razdi

Reputation: 1440

The error is very self explanatory. You've zipped the word_pairs together so when you're unzipping them, you need to use zip(*word_pair)

def load_dataset(word_pairs, num_examples=None):
    # creating cleaned input, output pairs
    targ_lang, inp_lang = zip(*word_pairs)

    input_tensor, inp_lang_tokenizer = tokenize(inp_lang)
    target_tensor, targ_lang_tokenizer = tokenize(targ_lang)

    return input_tensor, target_tensor, inp_lang_tokenizer, targ_lang_tokenizer

Upvotes: 1

Related Questions