PineappleThursday
PineappleThursday

Reputation: 123

ImportError: cannot import name 'cpu_count' from 'joblib.externals.loky' (unknown location)

I am attempting to add a column that represents the similarity of two segments of text based on where they are located in embedding space, using the openai library to embed text.

import openai
import requests
import PyPDF2
import re
import os
import requests
import pandas as pd
import tiktoken
import time
from io import StringIO

from sklearn.metrics.pairwise import cosine_similarity
import sklearn.external.joblib as extjoblib
from sklearn.manifold import TSNE

import numpy as np
import ast
import csv 
import json

def add_similarity(df, given_embedding):
    """Adds a 'similarity' column to a dataframe based on cosine similarity with a given embedding."""
    def calculate_similarity(embedding):
        # Check if embedding is a string and convert it to a list of floats if necessary
        if isinstance(embedding, str):
            embedding = [float(x) for x in embedding.strip('[]').split(',')]
        return cosine_similarity([embedding], [given_embedding])[0][0]

    df['similarity'] = df['embedding'].apply(calculate_similarity)
    return df

However, the three sklearn imports are all throwing the same ImportError. Here is the traceback:

{
    "name": "ImportError",
    "message": "cannot import name 'cpu_count' from 'joblib.externals.loky' (unknown location)",
    "stack": "---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
Cell In[46], line 15
     10 from io import StringIO
     12 # sys.modules['sklearn.externals.joblib'] = joblib
     13 
     14 # from sklearn.metrics.pairwise import cosine_similarity
---> 15 import sklearn.external.joblib as extjoblib
     16 from sklearn.manifold import TSNE
     18 import numpy as np

File ~/.../myenv3.11/lib/python3.11/site-packages/sklearn/__init__.py:87
     73     # We are not importing the rest of scikit-learn during the build
     74     # process, as it may not be compiled yet
     75 else:
   (...)
     81     # later is linked to the OpenMP runtime to make it possible to introspect
     82     # it and importing it first would fail if the OpenMP dll cannot be found.
     83     from . import (
     84         __check_build,  # noqa: F401
     85         _distributor_init,  # noqa: F401
     86     )
---> 87     from .base import clone
     88     from .utils._show_versions import show_versions
     90     __all__ = [
     91         \"calibration\",
     92         \"cluster\",
   (...)
    133         \"show_versions\",
    134     ]

File ~/.../myenv3.11/lib/python3.11/site-packages/sklearn/base.py:19
     17 from ._config import config_context, get_config
     18 from .exceptions import InconsistentVersionWarning
---> 19 from .utils import _IS_32BIT
     20 from .utils._estimator_html_repr import _HTMLDocumentationLinkMixin, estimator_html_repr
     21 from .utils._metadata_requests import _MetadataRequester, _routing_enabled

File ~/.../myenv3.11/lib/python3.11/site-packages/sklearn/utils/__init__.py:20
     18 from .. import get_config
     19 from ..exceptions import DataConversionWarning
---> 20 from . import _joblib, metadata_routing
     21 from ._bunch import Bunch
     22 from ._estimator_html_repr import estimator_html_repr

File ~/.../myenv3.11/lib/python3.11/site-packages/sklearn/utils/_joblib.py:7
      4     _warnings.simplefilter(\"ignore\")
      5     # joblib imports may raise DeprecationWarning on certain Python
      6     # versions
----> 7     import joblib
      8     from joblib import (
      9         Memory,
     10         Parallel,
   (...)
     20         register_parallel_backend,
     21     )
     24 __all__ = [
     25     \"parallel_backend\",
     26     \"register_parallel_backend\",
   (...)
     37     \"__version__\",
     38 ]

File ~/.../myenv3.11/lib/python3.11/site-packages/joblib/__init__.py:129
    125 from .numpy_pickle import load
    127 from .compressor import register_compressor
--> 129 from .parallel import Parallel
    130 from .parallel import delayed
    131 from .parallel import cpu_count

File ~/.../myenv3.11/lib/python3.11/site-packages/joblib/parallel.py:31
     29 from .logger import Logger, short_format_time
     30 from .disk import memstr_to_bytes
---> 31 from ._parallel_backends import (FallbackToBackend, MultiprocessingBackend,
     32                                  ThreadingBackend, SequentialBackend,
     33                                  LokyBackend)
     34 from ._utils import eval_expr, _Sentinel
     36 # Make sure that those two classes are part of the public joblib.parallel API
     37 # so that 3rd party backend implementers can import them from here.

File ~/.../myenv3.11/lib/python3.11/site-packages/joblib/_parallel_backends.py:25
     22     from .executor import get_memmapping_executor
     24     # Import loky only if multiprocessing is present
---> 25     from .externals.loky import process_executor, cpu_count
     26     from .externals.loky.process_executor import ShutdownExecutorError
     29 class ParallelBackendBase(metaclass=ABCMeta):

ImportError: cannot import name 'cpu_count' from 'joblib.externals.loky' (unknown location)"
}

Even when I try simply import sklearn I get the same error.

The error occurs whether I use scikit-learn version 1.4.0 or the latest version (1.5.1). I believe I am on the latest version of joblib, but I can't tell because when I type pip show joblib I the output is

Name: joblib
Version: None
Summary: 
Home-page: 
Author: 
Author-email: 
License: 
Location: /.../myenv3.11/lib/python3.11/site-packages
Requires: 
Required-by: scikit-learn

I am also unable to uninstall joblib. When I try pip install --force-reinstall joblib the output is

Collecting joblib
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Using cached joblib-1.4.2-py3-none-any.whl (301 kB)
Installing collected packages: joblib
  Attempting uninstall: joblib
    Found existing installation: joblib None
error: uninstall-no-record-file

× Cannot uninstall joblib None
╰─> The package's contents are unknown: no RECORD file was found for joblib.

Which may be related to the cause of the error.

The code was working fine earlier, but it seems to have broken only after I restarted my kernel recently after setting up my project as a Github repository.

How to solve this issue?

Upvotes: 0

Views: 46

Answers (0)

Related Questions